From 8b132385c63266b5a024a997ee10a03518ba2db5 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 13 Dec 2022 07:58:00 +0100 Subject: [PATCH 01/47] Adapt to new as.s2dv_cube() --- modules/Loading/Loading.R | 6 ++++-- modules/Loading/Loading_decadal.R | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 8d54d63d..93eadea7 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -4,7 +4,9 @@ source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") source("modules/Loading/dates2load.R") source("modules/Loading/check_latlon.R") source("tools/libs.R") - +#TODO: remove these two lines when new as.s2dv_cube() is in CSTools +source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/as.s2dv_cube.R') +source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/zzz.R') load_datasets <- function(recipe) { @@ -203,7 +205,7 @@ load_datasets <- function(recipe) { # Obtain dates and date dimensions from the loaded hcst data to make sure # the corresponding observations are loaded correctly. - dates <- hcst$Dates$start + dates <- hcst$attrs$Dates dim(dates) <- dim(Subset(hcst$data, along=c('dat', 'var', 'latitude', 'longitude', 'ensemble'), diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index 8046344b..394d29ff 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -11,8 +11,9 @@ source("modules/Loading/helper_loading_decadal.R") source("modules/Loading/dates2load.R") source("modules/Loading/check_latlon.R") source("tools/libs.R") -## TODO: Remove once the fun is included in CSTools -source("tools/tmp/as.s2dv_cube.R") +#TODO: remove these two lines when new as.s2dv_cube() is in CSTools +source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/as.s2dv_cube.R') +source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/zzz.R') #==================================================================== @@ -275,7 +276,7 @@ load_datasets <- function(recipe) { # Get from startR_cube # dates <- attr(hcst, 'Variables')$common$time # Get from s2dv_cube - dates <- hcst$Dates$start + dates <- hcst$attrs$Dates dates_file <- sapply(dates, format, '%Y%m') dim(dates_file) <- dim(dates) @@ -358,7 +359,6 @@ load_datasets <- function(recipe) { obs <- as.s2dv_cube(obs) ) - #------------------------------------------- # Step 4. Verify the consistance between data #------------------------------------------- -- GitLab From cc0be2e9f75bd786a97b3aff0cdb0f48c5f5c3d7 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 13 Dec 2022 08:14:53 +0100 Subject: [PATCH 02/47] Correct time attributes --- tests/testthat/test-decadal_daily_1.R | 12 ++++++------ tests/testthat/test-decadal_monthly_1.R | 10 +++++----- tests/testthat/test-decadal_monthly_2.R | 10 +++++----- tests/testthat/test-decadal_monthly_3.R | 10 +++++----- tests/testthat/test-seasonal_daily.R | 10 +++++----- tests/testthat/test-seasonal_monthly.R | 10 +++++----- 6 files changed, 31 insertions(+), 31 deletions(-) diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R index c9833d2b..264371f7 100644 --- a/tests/testthat/test-decadal_daily_1.R +++ b/tests/testthat/test-decadal_daily_1.R @@ -72,7 +72,7 @@ dim(data$fcst$data), c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 2, time = 90, latitude = 7, longitude = 11, ensemble = 3) ) expect_equal( -dim(data$hcst$Dates$start), +dim(data$hcst$attrs$Dates), c(sday = 1, sweek = 1, syear = 3, time = 90) ) # hcst data @@ -111,23 +111,23 @@ tolerance = 0.0001 # time value expect_equal( -(data$hcst$Dates$start)[1], +(data$hcst$attrs$Dates)[1], as.POSIXct("1991-01-01 12:00:00", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[2], +(data$hcst$attrs$Dates)[2], as.POSIXct("1992-01-01 12:00:00", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[5], +(data$hcst$attrs$Dates)[5], as.POSIXct("1992-01-02 12:00:00", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[1, 1, 3, 90], +(data$hcst$attrs$Dates)[1, 1, 3, 90], as.POSIXct("1993-03-31 12:00:00", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[1, 1, 2, 90], +(data$hcst$attrs$Dates)[1, 1, 2, 90], as.POSIXct("1992-03-30 12:00:00", tz = 'UTC') ) diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 5cf1922e..249bcc6e 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -91,7 +91,7 @@ dim(data$fcst$data), c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4, ensemble = 2) ) expect_equal( -dim(data$hcst$Dates$start), +dim(data$hcst$attr$Dates), c(sday = 1, sweek = 1, syear = 4, time = 3) ) expect_equal( @@ -110,19 +110,19 @@ c(281.7395, 294.2467), tolerance = 0.0001 ) expect_equal( -(data$hcst$Dates$start)[1], +(data$hcst$attrs$Dates)[1], as.POSIXct("1991-11-16", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[2], +(data$hcst$attrs$Dates)[2], as.POSIXct("1992-11-16", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[5], +(data$hcst$attrs$Dates)[5], as.POSIXct("1991-12-16 12:00:00", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[10], +(data$hcst$attrs$Dates)[10], as.POSIXct("1993-01-16 12:00:00", tz = 'UTC') ) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 4dd72ebf..7e9f74b4 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -73,7 +73,7 @@ dim(data$fcst$data), c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 2, time = 14, latitude = 8, longitude = 5, ensemble = 3) ) expect_equal( -dim(data$hcst$Dates$start), +dim(data$hcst$attrs$Dates), c(sday = 1, sweek = 1, syear = 3, time = 14) ) #expect_equal( @@ -109,19 +109,19 @@ tolerance = 0.0001 ) expect_equal( -(data$hcst$Dates$start)[1], +(data$hcst$attrs$Dates)[1], as.POSIXct("1990-11-16", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[2], +(data$hcst$attrs$Dates)[2], as.POSIXct("1991-11-16", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[5], +(data$hcst$attrs$Dates)[5], as.POSIXct("1991-12-16 12:00:00", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[10], +(data$hcst$attrs$Dates)[10], as.POSIXct("1991-02-15", tz = 'UTC') ) diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 7535e8dc..042f2055 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -66,7 +66,7 @@ dim(data$hcst$data), c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 25, longitude = 16, ensemble = 3) ) expect_equal( -dim(data$hcst$Dates$start), +dim(data$hcst$attrs$Dates), c(sday = 1, sweek = 1, syear = 4, time = 3) ) # hcst data @@ -87,19 +87,19 @@ tolerance = 0.0001 ) expect_equal( -(data$hcst$Dates$start)[1], +(data$hcst$attrs$Dates)[1], as.POSIXct("2016-04-16", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[2], +(data$hcst$attrs$Dates)[2], as.POSIXct("2017-04-16", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[5], +(data$hcst$attrs$Dates)[5], as.POSIXct("2016-05-16 12:00:00", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[12], +(data$hcst$attrs$Dates)[12], as.POSIXct("2019-06-16", tz = 'UTC') ) diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index 5b771d77..89686ad3 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -61,7 +61,7 @@ dim(data$obs$data), c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 1) ) expect_equal( -dim(data$obs$Dates$start), +dim(data$obs$attrs$Dates), c(sday = 1, sweek = 1, syear = 4, time = 31) ) expect_equal( @@ -80,19 +80,19 @@ c(280.1490, 298.2324), tolerance = 0.0001 ) expect_equal( -(data$hcst$Dates$start)[1], +(data$hcst$attrs$Dates)[1], as.POSIXct("1993-12-01 18:00:00 UTC", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[2], +(data$hcst$attrs$Dates)[2], as.POSIXct("1994-12-01 18:00:00 UTC", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[5], +(data$hcst$attrs$Dates)[5], as.POSIXct("1993-12-02 18:00:00 UTC", tz = 'UTC') ) expect_equal( -(data$obs$Dates$start)[10], +(data$obs$attrs$Dates)[10], as.POSIXct("1994-12-03 11:30:00 UTC", tz = 'UTC') ) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 86feedfb..2052bfab 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -88,7 +88,7 @@ dim(data$fcst$data), c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 3, longitude = 3, ensemble = 51) ) expect_equal( -dim(data$hcst$Dates$start), +dim(data$hcst$attrs$Dates), c(sday = 1, sweek = 1, syear = 4, time = 3) ) expect_equal( @@ -107,19 +107,19 @@ c(284.7413, 299.6219), tolerance = 0.0001 ) expect_equal( -(data$hcst$Dates$start)[1], +(data$hcst$attrs$Dates)[1], as.POSIXct("1993-12-01", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[2], +(data$hcst$attrs$Dates)[2], as.POSIXct("1994-12-01", tz = 'UTC') ) expect_equal( -(data$hcst$Dates$start)[5], +(data$hcst$attrs$Dates)[5], as.POSIXct("1994-01-01", tz = 'UTC') ) expect_equal( -(data$obs$Dates$start)[10], +(data$obs$attrs$Dates)[10], as.POSIXct("1995-02-14", tz = 'UTC') ) -- GitLab From 50a70879ce2171fd5c97f035f641e6c4d623e423 Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 13 Dec 2022 08:20:45 +0100 Subject: [PATCH 03/47] Correct s2dv_cube list name --- tests/testthat/test-decadal_daily_1.R | 2 +- tests/testthat/test-decadal_monthly_1.R | 2 +- tests/testthat/test-decadal_monthly_2.R | 2 +- tests/testthat/test-decadal_monthly_3.R | 2 +- tests/testthat/test-seasonal_daily.R | 2 +- tests/testthat/test-seasonal_monthly.R | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R index 264371f7..400b864d 100644 --- a/tests/testthat/test-decadal_daily_1.R +++ b/tests/testthat/test-decadal_daily_1.R @@ -53,7 +53,7 @@ class(data$obs), ) expect_equal( names(data$hcst), -c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +c("data", "dims", "coords", "attrs") ) expect_equal( names(data$hcst), diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 249bcc6e..69b51b06 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -72,7 +72,7 @@ class(data$obs), ) expect_equal( names(data$hcst), -c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +c("data", "dims", "coords", "attrs") ) expect_equal( names(data$hcst), diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 7e9f74b4..c0860fd4 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -54,7 +54,7 @@ class(data$obs), ) expect_equal( names(data$hcst), -c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +c("data", "dims", "coords", "attrs") ) expect_equal( names(data$hcst), diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 042f2055..c1e9a8af 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -55,7 +55,7 @@ class(data$obs), ) expect_equal( names(data$hcst), -c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +c("data", "dims", "coords", "attrs") ) expect_equal( names(data$hcst), diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index 89686ad3..cf403a47 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -46,7 +46,7 @@ class(data$obs), ) expect_equal( names(data$hcst), -c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +c("data", "dims", "coords", "attrs") ) expect_equal( names(data$hcst), diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 2052bfab..e8b661b6 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -69,7 +69,7 @@ class(data$obs), ) expect_equal( names(data$hcst), -c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters") +c("data", "dims", "coords", "attrs") ) expect_equal( names(data$hcst), -- GitLab From 908dcf20f92d79372fa3ceaee5a849c758ba8f0a Mon Sep 17 00:00:00 2001 From: aho Date: Tue, 13 Dec 2022 08:33:52 +0100 Subject: [PATCH 04/47] Correct time attributes --- modules/Loading/Loading_decadal.R | 4 ++-- modules/Saving/Saving.R | 20 ++++++++++---------- modules/Visualization/Visualization.R | 6 +++--- tools/data_summary.R | 6 +++--- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index 394d29ff..d8715ba8 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -378,8 +378,8 @@ load_datasets <- function(recipe) { } # time attribute - if (!identical(format(hcst$Dates$start, '%Y%m'), - format(obs$Dates$start, '%Y%m'))) { + if (!identical(format(hcst$attrs$Dates, '%Y%m'), + format(obs$attrs$Dates, '%Y%m'))) { error(recipe$Run$logger, "hcst and obs don't share the same time.") stop() diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 961bacee..e49e9781 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -183,7 +183,7 @@ save_forecast <- function(data_cube, # } # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { # Method 1: Use the first date as init_date. But it may be better to use the real initialized date (ask users) @@ -216,7 +216,7 @@ save_forecast <- function(data_cube, syears <- seq(1:dim(data_cube$data)['syear'][[1]]) # expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) for (i in syears) { # Select year from array and rearrange dimensions fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) @@ -318,7 +318,7 @@ save_observations <- function(data_cube, # Generate vector containing leadtimes ## TODO: Move to a separate function? - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { # Method 1: Use the first date as init_date. But it may be better to use the real initialized date (ask users) @@ -339,7 +339,7 @@ save_observations <- function(data_cube, syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) for (i in syears) { # Select year from array and rearrange dimensions fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) @@ -390,7 +390,7 @@ save_observations <- function(data_cube, fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i] fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') } else { - fcst.sdate <- as.Date(data_cube$Dates$start[i]) + fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) } } @@ -485,7 +485,7 @@ save_metrics <- function(skill, calendar <- archive$System[[global_attributes$system]]$calendar # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { @@ -592,7 +592,7 @@ save_corr <- function(skill, calendar <- archive$System[[global_attributes$system]]$calendar # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month @@ -691,7 +691,7 @@ save_percentiles <- function(percentiles, store.freq <- recipe$Analysis$Variables$freq calendar <- archive$System[[global_attributes$system]]$calendar # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month @@ -775,7 +775,7 @@ save_probabilities <- function(probs, # Generate vector containing leadtimes ## TODO: Move to a separate function? - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$Dates$start, 'syear', 1), + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) if (fcst.horizon == 'decadal') { init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month @@ -793,7 +793,7 @@ save_probabilities <- function(probs, syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) for (i in syears) { # Select year from array and rearrange dimensions probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 2d50a1a0..ff9b4981 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -168,7 +168,7 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, outfile <- paste0(outdir, name, ".png") toptitle <- paste(display_name, "-", data_cube$Variable$varName, "-", system_name, "-", init_month, hcst_period) - months <- unique(lubridate::month(data_cube$Dates$start, + months <- unique(lubridate::month(data_cube$attrs$Dates, label = T, abb = F)) titles <- as.vector(months) # Plot @@ -247,7 +247,7 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { } toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, "- Initialization:", i_syear) - months <- lubridate::month(fcst$Dates$start[1, 1, which(start_date == i_syear), ], + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], label = T, abb = F) titles <- as.vector(months) # Plots @@ -326,7 +326,7 @@ plot_most_likely_terciles <- function(recipe, archive, } toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", "Initialization:", i_syear) - months <- lubridate::month(fcst$Dates$start[1, 1, which(start_date == i_syear), ], + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], label = T, abb = F) ## TODO: Ensure this works for daily and sub-daily cases titles <- as.vector(months) diff --git a/tools/data_summary.R b/tools/data_summary.R index 34b6bd6e..8fffd3b4 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -12,10 +12,10 @@ data_summary <- function(data_cube, recipe) { } else if (recipe$Analysis$Variables$freq == "daily_mean") { date_format <- '%b %d %Y' } - months <- unique(format(as.Date(data_cube$Dates[[1]]), format = '%B')) + months <- unique(format(as.Date(data_cube$attrs$Dates[[1]]), format = '%B')) months <- paste(as.character(months), collapse=", ") - sdate_min <- format(min(as.Date(data_cube$Dates[[1]])), format = date_format) - sdate_max <- format(max(as.Date(data_cube$Dates[[1]])), format = date_format) + sdate_min <- format(min(as.Date(data_cube$attrs$Dates[[1]])), format = date_format) + sdate_max <- format(max(as.Date(data_cube$attrs$Dates[[1]])), format = date_format) # Create log instance and sink output to logfile and terminal info(recipe$Run$logger, "DATA SUMMARY:") -- GitLab From 4bc28d1a46349d496fd3b53dd4f5cd1b924cdf67 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 14 Dec 2022 06:32:22 +0100 Subject: [PATCH 05/47] Correct varName attributes --- modules/Saving/Saving.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index e49e9781..c7a26a57 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -170,7 +170,7 @@ save_forecast <- function(data_cube, lalo <- c('longitude', 'latitude') - variable <- data_cube$Variable$varName + variable <- data_cube$attrs$Variable$varName var.longname <- attr(data_cube$Variable, 'variable')$long_name global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) @@ -273,7 +273,7 @@ save_forecast <- function(data_cube, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, fcst.sdate, agg, "exp") # Get grid data and metadata and export to netCDF @@ -309,7 +309,7 @@ save_observations <- function(data_cube, lalo <- c('longitude', 'latitude') - variable <- data_cube$Variable$varName + variable <- data_cube$attrs$Variable$varName var.longname <- attr(data_cube$Variable, 'variable')$long_name global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) @@ -408,7 +408,7 @@ save_observations <- function(data_cube, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, fcst.sdate, agg, "obs") # Get grid data and metadata and export to netCDF @@ -525,7 +525,7 @@ save_metrics <- function(skill, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, fcst.sdate, agg, "skill") # Get grid data and metadata and export to netCDF @@ -631,7 +631,7 @@ save_corr <- function(skill, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, fcst.sdate, agg, "corr") # Get grid data and metadata and export to netCDF @@ -730,7 +730,7 @@ save_percentiles <- function(percentiles, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, fcst.sdate, agg, "percentiles") # Get grid data and metadata and export to netCDF @@ -766,7 +766,7 @@ save_probabilities <- function(probs, lalo <- c('longitude', 'latitude') - variable <- data_cube$Variable$varName + variable <- data_cube$attrs$Variable$varName var.longname <- attr(data_cube$Variable, 'variable')$long_name global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) @@ -837,7 +837,7 @@ save_probabilities <- function(probs, time <- times$time # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$Variable$varName, + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, fcst.sdate, agg, "probs") # Get grid data and metadata and export to netCDF -- GitLab From a483ea10d1c55eb3ee3c9de325ab4aca84aae315 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 14 Dec 2022 06:44:40 +0100 Subject: [PATCH 06/47] Change lat/lon attributes level --- modules/Saving/Saving.R | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index c7a26a57..77c6ad03 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -281,8 +281,8 @@ save_forecast <- function(data_cube, country <- get_countries(grid) ArrayToNc(append(country, time, fcst), outfile) } else { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] + latitude <- data_cube$coords$lat[1:length(data_cube$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) @@ -416,8 +416,8 @@ save_observations <- function(data_cube, country <- get_countries(grid) ArrayToNc(append(country, time, fcst), outfile) } else { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] + latitude <- data_cube$coords$lat[1:length(data_cube$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) @@ -533,8 +533,8 @@ save_metrics <- function(skill, country <- get_countries(grid) ArrayToNc(append(country, time, skill), outfile) } else { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] + latitude <- data_cube$coords$lat[1:length(data_cube$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) @@ -639,8 +639,8 @@ save_corr <- function(skill, country <- get_countries(grid) ArrayToNc(append(country, time, skill), outfile) } else { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] + latitude <- data_cube$coords$lat[1:length(data_cube$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) @@ -738,8 +738,8 @@ save_percentiles <- function(percentiles, country <- get_countries(grid) ArrayToNc(append(country, time, percentiles), outfile) } else { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] + latitude <- data_cube$coords$lat[1:length(data_cube$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) @@ -845,8 +845,8 @@ save_probabilities <- function(probs, country <- get_countries(grid) ArrayToNc(append(country, time, probs_syear), outfile) } else { - latitude <- data_cube$lat[1:length(data_cube$lat)] - longitude <- data_cube$lon[1:length(data_cube$lon)] + latitude <- data_cube$coords$lat[1:length(data_cube$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) -- GitLab From 95fa10cc86e47db4dcaf6d8fd17807cb7699a295 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 14 Dec 2022 07:17:57 +0100 Subject: [PATCH 07/47] Crrect attributes levels --- modules/Saving/Saving.R | 42 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 77c6ad03..5246b320 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -171,7 +171,7 @@ save_forecast <- function(data_cube, lalo <- c('longitude', 'latitude') variable <- data_cube$attrs$Variable$varName - var.longname <- attr(data_cube$Variable, 'variable')$long_name + var.longname <- data_cube$attrs$Variable$variables[[variable]]$long_name global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq @@ -241,7 +241,7 @@ save_forecast <- function(data_cube, dims <- c(lalo, 'ensemble', 'time') var.expname <- variable var.sdname <- var.sdname - var.units <- attr(data_cube$Variable, 'variable')$units + var.units <- data_cube$attrs$Variable$variables[[variable]]$units } metadata <- list(fcst = list(name = var.expname, @@ -265,7 +265,7 @@ save_forecast <- function(data_cube, fcst.sdate <- format(fcst.sdate, '%Y%m%d') } else { - fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i] + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] } # Get time dimension values and metadata @@ -281,8 +281,8 @@ save_forecast <- function(data_cube, country <- get_countries(grid) ArrayToNc(append(country, time, fcst), outfile) } else { - latitude <- data_cube$coords$lat[1:length(data_cube$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$lon)] + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) @@ -310,7 +310,7 @@ save_observations <- function(data_cube, lalo <- c('longitude', 'latitude') variable <- data_cube$attrs$Variable$varName - var.longname <- attr(data_cube$Variable, 'variable')$long_name + var.longname <- data_cube$attrs$Variable$variables[[variable]]$long_name global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq @@ -359,11 +359,11 @@ save_observations <- function(data_cube, dims <- c('Country', 'time') var.expname <- paste0(variable, '_country') var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- attr(data_cube$Variable, 'variable')$units + var.units <- data_cube$attrs$Variable$variables[[variable]]$units } else { dims <- c(lalo, 'time') var.expname <- variable - var.units <- attr(data_cube$Variable, 'variable')$units + var.units <- data_cube$attrs$Variable$variables[[variable]]$units } metadata <- list(fcst = list(name = var.expname, @@ -387,7 +387,7 @@ save_observations <- function(data_cube, } else { if (store.freq == "monthly_mean") { - fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i] + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') } else { fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) @@ -416,8 +416,8 @@ save_observations <- function(data_cube, country <- get_countries(grid) ArrayToNc(append(country, time, fcst), outfile) } else { - latitude <- data_cube$coords$lat[1:length(data_cube$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$lon)] + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) @@ -533,8 +533,8 @@ save_metrics <- function(skill, country <- get_countries(grid) ArrayToNc(append(country, time, skill), outfile) } else { - latitude <- data_cube$coords$lat[1:length(data_cube$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$lon)] + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) @@ -639,8 +639,8 @@ save_corr <- function(skill, country <- get_countries(grid) ArrayToNc(append(country, time, skill), outfile) } else { - latitude <- data_cube$coords$lat[1:length(data_cube$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$lon)] + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) @@ -738,8 +738,8 @@ save_percentiles <- function(percentiles, country <- get_countries(grid) ArrayToNc(append(country, time, percentiles), outfile) } else { - latitude <- data_cube$coords$lat[1:length(data_cube$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$lon)] + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) @@ -767,7 +767,7 @@ save_probabilities <- function(probs, lalo <- c('longitude', 'latitude') variable <- data_cube$attrs$Variable$varName - var.longname <- attr(data_cube$Variable, 'variable')$long_name + var.longname <- data_cube$attrs$Variable$variables[[variable]]$long_name global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq @@ -829,7 +829,7 @@ save_probabilities <- function(probs, fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) fcst.sdate <- format(fcst.sdate, '%Y%m%d') } else { - fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i] + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] } # Get time dimension values and metadata @@ -845,8 +845,8 @@ save_probabilities <- function(probs, country <- get_countries(grid) ArrayToNc(append(country, time, probs_syear), outfile) } else { - latitude <- data_cube$coords$lat[1:length(data_cube$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$lon)] + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) -- GitLab From 2a5ca81b6a70ec6dc7283631980f5d783a2f4e44 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 14 Dec 2022 07:24:28 +0100 Subject: [PATCH 08/47] Correct lat/lon attribures level --- modules/Visualization/Visualization.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index ff9b4981..f9498739 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -81,8 +81,8 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, stop("The element 'skill_metrics' must be a list of named arrays.") } - latitude <- data_cube$lat - longitude <- data_cube$lon + latitude <- data_cube$coords$lat + longitude <- data_cube$coords$lon system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", recipe$Analysis$Time$hcst_end) @@ -166,7 +166,7 @@ plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, } # Define output file name and titles outfile <- paste0(outdir, name, ".png") - toptitle <- paste(display_name, "-", data_cube$Variable$varName, + toptitle <- paste(display_name, "-", data_cube$attrs$Variable$varName, "-", system_name, "-", init_month, hcst_period) months <- unique(lubridate::month(data_cube$attrs$Dates, label = T, abb = F)) -- GitLab From 32c44e8846cdb6bb56ca5095bff97a8aedd8b8d1 Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 14 Dec 2022 07:52:03 +0100 Subject: [PATCH 09/47] Correct attributes level --- modules/Visualization/Visualization.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index f9498739..b16fb4a7 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -201,8 +201,8 @@ plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { stop("Visualization functions not yet implemented for daily data.") } - latitude <- fcst$lat - longitude <- fcst$lon + latitude <- fcst$coords$lat + longitude <- fcst$coords$lon system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name variable <- recipe$Analysis$Variables$name units <- attr(fcst$Variable, "variable")$units @@ -278,8 +278,8 @@ plot_most_likely_terciles <- function(recipe, archive, stop("Visualization functions not yet implemented for daily data.") } - latitude <- fcst$lat - longitude <- fcst$lon + latitude <- fcst$coords$lat + longitude <- fcst$coords$lon system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name variable <- recipe$Analysis$Variables$name start_date <- paste0(recipe$Analysis$Time$fcst_year, -- GitLab From b6799116a95af6bac2afcc798af2a0493cf1553c Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 19 Dec 2022 16:08:45 +0100 Subject: [PATCH 10/47] Preliminary version of multivar loading (WIP) --- modules/Loading/Loading.R | 49 ++++++++++++------- .../testing_recipes/recipe_test_multivar.yml | 49 +++++++++++++++++++ 2 files changed, 80 insertions(+), 18 deletions(-) create mode 100644 modules/Loading/testing_recipes/recipe_test_multivar.yml diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 66a53451..f621172c 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -5,6 +5,9 @@ source("modules/Loading/dates2load.R") source("modules/Loading/check_latlon.R") ## TODO: Move to prepare_outputs.R source("tools/libs.R") +## TODO: Source new s2dv_cube version +## TODO: Eliminate dim_var dimension (merge_across_dims?) + load_datasets <- function(recipe) { @@ -20,7 +23,8 @@ load_datasets <- function(recipe) { ref.name <- recipe$Analysis$Datasets$Reference$name exp.name <- recipe$Analysis$Datasets$System$name - variable <- recipe$Analysis$Variables$name + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]][1] + vars <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] store.freq <- recipe$Analysis$Variables$freq # get sdates array @@ -66,22 +70,22 @@ load_datasets <- function(recipe) { ##} else { ## accum <- FALSE ##} + + var_dir_obs <- reference_descrip[[store.freq]][vars] + var_dir_exp <- exp_descrip[[store.freq]][vars] # ----------- obs.path <- paste0(archive$src, - obs.dir, store.freq, "/$var$", - reference_descrip[[store.freq]][[variable]], - "$var$_$file_date$.nc") + obs.dir, store.freq, "/$var$", "$var_dir$", + "/$var$_$file_date$.nc") hcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], + hcst.dir, store.freq, "/$var$", "$var_dir$", "$var$_$file_date$.nc") fcst.path <- paste0(archive$src, - hcst.dir, store.freq, "/$var$", - exp_descrip[[store.freq]][[variable]], - "$var$_$file_date$.nc") + hcst.dir, store.freq, "/$var$", "$var_dir$", + "/$var$_$file_date$.nc") # Define regrid parameters: #------------------------------------------------------------------- @@ -100,9 +104,11 @@ load_datasets <- function(recipe) { # Load hindcast #------------------------------------------------------------------- hcst <- Start(dat = hcst.path, - var = variable, + var = vars, + var_dir = var_dir_exp, file_date = sdates$hcst, time = idxs$hcst, + var_dir_depends = 'var', latitude = values(list(lats.min, lats.max)), latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), @@ -115,6 +121,7 @@ load_datasets <- function(recipe) { longitude = c('lon', 'longitude'), ensemble = c('member', 'ensemble')), ensemble = indices(1:hcst.nmember), + metadata_dims = 'var', # change to just 'var'? return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), @@ -152,7 +159,9 @@ load_datasets <- function(recipe) { # multiple dims split fcst <- Start(dat = fcst.path, - var = variable, + var = vars, + var_dir = var_dir_exp, + var_dir_depends = 'var', file_date = sdates$fcst, time = idxs$fcst, latitude = values(list(lats.min, lats.max)), @@ -167,6 +176,7 @@ load_datasets <- function(recipe) { longitude = c('lon', 'longitude'), ensemble = c('member', 'ensemble')), ensemble = indices(1:fcst.nmember), + metadata_dims = 'var', return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), @@ -203,11 +213,8 @@ load_datasets <- function(recipe) { # Obtain dates and date dimensions from the loaded hcst data to make sure # the corresponding observations are loaded correctly. - dates <- hcst$Dates$start - dim(dates) <- dim(Subset(hcst$data, - along=c('dat', 'var', - 'latitude', 'longitude', 'ensemble'), - list(1,1,1,1,1), drop="selected")) + dates <- hcst$attrs$Dates + # dim(dates) <- hcst_cube$dims[c("sday", "sweek", "syear", "time")] # Separate Start() call for monthly vs daily data if (store.freq == "monthly_mean") { @@ -216,7 +223,9 @@ load_datasets <- function(recipe) { dim(dates_file) <- dim(dates) obs <- Start(dat = obs.path, - var = variable, + var = vars, + var_dir = var_dir_obs, + var_dir_depends = 'var', file_date = dates_file, latitude = values(list(lats.min, lats.max)), latitude_reorder = Sort(), @@ -228,6 +237,7 @@ load_datasets <- function(recipe) { transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat','latitude'), longitude = c('lon','longitude')), + metadata_dims = 'var', return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), @@ -246,7 +256,9 @@ load_datasets <- function(recipe) { dim(dates) <- dim(dates_file) obs <- Start(dat = obs.path, - var = variable, + var = vars, + var_dir = var_dir_obs, + var_dir_depends = 'var', file_date = sort(unique(dates_file)), time = dates, time_var = 'time', @@ -263,6 +275,7 @@ load_datasets <- function(recipe) { transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat','latitude'), longitude = c('lon','longitude')), + metadata_dims = 'var', return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), diff --git a/modules/Loading/testing_recipes/recipe_test_multivar.yml b/modules/Loading/testing_recipes/recipe_test_multivar.yml new file mode 100644 index 00000000..71fe93ba --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_test_multivar.yml @@ -0,0 +1,49 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas prlr + freq: monthly_mean + Datasets: + System: + name: system5c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1999' + hcst_end: '2010' + ftime_min: 1 + ftime_max: 2 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: raw + Anomalies: + compute: yes + cross_validation: yes + Skill: + metric: RPS RPSS CRPS CRPSS BSS10 BSS90 EnsCorr mean_bias mean_bias_SS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + Indicators: + index: no + ncores: 7 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ -- GitLab From 546a82c15ec2757550d5a4309ee5789b94487e3d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 27 Dec 2022 17:19:09 +0100 Subject: [PATCH 11/47] add extra var to recipe --- modules/Loading/testing_recipes/recipe_seasonal-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml index e1857ac0..e7e92f45 100644 --- a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml +++ b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml @@ -4,7 +4,7 @@ Description: Analysis: Horizon: Seasonal Variables: - name: tas + name: tas prlr freq: monthly_mean Datasets: System: -- GitLab From 248fa4dfddb4fddda3a988a1dad0b3777ede40aa Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 29 Dec 2022 11:01:33 +0100 Subject: [PATCH 12/47] Add a TODO to data_summary() --- tools/data_summary.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/data_summary.R b/tools/data_summary.R index 8fffd3b4..27eb8c6b 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -1,6 +1,5 @@ # Print a summary of the loaded data for the user, for each object. # object: hindcast, forecast or reference data in s2dv_cube format. -## TODO: Incorporate into logger ## TODO: Adapt to daily/subseasonal cases ## TODO: Add check for missing files/NAs by dimension @@ -18,6 +17,7 @@ data_summary <- function(data_cube, recipe) { sdate_max <- format(max(as.Date(data_cube$attrs$Dates[[1]])), format = date_format) # Create log instance and sink output to logfile and terminal + ## TODO: Return variables and summary for each variable info(recipe$Run$logger, "DATA SUMMARY:") sink(recipe$Run$logfile, append = TRUE, split = TRUE) print(paste0(object_name, " months: ", months)) -- GitLab From 1138760de991fe154fdfe42155cb4253f358d8e6 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 27 Jan 2023 11:42:46 +0100 Subject: [PATCH 13/47] Bugfix: correct variable name --- tools/data_summary.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/data_summary.R b/tools/data_summary.R index 1764000f..f9ad5979 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -15,8 +15,10 @@ data_summary <- function(data_cube, recipe) { } months <- unique(format(as.Date(data_cube$attrs$Dates[[1]]), format = '%B')) months <- paste(as.character(months), collapse=", ") - sdate_min <- format(min(as.Date(data_cube$attrs$Dates[[1]])), format = date_format) - sdate_max <- format(max(as.Date(data_cube$attrs$Dates[[1]])), format = date_format) + sdate_min <- format(min(as.Date(data_cube$attrs$Dates[[1]])), + format = date_format) + sdate_max <- format(max(as.Date(data_cube$attrs$Dates[[1]])), + format = date_format) # Log the summary info(recipe$Run$logger, "DATA SUMMARY:") info(recipe$Run$logger, paste(object_name, "months:", months)) @@ -32,7 +34,8 @@ data_summary <- function(data_cube, recipe) { info(recipe$Run$logger, paste0("Statistical summary of the data in ", object_name, ":")) for (var_index in 1:data_cube$dims[['var']]) { - info(recipe$Run$logger, data$hcst$attrs$Variable$varName[var_index]) + info(recipe$Run$logger, + paste("Variable:", data_cube$attrs$Variable$varName[var_index])) output_string <- capture.output(summary(Subset(data_cube$data, along = "var", indices = var_index))) -- GitLab From 0c7d11629bffe7073acab2939629aada6b1f1ba1 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 30 Jan 2023 16:40:15 +0100 Subject: [PATCH 14/47] Adapt Loading module to change prlr units in multivar case --- modules/Loading/Loading.R | 60 ++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index df6f9fe1..e092cb66 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -352,37 +352,45 @@ load_datasets <- function(recipe) { # Remove negative values in accumulative variables dictionary <- read_yaml("conf/variable-dictionary.yml") - if (dictionary$vars[[variable]]$accum) { - info(recipe$Run$logger, - "Accumulated variable: setting negative values to zero.") - obs$data[obs$data < 0] <- 0 - hcst$data[hcst$data < 0] <- 0 - if (!is.null(fcst)) { - fcst$data[fcst$data < 0] <- 0 + for (var_idx in 1:length(vars)) { + var_name <- vars[var_idx] + if (dictionary$vars[[var_name]]$accum) { + info(recipe$Run$logger, + paste0("Accumulated variable ", var_name, + ": setting negative values to zero.")) + # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, + # along = "var", + # indices = var_idx, F), 0) + obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 + hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 + } } - } - # Convert prlr from m/s to mm/day - ## TODO: Make a unit conversion function? - if (variable == "prlr") { - # Verify that the units are m/s and the same in obs and hcst - if (((attr(obs$Variable, "variable")$units == "m s-1") || - (attr(obs$Variable, "variable")$units == "m s**-1")) && - ((attr(hcst$Variable, "variable")$units == "m s-1") || - (attr(hcst$Variable, "variable")$units == "m s**-1"))) { - - info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") - obs$data <- obs$data*86400*1000 - attr(obs$Variable, "variable")$units <- "mm/day" - hcst$data <- hcst$data*86400*1000 - attr(hcst$Variable, "variable")$units <- "mm/day" - if (!is.null(fcst)) { - fcst$data <- fcst$data*86400*1000 - attr(fcst$Variable, "variable")$units <- "mm/day" + # Convert prlr from m/s to mm/day + ## TODO: Make a unit conversion function + if (vars[[var_idx]] == "prlr") { + # Verify that the units are m/s and the same in obs and hcst + if (((obs$attrs$Variable$variables[[var_name]]$units == "m s-1") || + (obs$attrs$Variable$variables[[var_name]]$units == "m s**-1")) && + ((hcst$attrs$Variable$variables[[var_name]]$units == "m s-1") || + (hcst$attrs$Variable$variables[[var_name]]$units == "m s**-1"))) { + info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") + obs$data[, var_idx, , , , , , , ] <- + obs$data[, var_idx, , , , , , , ]*86400*1000 + obs$attrs$Variable$variables[[var_name]]$units <- "mm/day" + hcst$data[, var_idx, , , , , , , ] <- + hcst$data[, var_idx, , , , , , , ]*86400*1000 + hcst$attrs$Variable$variables[[var_name]]$units <- "mm/day" + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ] <- + fcst$data[, var_idx, , , , , , , ]*86400*1000 + fcst$attrs$Variable$variables[[var_name]]$units <- "mm/day" + } } } } - # Compute anomalies if requested # Print a summary of the loaded data for the user, for each object if (recipe$Run$logger$threshold <= 2) { -- GitLab From 086eb3ae63b98477c071579611eee55471d4684e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 30 Jan 2023 17:16:05 +0100 Subject: [PATCH 15/47] Adapt Anomalies module --- modules/Anomalies/Anomalies.R | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 552f895a..41fd286f 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -45,13 +45,14 @@ compute_anomalies <- function(recipe, data) { data$obs <- anom$obs remove(anom) # Change variable metadata - # data$hcst$Variable$varName <- paste0(data$hcst$Variable$varName, "anomaly") - attr(data$hcst$Variable, "variable")$long_name <- - paste(attr(data$hcst$Variable, "variable")$long_name, "anomaly") - # data$obs$Variable$varName <- paste0(data$obs$Variable$varName, "anomaly") - attr(data$obs$Variable, "variable")$long_name <- - paste(attr(data$obs$Variable, "variable")$long_name, "anomaly") - + for (var in data$hcst$attrs$Variable$varName) { + # Change hcst longname + data$hcst$attrs$Variable$variables[[var]]$long_name <- + paste(data$hcst$attrs$Variable$variables[[var]]$long_name, "anomaly") + # Change obs longname + data$obs$attrs$Variable$variables[[var]]$long_name <- + paste(data$obs$attrs$Variable$variables[[var]]$long_name, "anomaly") + } # Compute forecast anomaly field if (!is.null(data$fcst)) { # Compute hindcast climatology ensemble mean @@ -70,9 +71,10 @@ compute_anomalies <- function(recipe, data) { # Get fcst anomalies data$fcst$data <- data$fcst$data - clim_hcst # Change metadata - # data$fcst$Variable$varName <- paste0(data$fcst$Variable$varName, "anomaly") - attr(data$fcst$Variable, "variable")$long_name <- - paste(attr(data$fcst$Variable, "variable")$long_name, "anomaly") + for (var in data$fcst$attrs$Variable$varName) { + data$fcst$attrs$Variable$variables[[var]]$long_name <- + paste(data$fcst$attrs$Variable$variables[[var]]$long_name, "anomaly") + } } info(recipe$Run$logger, -- GitLab From 4e6fb68a17754a31dad9d0d53b10c9b7d6dfcebc Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 31 Jan 2023 09:52:31 +0100 Subject: [PATCH 16/47] Adapt Anomalies to multivar case --- modules/Anomalies/Anomalies.R | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index 41fd286f..adaa8037 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -20,7 +20,7 @@ compute_anomalies <- function(recipe, data) { cross <- FALSE cross_msg <- "without" } - original_dims <- dim(data$hcst$data) + original_dims <- data$hcst$dim # Compute anomalies anom <- CST_Anomaly(data$hcst, data$obs, @@ -65,9 +65,13 @@ compute_anomalies <- function(recipe, data) { ncores = recipe$Analysis$ncores) clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, name = "syear") + # Store original dimensions dims <- dim(clim_hcst) - clim_hcst <- rep(clim_hcst, dim(data$fcst$data)[['ensemble']]) - dim(clim_hcst) <- c(dims, ensemble = dim(data$fcst$data)[['ensemble']]) + # Repeat the array as many times as ensemble members + clim_hcst <- rep(clim_hcst, data$fcst$dim[['ensemble']]) + # Rename and reorder dimensions + dim(clim_hcst) <- c(dims, ensemble = data$fcst$dim[['ensemble']]) + clim_hcst <- Reorder(clim_hcst, order = names(data$fcst$dim)) # Get fcst anomalies data$fcst$data <- data$fcst$data - clim_hcst # Change metadata -- GitLab From 6a63fd77b52c23217138fd8b3f4950034c8d2a12 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 1 Feb 2023 10:48:32 +0100 Subject: [PATCH 17/47] Add a TODO --- modules/Saving/Saving.R | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 319d5ba3..7ec90d05 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -1,4 +1,5 @@ ## TODO: Save obs percentiles +## TODO: Save data for multiple variables source("modules/Saving/paths2save.R") -- GitLab From f8988d7184d51f7ca5762a41705b558b3587e509 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 2 Feb 2023 14:50:44 +0100 Subject: [PATCH 18/47] Include package 'sticky' and add multivar Saving (WIP) --- modules/Saving/Saving.R | 147 +++++++++++++++++++----------------- modules/Saving/paths2save.R | 6 +- tools/libs.R | 2 + 3 files changed, 83 insertions(+), 72 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 7ec90d05..6ff9f1fd 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -40,7 +40,9 @@ save_data <- function(recipe, data, # Create output directory outdir <- get_dir(recipe) - dir.create(outdir, showWarnings = FALSE, recursive = TRUE) + for (directory in outdir) { + dir.create(directory, showWarnings = FALSE, recursive = TRUE) + } # Export hindcast, forecast and observations onto outfile save_forecast(data$hcst, recipe, dict, outdir, archive = archive, @@ -173,8 +175,8 @@ save_forecast <- function(data_cube, lalo <- c('longitude', 'latitude') - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$variables[[variable]]$long_name + variables <- data_cube$attrs$Variable$varName + # var.longname <- data_cube$attrs$Variable$variables[[variable]]$long_name global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq @@ -222,77 +224,84 @@ save_forecast <- function(data_cube, syears <- seq(1:dim(data_cube$data)['syear'][[1]]) # expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) - - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) - } + # Split by variable + for (var_idx in 1:length(data_cube$attrs$Variable$varName)) { + sub_cube <- CST_Subset(data_cube, along = 'var', indices = list(var_idx), + drop = F, var_dim = 'var', dat_dim = 'dat') + variable <- variables[var_idx] + var.longname <- data_cube$attrs$Variable$variables[[variable]]$long_name + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(sub_cube$data, 'syear', i, drop = T) + + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) + } - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'ensemble', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- attr(data_cube$Variable, 'variable')$units - } else { - dims <- c(lalo, 'ensemble', 'time') - var.expname <- variable - var.sdname <- var.sdname - var.units <- data_cube$attrs$Variable$variables[[variable]]$units - } - - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'ensemble', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- attr(data_cube$Variable, 'variable')$units + } else { + dims <- c(lalo, 'ensemble', 'time') + var.expname <- variable + var.sdname <- var.sdname + var.units <- data_cube$attrs$Variable$variables[[variable]]$units + } - # Select start date - if (fcst.horizon == 'decadal') { - ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal - ## data has been reshaped - # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') - - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - fcst.sdate <- format(fcst.sdate, '%Y%m%d') + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date + if (fcst.horizon == 'decadal') { + ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal + ## data has been reshaped + # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') + + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') - } else { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - } + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + # Get time dimension values and metadata + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "exp") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) + # Generate name of output file + outfile <- get_filename(outdir[var_idx], recipe, variable, fcst.sdate, + agg, "exp") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } } } info(recipe$Run$logger, paste("#####", toupper(type), diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R index 2d6353fe..bb41f05f 100644 --- a/modules/Saving/paths2save.R +++ b/modules/Saving/paths2save.R @@ -63,7 +63,7 @@ get_dir <- function(recipe, agg = "global") { outdir <- paste0(recipe$Run$output_dir, "/outputs/") ## TODO: multivar case - variable <- recipe$Analysis$Variables$name + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] if (tolower(recipe$Analysis$Output_format) == 'scorecards') { # Define output dir name accordint to Scorecards format @@ -75,7 +75,7 @@ get_dir <- function(recipe, agg = "global") { # Default generic output format based on FOCUS if (!is.null(recipe$Analysis$Time$fcst_year)) { if (tolower(recipe$Analysis$Horizon) == 'decadal') { - #PROBLEM: decadal doesn't have sdate + ## PROBLEM: decadal doesn't have sdate fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_') } else { fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, @@ -83,7 +83,7 @@ get_dir <- function(recipe, agg = "global") { } } else { if (tolower(recipe$Analysis$Horizon) == 'decadal') { - #PROBLEM: decadal doesn't have sdate + ## PROBLEM: decadal doesn't have sdate fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$hcst_end, sep = '_')) diff --git a/tools/libs.R b/tools/libs.R index a0767f76..82b7166e 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -1,4 +1,5 @@ library(log4r) +library(sticky) library(startR) library(ClimProjDiags) library(multiApply) @@ -32,4 +33,5 @@ source("tools/check_recipe.R") source("tools/prepare_outputs.R") source("tools/divide_recipe.R") source("tools/data_summary.R") +source("/esarchive/scratch/vagudets/repos/victoria-personal-code/misc/R/CST_Subset.R") # source("tools/add_dims.R") # Not sure if necessary yet -- GitLab From 602c93f5f05e2417344d0251560c470aaaa35fb2 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 3 Feb 2023 09:16:36 +0100 Subject: [PATCH 19/47] Save multivar obs --- modules/Saving/Saving.R | 161 +++++++++++++++++++++------------------- 1 file changed, 83 insertions(+), 78 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 6ff9f1fd..0cde6b2a 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -324,8 +324,7 @@ save_observations <- function(data_cube, lalo <- c('longitude', 'latitude') - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$variables[[variable]]$long_name + variables <- data_cube$attrs$Variable$varName global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq @@ -356,89 +355,95 @@ save_observations <- function(data_cube, syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) - - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'time'))) - } + # Split by variable + for (var_idx in 1:length(variables)) { + sub_cube <- CST_Subset(data_cube, along = 'var', indices = list(var_idx), + drop = F, var_dim = 'var', dat_dim = 'dat') + variable <- variables[var_idx] + var.longname <- data_cube$attrs$Variable$variables[[variable]]$long_name + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(sub_cube$data, 'syear', i, drop = T) + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'time'))) + } - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- data_cube$attrs$Variable$variables[[variable]]$units - } else { - dims <- c(lalo, 'time') - var.expname <- variable - var.units <- data_cube$attrs$Variable$variables[[variable]]$units - } - - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- data_cube$attrs$Variable$variables[[variable]]$units + } else { + dims <- c(lalo, 'time') + var.expname <- variable + var.units <- data_cube$attrs$Variable$variables[[variable]]$units + } - # Select start date. The date is computed for each year, and adapted for - # consistency with the hcst/fcst dates, so that both sets of files have - # the same name pattern. - ## Because observations are loaded differently in the daily vs. monthly - ## cases, different approaches are necessary. - if (fcst.horizon == 'decadal') { - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - } else { - - if (store.freq == "monthly_mean") { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date. The date is computed for each year, and adapted for + # consistency with the hcst/fcst dates, so that both sets of files have + # the same name pattern. + ## Because observations are loaded differently in the daily vs. monthly + ## cases, different approaches are necessary. + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) } else { - fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) + + if (store.freq == "monthly_mean") { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') + } else { + fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) + } } - } - # Ensure the year is correct if the first leadtime goes to the next year - init_date <- as.POSIXct(init_date) - if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { - lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 - } - # Ensure that the initialization month is consistent with the hindcast - lubridate::month(fcst.sdate) <- lubridate::month(init_date) - fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') + # Ensure the year is correct if the first leadtime goes to the next year + init_date <- as.POSIXct(init_date) + if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { + lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 + } + # Ensure that the initialization month is consistent with the hindcast + lubridate::month(fcst.sdate) <- lubridate::month(init_date) + fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + # Get time dimension values and metadata + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "obs") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) + # Generate name of output file + outfile <- get_filename(outdir[var_idx], recipe, variable, + fcst.sdate, agg, "obs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } } } info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") -- GitLab From 23c60c70c300f06c1123fe00fd99039363057cc1 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 6 Feb 2023 15:57:22 +0100 Subject: [PATCH 20/47] Update s2dv_cube structure --- modules/Anomalies/Anomalies.R | 12 +++++----- modules/Loading/Loading.R | 14 ++++++------ modules/Saving/Saving.R | 42 ++++++++++++++++++++++------------- 3 files changed, 40 insertions(+), 28 deletions(-) diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R index adaa8037..3ee40f31 100644 --- a/modules/Anomalies/Anomalies.R +++ b/modules/Anomalies/Anomalies.R @@ -47,11 +47,11 @@ compute_anomalies <- function(recipe, data) { # Change variable metadata for (var in data$hcst$attrs$Variable$varName) { # Change hcst longname - data$hcst$attrs$Variable$variables[[var]]$long_name <- - paste(data$hcst$attrs$Variable$variables[[var]]$long_name, "anomaly") + data$hcst$attrs$Variable$metadata[[var]]$long_name <- + paste(data$hcst$attrs$Variable$metadata[[var]]$long_name, "anomaly") # Change obs longname - data$obs$attrs$Variable$variables[[var]]$long_name <- - paste(data$obs$attrs$Variable$variables[[var]]$long_name, "anomaly") + data$obs$attrs$Variable$metadata[[var]]$long_name <- + paste(data$obs$attrs$Variable$metadata[[var]]$long_name, "anomaly") } # Compute forecast anomaly field if (!is.null(data$fcst)) { @@ -76,8 +76,8 @@ compute_anomalies <- function(recipe, data) { data$fcst$data <- data$fcst$data - clim_hcst # Change metadata for (var in data$fcst$attrs$Variable$varName) { - data$fcst$attrs$Variable$variables[[var]]$long_name <- - paste(data$fcst$attrs$Variable$variables[[var]]$long_name, "anomaly") + data$fcst$attrs$Variable$metadata[[var]]$long_name <- + paste(data$fcst$attrs$Variable$metadata[[var]]$long_name, "anomaly") } } diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index e092cb66..755650b7 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -372,21 +372,21 @@ load_datasets <- function(recipe) { ## TODO: Make a unit conversion function if (vars[[var_idx]] == "prlr") { # Verify that the units are m/s and the same in obs and hcst - if (((obs$attrs$Variable$variables[[var_name]]$units == "m s-1") || - (obs$attrs$Variable$variables[[var_name]]$units == "m s**-1")) && - ((hcst$attrs$Variable$variables[[var_name]]$units == "m s-1") || - (hcst$attrs$Variable$variables[[var_name]]$units == "m s**-1"))) { + if (((obs$attrs$Variable$metadata[[var_name]]$units == "m s-1") || + (obs$attrs$Variable$metadata[[var_name]]$units == "m s**-1")) && + ((hcst$attrs$Variable$metadata[[var_name]]$units == "m s-1") || + (hcst$attrs$Variable$metadata[[var_name]]$units == "m s**-1"))) { info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") obs$data[, var_idx, , , , , , , ] <- obs$data[, var_idx, , , , , , , ]*86400*1000 - obs$attrs$Variable$variables[[var_name]]$units <- "mm/day" + obs$attrs$Variable$metadata[[var_name]]$units <- "mm/day" hcst$data[, var_idx, , , , , , , ] <- hcst$data[, var_idx, , , , , , , ]*86400*1000 - hcst$attrs$Variable$variables[[var_name]]$units <- "mm/day" + hcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" if (!is.null(fcst)) { fcst$data[, var_idx, , , , , , , ] <- fcst$data[, var_idx, , , , , , , ]*86400*1000 - fcst$attrs$Variable$variables[[var_name]]$units <- "mm/day" + fcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" } } } diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 0cde6b2a..bdbb00ef 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -67,13 +67,25 @@ save_data <- function(recipe, data, } # Export skill metrics onto outfile - if (!is.null(skill_metrics)) { - save_metrics(skill_metrics, recipe, dict, data$hcst, outdir, - archive = archive) - } - if (!is.null(corr_metrics)) { - save_corr(corr_metrics, recipe, dict, data$hcst, outdir, - archive = archive) + for (var in 1:data$hcst$dims[['var']]) { + subset_cube <- CST_Subset(data$hcst, along = 'var', indices = var, + drop = F, var_dim = 'var', dat_dim = 'dat') + if (!is.null(skill_metrics)) { + subset_skill <- lapply(skill_metrics, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + save_metrics(subset_skill, recipe, dict, subset_cube, outdir[var], + archive = archive) + } + if (!is.null(corr_metrics)) { + subset_corr <- lapply(corr_metrics, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + save_corr(subset_corr, recipe, dict, subset_cube, outdir[var], + archive = archive) + } } # Export probabilities onto outfile @@ -176,7 +188,7 @@ save_forecast <- function(data_cube, lalo <- c('longitude', 'latitude') variables <- data_cube$attrs$Variable$varName - # var.longname <- data_cube$attrs$Variable$variables[[variable]]$long_name + # var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq @@ -229,7 +241,7 @@ save_forecast <- function(data_cube, sub_cube <- CST_Subset(data_cube, along = 'var', indices = list(var_idx), drop = F, var_dim = 'var', dat_dim = 'dat') variable <- variables[var_idx] - var.longname <- data_cube$attrs$Variable$variables[[variable]]$long_name + var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name for (i in syears) { # Select year from array and rearrange dimensions fcst <- ClimProjDiags::Subset(sub_cube$data, 'syear', i, drop = T) @@ -249,12 +261,12 @@ save_forecast <- function(data_cube, dims <- c('Country', 'ensemble', 'time') var.expname <- paste0(variable, '_country') var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- attr(data_cube$Variable, 'variable')$units + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units } else { dims <- c(lalo, 'ensemble', 'time') var.expname <- variable var.sdname <- var.sdname - var.units <- data_cube$attrs$Variable$variables[[variable]]$units + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units } metadata <- list(fcst = list(name = var.expname, @@ -360,7 +372,7 @@ save_observations <- function(data_cube, sub_cube <- CST_Subset(data_cube, along = 'var', indices = list(var_idx), drop = F, var_dim = 'var', dat_dim = 'dat') variable <- variables[var_idx] - var.longname <- data_cube$attrs$Variable$variables[[variable]]$long_name + var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name for (i in syears) { # Select year from array and rearrange dimensions fcst <- ClimProjDiags::Subset(sub_cube$data, 'syear', i, drop = T) @@ -379,11 +391,11 @@ save_observations <- function(data_cube, dims <- c('Country', 'time') var.expname <- paste0(variable, '_country') var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- data_cube$attrs$Variable$variables[[variable]]$units + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units } else { dims <- c(lalo, 'time') var.expname <- variable - var.units <- data_cube$attrs$Variable$variables[[variable]]$units + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units } metadata <- list(fcst = list(name = var.expname, @@ -817,7 +829,7 @@ save_probabilities <- function(probs, lalo <- c('longitude', 'latitude') variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$variables[[variable]]$long_name + var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name global_attributes <- get_global_attributes(recipe, archive) # Add anomaly computation to global attributes ## TODO: Sort out the logic once default behavior is decided -- GitLab From 75055a9f27a6d705c13bdf4869fb107110f367ec Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 6 Feb 2023 16:03:49 +0100 Subject: [PATCH 21/47] Mask sticky::append() --- tools/libs.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/libs.R b/tools/libs.R index 82b7166e..19b26e87 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -15,6 +15,7 @@ library(lubridate) library(PCICt) library(RColorBrewer) library(grDevices) +append <- base::append # # library(parallel) # library(pryr) # To check mem usage. -- GitLab From 1ff5ed3307a43f80beb47572e8cc9f8340365592 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 7 Feb 2023 11:04:37 +0100 Subject: [PATCH 22/47] Adapt Saving module to iterate over multiple variables in s2dv_cubes --- modules/Saving/Saving.R | 370 +++++++++++++++++++++------------------- 1 file changed, 190 insertions(+), 180 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index bdbb00ef..fcc78e6a 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -44,15 +44,6 @@ save_data <- function(recipe, data, dir.create(directory, showWarnings = FALSE, recursive = TRUE) } - # Export hindcast, forecast and observations onto outfile - save_forecast(data$hcst, recipe, dict, outdir, archive = archive, - type = 'hcst') - if (!is.null(data$fcst)) { - save_forecast(data$fcst, recipe, dict, outdir, - archive = archive, type = 'fcst') - } - save_observations(data$obs, recipe, dict, outdir, archive = archive) - # Separate ensemble correlation from the rest of the metrics, as it has one # extra dimension "ensemble" and must be saved to a different file if ("corr" %in% names(skill_metrics)) { @@ -66,16 +57,33 @@ save_data <- function(recipe, data, corr_metrics <- NULL } - # Export skill metrics onto outfile + # Iterate over variables to subset s2dv_cubes and save outputs for (var in 1:data$hcst$dims[['var']]) { - subset_cube <- CST_Subset(data$hcst, along = 'var', indices = var, + info(recipe$Run$logger, + paste("Saving outputs for", data$hcst$attrs$Variable$varName[var])) + # Export hindcast, forecast and observations + subset_hcst <- CST_Subset(data$hcst, along = 'var', indices = var, drop = F, var_dim = 'var', dat_dim = 'dat') + + save_forecast(subset_hcst, recipe, dict, outdir[var], archive = archive, + type = 'hcst') + if (!is.null(data$fcst)) { + subset_fcst <- CST_Subset(data$fcst, along = 'var', indices = var, + drop = F, var_dim = 'var', dat_dim = 'dat') + save_forecast(subset_fcst, recipe, dict, outdir, + archive = archive, type = 'fcst') + } + subset_obs <- CST_Subset(data$obs, along = 'var', indices = var, + drop = F, var_dim = 'var', dat_dim = 'dat') + save_observations(subset_obs, recipe, dict, outdir, archive = archive) + + # Export skill metrics onto outfile if (!is.null(skill_metrics)) { subset_skill <- lapply(skill_metrics, function(x) { ClimProjDiags::Subset(x, along = 'var', indices = var, drop = 'selected')}) - save_metrics(subset_skill, recipe, dict, subset_cube, outdir[var], + save_metrics(subset_skill, recipe, dict, subset_hcst, outdir[var], archive = archive) } if (!is.null(corr_metrics)) { @@ -83,20 +91,32 @@ save_data <- function(recipe, data, ClimProjDiags::Subset(x, along = 'var', indices = var, drop = 'selected')}) - save_corr(subset_corr, recipe, dict, subset_cube, outdir[var], + save_corr(subset_corr, recipe, dict, subset_hcst, outdir[var], archive = archive) } - } - # Export probabilities onto outfile - if (!is.null(probabilities)) { - save_percentiles(probabilities$percentiles, recipe, data$hcst, outdir, - archive = archive) - save_probabilities(probabilities$probs, recipe, data$hcst, outdir, - archive = archive, type = "hcst") - if (!is.null(probabilities$probs_fcst)) { - save_probabilities(probabilities$probs_fcst, recipe, data$fcst, outdir, - archive = archive, type = "fcst") + # Export probabilities onto outfile + if (!is.null(probabilities)) { + subset_percentiles <- lapply(probabilities$percentiles, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + save_percentiles(subset_percentiles, recipe, subset_hcst, outdir, + archive = archive) + subset_probs <- lapply(probabilities$probs, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + save_probabilities(subset_probs, recipe, subset_hcst, outdir, + archive = archive, type = "hcst") + if (!is.null(probabilities$probs_fcst)) { + subset_probs_fcst <- lapply(probabilities$probs_fcst, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + save_probabilities(subset_probs_fcst, recipe, subset_fcst, outdir, + archive = archive, type = "fcst") + } } } } @@ -187,8 +207,8 @@ save_forecast <- function(data_cube, lalo <- c('longitude', 'latitude') - variables <- data_cube$attrs$Variable$varName - # var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name + variable <- data_cube$attrs$Variable$varName + var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq @@ -237,85 +257,79 @@ save_forecast <- function(data_cube, # expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) # Split by variable - for (var_idx in 1:length(data_cube$attrs$Variable$varName)) { - sub_cube <- CST_Subset(data_cube, along = 'var', indices = list(var_idx), - drop = F, var_dim = 'var', dat_dim = 'dat') - variable <- variables[var_idx] - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(sub_cube$data, 'syear', i, drop = T) - - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) - } + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'ensemble', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } else { - dims <- c(lalo, 'ensemble', 'time') - var.expname <- variable - var.sdname <- var.sdname - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) + } + + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'ensemble', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } else { + dims <- c(lalo, 'ensemble', 'time') + var.expname <- variable + var.sdname <- var.sdname + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes - - # Select start date - if (fcst.horizon == 'decadal') { - ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal - ## data has been reshaped - # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') - - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - fcst.sdate <- format(fcst.sdate, '%Y%m%d') + # Select start date + if (fcst.horizon == 'decadal') { + ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal + ## data has been reshaped + # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') - } else { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - } + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } - # Generate name of output file - outfile <- get_filename(outdir[var_idx], recipe, variable, fcst.sdate, - agg, "exp") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) - } + # Get time dimension values and metadata + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, variable, fcst.sdate, + agg, "exp") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) } - } + } info(recipe$Run$logger, paste("#####", toupper(type), "SAVED TO NETCDF FILE #####")) } @@ -336,7 +350,8 @@ save_observations <- function(data_cube, lalo <- c('longitude', 'latitude') - variables <- data_cube$attrs$Variable$varName + variable <- data_cube$attrs$Variable$varName + var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name global_attributes <- get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq @@ -368,96 +383,91 @@ save_observations <- function(data_cube, ## expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) # Split by variable - for (var_idx in 1:length(variables)) { - sub_cube <- CST_Subset(data_cube, along = 'var', indices = list(var_idx), - drop = F, var_dim = 'var', dat_dim = 'dat') - variable <- variables[var_idx] - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(sub_cube$data, 'syear', i, drop = T) - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'time'))) - } + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'time'))) + } - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } else { - dims <- c(lalo, 'time') - var.expname <- variable - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } else { + dims <- c(lalo, 'time') + var.expname <- variable + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes - - # Select start date. The date is computed for each year, and adapted for - # consistency with the hcst/fcst dates, so that both sets of files have - # the same name pattern. - ## Because observations are loaded differently in the daily vs. monthly - ## cases, different approaches are necessary. - if (fcst.horizon == 'decadal') { - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - } else { + # Select start date. The date is computed for each year, and adapted for + # consistency with the hcst/fcst dates, so that both sets of files have + # the same name pattern. + ## Because observations are loaded differently in the daily vs. monthly + ## cases, different approaches are necessary. + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + } else { - if (store.freq == "monthly_mean") { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') - } else { - fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) - } + if (store.freq == "monthly_mean") { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') + } else { + fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) } + } - # Ensure the year is correct if the first leadtime goes to the next year - init_date <- as.POSIXct(init_date) - if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { - lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 - } - # Ensure that the initialization month is consistent with the hindcast - lubridate::month(fcst.sdate) <- lubridate::month(init_date) - fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') + # Ensure the year is correct if the first leadtime goes to the next year + init_date <- as.POSIXct(init_date) + if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { + lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 + } + # Ensure that the initialization month is consistent with the hindcast + lubridate::month(fcst.sdate) <- lubridate::month(init_date) + fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + # Get time dimension values and metadata + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time - # Generate name of output file - outfile <- get_filename(outdir[var_idx], recipe, variable, - fcst.sdate, agg, "obs") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) - } + # Generate name of output file + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, "obs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) } } + info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") } -- GitLab From 805ec2c76274cfbca990234911c115c794dd4ef4 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 7 Feb 2023 11:59:10 +0100 Subject: [PATCH 23/47] fix pipeline (save single variable) --- modules/Saving/Saving.R | 87 +++++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 33 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index fcc78e6a..9e94b5e3 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -79,43 +79,64 @@ save_data <- function(recipe, data, # Export skill metrics onto outfile if (!is.null(skill_metrics)) { - subset_skill <- lapply(skill_metrics, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) - save_metrics(subset_skill, recipe, dict, subset_hcst, outdir[var], - archive = archive) - } - if (!is.null(corr_metrics)) { - subset_corr <- lapply(corr_metrics, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) - save_corr(subset_corr, recipe, dict, subset_hcst, outdir[var], - archive = archive) + if (data$hcst$dims[['var']] == 1) { + save_metrics(skill_metrics, recipe, dict, data$hcst, outdir, + archive = archive) + if (!is.null(corr_metrics)) { + save_corr(corr_metrics, recipe, dict, data$hcst, outdir, + archive = archive) + } + } else { + + subset_skill <- lapply(skill_metrics, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + save_metrics(subset_skill, recipe, dict, subset_hcst, outdir[var], + archive = archive) + if (!is.null(corr_metrics)) { + subset_corr <- lapply(corr_metrics, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + save_corr(subset_corr, recipe, dict, subset_hcst, outdir[var], + archive = archive) + } + } } # Export probabilities onto outfile if (!is.null(probabilities)) { - subset_percentiles <- lapply(probabilities$percentiles, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) - save_percentiles(subset_percentiles, recipe, subset_hcst, outdir, - archive = archive) - subset_probs <- lapply(probabilities$probs, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) - save_probabilities(subset_probs, recipe, subset_hcst, outdir, - archive = archive, type = "hcst") - if (!is.null(probabilities$probs_fcst)) { - subset_probs_fcst <- lapply(probabilities$probs_fcst, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) - save_probabilities(subset_probs_fcst, recipe, subset_fcst, outdir, - archive = archive, type = "fcst") + if (data$hcst$dims[['var']] == 1) { + save_percentiles(probabilities$percentiles, recipe, data$hcst, outdir, + archive = archive) + save_probabilities(probabilities$probs, recipe, data$hcst, outdir, + archive = archive) + if (!is.null(probabilities$probs_fcst)) { + save_probabilities(probabilities$probs_fcst, recipe, subset_fcst, + outdir, archive = archive, type = "fcst") + } + } else { + subset_percentiles <- lapply(probabilities$percentiles, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + save_percentiles(subset_percentiles, recipe, subset_hcst, outdir[var], + archive = archive) + subset_probs <- lapply(probabilities$probs, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + save_probabilities(subset_probs, recipe, subset_hcst, outdir[var], + archive = archive, type = "hcst") + if (!is.null(probabilities$probs_fcst)) { + subset_probs_fcst <- lapply(probabilities$probs_fcst, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + save_probabilities(subset_probs_fcst, recipe, subset_fcst, + outdir[var], archive = archive, type = "fcst") + } } } } -- GitLab From 1de035722573a7fbc9eb01131bb2a267390d8459 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 8 Feb 2023 09:28:07 +0100 Subject: [PATCH 24/47] Fix bug in data_summary --- tools/data_summary.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/data_summary.R b/tools/data_summary.R index f9ad5979..98cfe46e 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -13,11 +13,11 @@ data_summary <- function(data_cube, recipe) { } else if (recipe$Analysis$Variables$freq == "daily_mean") { date_format <- '%b %d %Y' } - months <- unique(format(as.Date(data_cube$attrs$Dates[[1]]), format = '%B')) + months <- unique(format(as.Date(data_cube$attrs$Dates), format = '%B')) months <- paste(as.character(months), collapse=", ") - sdate_min <- format(min(as.Date(data_cube$attrs$Dates[[1]])), + sdate_min <- format(min(as.Date(data_cube$attrs$Dates)), format = date_format) - sdate_max <- format(max(as.Date(data_cube$attrs$Dates[[1]])), + sdate_max <- format(max(as.Date(data_cube$attrs$Dates)), format = date_format) # Log the summary info(recipe$Run$logger, "DATA SUMMARY:") -- GitLab From 7c2c90817f319d359e8bd9da1cd9ea0e1d6b7abf Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 8 Feb 2023 09:50:27 +0100 Subject: [PATCH 25/47] Add a TODO --- modules/Saving/Saving.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 9e94b5e3..72755652 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -1,5 +1,5 @@ ## TODO: Save obs percentiles -## TODO: Save data for multiple variables +## TODO: Insert vardim to simplify the code? source("modules/Saving/paths2save.R") @@ -77,7 +77,7 @@ save_data <- function(recipe, data, drop = F, var_dim = 'var', dat_dim = 'dat') save_observations(subset_obs, recipe, dict, outdir, archive = archive) - # Export skill metrics onto outfile + # Export skill metrics if (!is.null(skill_metrics)) { if (data$hcst$dims[['var']] == 1) { save_metrics(skill_metrics, recipe, dict, data$hcst, outdir, -- GitLab From c235b4932a91ee4d051de89b3196371002aba56d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 8 Feb 2023 09:51:36 +0100 Subject: [PATCH 26/47] Fix bug in data summary --- tools/data_summary.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/data_summary.R b/tools/data_summary.R index 49f3c8ea..597f42cb 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -11,10 +11,10 @@ data_summary <- function(data_cube, recipe) { } else if (recipe$Analysis$Variables$freq == "daily_mean") { date_format <- '%b %d %Y' } - months <- unique(format(as.Date(data_cube$attrs$Dates[[1]]), format = '%B')) + months <- unique(format(as.Date(data_cube$attrs$Dates), format = '%B')) months <- paste(as.character(months), collapse=", ") - sdate_min <- format(min(as.Date(data_cube$attrs$Dates[[1]])), format = date_format) - sdate_max <- format(max(as.Date(data_cube$attrs$Dates[[1]])), format = date_format) + sdate_min <- format(min(as.Date(data_cube$attrs$Dates)), format = date_format) + sdate_max <- format(max(as.Date(data_cube$attrs$Dates)), format = date_format) # Create log instance and sink output to logfile and terminal info(recipe$Run$logger, "DATA SUMMARY:") -- GitLab From 7b354ed27346a534121c5bf070a54684a434ccb0 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 8 Feb 2023 14:39:21 +0100 Subject: [PATCH 27/47] Verification Suite CERISE --- .gitignore | 7 + .gitlab-ci.yml | 22 + MODULES | 30 + NEWS.md | 17 + OperationalCS.R | 33 + README.md | 31 + conf/archive.yml | 150 +++ conf/grid_description/griddes_eccc1.txt | 19 + conf/grid_description/griddes_ncep-cfsv2.txt | 18 + conf/grid_description/griddes_system21_m1.txt | 17 + conf/grid_description/griddes_system2c3s.txt | 17 + conf/grid_description/griddes_system35c3s.txt | 19 + conf/grid_description/griddes_system7c3s.txt | 19 + conf/grid_description/griddes_ukmo600.txt | 18 + conf/indicators_table.yml | 19 + conf/output_dictionaries/scorecards.yml | 37 + conf/variable-dictionary.yml | 268 ++++++ modules/Anomalies/Anomalies.R | 101 ++ modules/Anomalies/tmp/CST_Anomaly.R | 246 +++++ modules/Loading/Loading.R | 422 ++++++++ modules/Loading/check_latlon.R | 90 ++ modules/Loading/dates2load.R | 104 ++ .../testing_recipes/recipe_seasonal-tests.yml | 49 + .../recipe_system5c3s-rsds.yml | 49 + .../testing_recipes/recipe_system5c3s-tas.yml | 47 + .../recipe_system7c3s-prlr.yml | 49 + .../testing_recipes/recipe_system7c3s-tas.yml | 49 + .../recipe_tas-daily-regrid-to-reference.yml | 50 + .../recipe_tas-daily-regrid-to-system.yml | 47 + .../testing_recipes/recipe_test-logging.yml | 47 + .../recipe_test-new-metrics.yml | 46 + .../testing_recipes/recipe_test_anomalies.yml | 49 + modules/Saving/Saving.R | 906 ++++++++++++++++++ modules/Saving/paths2save.R | 108 +++ modules/Skill/Skill.R | 408 ++++++++ modules/Skill/compute_probs.R | 38 + modules/Skill/compute_quants.R | 33 + modules/Skill/s2s.metrics.R | 279 ++++++ modules/Skill/tmp/AbsBiasSS.R | 281 ++++++ modules/Skill/tmp/Bias.R | 189 ++++ modules/Skill/tmp/Corr.R | 463 +++++++++ modules/Skill/tmp/RMSSS.R | 448 +++++++++ modules/Skill/tmp/RandomWalkTest.R | 82 ++ modules/Visualization/Visualization.R | 359 +++++++ modules/Visualization/tmp/PlotCombinedMap.R | 608 ++++++++++++ .../tmp/PlotMostLikelyQuantileMap.R | 196 ++++ modules/test_seasonal.R | 25 + modules/verifications.R | 100 ++ recipes/seasonal_complex.yml-OUTDATED | 46 + recipes/seasonal_oper.yml | 68 ++ recipes/seasonal_oper_atomic.yml-OUTDATED | 73 ++ recipes/tests/execute_tests.R | 44 + recipes/tests/seasonal_testWorkflow1.yml | 53 + recipes/tests/seasonal_testWorkflow2.yml | 54 ++ recipes/tests/seasonal_testWorkflow3.yml | 52 + recipes/tests/seasonal_testWorkflow4.yml | 53 + recipes/tests/seasonal_testWorkflow5.yml | 51 + recipes/tests/seasonal_testWorkflow6.yml | 53 + recipes/tests/seasonal_testWorkflow7.yml | 53 + recipes/tests/seasonal_testWorkflow8.yml | 51 + tests/recipes/recipe-decadal_daily_1.yml | 51 + tests/recipes/recipe-decadal_monthly_1.yml | 51 + tests/recipes/recipe-decadal_monthly_1b.yml | 51 + tests/recipes/recipe-decadal_monthly_2.yml | 51 + tests/recipes/recipe-decadal_monthly_3.yml | 51 + tests/recipes/recipe-seasonal_daily_1.yml | 45 + tests/recipes/recipe-seasonal_monthly_1.yml | 47 + tests/test_decadal.R | 16 + tests/test_seasonal.R | 9 + tests/testthat/test-decadal_daily_1.R | 222 +++++ tests/testthat/test-decadal_monthly_1.R | 338 +++++++ tests/testthat/test-decadal_monthly_2.R | 275 ++++++ tests/testthat/test-decadal_monthly_3.R | 199 ++++ tests/testthat/test-seasonal_daily.R | 167 ++++ tests/testthat/test-seasonal_monthly.R | 238 +++++ tools/add_dims.R | 9 + tools/check_recipe.R | 324 +++++++ tools/data_summary.R | 38 + tools/divide_recipe.R | 113 +++ tools/libs.R | 35 + tools/prepare_outputs.R | 81 ++ ...heck_number_of_independent_verifications.R | 161 ++++ tools/tmp/as.s2dv_cube.R | 184 ++++ 83 files changed, 10116 insertions(+) create mode 100644 .gitignore create mode 100644 .gitlab-ci.yml create mode 100644 MODULES create mode 100644 NEWS.md create mode 100644 OperationalCS.R create mode 100644 README.md create mode 100644 conf/archive.yml create mode 100644 conf/grid_description/griddes_eccc1.txt create mode 100644 conf/grid_description/griddes_ncep-cfsv2.txt create mode 100644 conf/grid_description/griddes_system21_m1.txt create mode 100644 conf/grid_description/griddes_system2c3s.txt create mode 100644 conf/grid_description/griddes_system35c3s.txt create mode 100644 conf/grid_description/griddes_system7c3s.txt create mode 100644 conf/grid_description/griddes_ukmo600.txt create mode 100644 conf/indicators_table.yml create mode 100644 conf/output_dictionaries/scorecards.yml create mode 100644 conf/variable-dictionary.yml create mode 100644 modules/Anomalies/Anomalies.R create mode 100644 modules/Anomalies/tmp/CST_Anomaly.R create mode 100644 modules/Loading/Loading.R create mode 100644 modules/Loading/check_latlon.R create mode 100644 modules/Loading/dates2load.R create mode 100644 modules/Loading/testing_recipes/recipe_seasonal-tests.yml create mode 100644 modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml create mode 100644 modules/Loading/testing_recipes/recipe_system5c3s-tas.yml create mode 100644 modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml create mode 100644 modules/Loading/testing_recipes/recipe_system7c3s-tas.yml create mode 100644 modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml create mode 100644 modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml create mode 100644 modules/Loading/testing_recipes/recipe_test-logging.yml create mode 100644 modules/Loading/testing_recipes/recipe_test-new-metrics.yml create mode 100644 modules/Loading/testing_recipes/recipe_test_anomalies.yml create mode 100644 modules/Saving/Saving.R create mode 100644 modules/Saving/paths2save.R create mode 100644 modules/Skill/Skill.R create mode 100644 modules/Skill/compute_probs.R create mode 100644 modules/Skill/compute_quants.R create mode 100644 modules/Skill/s2s.metrics.R create mode 100644 modules/Skill/tmp/AbsBiasSS.R create mode 100644 modules/Skill/tmp/Bias.R create mode 100644 modules/Skill/tmp/Corr.R create mode 100644 modules/Skill/tmp/RMSSS.R create mode 100644 modules/Skill/tmp/RandomWalkTest.R create mode 100644 modules/Visualization/Visualization.R create mode 100644 modules/Visualization/tmp/PlotCombinedMap.R create mode 100644 modules/Visualization/tmp/PlotMostLikelyQuantileMap.R create mode 100644 modules/test_seasonal.R create mode 100644 modules/verifications.R create mode 100644 recipes/seasonal_complex.yml-OUTDATED create mode 100644 recipes/seasonal_oper.yml create mode 100644 recipes/seasonal_oper_atomic.yml-OUTDATED create mode 100644 recipes/tests/execute_tests.R create mode 100644 recipes/tests/seasonal_testWorkflow1.yml create mode 100644 recipes/tests/seasonal_testWorkflow2.yml create mode 100644 recipes/tests/seasonal_testWorkflow3.yml create mode 100644 recipes/tests/seasonal_testWorkflow4.yml create mode 100644 recipes/tests/seasonal_testWorkflow5.yml create mode 100644 recipes/tests/seasonal_testWorkflow6.yml create mode 100644 recipes/tests/seasonal_testWorkflow7.yml create mode 100644 recipes/tests/seasonal_testWorkflow8.yml create mode 100644 tests/recipes/recipe-decadal_daily_1.yml create mode 100644 tests/recipes/recipe-decadal_monthly_1.yml create mode 100644 tests/recipes/recipe-decadal_monthly_1b.yml create mode 100644 tests/recipes/recipe-decadal_monthly_2.yml create mode 100644 tests/recipes/recipe-decadal_monthly_3.yml create mode 100644 tests/recipes/recipe-seasonal_daily_1.yml create mode 100644 tests/recipes/recipe-seasonal_monthly_1.yml create mode 100644 tests/test_decadal.R create mode 100644 tests/test_seasonal.R create mode 100644 tests/testthat/test-decadal_daily_1.R create mode 100644 tests/testthat/test-decadal_monthly_1.R create mode 100644 tests/testthat/test-decadal_monthly_2.R create mode 100644 tests/testthat/test-decadal_monthly_3.R create mode 100644 tests/testthat/test-seasonal_daily.R create mode 100644 tests/testthat/test-seasonal_monthly.R create mode 100644 tools/add_dims.R create mode 100644 tools/check_recipe.R create mode 100644 tools/data_summary.R create mode 100644 tools/divide_recipe.R create mode 100644 tools/libs.R create mode 100644 tools/prepare_outputs.R create mode 100644 tools/test_check_number_of_independent_verifications.R create mode 100644 tools/tmp/as.s2dv_cube.R diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..d17d7634 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +out-logs/ +*.swp +*.swo +/modules/Calibration/test_victoria.R +modules/Loading/testing_recipes/recipe_decadal_calendartest.yml +modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml +conf/vitigeoss-vars-dict.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 00000000..8c720372 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,22 @@ +stages: # List of stages for jobs, and their order of execution + - test + +unit-test-seasonal: # This job runs in the test stage. + stage: test + script: + - echo "Loading modules..." + - module load R/4.1.2-foss-2015a-bare + - module load CDO/1.9.8-foss-2015a + - module list + - echo "Running seasonal unit tests..." + - Rscript ./tests/test_seasonal.R + +unit-test-decadal: # This job runs in the test stage. + stage: test + script: + - echo "Loading modules..." + - module load R/4.1.2-foss-2015a-bare + - module load CDO/1.9.8-foss-2015a + - module list + - echo "Running decadal unit tests..." + - Rscript ./tests/test_decadal.R diff --git a/MODULES b/MODULES new file mode 100644 index 00000000..0a01a979 --- /dev/null +++ b/MODULES @@ -0,0 +1,30 @@ +#!/bin/bash + + # WARNING: CDO HAS TO BE ON VERSION 1.9.4 + # (If not, conflicts with weekly means computation could appear) + +if [ $BSC_MACHINE == "power" ]; then + + module unuse /apps/modules/modulefiles/applications + module use /gpfs/projects/bsc32/software/rhel/7.4/ppc64le/POWER9/modules/all/ + + module load CDO/1.9.4-foss-2018b + module load R/3.6.1-foss-2018b + +elif [ $BSC_MACHINE == "nord3v2" ]; then + + module use /gpfs/projects/bsc32/software/suselinux/11/modules/all + module unuse /apps/modules/modulefiles/applications /apps/modules/modulefiles/compilers /apps/modules/modulefiles/tools /apps/modules/modulefiles/libraries /apps/modules/modulefiles/environment + + + module load CDO/1.9.8-foss-2019b + module load R/4.1.2-foss-2019b + module load OpenMPI/4.0.5-GCC-8.3.0-nord3-v2 + +else + + module load CDO/1.9.8-foss-2015a + module load R/4.1.2-foss-2015a-bare + +fi + diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 00000000..fe7ea6e3 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,17 @@ +ESS Verification Suite v1.0.0 +============================= + +Modules: Loading, Calibration, Skill, Saving, Visualization + +New features: +- New function prepare_outputs(), reads the path to the recipe and prepares a new directory with a unique identifier inside `output_dir`. Returns the recipe with in the form of a list that can be passed to the modules. +- New output structure: The netCDF files and plots are now stored inside the directory mentioned above. +- Log files: stored inside the output directory. Users can specify the logging threshold in the recipe, under Run:Loglevel. The default is `INFO`, which showsm essages of level INFO and higher. They can also indicate if they want the messages displayed in the terminal as well, under Run:Terminal. The default is `TRUE`. + +Fixes/improvements: +- The `qmap` calibration method has changed with the new version of CST_QuantileMapping(). +- The order of the parameters of the input functions has changed. +- `archive` is no longer a mandatory parameter. +- More informative error message when experiment and reference grids do not match. +- Success messages, loading summaries and warnings are now written to the log file. + diff --git a/OperationalCS.R b/OperationalCS.R new file mode 100644 index 00000000..1e662d1b --- /dev/null +++ b/OperationalCS.R @@ -0,0 +1,33 @@ +#!/usr/bin/env Rscript +args = commandArgs(trailingOnly = TRUE) +# To test: +# args <- NULL; args[1] <- "recipes/seasonal_oper.yml" + + +# execution: Rscript OperationalCS.R recipe.yml +# This code checks the recipe and builds and executes the workflow +print(args) +library(yaml) + +recipe <- read_yaml(args[1]) +recipe$filename <- args[1] + +# Load required libraries +source("tools/libs.R") + +# Create output folder and log: +logger <- prepare_outputs(recipe = recipe) +folder <- logger$foldername +log_file <- logger$logname +logger <- logger$logger + +# Checks: +verifications <- check_recipe(recipe, logger) +# Divide recipe into single verifications recipes: +total_recipes <- divide_recipe(recipe, verifications, folder, logger) +# Go to verification code: +capture.output(source("modules/verifications.R"), + file = log_file, type ='message', + append = TRUE) + + diff --git a/README.md b/README.md new file mode 100644 index 00000000..4df05ecc --- /dev/null +++ b/README.md @@ -0,0 +1,31 @@ + +ESS Verification Suite +====================== + +This is the Git project for the ESS Verification Suite, which will serve as a tool for research projects and operational workflows involving subseasonal to seasonal to decadal forecast verification. + +The main developers of the tool are Victòria Agudetse (@vagudets), An-Chi Ho (@aho), Lluís Palma (@lpalma) and Núria Pérez-Zanón (@nperez). + +Resources +--------- + +You can access the documentation of the Verification Suite through the wiki: +[Auto-s2s Wiki](https://earth.bsc.es/gitlab/es/auto-s2s/-/wikis/home?target=_blank) + +You may also find useful information in the slides from past user meetings: + +[User meeting September 2022](https://docs.google.com/presentation/d/14-qq__fblMt7xvJDaqS5UqfQMXWCf3Ju/edit#slide=id.p1?target=_blank) + +[User meeting June 2022](https://docs.google.com/presentation/d/1R8Gcz5R_NTgcBQvXBkCPG3jY31BVPDur/edit#slide=id.p1?target=_blank) + +Branching strategy +------------------ + +Branches containing developments that are to be merged into the tool must contain "dev-" at the beginning of the name, followed by a short, meaningful description of the development in question. E.g. "dev-loading-subseasonal" for the branch containing developments related to the loading of subseasonal datasets. + +Users that wish to incorporate their own developments into the core of the tool are encouraged to create a personal fork of the Auto-S2S repository to work on their projects. Please contact Victòria Agudetse at victoria.agudetse@bsc.es to discuss the first steps. + +Mailing list +------------ + +User meetings, internal releases and news are announced through the mailing list. You can send an email to victoria.agudetse@bsc.es or an.ho@bsc.es to request subscription. diff --git a/conf/archive.yml b/conf/archive.yml new file mode 100644 index 00000000..0251beaf --- /dev/null +++ b/conf/archive.yml @@ -0,0 +1,150 @@ + + +archive: + src: "/esarchive/" + System: + system5c3s: + name: "ECMWF SEAS5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ecmwf/system5c3s/" + daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", + "sfcWind":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/", + "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", + "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/"} + monthly_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", + "sfcWind":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/", + "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", + "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/"} + nmember: + fcst: 51 + hcst: 25 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" + system7c3s: + name: "Meteo-France System 7" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/meteofrance/system7c3s/" + monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", + "prlr":"_f24h/", "sfcWind": "_f6h/", + "tasmax":"_f6h/", "tasmin": "_f6h/"} + nmember: + fcst: 51 + hcst: 25 + time_stamp_lag: "+1" + calendar: "proleptic_gregorian" + reference_grid: "conf/grid_description/griddes_system7c3s.txt" + system21_m1: + name: "DWD GCFS 2.1" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/dwd/system21_m1/" + monthly_mean: {"tas":"_f6h/", "prlr":"_f24h/", + "g500":"_f12h/", "sfcWind":"_f6h/", + "tasmin":"_f24h/", "tasmax":"_f24h/"} + nmember: + fcst: 50 + hcst: 30 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system21_m1.txt" + system35c3s: + name: "CMCC-SPS3.5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/cmcc/system35c3s/" + monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", + "prlr":"_f24h/", "sfcWind": "_f6h/", + "tasmax":"_f24h/", "tasmin":"_f24h"} + nmember: + fcst: 50 + hcst: 40 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system35c3s.txt" + system2c3s: + name: "JMA System 2" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/jma/system2c3s/" + monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", + "tasmax":"_f6h/", "tasmin":"_f6h/"} + nmember: + fcst: 10 + hcst: 10 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system2c3s.txt" + eccc1: + name: "ECCC CanCM4i" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/eccc/eccc1/" + monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", + "tasmax":"_f6h/", "tasmin":"_f6h/"} + nmember: + fcst: 10 + hcst: 10 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_eccc1.txt" + glosea6_system600-c3s: + name: "UKMO GloSea 6 6.0" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system600-c3s/" + monthly_mean: {"tas":"_f6h/", "tasmin":"_f24h/", + "tasmax":"_f24h/", "prlr":"_f24h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo600.txt" + ncep-cfsv2: + name: "NCEP CFSv2" + institution: "NOAA NCEP" #? + src: "exp/ncep/cfs-v2/" + monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", + "tasmax":"_f6h/", "tasmin":"_f6h/"} + nmember: + fcst: 20 + hcst: 20 + calendar: "gregorian" + time_stamp_lag: "0" + reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" + Reference: + era5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/era5/" + daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", + "prlr":"_f1h-r1440x721cds/", "g300":"_f1h-r1440x721cds/", + "g500":"_f1h-r1440x721cds/", "g850":"_f1h-r1440x721cds/", + "sfcWind":"_f1h-r1440x721cds/", "tasmax":"_f1h-r1440x721cds/", + "tasmin":"_f1h-r1440x721cds/", "ta300":"_f1h-r1440x721cds/", + "ta500":"_f1h-r1440x721cds/", "ta850":"_f1h-r1440x721cds/"} + monthly_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", + "prlr":"_f1h-r1440x721cds/", "g300":"_f1h-r1440x721cds/", + "g500":"_f1h-r1440x721cds/", "g850":"_f1h-r1440x721cds/", + "sfcWind":"_f1h-r1440x721cds/", "tasmax":"_f1h-r1440x721cds/", + "tasmin":"_f1h-r1440x721cds/", "ta300":"_f1h-r1440x721cds/", + "ta500":"_f1h-r1440x721cds/", "ta850":"_f1h-r1440x721cds/"} + calendar: "standard" + reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" + era5land: + name: "ERA5-Land" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/era5land/" + daily_mean: {"tas":"_f1h/", "rsds":"_f1h/", + "prlr":"_f1h/", "sfcWind":"_f1h/"} + monthly_mean: {"tas":"_f1h/","tasmin":"_f24h/", + "tasmax":"_f24h/", "prlr":"_f1h/", + "sfcWind":"_f1h/", "rsds":"_f1h/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" + uerra: + name: "ECMWF UERRA" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/uerra_mescan/" + daily_mean: {"tas":"_f6h/"} + monthly_mean: {"tas":"_f6h/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/recon/ecmwf/uerra_mescan/daily_mean/tas_f6h/tas_201805.nc" + + diff --git a/conf/grid_description/griddes_eccc1.txt b/conf/grid_description/griddes_eccc1.txt new file mode 100644 index 00000000..58a9810c --- /dev/null +++ b/conf/grid_description/griddes_eccc1.txt @@ -0,0 +1,19 @@ +# +# Grid description file for ECCC1 +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/grid_description/griddes_ncep-cfsv2.txt b/conf/grid_description/griddes_ncep-cfsv2.txt new file mode 100644 index 00000000..6d8abe86 --- /dev/null +++ b/conf/grid_description/griddes_ncep-cfsv2.txt @@ -0,0 +1,18 @@ +# +# Grid description file for NCEP CFSv2 +# +gridtype = lonlat +gridsize = 64800 +xname = lon +xlongname = Longitude +xunits = degrees_east +yname = lat +ylongname = Latitude +yunits = degrees_north +xsize = 360 +ysize = 180 +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 +# diff --git a/conf/grid_description/griddes_system21_m1.txt b/conf/grid_description/griddes_system21_m1.txt new file mode 100644 index 00000000..954438f8 --- /dev/null +++ b/conf/grid_description/griddes_system21_m1.txt @@ -0,0 +1,17 @@ +# +# Grid description file for DWD GCFS2.1 +# +gridtype = lonlat +gridsize = 64800 +xname = lon +xlongname = longitude +xunits = degrees_east +yname = lat +ylongname = latitude +yunits = degrees_north +xsize = 360 +ysize = 180 +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/grid_description/griddes_system2c3s.txt b/conf/grid_description/griddes_system2c3s.txt new file mode 100644 index 00000000..0827b7c0 --- /dev/null +++ b/conf/grid_description/griddes_system2c3s.txt @@ -0,0 +1,17 @@ +# +# Grid description file for JMA System 2 C3S +# +gridtype = lonlat +gridsize = 10512 +xname = lon +xlongname = longitude +xunits = degrees_east +yname = lat +ylongname = latitude +yunits = degrees_north +xsize = 144 +ysize = 73 +xfirst = 0 +xinc = 2.5 +yfirst = 90 +yinc = -2.5 diff --git a/conf/grid_description/griddes_system35c3s.txt b/conf/grid_description/griddes_system35c3s.txt new file mode 100644 index 00000000..a1248680 --- /dev/null +++ b/conf/grid_description/griddes_system35c3s.txt @@ -0,0 +1,19 @@ +# Grid description file for CMCC SPSv3.5 (C3S) +# Serves as reference_grid for archive.yml +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/grid_description/griddes_system7c3s.txt b/conf/grid_description/griddes_system7c3s.txt new file mode 100644 index 00000000..b6f18478 --- /dev/null +++ b/conf/grid_description/griddes_system7c3s.txt @@ -0,0 +1,19 @@ +# Grid description file for Meteofrance System 7 (C3S) +# Serves as reference_grid for archive.ym +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = longitude +xlongname = "longitude" +xunits = "degrees_east" +yname = latitude +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/grid_description/griddes_ukmo600.txt b/conf/grid_description/griddes_ukmo600.txt new file mode 100644 index 00000000..31376072 --- /dev/null +++ b/conf/grid_description/griddes_ukmo600.txt @@ -0,0 +1,18 @@ +# Grid description file for UKMO600 (CDS) +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 + diff --git a/conf/indicators_table.yml b/conf/indicators_table.yml new file mode 100644 index 00000000..8e8ce0fd --- /dev/null +++ b/conf/indicators_table.yml @@ -0,0 +1,19 @@ + +gdd: + longname: 'Growing Degree Days' + ECVs: tas + freq: daily_mean + fun: AccumulationExceedingThreshold + +gst: + longname: 'Growing Season Temperature' + ECVs: tas + freq: daily_mean + fun: PeriodMean + +spr32: + longname: 'Spring Heat Stress Days - 32°C' + ECVs: tasmax + freq: daily_mean + fun: TotalTimeExceedingThreshold + diff --git a/conf/output_dictionaries/scorecards.yml b/conf/output_dictionaries/scorecards.yml new file mode 100644 index 00000000..c5071987 --- /dev/null +++ b/conf/output_dictionaries/scorecards.yml @@ -0,0 +1,37 @@ +System: + system5c3s: + short_name: "ecmwfs5" + display_name: "ECMWF System 5" + system7c3s: + short_name: "meteofrances7" + display_name: "Meteo-France System 7" + system21_m1: + short_name: "dwds21" + display_name: "DWD System 21" + system35c3s: + short_name: "cmccs35" + display_name: "CMCC System 35" + system2c3s: + short_name: "jmas2" + display_name: "JMA System 2" + eccc1: + short_name: "ecccs1" + display_name: "ECCC System 1" + glosea6_system600-c3s: + short_name: "ukmos600" + display_name: "UK Met Office System 600" + ncep-cfsv2: + short_name: "nceps2" + display_name: "NCEP System 2" +Reference: + era5: + short_name: "era5" + display_name: "ERA5" + era5land: + short_name: "era5land" + display_name: "ERA5-Land" + uerra: + short_name: "uerra_mescan" + display_name: "UERRA MESCAN" + + diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml new file mode 100644 index 00000000..994256ce --- /dev/null +++ b/conf/variable-dictionary.yml @@ -0,0 +1,268 @@ + +vars: +## NOTE: The units field in this file corresponds to CMOR standards. +## Some variables in esarchive may have different units than stated here. +## Use with caution. + +# ECVs + ta300: + units: "K" + long_name: "Air Temperature at 300 hPa" + standard_name: "air_temperature" + accum: no + ta500: + units: "K" + long_name: "Air Temperature at 500 hPa" + standard_name: "air_temperature" + accum: no + ta850: + units: "K" + long_name: "Air Temperature at 850 hPa" + standard_name: "air_temperature" + accum: no + tas: + units: "K" + long_name: "Near-Surface Air Temperature" + standard_name: "air_temperature" + accum: no + tos: + units: "degC" + long_name: "Sea Surface Temperature" + standard_name: "sea_surface_temperature" + accum: no +# outname: "t2" + tasmax: + units: "K" + long_name: "Maximum Near-Surface Air Temperature" + standard_name: "air_temperature" + accum: no + tasmin: + units: "K" + long_name: "Minimum Near-Surface Air Temperature" + standard_name: "air_temperature" + accum: no + ts: + units: "K" + long_name: "Surface Temperature" + standard_name: "surface_temperature" + accum: no + sfcWind: + units: "m s-1" + long_name: "Near-Surface Wind Speed" + standard_name: "wind_speed" + accum: no + sfcWindmax: + units: "m s-1" + long_name: "Daily Maximum Near-Surface Wind Speed" + standard_name: "wind_speed" + accum: no +# outname: "wind" + rsds: + units: "W m-2" + long_name: "Surface Downwelling Shortwave Radiation" + standard_name: "surface_downwelling_shortwave_flux_in_air" + positive: "down" + accum: yes +# outname: "rswin" + prlr: + units: "mm/day" + long_name: "Total precipitation" + standard_name: "total_precipitation_flux" #? Not in CF + accum: yes +# outname: "acprec" + g300: + units: "m2 s-2" + long_name: "Geopotential" + standard_name: "geopotential" + accum: no + g500: + units: "m2 s-2" + long_name: "Geopotential" + standard_name: "geopotential" + accum: no + g850: + units: "m2 s-2" + long_name: "Geopotential" + standard_name: "geopotential" + accum: no + pr: + units: "kg m-2 s-1" + long_name: "Precipitation" + standard_name: "precipitation_flux" + accum: yes + prc: + units: "kg m-2 s-1" + long_name: "Convective Precipitation" + standard_name: "convective_precipitation_flux" + accum: yes + psl: + units: "Pa" + long_name: "Sea Level Pressure" + standard_name: "air_pressure_at_mean_sea_level" + accum: no + clt: + units: "%" + long_name: "Total Cloud Cover Percentage" + standard_name: "cloud_area_fraction" + accum: no + hurs: + units: "%" + long_name: "Near-Surface Relative Humidity" + standard_name: "relative_humidity" + accum: no + hursmin: + units: "%" + long_name: "Daily Minimum Near-Surface Relative Humidity" + standard_name: "relative_humidity" + accum: no + hursmax: + units: "%" + long_name: "Daily Maximum Near-Surface Relative Humidity" + standard_name: "relative_humidity" + accum: no + hfls: + units: "W m-2" + long_name: "Surface Upward Latent Heat Flux" + standard_name: "surface_upward_latent_heat_flux" + accum: no + huss: + units: "1" + long_name: "Near-Surface Specific Humidity" + standard_name: "specific_humidity" + accum: no + rsut: + units: "W m-2" + long_name: "TOA Outgoing Shortwave Radiation" + standard_name: "toa_outgoing_shortwave_flux" + accum: no + rlut: + units: "W m-2" + long_name: "TOA Outgoing Longwave Radiation" + standard_name: "toa_outgoing_longwave_flux" + accum: no + rsdt: + units: "W m-2" + long_name: "TOA Incident Shortwave Radiation" + standard_name: "toa_incoming_shortwave_flux" + accum: no + ta: + units: "K" + long_name: "Air Temperature" + standard_name: "air_temperature" + accum: no + ua: + units: "m s-1" + long_name: "Eastward Wind" + standard_name: "eastward_wind" + accum: no + uas: + units: "m s-1" + long_name: "Eastward Near-Surface Wind" + standard_name: "eastward_wind" + accum: no + va: + units: "m s-1" + long_name: "Northward Wind" + standard_name: "northward_wind" + accum: no + vas: + units: "m s-1" + long_name: "Northward Near-Surface Wind" + standard_name: "northward wind" + accum: no + zg: + units: "m" + long_name: "Geopotential Height" + standard_name: "geopotential_height" + accum: no + evspsbl: + units: "kg m-2 s-1" + long_name: "Evaporation Including Sublimation and Transpiration" + standard_name: "water_evapotranspiration_flux" + accum: no + hfss: + units: "W m-2" + long_name: "Surface Upward Sensible Heat Flux" + standard_name: "surface_upward_sensible_heat_flux" + accum: no + +# Coordinates +coords: + longitude: + units: "degrees_east" + standard_name: "longitude" + long_name: "Longitude" + axis: "X" + latitude: + units: "degrees_north" + standard_name: "latitude" + long_name: "Latitude" + axis: "Y" +## TODO: Add plevels + +# Skill metrics +metrics: + enscorr: + long_name: "Ensemble Mean Correlation Coefficient" + enscorr_specs: + long_name: "Ensemble Mean Correlation Coefficient" + enscorr_p.value: + long_name: "Ensemble Mean Correlation p-value" + enscorr_conf.low: + long_name: "Ensemble Mean Correlation Lower Confidence Interval" + enscorr_conf.up: + long_name: "Ensemble Mean Correlation Upper Confidence Interval" + enscorr_significance: + long_name: "Ensemble Mean Correlation Statistical Significance" + corr: + long_name: "Ensemble Correlation Coefficient" + corr_specs: + long_name: "Ensemble Correlation Coefficient" + corr_p.value: + long_name: "Ensemble Correlation p-value" + corr_conf.low: + long_name: "Ensemble Correlation Lower Confidence Interval" + corr_conf.up: + long_name: "Ensemble Correlation Upper Confidence Interval" + corr_significance: + long_name: "Ensemble Correlation Statistical Significance" + rps: + long_name: "Ranked Probability Score" + frps: + long_name: "Fair Ranked Probability Score" + rpss: + long_name: "Ranked Probability Skill Score" + rpss_significance: + long_name: "Ranked Probability Skill Score Statistical Significance" + rpss_specs: + long_name: "Ranked Probability Skill Score" + frpss: + long_name: "Fair Ranked Probability Skill Score" + frpss_significance: + long_name: "Fair Ranked Probability Skill Score Statistical Significance" + frpss_specs: + long_name: "Fair Ranked Probability Skill Score" + bss10: + long_name: "Brier Skill Score Lower Extreme" + bss10_specs: + long_name: "Brier Skill Score Lower Extreme" + bss10_significance: + long_name: "Brier Score Lower Extreme Statistical Significance" + bss90: + long_name: "Brier Skill Score Upper Extreme" + bss90_significance: + long_name: "Brier Skill Score Upper Extreme Statistical Significance" + crps: + long_name: "Continuous Ranked Probability Score" + crpss: + long_name: "Continuous Ranked Probability Skill Score" + mean_bias: + long_name: "Mean Bias" + mean_bias_ss: + long_name: "Mean Bias Skill Score" + mean_bias_ss_significance: + long_name: "Mean Bias Skill Score Statistical Significance" + enssprerr: + long_name: "Ensemble Spread-To-Error Ratio" + rmsss: + long_name: "Root Mean Square Skill Score" diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R new file mode 100644 index 00000000..41fd286f --- /dev/null +++ b/modules/Anomalies/Anomalies.R @@ -0,0 +1,101 @@ +source("modules/Anomalies/tmp/CST_Anomaly.R") + +# Compute the hcst, obs and fcst anomalies with or without cross-validation +# and return them, along with the hcst and obs climatologies. + +compute_anomalies <- function(recipe, data) { + + if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) { + error(recipe$Run$logger, + paste("The anomaly module has been called, but the element", + "'Workflow:Anomalies:compute' is missing from the recipe.")) + stop() + } + + if (recipe$Analysis$Workflow$Anomalies$compute) { + if (recipe$Analysis$Workflow$Anomalies$cross_validation) { + cross <- TRUE + cross_msg <- "with" + } else { + cross <- FALSE + cross_msg <- "without" + } + original_dims <- dim(data$hcst$data) + + # Compute anomalies + anom <- CST_Anomaly(data$hcst, data$obs, + cross = cross, + memb = TRUE, + memb_dim = 'ensemble', + dim_anom = 'syear', + dat_dim = c('dat', 'ensemble'), + ftime_dim = 'time', + ncores = recipe$Analysis$ncores) + # Reorder dims + anom$exp$data <- Reorder(anom$exp$data, names(original_dims)) + anom$obs$data <- Reorder(anom$obs$data, names(original_dims)) + + # Save full fields + hcst_fullvalue <- data$hcst + obs_fullvalue <- data$obs + + # Hindcast climatology + + data$hcst <- anom$exp + data$obs <- anom$obs + remove(anom) + # Change variable metadata + for (var in data$hcst$attrs$Variable$varName) { + # Change hcst longname + data$hcst$attrs$Variable$variables[[var]]$long_name <- + paste(data$hcst$attrs$Variable$variables[[var]]$long_name, "anomaly") + # Change obs longname + data$obs$attrs$Variable$variables[[var]]$long_name <- + paste(data$obs$attrs$Variable$variables[[var]]$long_name, "anomaly") + } + # Compute forecast anomaly field + if (!is.null(data$fcst)) { + # Compute hindcast climatology ensemble mean + clim <- s2dv::Clim(hcst_fullvalue$data, obs_fullvalue$data, + time_dim = "syear", + dat_dim = c("dat", "ensemble"), + memb = FALSE, + memb_dim = "ensemble", + ftime_dim = "time", + ncores = recipe$Analysis$ncores) + clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1, + name = "syear") + dims <- dim(clim_hcst) + clim_hcst <- rep(clim_hcst, dim(data$fcst$data)[['ensemble']]) + dim(clim_hcst) <- c(dims, ensemble = dim(data$fcst$data)[['ensemble']]) + # Get fcst anomalies + data$fcst$data <- data$fcst$data - clim_hcst + # Change metadata + for (var in data$fcst$attrs$Variable$varName) { + data$fcst$attrs$Variable$variables[[var]]$long_name <- + paste(data$fcst$attrs$Variable$variables[[var]]$long_name, "anomaly") + } + } + + info(recipe$Run$logger, + paste("The anomalies have been computed,", cross_msg, + "cross-validation. The original full fields are returned as", + "$hcst.full_val and $obs.full_val.")) + + info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####") + + } else { + warn(recipe$Run$logger, paste("The Anomalies module has been called, but", + "recipe parameter Analysis:Variables:anomaly is set to FALSE.", + "The full fields will be returned.")) + hcst_fullvalue <- NULL + obs_fullvalue <- NULL + info(recipe$Run$logger, "##### ANOMALIES NOT COMPUTED #####") + } + + ## TODO: Return fcst full value? + + return(list(hcst = data$hcst, obs = data$obs, fcst = data$fcst, + hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue)) + +} diff --git a/modules/Anomalies/tmp/CST_Anomaly.R b/modules/Anomalies/tmp/CST_Anomaly.R new file mode 100644 index 00000000..f38e39b0 --- /dev/null +++ b/modules/Anomalies/tmp/CST_Anomaly.R @@ -0,0 +1,246 @@ +#'Anomalies relative to a climatology along selected dimension with or without cross-validation +#' +#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +#'@author Pena Jesus, \email{jesus.pena@bsc.es} +#'@description This function computes the anomalies relative to a climatology +#'computed along the selected dimension (usually starting dates or forecast +#'time) allowing the application or not of crossvalidated climatologies. The +#'computation is carried out independently for experimental and observational +#'data products. +#' +#'@param exp An object of class \code{s2dv_cube} as returned by \code{CST_Load} +#' function, containing the seasonal forecast experiment data in the element +#' named \code{$data}. +#'@param obs An object of class \code{s2dv_cube} as returned by \code{CST_Load} +#' function, containing the observed data in the element named \code{$data}. +#'@param dim_anom A character string indicating the name of the dimension +#' along which the climatology will be computed. The default value is 'sdate'. +#'@param cross A logical value indicating whether cross-validation should be +#' applied or not. Default = FALSE. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no +#' member dimension, set NULL. The default value is 'member'. +#'@param memb A logical value indicating whether to subtract the climatology +#' based on the individual members (TRUE) or the ensemble mean over all +#' members (FALSE) when calculating the anomalies. The default value is TRUE. +#'@param dat_dim A character vector indicating the name of the dataset and +#' member dimensions. If there is no dataset dimension, it can be NULL. +#' The default value is "c('dataset', 'member')". +#'@param filter_span A numeric value indicating the degree of smoothing. This +#' option is only available if parameter \code{cross} is set to FALSE. +#'@param ftime_dim A character string indicating the name of the temporal +#' dimension where the smoothing with 'filter_span' will be applied. It cannot +#' be NULL if 'filter_span' is provided. The default value is 'ftime'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. It will be used only when +#' 'filter_span' is not NULL. +#' +#'@return A list with two S3 objects, 'exp' and 'obs', of the class +#''s2dv_cube', containing experimental and date-corresponding observational +#'anomalies, respectively. These 's2dv_cube's can be ingested by other functions +#'in CSTools. +#' +#'@examples +#'# Example 1: +#'mod <- 1 : (2 * 3 * 4 * 5 * 6 * 7) +#'dim(mod) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) +#'obs <- 1 : (1 * 1 * 4 * 5 * 6 * 7) +#'dim(obs) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) +#'lon <- seq(0, 30, 5) +#'lat <- seq(0, 25, 5) +#'exp <- list(data = mod, lat = lat, lon = lon) +#'obs <- list(data = obs, lat = lat, lon = lon) +#'attr(exp, 'class') <- 's2dv_cube' +#'attr(obs, 'class') <- 's2dv_cube' +#' +#'anom <- CST_Anomaly(exp = exp, obs = obs, cross = FALSE, memb = TRUE) +#' +#'@seealso \code{\link[s2dv]{Ano_CrossValid}}, \code{\link[s2dv]{Clim}} and \code{\link{CST_Load}} +#' +#'@import multiApply +#'@importFrom s2dv InsertDim Clim Ano_CrossValid Reorder +#'@export +CST_Anomaly <- function(exp = NULL, obs = NULL, dim_anom = 'sdate', cross = FALSE, + memb_dim = 'member', memb = TRUE, dat_dim = c('dataset', 'member'), + filter_span = NULL, ftime_dim = 'ftime', ncores = NULL) { + # s2dv_cube + if (!inherits(exp, 's2dv_cube') & !is.null(exp) || + !inherits(obs, 's2dv_cube') & !is.null(obs)) { + stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + # exp and obs + if (is.null(exp$data) & is.null(obs$data)) { + stop("One of the parameter 'exp' or 'obs' cannot be NULL.") + } + case_exp = case_obs = 0 + if (is.null(exp)) { + exp <- obs + case_obs = 1 + warning("Parameter 'exp' is not provided and 'obs' will be used instead.") + } + if (is.null(obs)) { + obs <- exp + case_exp = 1 + warning("Parameter 'obs' is not provided and 'exp' will be used instead.") + } + if(any(is.null(names(dim(exp$data))))| any(nchar(names(dim(exp$data))) == 0) | + any(is.null(names(dim(obs$data))))| any(nchar(names(dim(obs$data))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names in element 'data'.") + } + if(!all(names(dim(exp$data)) %in% names(dim(obs$data))) | + !all(names(dim(obs$data)) %in% names(dim(exp$data)))) { + stop("Parameter 'exp' and 'obs' must have same dimension names in element 'data'.") + } + dim_exp <- dim(exp$data) + dim_obs <- dim(obs$data) + dimnames_data <- names(dim_exp) + # dim_anom + if (is.numeric(dim_anom) & length(dim_anom) == 1) { + warning("Parameter 'dim_anom' must be a character string and a numeric value will not be ", + "accepted in the next release. The corresponding dimension name is assigned.") + dim_anom <- dimnames_data[dim_anom] + } + if (!is.character(dim_anom)) { + stop("Parameter 'dim_anom' must be a character string.") + } + if (!dim_anom %in% names(dim_exp) | !dim_anom %in% names(dim_obs)) { + stop("Parameter 'dim_anom' is not found in 'exp' or in 'obs' dimension in element 'data'.") + } + if (dim_exp[dim_anom] <= 1 | dim_obs[dim_anom] <= 1) { + stop("The length of dimension 'dim_anom' in label 'data' of the parameter ", + "'exp' and 'obs' must be greater than 1.") + } + # cross + if (!is.logical(cross) | !is.logical(memb) ) { + stop("Parameters 'cross' and 'memb' must be logical.") + } + if (length(cross) > 1 | length(memb) > 1 ) { + cross <- cross[1] + warning("Parameter 'cross' has length greater than 1 and only the first element", + "will be used.") + } + # memb + if (length(memb) > 1) { + memb <- memb[1] + warning("Parameter 'memb' has length greater than 1 and only the first element", + "will be used.") + } + # memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim_exp) | !memb_dim %in% names(dim_obs)) { + stop("Parameter 'memb_dim' is not found in 'exp' or in 'obs' dimension.") + } + } + # dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim)) { + stop("Parameter 'dat_dim' must be a character vector.") + } + if (!all(dat_dim %in% names(dim_exp)) | !all(dat_dim %in% names(dim_obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension in element 'data'.", + " Set it as NULL if there is no dataset dimension.") + } + } + # filter_span + if (!is.null(filter_span)) { + if (!is.numeric(filter_span)) { + warning("Paramater 'filter_span' is not numeric and any filter", + " is being applied.") + filter_span <- NULL + } + # ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + # ftime_dim + if (!is.character(ftime_dim)) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!ftime_dim %in% names(dim_exp) | !memb_dim %in% names(dim_obs)) { + stop("Parameter 'ftime_dim' is not found in 'exp' or in 'obs' dimension in element 'data'.") + } + } + + # Computating anomalies + #---------------------- + + # With cross-validation + if (cross) { + ano <- Ano_CrossValid(exp = exp$data, obs = obs$data, + time_dim = dim_anom, + memb_dim = memb_dim, + memb = memb, + dat_dim = dat_dim, + ncores = ncores) + + # Without cross-validation + } else { + tmp <- Clim(exp = exp$data, obs = obs$data, + time_dim = dim_anom, + memb_dim = memb_dim, + memb = memb, + dat_dim = dat_dim, + ncores = ncores) + if (!is.null(filter_span)) { + tmp$clim_exp <- Apply(tmp$clim_exp, + target_dims = c(ftime_dim), + output_dims = c(ftime_dim), + fun = .Loess, + loess_span = filter_span, + ncores = ncores)$output1 + tmp$clim_obs <- Apply(tmp$clim_obs, + target_dims = c(ftime_dim), + output_dims = c(ftime_dim), + fun = .Loess, + loess_span = filter_span, + ncores = ncores)$output1 + } + if (memb) { + clim_exp <- tmp$clim_exp + clim_obs <- tmp$clim_obs + } else { + clim_exp <- InsertDim(tmp$clim_exp, 1, dim_exp[memb_dim]) + clim_obs <- InsertDim(tmp$clim_obs, 1, dim_obs[memb_dim]) + } + clim_exp <- InsertDim(clim_exp, 1, dim_exp[dim_anom]) + clim_obs <- InsertDim(clim_obs, 1, dim_obs[dim_anom]) + ano <- NULL + + # Permuting back dimensions to original order + clim_exp <- Reorder(clim_exp, dimnames_data) + clim_obs <- Reorder(clim_obs, dimnames_data) + + ano$exp <- exp$data - clim_exp + ano$obs <- obs$data - clim_obs + } + + exp$data <- ano$exp + obs$data <- ano$obs + + # Outputs + # ~~~~~~~~~ + if (case_obs == 1) { + return(obs) + } + else if (case_exp == 1) { + return(exp) + } + else { + return(list(exp = exp, obs = obs)) + } +} + +.Loess <- function(clim, loess_span) { + data <- data.frame(ensmean = clim, day = 1 : length(clim)) + loess_filt <- loess(ensmean ~ day, data, span = loess_span) + output <- predict(loess_filt) + return(output) +} + diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R new file mode 100644 index 00000000..ca79cb7d --- /dev/null +++ b/modules/Loading/Loading.R @@ -0,0 +1,422 @@ +## TODO: remove paths to personal scratchs +source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") +# Load required libraries/funs +source("modules/Loading/dates2load.R") +source("modules/Loading/check_latlon.R") +## TODO: Move to prepare_outputs.R +source("tools/libs.R") +#TODO: remove these two lines when new as.s2dv_cube() is in CSTools +source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/as.s2dv_cube.R') +source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/zzz.R') + +load_datasets <- function(recipe) { + + # ------------------------------------------- + # Set params ----------------------------------------- + + hcst.inityear <- recipe$Analysis$Time$hcst_start + hcst.endyear <- recipe$Analysis$Time$hcst_end + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + + variable <- recipe$Analysis$Variables$name + store.freq <- recipe$Analysis$Variables$freq + + # get sdates array + ## LOGGER: Change dates2load to extract logger from recipe? + sdates <- dates2load(recipe, recipe$Run$logger) + + idxs <- NULL + idxs$hcst <- get_timeidx(sdates$hcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + + if (!(is.null(sdates$fcst))) { + idxs$fcst <- get_timeidx(sdates$fcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + } + + ## TODO: Examine this verifications part, verify if it's necessary + # stream <- verifications$stream + # sdates <- verifications$fcst.sdate + + ## TODO: define fcst.name + ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name + + # get esarchive datasets dict: + archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + exp_descrip <- archive$System[[exp.name]] + + freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) + reference_descrip <- archive$Reference[[ref.name]] + freq.obs <- unlist(reference_descrip[[store.freq]][variable]) + obs.dir <- reference_descrip$src + fcst.dir <- exp_descrip$src + hcst.dir <- exp_descrip$src + fcst.nmember <- exp_descrip$nmember$fcst + hcst.nmember <- exp_descrip$nmember$hcst + + ## TODO: it is necessary? + ##if ("accum" %in% names(reference_descrip)) { + ## accum <- unlist(reference_descrip$accum[store.freq][[1]]) + ##} else { + ## accum <- FALSE + ##} + + # ----------- + obs.path <- paste0(archive$src, + obs.dir, store.freq, "/$var$", + reference_descrip[[store.freq]][[variable]], + "$var$_$file_date$.nc") + + hcst.path <- paste0(archive$src, + hcst.dir, store.freq, "/$var$", + exp_descrip[[store.freq]][[variable]], + "$var$_$file_date$.nc") + + fcst.path <- paste0(archive$src, + hcst.dir, store.freq, "/$var$", + exp_descrip[[store.freq]][[variable]], + "$var$_$file_date$.nc") + + # Define regrid parameters: + #------------------------------------------------------------------- + regrid_params <- get_regrid_params(recipe, archive) + + # Longitude circular sort and latitude check + #------------------------------------------------------------------- + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean"){ + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Load hindcast + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = variable, + file_date = sdates$hcst, + time = idxs$hcst, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:hcst.nmember), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + if (recipe$Analysis$Variables$freq == "daily_mean") { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + + # Convert hcst to s2dv_cube object + ## TODO: Give correct dimensions to $Dates + ## (sday, sweek, syear instead of file_date) + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + # the call uses file_date instead of fcst_syear so that it can work + # with the daily case and the current version of startR not allowing + # multiple dims split + + fcst <- Start(dat = fcst.path, + var = variable, + file_date = sdates$fcst, + time = idxs$fcst, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:fcst.nmember), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + if (recipe$Analysis$Variables$freq == "daily_mean") { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + + # Convert fcst to s2dv_cube + fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$attrs$Dates[] <- + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- dim(Subset(hcst$data, + along=c('dat', 'var', + 'latitude', 'longitude', 'ensemble'), + list(1,1,1,1,1), drop="selected")) + + # Separate Start() call for monthly vs daily data + if (store.freq == "monthly_mean") { + + dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") + dim(dates_file) <- dim(dates) + + obs <- Start(dat = obs.path, + var = variable, + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq == "daily_mean") { + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + } + + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!(recipe$Analysis$Regrid$type == 'none')) { + if (!identical(as.vector(hcst$lat), as.vector(obs$lat))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!identical(as.vector(hcst$lon), as.vector(obs$lon))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + + } + } + + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + if (dictionary$vars[[variable]]$accum) { + info(recipe$Run$logger, + "Accumulated variable: setting negative values to zero.") + obs$data[obs$data < 0] <- 0 + hcst$data[hcst$data < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[fcst$data < 0] <- 0 + } + } + + # Convert prlr from m/s to mm/day + ## TODO: Make a unit conversion function? + if (variable == "prlr") { + # Verify that the units are m/s and the same in obs and hcst + if (((attr(obs$Variable, "variable")$units == "m s-1") || + (attr(obs$Variable, "variable")$units == "m s**-1")) && + ((attr(hcst$Variable, "variable")$units == "m s-1") || + (attr(hcst$Variable, "variable")$units == "m s**-1"))) { + + info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") + obs$data <- obs$data*86400*1000 + attr(obs$Variable, "variable")$units <- "mm/day" + hcst$data <- hcst$data*86400*1000 + attr(hcst$Variable, "variable")$units <- "mm/day" + if (!is.null(fcst)) { + fcst$data <- fcst$data*86400*1000 + attr(fcst$Variable, "variable")$units <- "mm/day" + } + } + } + + # Compute anomalies if requested + # Print a summary of the loaded data for the user, for each object + if (recipe$Run$logger$threshold <= 2) { + data_summary(hcst, recipe) + data_summary(obs, recipe) + if (!is.null(fcst)) { + data_summary(fcst, recipe) + } + } + + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + ############################################################################ + # + # CHECKS ON MISSING FILES + # + ############################################################################ + + #obs.NA_dates.ind <- Apply(obs, + # fun=(function(x){ all(is.na(x))}), + # target_dims=c('time', 'latitude', 'longitude'))[[1]] + #obs.NA_dates <- dates_file[obs.NA_dates.ind] + #obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] + #obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", + # freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") + # + #if (any(is.na(hcst))){ + # fatal(recipe$Run$logger, + # paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "hcst files:", + # hcst.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status = 1) + #} + # + #if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ + # fatal(recipe$logger, + # paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "obs files:", + # obs.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status=1) + #} + # + #info(recipe$logger, + # "######### DATA LOADING COMPLETED SUCCESFULLY ##############") + + ############################################################################ + ############################################################################ + + return(list(hcst = hcst, fcst = fcst, obs = obs)) + +} diff --git a/modules/Loading/check_latlon.R b/modules/Loading/check_latlon.R new file mode 100644 index 00000000..c317acd2 --- /dev/null +++ b/modules/Loading/check_latlon.R @@ -0,0 +1,90 @@ +# Check if the lat and lon inputs abide the regulation. +# +# (x) ←|-------------|-------------|-------------|-------------|-------------| →(x) +# -360 -180 0 180 360 540 +# |___________________________|___________________________| +# [0, 360] [0, 360] +# +# |___________________________|___________________________| +# [-180, 180] [-180, 180] +# Premise: lonmin < lonmax +# [Case 1: lonmin >= 0] +# 1-0: lonmax <360 →[0, 360] +# 1-1:lonmin >= 180 & lonmax > 360 →[-180, 180] +# 1-2:lonmin < 180 & lonmax > 360 →[0, 360] + warning* +# +# [Case 2: lonmax < 0] +# 2-0: lonmin >= -180 & lonmax < 180 →[-180, 180] +# 2-1: lonmin < -180 & lonmax < 0 →[0, 360] +# 2-2: lonmin < -180 & lonmax > 0 →[-180, 180] + warning* +# 2-3 lonmax > 180 →[-180, 180] + warning* +# +# *warning: The region is not one integrated box. + +check_latlon <- function(latmin, latmax, lonmin, lonmax) { + + if (any(!sapply(c(latmin, latmax, lonmin, lonmax), is.numeric)) | + any(sapply(c(latmin, latmax, lonmin, lonmax), length) != 1)) + stop("latmin, latmax, lonmin, and lonmax must be a number.") + # lat + if (any(c(latmin, latmax) < -90) | any(c(latmin, latmax) > 90)) + stop("latmin must be within [-90, 90].") + # lon + if (lonmin > lonmax) + stop("lonmax must not be smaller than lonmin.") + # Adjust lon if it is way out of scope + while (lonmin > 360 & lonmax > 360) { + lonmin <- lonmin - 360 + lonmax <- lonmax - 360 + } + while (lonmin < -180 & lonmax < -180) { + lonmin <- lonmin + 360 + lonmax <- lonmax + 360 + } + + warning_discrete_lon <- FALSE + warning_unconsidered_case <- FALSE + ## case 1 + if (lonmin >= 0) { + if (lonmax < 360) { ## 1-0 + circularsort <- CircularSort(0, 360) + } else if (lonmin >= 180 & lonmax > 360) { ## 1-1 + circularsort <- CircularSort(-180, 180) + if (lonmax >= 540) + warning_discrete_lon <- TRUE + } else if (lonmin < 180 & lonmax > 360) { ## 1-2 + circularsort <- CircularSort(0, 360) + warning_discrete_lon <- TRUE + } else { + circularsort <- CircularSort(0, 360) + warning_unconsidered_case <- TRUE + } + + } + ## case 2 + else { + if (lonmin >= -180 & lonmax < 180) { ## 2-0 + circularsort <- CircularSort(-180, 180) + } else if (lonmin < -180 & lonmax < 0) { ## 2-1 + circularsort <- CircularSort(0, 360) + if (lonmin < -360) + warning_discrete_lon <- TRUE + } else if (lonmin < -180 & lonmax > 0) { ## 2-2 + circularsort <- CircularSort(-180, 180) + warning_discrete_lon <- TRUE + } else if (lonmax > 180) { ## 2-3 + circularsort <- CircularSort(-180, 180) + warning_discrete_lon <- TRUE + } else { + circularsort <- CircularSort(-180, 180) + warning_unconsidered_case <- TRUE + } + } + + if (warning_discrete_lon) + warning("The longitude of returned data won't be continuous. Check the metadata to see the longitude range.") + if (warning_unconsidered_case) + warning("The longitude case is not being considered yet. Please report this case to maintainers and check the longitude of returned data.") + + return(circularsort) +} diff --git a/modules/Loading/dates2load.R b/modules/Loading/dates2load.R new file mode 100644 index 00000000..0e3613f3 --- /dev/null +++ b/modules/Loading/dates2load.R @@ -0,0 +1,104 @@ +#'Read requested dates from recipe and return array of file dates to be loaded +#' +#'The purpose of this function is to read the recipe configuration data for +#'Auto-S2S workflows, retrieve the start and end dates for the hindcast and +#'the forecast dates, and return two arrays containing the requested dates, to +#'be passed to the Start() call. If no fcst date is provided, it returns an +#'empty object. +#' +#'@param recipe Auto-S2S configuration recipe as returned by read_yaml() +#'@param logger object of class logger containing log output file information +#' +#'@return a list of two arrays containing file dates for hcst and fcst +#' +#'@export + +library(lubridate) + +dates2load <- function(recipe, logger) { + + temp_freq <- recipe$Analysis$Variables$freq + recipe <- recipe$Analysis$Time + # hcst dates + file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), + recipe$sdate) + + if (temp_freq == "monthly_mean") { + file_dates <- .add_dims(file_dates) + } + # fcst dates (if fcst_year empty it creates an empty object) + if (! is.null(recipe$fcst_year)) { + file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate) + if (temp_freq == "monthly_mean") { + file_dates.fcst <- .add_dims(file_dates.fcst) + } + } else { + file_dates.fcst <- NULL + info(logger, + paste("fcst_year empty in the recipe, creating empty fcst object...")) + } + return(list(hcst = file_dates, fcst = file_dates.fcst)) + ## TODO: document header of fun +} + +# adds the correspondent dims to each sdate array +.add_dims <- function(data) { + + default_dims <- c(sday = 1, sweek = 1, syear = length(data)) + default_dims[names(dim(data))] <- dim(data) + dim(data) <- default_dims + return(data) +} + +# Gets the corresponding dates or indices according +# to the sdate/leadtimes requested in the recipe +# +# The leadtimes are defined by months +# Ex. 20201101 with leadtimes 1-4 corresponds to +# the forecasting times covering December to March + +get_timeidx <- function(sdates, ltmin, ltmax, + time_freq="monthly_mean") { + + if (time_freq == "daily_mean") { + + sdates <- ymd(sdates) + idx_min <- sdates + months(ltmin - 1) + idx_max <- sdates + months(ltmax) - days(1) + + day_seq <- seq(idx_min[1], idx_max[1], by = 'days') + if (any("0229" %in% (format(day_seq, "%m%d")))) { + time_length <- as.integer(idx_max[1]-idx_min[1]) + } else { + time_length <- as.integer(idx_max[1]-idx_min[1]+1) + } + indxs <- array(numeric(), c(file_date = length(sdates), + time = time_length)) + #syear = length(sdates), + #sday = 1, sweek = 1, + + for (sdate in 1:length(sdates)) { + day_seq <- seq(idx_min[sdate], idx_max[sdate], by='days') + indxs[sdate,] <- day_seq[!(format(day_seq, "%m%d") == "0229")] + } + indxs <- as.POSIXct(indxs*86400, + tz = 'UTC', origin = '1970-01-01') + lubridate::hour(indxs) <- 12 + lubridate::minute(indxs) <- 00 + dim(indxs) <- c(file_date = length(sdates), + time = time_length) + + } else if (time_freq == "monthly_mean") { + + idx_min <- ltmin + idx_max <- ltmax + indxs <- indices(idx_min:idx_max) + + } + + # TODO: 6 hourly case + #idx1 <- (sdates + months(ltmin-1) - sdates)*4 + #idx2 <- idx1 + ndays*4 - 1 + + return(indxs) +} diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml new file mode 100644 index 00000000..61177b71 --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml @@ -0,0 +1,49 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '2000' + hcst_end: '2015' + ftime_min: 1 + ftime_max: 2 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: mse_min + Anomalies: + compute: yes + cross_validation: yes + Skill: + metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + Indicators: + index: no + ncores: 7 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml b/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml new file mode 100644 index 00000000..94fc716c --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_system5c3s-rsds.yml @@ -0,0 +1,49 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: rsds + freq: monthly_mean + Datasets: + System: + name: system5c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: + Calibration: + method: mse_min + Skill: + metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + Indicators: + index: no + ncores: 1 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml new file mode 100644 index 00000000..3a2bc72e --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_system5c3s-tas.yml @@ -0,0 +1,47 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system5c3s + Multimodel: no + Reference: + name: era5 + Time: + sdate: '0601' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2006' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: + Calibration: + method: raw + Skill: + metric: RPSS_specs BSS90_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS + Probabilities: + percentiles: [[1/3, 2/3]] + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml b/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml new file mode 100644 index 00000000..23b630b5 --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_system7c3s-prlr.yml @@ -0,0 +1,49 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: prlr + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2016' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: + Calibration: + method: mse_min + Skill: + metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + Indicators: + index: no + ncores: 1 + remove_NAs: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml b/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml new file mode 100644 index 00000000..df82c349 --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_system7c3s-tas.yml @@ -0,0 +1,49 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2010' + ftime_min: 1 + ftime_max: 2 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: yes # yes/no, default yes + cross_validation: yes # yes/no, default yes + Calibration: + method: mse_min + Skill: + metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] + Indicators: + index: no + ncores: 1 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml new file mode 100644 index 00000000..364d3dd6 --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-reference.yml @@ -0,0 +1,50 @@ +Description: + Author: V. Agudetse + Info: ECMWF System5 Seasonal Forecast Example recipe (daily mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas # Mandatory, str: variable name in /esarchive/ + freq: daily_mean # Mandatory, str: either monthly_mean or daily_mean + Datasets: + System: + name: system5c3s # Mandatory, str: System codename. See docu. + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: era5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '1101' + fcst_year: '2020' # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '1996' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months + Region: + latmin: -10 # Mandatory, int: minimum latitude + latmax: 10 # Mandatory, int: maximum latitude + lonmin: 0 # Mandatory, int: minimum longitude + lonmax: 20 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: to_reference # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no # Whether to compute the anomalies and use them for skill metrics + cross_validation: # whether they should be computed in cross-validation + Calibration: + method: qmap # Mandatory, str: Calibration method. See docu. + Skill: + metric: RPSS FRPSS # str: Skill metric or list of skill metrics. See docu. + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + Indicators: + index: no + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml new file mode 100644 index 00000000..244a5654 --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_tas-daily-regrid-to-system.yml @@ -0,0 +1,47 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: daily_mean + Datasets: + System: + name: system5c3s + Multimodel: no + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2003' + ftime_min: 1 + ftime_max: 2 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: + Calibration: + method: qmap + Skill: + metric: FRPS RPSS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_test-logging.yml b/modules/Loading/testing_recipes/recipe_test-logging.yml new file mode 100644 index 00000000..372f6d83 --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_test-logging.yml @@ -0,0 +1,47 @@ +Description: + Author: V. Agudetse + Info: Light recipe to raise some errors/warnings and test the logging system + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 1 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: qmap + Skill: + metric: mean_bias bias_SS + Probabilities: + percentiles: + Indicators: + index: no + ncores: 1 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_test-new-metrics.yml b/modules/Loading/testing_recipes/recipe_test-new-metrics.yml new file mode 100644 index 00000000..df84138d --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_test-new-metrics.yml @@ -0,0 +1,46 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1998' + hcst_end: '2010' + ftime_min: 1 + ftime_max: 2 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: mse_min + Skill: + metric: RMSSS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + Indicators: + index: no + ncores: 7 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Loading/testing_recipes/recipe_test_anomalies.yml b/modules/Loading/testing_recipes/recipe_test_anomalies.yml new file mode 100644 index 00000000..cdf5e3ca --- /dev/null +++ b/modules/Loading/testing_recipes/recipe_test_anomalies.yml @@ -0,0 +1,49 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system5c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1999' + hcst_end: '2010' + ftime_min: 1 + ftime_max: 2 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: raw + Anomalies: + compute: yes + cross_validation: yes + Skill: + metric: RPS RPSS CRPS CRPSS BSS10 BSS90 EnsCorr mean_bias mean_bias_SS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + Indicators: + index: no + ncores: 7 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R new file mode 100644 index 00000000..319d5ba3 --- /dev/null +++ b/modules/Saving/Saving.R @@ -0,0 +1,906 @@ +## TODO: Save obs percentiles + +source("modules/Saving/paths2save.R") + +save_data <- function(recipe, data, + skill_metrics = NULL, + probabilities = NULL, + archive = NULL) { + # Wrapper for the saving functions. + # recipe: The auto-s2s recipe + # archive: The auto-s2s archive + # data: output of load_datasets() + # data: output of calibrate_datasets() + # skill_metrics: output of compute_skill_metrics() + # probabilities: output of compute_probabilities() + # mean_bias: output of compute_mean_bias() + + if (is.null(recipe)) { + error(recipe$Run$logger, "The 'recipe' parameter is mandatory.") + stop() + } + + if (is.null(data)) { + error(recupe$Run$logger, + paste("The 'data' parameter is mandatory. It should be a list", + "of at least two s2dv_cubes containing the hcst and obs.")) + stop() + } + if (is.null(archive)) { + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + archive <- read_yaml(paste0(recipe$Run$code_dir, + "conf/archive.yml"))$archive + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + archive <- read_yaml(paste0(recipe$Run$code_dir, + "conf/archive_decadal.yml"))$archive + } + } + dict <- read_yaml("conf/variable-dictionary.yml") + + # Create output directory + outdir <- get_dir(recipe) + dir.create(outdir, showWarnings = FALSE, recursive = TRUE) + + # Export hindcast, forecast and observations onto outfile + save_forecast(data$hcst, recipe, dict, outdir, archive = archive, + type = 'hcst') + if (!is.null(data$fcst)) { + save_forecast(data$fcst, recipe, dict, outdir, + archive = archive, type = 'fcst') + } + save_observations(data$obs, recipe, dict, outdir, archive = archive) + + # Separate ensemble correlation from the rest of the metrics, as it has one + # extra dimension "ensemble" and must be saved to a different file + if ("corr" %in% names(skill_metrics)) { + corr_metric_names <- grep("^corr", names(skill_metrics)) + corr_metrics <- skill_metrics[corr_metric_names] + skill_metrics <- skill_metrics[-corr_metric_names] + if (length(skill_metrics) == 0) { + skill_metrics <- NULL + } + } else { + corr_metrics <- NULL + } + + # Export skill metrics onto outfile + if (!is.null(skill_metrics)) { + save_metrics(skill_metrics, recipe, dict, data$hcst, outdir, + archive = archive) + } + if (!is.null(corr_metrics)) { + save_corr(corr_metrics, recipe, dict, data$hcst, outdir, + archive = archive) + } + + # Export probabilities onto outfile + if (!is.null(probabilities)) { + save_percentiles(probabilities$percentiles, recipe, data$hcst, outdir, + archive = archive) + save_probabilities(probabilities$probs, recipe, data$hcst, outdir, + archive = archive, type = "hcst") + if (!is.null(probabilities$probs_fcst)) { + save_probabilities(probabilities$probs_fcst, recipe, data$fcst, outdir, + archive = archive, type = "fcst") + } + } +} + +get_global_attributes <- function(recipe, archive) { + # Generates metadata of interest to add to the global attributes of the + # netCDF files. + parameters <- recipe$Analysis + hcst_period <- paste0(parameters$Time$hcst_start, " to ", + parameters$Time$hcst_end) + current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) + system_name <- parameters$Datasets$System$name + reference_name <- parameters$Datasets$Reference$name + + attrs <- list(reference_period = hcst_period, + institution_system = archive$System[[system_name]]$institution, + institution_reference = archive$Reference[[reference_name]]$institution, + system = system_name, + reference = reference_name, + calibration_method = parameters$Workflow$Calibration$method, + computed_on = current_time) + + return(attrs) +} + +get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { + # Generates time dimensions and the corresponding metadata. + ## TODO: Subseasonal + + switch(fcst.horizon, + "seasonal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}, + "subseasonal" = {len <- 4; ref <- 'hours since '; + stdname <- ''}, + "decadal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}) + + dim(time) <- length(time) + sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting + metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), + calendar = calendar)) + attr(time, 'variables') <- metadata + names(dim(time)) <- 'time' + + sdate <- 1:length(sdate) + dim(sdate) <- length(sdate) + metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), + collapse=", "), + units = paste0('Init date'))) + attr(sdate, 'variables') <- metadata + names(dim(sdate)) <- 'sdate' + + return(list(time=time)) +} + +get_latlon <- function(latitude, longitude) { + # Adds dimensions and metadata to lat and lon + # latitude: array containing the latitude values + # longitude: array containing the longitude values + + dim(longitude) <- length(longitude) + metadata <- list(longitude = list(units = 'degrees_east')) + attr(longitude, 'variables') <- metadata + names(dim(longitude)) <- 'longitude' + + dim(latitude) <- length(latitude) + metadata <- list(latitude = list(units = 'degrees_north')) + attr(latitude, 'variables') <- metadata + names(dim(latitude)) <- 'latitude' + + return(list(lat=latitude, lon=longitude)) + +} + +save_forecast <- function(data_cube, + recipe, + dictionary, + outdir, + agg = "global", + archive = NULL, + type = NULL) { + # Loops over the years in the s2dv_cube containing a hindcast or forecast + # and exports each year to a netCDF file. + # data_cube: s2dv_cube containing the data and metadata + # recipe: the auto-s2s recipe + # outdir: directory where the files should be saved + # agg: aggregation, "global" or "country" + + lalo <- c('longitude', 'latitude') + + variable <- data_cube$attrs$Variable$varName + var.longname <- data_cube$attrs$Variable$variables[[variable]]$long_name + global_attributes <- get_global_attributes(recipe, archive) + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + +# if (fcst.horizon == "seasonal") { +# calendar <- attr(data_cube$Variable, "variable")$dim$time$calendar +# } else { +# calendar <- attr(data_cube$Variable, "variable")$dim[[3]]$calendar +# } + + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + ## Method 1: Use the first date as init_date. But it may be better to use + ## the real initialized date (ask users) +# init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') + ## Method 2: use initial month + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + if (type == 'hcst') { + ## PROBLEM for fcst!!!!!!!!!!!! + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else if (type == 'fcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } + } else { + if (type == 'hcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } else if (type == 'fcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + } + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + # expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) + + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) + } + + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'ensemble', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- attr(data_cube$Variable, 'variable')$units + } else { + dims <- c(lalo, 'ensemble', 'time') + var.expname <- variable + var.sdname <- var.sdname + var.units <- data_cube$attrs$Variable$variables[[variable]]$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date + if (fcst.horizon == 'decadal') { + ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal + ## data has been reshaped + # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') + + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') + + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } + + # Get time dimension values and metadata + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "exp") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } + } + info(recipe$Run$logger, paste("#####", toupper(type), + "SAVED TO NETCDF FILE #####")) +} + + +save_observations <- function(data_cube, + recipe, + dictionary, + outdir, + agg = "global", + archive = NULL) { + # Loops over the years in the s2dv_cube containing the observations and + # exports each year to a netCDF file. + # data_cube: s2dv_cube containing the data and metadata + # recipe: the auto-s2s recipe + # outdir: directory where the files should be saved + # agg: aggregation, "global" or "country" + + lalo <- c('longitude', 'latitude') + + variable <- data_cube$attrs$Variable$varName + var.longname <- data_cube$attrs$Variable$variables[[variable]]$long_name + global_attributes <- get_global_attributes(recipe, archive) + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$Reference[[global_attributes$reference]]$calendar + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + ## Method 1: Use the first date as init_date. But it may be better to use + ## the real initialized date (ask users) +# init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') + ## Method 2: use initial month + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + ## expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) + + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'time'))) + } + + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- data_cube$attrs$Variable$variables[[variable]]$units + } else { + dims <- c(lalo, 'time') + var.expname <- variable + var.units <- data_cube$attrs$Variable$variables[[variable]]$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date. The date is computed for each year, and adapted for + # consistency with the hcst/fcst dates, so that both sets of files have + # the same name pattern. + ## Because observations are loaded differently in the daily vs. monthly + ## cases, different approaches are necessary. + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + } else { + + if (store.freq == "monthly_mean") { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') + } else { + fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) + } + } + + # Ensure the year is correct if the first leadtime goes to the next year + init_date <- as.POSIXct(init_date) + if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { + lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 + } + # Ensure that the initialization month is consistent with the hindcast + lubridate::month(fcst.sdate) <- lubridate::month(init_date) + fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') + + # Get time dimension values and metadata + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "obs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } + } + info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") +} + +## TODO: Place inside a function somewhere +# if (tolower(agg) == "country") { +# load(mask.path) +# grid <- europe.countries.iso +# } else { +# grid <- list(lon=attr(var.obs, 'Variables')$dat1$longitude, +# lat=attr(var.obs, 'Variables')$dat1$latitude) +# } + +save_metrics <- function(skill, + recipe, + dictionary, + data_cube, + outdir, + agg = "global", + archive = NULL) { + # This function adds metadata to the skill metrics in 'skill' + # and exports them to a netCDF file inside 'outdir'. + + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + skill <- lapply(skill, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + + # Add global and variable attributes + global_attributes <- get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } + attr(skill[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(skill)) { + metric <- names(skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + skill[[i]][is.na(skill[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'time') + } else { + sdname <- paste0(metric) #, " grid point metric") + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "skill") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, skill), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, skill) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") +} + +save_corr <- function(skill, + recipe, + dictionary, + data_cube, + outdir, + agg = "global", + archive = NULL) { + # This function adds metadata to the ensemble correlation in 'skill' + # and exports it to a netCDF file inside 'outdir'. + + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + skill <- lapply(skill, function(x) { + Reorder(x, c(lalo, 'ensemble', 'time'))}) + } + + # Add global and variable attributes + global_attributes <- get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(global_attributes, + list(from_anomalies = "Yes")) + } else { + global_attributes <- c(global_attributes, + list(from_anomalies = "No")) + } + attr(skill[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(skill)) { + metric <- names(skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + skill[[i]][is.na(skill[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'ensemble', 'time') + } else { + sdname <- paste0(metric) #, " grid point metric") # formerly names(metric) + dims <- c(lalo, 'ensemble', 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "corr") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, skill), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, skill) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, + "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") +} + +save_percentiles <- function(percentiles, + recipe, + data_cube, + outdir, + agg = "global", + archive = NULL) { + # This function adds metadata to the percentiles + # and exports them to a netCDF file inside 'outdir'. + + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + percentiles <- lapply(percentiles, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + + # Add global and variable attributes + global_attributes <- get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } + attr(percentiles[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(percentiles)) { + ## TODO: replace with proper standard names + percentile <- names(percentiles[i]) + long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = percentile, long_name = long_name)) + attr(percentiles[[i]], 'variables') <- metadata + names(dim(percentiles[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "percentiles") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, percentiles), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, percentiles) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") +} + +save_probabilities <- function(probs, + recipe, + data_cube, + outdir, + agg = "global", + archive = NULL, + type = "hcst") { + # Loops over the years in the s2dv_cube containing a hindcast or forecast + # and exports the corresponding category probabilities to a netCDF file. + # probs: array containing the probability data + # recipe: the auto-s2s recipe + # data_cube: s2dv_cube containing the data and metadata + # outdir: directory where the files should be saved + # type: 'exp' (hcst and fcst) or 'obs' + # agg: aggregation, "global" or "country" + # type: 'hcst' or 'fcst' + + lalo <- c('longitude', 'latitude') + + variable <- data_cube$attrs$Variable$varName + var.longname <- data_cube$attrs$Variable$variables[[variable]]$long_name + global_attributes <- get_global_attributes(recipe, archive) + # Add anomaly computation to global attributes + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + ## expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + for (i in syears) { + # Select year from array and rearrange dimensions + probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') + if (tolower(agg) == "global") { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c(lalo, 'time'))}) + } else { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c('country', 'time'))}) + } + + ## TODO: Replace for loop with something more efficient? + for (bin in 1:length(probs_syear)) { + prob_bin <- names(probs_syear[bin]) + long_name <- paste0(prob_bin, " probability category") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = prob_bin, long_name = long_name)) + attr(probs_syear[[bin]], 'variables') <- metadata + names(dim(probs_syear[[bin]])) <- dims # is this necessary? + } + + # Add global attributes + attr(probs_syear[[1]], 'global_attrs') <- global_attributes + + # Select start date + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } + + # Get time dimension values and metadata + times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "probs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, probs_syear), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, probs_syear) + ArrayToNc(vars, outfile) + } + } + + info(recipe$Run$logger, + paste("#####", toupper(type), + "PROBABILITIES SAVED TO NETCDF FILE #####")) +} diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R new file mode 100644 index 00000000..2d6353fe --- /dev/null +++ b/modules/Saving/paths2save.R @@ -0,0 +1,108 @@ +## TODO: Separate by time aggregation + +get_filename <- function(dir, recipe, var, date, agg, file.type) { + # This function builds the path of the output file based on directory, + # variable, forecast date, startdate, aggregation, forecast horizon and + # type of metric/forecast/probability. + + if (recipe$Analysis$Horizon == "subseasonal") { + shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%V") + dd <- "week" + } else { + shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%m") + dd <- "month" + } + + switch(tolower(agg), + "country" = {gg <- "-country"}, + "global" = {gg <- ""}) + + if (tolower(recipe$Analysis$Output_format) == 'scorecards') { + # Define output dir name accordint to Scorecards format + dict <- read_yaml("conf/output_dictionaries/scorecards.yml") + # Get necessary names + system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name + reference <- dict$Reference[[recipe$Analysis$Datasets$Reference$name]]$short_name + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + + switch(file.type, + "skill" = {type_info <- "-skill_"}, + "corr" = {type_info <- "-corr_"}, + "exp" = {type_info <- paste0("_", date, "_")}, + "obs" = {type_info <- paste0("-obs_", date, "_")}, + "percentiles" = {type_info <- "-percentiles_"}, + "probs" = {type_info <- paste0("-probs_", date, "_")}, + "bias" = {type_info <- paste0("-bias_", date, "_")}) + + # Build file name + file <- paste0("scorecards_", system, "_", reference, "_", + var, type_info, hcst_start, "-", hcst_end, "_s", shortdate) + } else { + switch(file.type, + "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, + "corr" = {file <- paste0(var, gg, "-corr_", dd, shortdate)}, + "exp" = {file <- paste0(var, gg, "_", date)}, + "obs" = {file <- paste0(var, gg, "-obs_", date)}, + "percentiles" = {file <- paste0(var, gg, "-percentiles_", dd, + shortdate)}, + "probs" = {file <- paste0(var, gg, "-probs_", date)}, + "bias" = {file <- paste0(var, gg, "-bias_", date)}) + } + + return(paste0(dir, file, ".nc")) + +} + +get_dir <- function(recipe, agg = "global") { + # This function builds the path for the output directory. The output + # directories will be subdirectories within outdir, organized by variable, + # startdate, and aggregation. + + ## TODO: Get aggregation from recipe + + outdir <- paste0(recipe$Run$output_dir, "/outputs/") + ## TODO: multivar case + variable <- recipe$Analysis$Variables$name + + if (tolower(recipe$Analysis$Output_format) == 'scorecards') { + # Define output dir name accordint to Scorecards format + dict <- read_yaml("conf/output_dictionaries/scorecards.yml") + system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name + dir <- paste0(outdir, "/", system, "/", variable, "/") + + } else { + # Default generic output format based on FOCUS + if (!is.null(recipe$Analysis$Time$fcst_year)) { + if (tolower(recipe$Analysis$Horizon) == 'decadal') { + #PROBLEM: decadal doesn't have sdate + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_') + } else { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } + } else { + if (tolower(recipe$Analysis$Horizon) == 'decadal') { + #PROBLEM: decadal doesn't have sdate + fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$hcst_end, + sep = '_')) + } else { + fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) + } + } + + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + store.freq <- recipe$Analysis$Variables$freq + + switch(tolower(agg), + "country" = {dir <- paste0(outdir, "/", calib.method, "-", + store.freq, "/", variable, + "_country/", fcst.sdate, "/")}, + "global" = {dir <- paste0(outdir, "/", calib.method, "-", + store.freq, "/", variable, "/", + fcst.sdate, "/")}) + } + return(dir) + +} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R new file mode 100644 index 00000000..9f97e688 --- /dev/null +++ b/modules/Skill/Skill.R @@ -0,0 +1,408 @@ +# This module should calculate verification metrics at any time of the workflow +# It should implement different verification metrics: +# - FRPSS and RPSS +# - FCRPSS and CRPSS +# - enscorr +# - bias +# - reliability diagram +# - ask Carlos which decadal metrics he is currently using + +source("modules/Skill/compute_quants.R") +source("modules/Skill/compute_probs.R") +source("modules/Skill/s2s.metrics.R") +## TODO: Remove when new version of s2dv is released +source("modules/Skill/tmp/RandomWalkTest.R") +source("modules/Skill/tmp/Bias.R") +source("modules/Skill/tmp/AbsBiasSS.R") +source("modules/Skill/tmp/RMSSS.R") +source("modules/Skill/tmp/Corr.R") + +## TODO: Implement this in the future +## Which parameter are required? +# if (!("obs" %in% ls()) || is.null(obs)) { +# error(logger, +# "There is no object 'obs' in the global environment or it is NULL") +# } +# if (stream == "fcst" && (!("fcst" %in% ls()) || is.null(fcst))) { +# error(logger, +# "There is no object 'fcst' in the global environment or it is NULL") +# } +# if (!("hcst" %in% ls()) || is.null(hcst)) { +# error(logger, +# "There is no object 'hcst' in the global environment or it is NULL") +# } +# if (!("metric" %in% ls()) || is.null(metric)) { +# warn(logger, +# "Verification metric not found and it is set as 'EnsCorr'.") +# metric <- 'EnsCorr' +# } +# if (metric %in% c('FRPSS', 'RPSS')) { +# metric_fun <- "veriApply" +# metric_method <- "FairRpss" +# } else if (metric %in% c("FCRPSS", "CRPSS")) { +# metric_fun <- "veriApply" +# } else if (metric %in% c("EnsCorr", "EnsCor")) { +# metric_fun <- "veriApply" +# metric_method <- "EnsCorr" +# #... +# } else { +# error(logger, "Unknown verification metric defined in the recipe.") +# metric_fun <- 'NotFound' +# } +# info(logger, paste("#-------------------------- ", "\n", +# " running Skill module ", "\n", +# " it can call ", metric_fun )) + +# compute_skill_metrics <- function(recipe, data$hcst, obs, +# clim_data$hcst = NULL, +# clim_obs = NULL) { +compute_skill_metrics <- function(recipe, data) { + + # data$hcst: s2dv_cube containing the hindcast + # obs: s2dv_cube containing the observations + # recipe: auto-s2s recipe as provided by read_yaml + + ## TODO: Adapt time_dims to subseasonal case + ## TODO: Add dat_dim + ## TODO: Refine error messages + ## TODO: Add check to see if anomalies are provided (info inside s2dv_cube) + +# if (recipe$Analysis$Workflow$Anomalies$compute) { +# if (is.null(clim_data$hcst) || is.null(clim_obs)) { +# warn(recipe$Run$logger, "Anomalies have been requested in the recipe, +# but the climatologies have not been provided in the +# compute_skill_metrics call. Be aware that some metrics like the +# Mean Bias may not be correct.") +# } +# } else { +# warn(recipe$Run$logger, "Anomaly computation was not requested in the +# recipe. Be aware that some metrics, such as the CRPSS may not be +# correct.") +# } + time_dim <- 'syear' + memb_dim <- 'ensemble' + metrics <- tolower(recipe$Analysis$Workflow$Skill$metric) + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm = F + } else { + na.rm = recipe$Analysis$remove_NAs + } + skill_metrics <- list() + for (metric in strsplit(metrics, ", | |,")[[1]]) { + # Whether the fair version of the metric is to be computed + if (metric %in% c('frps', 'frpss', 'bss10', 'bss90', + 'fcrps', 'fcrpss')) { + Fair <- T + } else { + Fair <- F + } + # Whether to compute correlation for the ensemble mean or for each member + if (metric == 'corr') { + memb <- T + } else if (metric == 'enscorr') { + memb <- F + } + # Ranked Probability Score and Fair version + if (metric %in% c('rps', 'frps')) { + skill <- RPS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill + # Ranked Probability Skill Score and Fair version + } else if (metric %in% c('rpss', 'frpss')) { + skill <- RPSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$rpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + # Brier Skill Score - 10th percentile + } else if (metric == 'bss10') { + skill <- RPSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + prob_thresholds = 0.1, + Fair = Fair, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$rpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + # Brier Skill Score - 90th percentile + } else if (metric == 'bss90') { + skill <- RPSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + prob_thresholds = 0.9, + Fair = Fair, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$rpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + # CRPS and FCRPS + } else if (metric %in% c('crps', 'fcrps')) { + skill <- CRPS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill + # CRPSS and FCRPSS + } else if (metric %in% c('crpss', 'fcrpss')) { + skill <- CRPSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$crpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + # Mean bias (climatology) + } else if (metric == 'mean_bias') { + ## TODO: Eliminate option to compute from anomalies + # Compute from full field + if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } else { + skill <- Bias(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill + # Mean bias skill score + } else if (metric == 'mean_bias_ss') { + if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + skill <- AbsBiasSS(data$hcst.full_val$data, data$obs.full_val$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } else { + skill <- AbsBiasSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$biasSS + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + # Ensemble mean correlation + } else if (metric %in% c('enscorr', 'corr')) { + ## TODO: Return significance + ## TODO: Implement option for Kendall and Spearman methods? + skill <- Corr(data$hcst$data, data$obs$data, + dat_dim = 'dat', + time_dim = time_dim, + method = 'pearson', + memb_dim = memb_dim, + memb = memb, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$corr + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + } else if (metric == 'rmsss') { + # Compute RMSS + skill <- RMSSS(data$hcst$data, data$obs$data, + dat_dim = 'dat', + time_dim = time_dim, + memb_dim = memb_dim, + pval = FALSE, + sign = TRUE, + sig_method = 'Random Walk', + ncores = ncores) + # Compute ensemble mean and modify dimensions + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$rmsss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + } else if (metric == 'enssprerr') { + # Remove ensemble dim from obs to avoid veriApply warning + obs_noensdim <- ClimProjDiags::Subset(data$obs$data, "ensemble", 1, + drop = "selected") + capture.output( + skill <- easyVerification::veriApply(verifun = 'EnsSprErr', + fcst = data$hcst$data, + obs = obs_noensdim, + tdim = which(names(dim(data$hcst$data))==time_dim), + ensdim = which(names(dim(data$hcst$data))==memb_dim), + na.rm = na.rm, + ncpus = ncores) + ) + remove(obs_noensdim) + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill + # SpecsVerification metrics + } else if (grepl("specs", metric, fixed = TRUE)) { + # Compute SpecsVerification version of the metrics + ## Retain _specs in metric name for clarity + metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name + if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', + 'rpss'))) { + warn(recipe$Run$logger, + "Some of the requested SpecsVerification metrics are not available.") + } + capture.output( + skill <- Compute_verif_metrics(data$hcst$data, data$obs$data, + skill_metrics = metric_name, + verif.dims=c("syear", "sday", "sweek"), + na.rm = na.rm, + ncores = ncores) + ) + skill <- .drop_dims(skill) + if (metric_name == "frps") { + # Compute yearly mean for FRPS + skill <- colMeans(skill, dims = 1) + } + skill_metrics[[ metric ]] <- skill + } + } + info(recipe$Run$logger, "##### SKILL METRIC COMPUTATION COMPLETE #####") + return(skill_metrics) +} + +compute_probabilities <- function(recipe, data) { + ## TODO: Do hcst and fcst at the same time + + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm = F + } else { + na.rm = recipe$Analysis$remove_NAs + } + + named_probs <- list() + named_probs_fcst <- list() + named_quantiles <- list() + + if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { + error(recipe$Run$logger, "Quantiles and probability bins have been + requested, but no thresholds are provided in the recipe.") + stop() + } else { + for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { + # Parse thresholds in recipe + thresholds <- sapply(element, function (x) eval(parse(text = x))) + quants <- compute_quants(data$hcst$data, thresholds, + ncores = ncores, + na.rm = na.rm) + probs <- compute_probs(data$hcst$data, quants, + ncores = ncores, + na.rm = na.rm) + + for (i in seq(1:dim(quants)['bin'][[1]])) { + named_quantiles <- append(named_quantiles, + list(ClimProjDiags::Subset(quants, + 'bin', i))) + names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", + as.integer(thresholds[i]*100)) + } + for (i in seq(1:dim(probs)['bin'][[1]])) { + if (i == 1) { + name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) + } else if (i == dim(probs)['bin'][[1]]) { + name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) + } else { + name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", + as.integer(thresholds[i]*100)) + } + named_probs <- append(named_probs, + list(ClimProjDiags::Subset(probs, + 'bin', i))) + names(named_probs)[length(named_probs)] <- name_i + } + + # Compute fcst probability bins + if (!is.null(data$fcst)) { + probs_fcst <- compute_probs(data$fcst$data, quants, + ncores = ncores, + na.rm = na.rm) + + for (i in seq(1:dim(probs_fcst)['bin'][[1]])) { + if (i == 1) { + name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) + } else if (i == dim(probs_fcst)['bin'][[1]]) { + name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) + } else { + name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", + as.integer(thresholds[i]*100)) + } + named_probs_fcst <- append(named_probs_fcst, + list(ClimProjDiags::Subset(probs_fcst, + 'bin', i))) + names(named_probs_fcst)[length(named_probs_fcst)] <- name_i + } + } + } + + # Rearrange dimensions and return probabilities + named_probs <- lapply(named_probs, function(x) {.drop_dims(x)}) + named_quantiles <- lapply(named_quantiles, function(x) {.drop_dims(x)}) + if (!is.null(data$fcst)) { + fcst_years <- dim(data$fcst$data)[['syear']] + named_probs_fcst <- lapply(named_probs_fcst, + function(x) {Subset(x, + along = 'syear', + indices = 1:fcst_years, + drop = 'non-selected')}) + results <- list(probs = named_probs, + probs_fcst = named_probs_fcst, + percentiles = named_quantiles) + } else { + results <- list(probs = named_probs, + percentiles = named_quantiles) + } + + info(recipe$Run$logger, + "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") + return(results) + } +} + +## TODO: Replace with ClimProjDiags::Subset +.drop_dims <- function(metric_array) { + # Drop all singleton dimensions + metric_array <- drop(metric_array) + # If time happened to be a singleton dimension, add it back in the array + if (!("time" %in% names(dim(metric_array)))) { + dim(metric_array) <- c("time" = 1, dim(metric_array)) + } + # If array has memb dim (Corr case), change name to 'ensemble' + if ("exp_memb" %in% names(dim(metric_array))) { + names(dim(metric_array))[which(names(dim(metric_array)) == + "exp_memb")] <- "ensemble" + # } else { + # dim(metric_array) <- c(dim(metric_array), "ensemble" = 1) + } + return(metric_array) +} diff --git a/modules/Skill/compute_probs.R b/modules/Skill/compute_probs.R new file mode 100644 index 00000000..1c17b358 --- /dev/null +++ b/modules/Skill/compute_probs.R @@ -0,0 +1,38 @@ +## TODO: Document header +compute_probs <- function(data, quantiles, + ncores=1, quantile_dims=c('syear', 'ensemble'), + probs_dims=list('ensemble', 'bin'), + split_factor=1, na.rm=FALSE) { + + # Define na.rm behavior + if (na.rm) { + .c2p <- function(x, t) { + if (any(!is.na(x))) { + # If the array contains any non-NA values, call convert2prob + colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) + } else { + # If the array only contains NAs, return NA vector + rep(NA, dim(t)[['bin']] + 1) # vector with as many NAs as prob bins. + } + } + } else { + .c2p <- function(x, t) { + if (any(is.na(x))) { + # If the array contains any NA values, return NA vector + rep(NA, dim(t)[['bin']] + 1) + } else { + # If there are no NAs, call convert2prob + colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) + } + } + } + + probs <- Apply(data = list(x = data, t = quantiles), + target_dims = probs_dims, + .c2p, + output_dims = "bin", + split_factor = split_factor, + ncores = ncores)[[1]] + + return(probs) +} diff --git a/modules/Skill/compute_quants.R b/modules/Skill/compute_quants.R new file mode 100644 index 00000000..8c89e87e --- /dev/null +++ b/modules/Skill/compute_quants.R @@ -0,0 +1,33 @@ +## TODO: Document header + +compute_quants <- function(data, thresholds, + ncores=1, quantile_dims=c('syear', 'ensemble'), + probs_dims=list('ensemble', 'bin'), + split_factor=1, na.rm=FALSE) { + + # Define na.rm behavior + if (na.rm) { + .get_quantiles <- function(x, t) { + quantile(as.vector(x), t, na.rm = TRUE) + } + } else { + .get_quantiles <- function(x, t) { + if (any(is.na(x))) { + # If the array contains any NA values, return NA vector + rep(NA, length(t)) + } else { + # If there are no NAs, call quantile() + quantile(as.vector(x), t, na.rm = FALSE) + } + } + } + + quantiles <- Apply(data, + target_dims = quantile_dims, + function(x, t) {.get_quantiles(as.vector(x), thresholds)}, + output_dims = "bin", + ncores = ncores, + split_factor = split_factor)[[1]] + + return(quantiles) +} diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R new file mode 100644 index 00000000..04e9d801 --- /dev/null +++ b/modules/Skill/s2s.metrics.R @@ -0,0 +1,279 @@ + +# MERGES verification dimns int single sdate dim along which the +# verification metrics will be computed +mergedims <- function(data, indims, outdim) { + + old.dims <- names(dim(data)) + new.dims <- c(indims, old.dims[! old.dims %in% indims]) + data <- Reorder(data, new.dims) + + for (step in 1:(length(indims) - 1)) { + if (step == 1) { + data <- MergeDims(data, + merge_dims=c(indims[step], indims[step+1]), + rename_dim=outdim) + } else { + data <- MergeDims(data, + merge_dims=c(outdim, indims[step+1]), + rename_dim=outdim) + } + } + + return(data) + +} + +## TODO: New function to split sdate dim back into 'sday', 'sweek' and 'syear'? + +Compute_verif_metrics <- function(exp, obs, skill_metrics, + verif.dims=c("syear", "sday", "sweek"), + merge.dims=F, + na.rm=T, ncores=1) { + + if (merge.dims) { + exp <- mergedims(exp, verif.dims, 'sdate') + obs <- mergedims(obs, verif.dims, 'sdate') + time.dim <- 'sdate' + } else { + time.dim <- 'syear' + } + ## obs already contains ensemble dimension + # obs <- InsertDim(obs, 2, 1, name='member') + + ## REMOVE VALUES IN CASE PAIR OBS-EXP IS NOT COMPLETE + if (na.rm) { + nsdates <- dim(exp)[which(names(dim(exp)) == time.dim)][] + not_missing_dates <- c() + for (sdate in 1:nsdates) { + if(!all(is.na(Subset(exp, along=time.dim, list(sdate), drop=F)))) { + not_missing_dates <- c(not_missing_dates, sdate) + } + } + obs <- Subset(obs, along=time.dim, not_missing_dates, drop=F) + exp <- Subset(exp, along=time.dim, not_missing_dates, drop=F) + } + + return(.compute_metrics(exp, obs, skill_metrics, ncores=ncores, na.rm=na.rm)) + +} + +.compute_metrics <- function(exp, obs, metrics, + ncores=1, split_factor=1, + merge.dims=FALSE, na.rm=FALSE) { + if (merge.dims) { + time.dim <- 'sdate' + } else { + time.dim <- 'syear' + } + + veriUnwrap <- easyVerification:::veriUnwrap + + ## SPECS VERIFICATION PARAMETERS + # Assign function name and probability bins for each metric + SPECSVER_METRICS <- c("frpss", "frps", "bss10", "bss90", "enscorr", "rpss") + + FUN <- list(frps="FairRps", rpss="EnsRpss", frpss="FairRpss", + bss10="FairRpss", bss90="FairRpss", enscorr="EnsCorr") + + PROB <- list(frps=c(1/3, 2/3), rpss=c(1/3, 2/3), frpss=c(1/3, 2/3), + bss10=c(1/10), bss90=c(9/10), enscorr=NULL) + + metrics_data <- list() + for (metric in metrics) { + + if (metric %in% SPECSVER_METRICS) { + + data <- Apply(data=list(exp, obs), + target_dims=list(c(time.dim, 'ensemble'), + c(time.dim, 'ensemble')), + fun="veriApply", + verifun=FUN[[metric]], + prob=PROB[[metric]], + ensdim=2, + ncores=ncores, + split_factor=split_factor, + tdim=1, + na.rm=na.rm)[[1]] #* 100 + + data <- Subset(data, c('ensemble'), list(1), drop='selected') + data[!is.finite(data)] <- NaN + metric <- paste0(metric, "_specs") + metrics_data <- data + # metrics_data[[ metric ]] <- data ## previously: list(data) + + } else if (metric == "corr_eno") { + # computes ensemble mean + data <- multiApply::Apply(data = exp, + target_dims = 'ensemble', + fun = mean, ncores = ncores)$output1 + + data <- multiApply::Apply(data = list(exp = data, + obs = Subset(obs, c('ensemble'), + list(1), drop='selected')), + target_dims = time.dim, fun = .correlation_eno, + time_dim = time.dim, ncores = ncores) + + # append both corr_r and corr_sign to metrics list + for (i in 1:length(data)) { + coeff <- data[[i]] + coeff[!is.finite(coeff)] <- NaN + if (names(data)[i] %in% c("r", "sign")){ + metrics_data[[ paste0("corr_", names(data)[i]) ]] <- coeff ## list(coeff) + } + } + + } else if (metric == "frpss_sign") { + + terciles_obs <- compute_quants(obs, c(1/3, 2/3), + quantile_dims=c(time.dim), + ncores=ncores, + split_factor=1, + na.rm=na.rm) + + terciles_exp <- compute_quants(exp, c(1/3, 2/3), + quantile_dims=c(time.dim, 'ensemble'), + ncores=ncores, + split_factor=1, + na.rm=na.rm) + + probs_obs <- compute_probs(obs, terciles_obs, + quantile_dims=c(time.dim), + ncores=ncores, + split_factor=1, + na.rm=na.rm) + + probs_exp <- compute_probs(exp, terciles_exp, + quantile_dims=c(time.dim), + ncores=ncores, + split_factor=1, + na.rm=na.rm) + + probs_clim = array(data = 1/3, dim = dim(probs_obs)) + + frps <- NULL + n_members <- dim(exp)[which(names(dim(exp)) == 'ensemble')][] + frps$clim <- multiApply::Apply(data = list(probs_exp = probs_clim, + probs_obs = probs_obs), + target_dims = 'bin', + fun = .rps_from_probs, + n_categories = 3, + n_members = n_members, + Fair = TRUE, + ncores = ncores)$output1 + + frps$exp <- multiApply::Apply(data = list(probs_exp = probs_exp, + probs_obs = probs_obs), + target_dims = 'bin', + fun = .rps_from_probs, + n_categories = 3, + n_members = n_members, + Fair = TRUE, + ncores = ncores)$output1 + + frps$clim_mean <- multiApply::Apply(data = frps$clim, target_dims=time.dim, + fun=mean, ncores=ncores)$output1 + frps$exp_mean <- multiApply::Apply(data = frps$exp, target_dims=time.dim, + fun=mean, ncores=ncores)$output1 + + frpss_respect2clim <- NULL + frpss_respect2clim$rpss <- 1 - frps$exp_mean/frps$clim_mean + frpss_respect2clim$sign <- s2dv::RandomWalkTest(skill_A=frps$exp, + skill_B=frps$clim, + time_dim=time.dim, + ncores=ncores)$signif + + frpss_respect2clim$rpss[!is.finite(frpss_respect2clim$rpss)] <- NaN + frpss_respect2clim$sign[!is.finite(frpss_respect2clim$sign)] <- NaN + + frpss_respect2clim$rpss <- Subset(frpss_respect2clim$rpss, + c('ensemble'), list(1), + drop='selected') + frpss_respect2clim$sign <- Subset(frpss_respect2clim$sign, + c('ensemble'), list(1), + drop='selected') + + metrics_data[[ "frpss_values" ]] <- list(frpss_respect2clim$rpss) + metrics_data[[ "frpss_sign" ]] <- list(frpss_respect2clim$sign) + + } + + } + + return(metrics_data) + +} + +.correlation_eno = function(exp, obs, + time_dim = 'syear', + alpha = 0.05, + ncores = 1) { + + cor = NULL + cor$r = cor(exp, obs) # Correlation coefficient + + n_eff = s2dv::Eno(data = obs, time_dim = time_dim, + na.action = na.pass, ncores = ncores) + + t_alpha2_n2 = qt(p=alpha/2, df = n_eff-2, lower.tail = FALSE) + t = abs(cor$r) * sqrt(n_eff-2) / sqrt(1-cor$r^2) + + if (anyNA(c(t, t_alpha2_n2)) == FALSE & t >= t_alpha2_n2) { + cor$sign = TRUE + } else { + cor$sign = FALSE + } + + z_a2 = qnorm(p = alpha/2, lower.tail = FALSE) + conf_int = c() + conf_int[1] = tanh(atanh(cor$r) - z_a2 / sqrt(n_eff-3)) + conf_int[2] = tanh(atanh(cor$r) + z_a2 / sqrt(n_eff-3)) + cor$conf_int = conf_int + + cor$n_eff <- n_eff + + return(cor) +} + + +.rps_from_probs <- function(probs_exp, + probs_obs, + n_categories, + Fair, + n_members = NULL) { + + ## Checkings + if (length(probs_exp) != n_categories | length(probs_obs) != n_categories){ + stop('The number of probabilities has to be the same as the number of categories') + } + if (!is.numeric(n_categories)){ + stop('n_categories must be an integer') + } + if (!is.numeric(n_members) & Fair == TRUE){ + stop('n_members must be an integer') + } + + ## RPS (Wilks 2011, pp.348-350) + rps <- NULL + for (i in 1:n_categories) { + rps[i] <- (cumsum(probs_exp)[i]-cumsum(probs_obs)[i])^2 + } + rps <- sum(rps) + + ## FairRPS + if (Fair == TRUE) { + + ## formula of SpecsVerification::EnsRps + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + R <- n_members + R_new <- Inf + probs_cum <- cumsum(probs_exp) + adjustment <- -1 / (R-1) * probs_cum * (1 - probs_cum) + adjustment <- sum(adjustment) + + rps <- rps + adjustment + + } + + return(rps) + +} diff --git a/modules/Skill/tmp/AbsBiasSS.R b/modules/Skill/tmp/AbsBiasSS.R new file mode 100644 index 00000000..0ceb009c --- /dev/null +++ b/modules/Skill/tmp/AbsBiasSS.R @@ -0,0 +1,281 @@ +#'Compute the Absolute Mean Bias Skill Score +#' +#'The Absolute Mean Bias Skill Score is based on the Absolute Mean Error (Wilks, +#' 2011) between the ensemble mean forecast and the observations. It measures +#'the accuracy of the forecast in comparison with a reference forecast to assess +#'whether the forecast presents an improvement or a worsening with respect to +#'that reference. The Mean Bias Skill Score ranges between minus infinite and 1. +#'Positive values indicate that the forecast has higher skill than the reference +#'forecast, while negative values indicate that it has a lower skill. Examples +#'of reference forecasts are the climatological forecast (average of the +#'observations), a previous model version, or another model. It is computed as +#'\code{AbsBiasSS = 1 - AbsBias_exp / AbsBias_ref}. The statistical significance +#'is obtained based on a Random Walk test at the 95% confidence level (DelSole +#'and Tippett, 2016). If there is more than one dataset, the result will be +#'computed for each pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension. The dimensions must be the same as 'exp' except +#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +#' not have dataset dimension. If there is corresponding reference for each +#' experiement, the dataset dimension must have the same length as in 'exp'. If +#' 'ref' is NULL, the climatological forecast is used as reference forecast. +#' The default value is NULL. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' +#' and 'ref' are already the ensemble mean. The default value is NULL. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or +#' kept (FALSE) for computation. The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'\item{$biasSS}{ +#' A numerical array of BiasSS with dimensions nexp, nobs and the rest +#' dimensions of 'exp' except 'time_dim' and 'memb_dim'. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance of the BiasSS +#' with the same dimensions as $biasSS. nexp is the number of +#' experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#' (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. +#'} +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'ref <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) +#'biasSS1 <- AbsBiasSS(exp = exp, obs = obs, ref = ref, memb_dim = 'member') +#'biasSS2 <- AbsBiasSS(exp = exp, obs = obs, ref = NULL, memb_dim = 'member') +#' +#'@import multiApply +#'@export +AbsBiasSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = NULL, + dat_dim = NULL, na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (!is.array(obs) | !is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop("Parameter 'ref' must be a numeric array.") + if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp, obs, and ref (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.")) + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim) && memb_dim %in% name_ref) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.")) + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.")) + } + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################ + + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = na.rm) + if (!is.null(ref) & memb_dim %in% names(dim(ref))) { + ref <- MeanDims(ref, memb_dim, na.rm = na.rm) + } + } + + ## Mean bias skill score + if (!is.null(ref)) { # use "ref" as reference forecast + if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { + target_dims_ref <- c(time_dim, dat_dim) + } else { + target_dims_ref <- c(time_dim) + } + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = target_dims_ref) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim)) + } + + output <- Apply(data, + target_dims = target_dims, + fun = .AbsBiasSS, + dat_dim = dat_dim, + na.rm = na.rm, + ncores = ncores) + + return(output) +} + +.AbsBiasSS <- function(exp, obs, ref = NULL, dat_dim = NULL, na.rm = FALSE) { + # exp and obs: [sdate, (dat_dim)] + # ref: [sdate, (dat_dim)] or NULL + + # Adjust exp, obs, ref to have dat_dim temporarily + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + exp <- InsertDim(exp, posdim = 2, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = 'dataset') + if (!is.null(ref)) { + ref <- InsertDim(ref, posdim = 2, lendim = 1, name = 'dataset') + } + ref_dat_dim <- FALSE + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + if (length(dim(ref)) == 1) { # ref: [sdate] + ref_dat_dim <- FALSE + } else { + ref_dat_dim <- TRUE + } + } + + biasSS <- array(dim = c(nexp = nexp, nobs = nobs)) + sign <- array(dim = c(nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + exp_data <- exp[, i] + if (isTRUE(ref_dat_dim)) { + ref_data <- ref[, i] + } else { + ref_data <- ref + } + for (j in 1:nobs) { + obs_data <- obs[, j] + + if (isTRUE(na.rm)) { + if (is.null(ref)) { + good_values <- !is.na(exp_data) & !is.na(obs_data) + exp_data <- exp_data[good_values] + obs_data <- obs_data[good_values] + } else { + good_values <- !is.na(exp_data) & !is.na(ref_data) & !is.na(obs_data) + exp_data <- exp_data[good_values] + ref_data <- ref_data[good_values] + obs_data <- obs_data[good_values] + } + } + + ## Bias of the exp + bias_exp <- .Bias(exp = exp_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = FALSE) + ## Bias of the ref + if (is.null(ref)) { ## Climatological forecast + ref_data <- rep(mean(obs_data, na.rm = na.rm), length(obs_data)) + } + bias_ref <- .Bias(exp = ref_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = FALSE) + ## Skill score and significance + biasSS[i, j] <- 1 - mean(bias_exp) / mean(bias_ref) + sign[i, j] <- .RandomWalkTest(skill_A = bias_exp, skill_B = bias_ref)$signif + } + } + + if (is.null(dat_dim)) { + dim(biasSS) <- NULL + dim(sign) <- NULL + } + + + return(list(biasSS = biasSS, sign = sign)) +} diff --git a/modules/Skill/tmp/Bias.R b/modules/Skill/tmp/Bias.R new file mode 100644 index 00000000..0319a0f0 --- /dev/null +++ b/modules/Skill/tmp/Bias.R @@ -0,0 +1,189 @@ +#'Compute the Mean Bias +#' +#'The Mean Bias or Mean Error (Wilks, 2011) is defined as the mean difference +#'between the ensemble mean forecast and the observations. It is a deterministic +#'metric. Positive values indicate that the forecasts are on average too high +#'and negative values indicate that the forecasts are on average too low. +#'It also allows to compute the Absolute Mean Bias or bias without temporal +#'mean. If there is more than one dataset, the result will be computed for each +#'pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter +#' 'exp' is already the ensemble mean. The default value is NULL. +#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or +#' kept (FALSE) for computation. The default value is FALSE. +#'@param absolute A logical value indicating whether to compute the absolute +#' bias. The default value is FALSE. +#'@param time_mean A logical value indicating whether to compute the temporal +#' mean of the bias. The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number +#'of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) +#'bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop("Parameter 'exp' must be a numeric array.") + if (!is.array(obs) | !is.numeric(obs)) + stop("Parameter 'obs' must be a numeric array.") + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop("Parameter 'time_dim' must be a character string.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.")) + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## absolute + if (!is.logical(absolute) | length(absolute) > 1) { + stop("Parameter 'absolute' must be one logical value.") + } + ## time_mean + if (!is.logical(time_mean) | length(time_mean) > 1) { + stop("Parameter 'time_mean' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = na.rm) + } + + ## (Mean) Bias + bias <- Apply(data = list(exp, obs), + target_dims = c(time_dim, dat_dim), + fun = .Bias, + time_dim = time_dim, + dat_dim = dat_dim, + na.rm = na.rm, + absolute = absolute, + time_mean = time_mean, + ncores = ncores)$output1 + + return(bias) +} + + +.Bias <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE) { + # exp and obs: [sdate, (dat)] + + if (is.null(dat_dim)) { + bias <- exp - obs + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- mean(bias, na.rm = na.rm) + } + + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + bias <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + bias[, i, j] <- exp[, i] - obs[, j] + } + } + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- MeanDims(bias, time_dim, na.rm = na.rm) + } + } + + return(bias) +} diff --git a/modules/Skill/tmp/Corr.R b/modules/Skill/tmp/Corr.R new file mode 100644 index 00000000..c95b1034 --- /dev/null +++ b/modules/Skill/tmp/Corr.R @@ -0,0 +1,463 @@ +#'Compute the correlation coefficient between an array of forecast and their corresponding observation +#' +#'Calculate the correlation coefficient (Pearson, Kendall or Spearman) for +#'an array of forecast and an array of observation. The correlations are +#'computed along 'time_dim' that usually refers to the start date dimension. If +#''comp_dim' is given, the correlations are computed only if obs along comp_dim +#'dimension are complete between limits[1] and limits[2], i.e., there is no NA +#'between limits[1] and limits[2]. This option can be activated if the user +#'wants to account only for the forecasts which the corresponding observations +#'are available at all leadtimes.\cr +#'The confidence interval is computed by the Fisher transformation and the +#'significance level relies on an one-sided student-T distribution.\cr +#'The function can calculate ensemble mean before correlation by 'memb_dim' +#'specified and 'memb = F'. If ensemble mean is not calculated, correlation will +#'be calculated for each member. +#'If there is only one dataset for exp and obs, you can simply use cor() to +#'compute the correlation. +#' +#'@param exp A named numeric array of experimental data, with at least dimension +#' 'time_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param time_dim A character string indicating the name of dimension along +#' which the correlations are computed. The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is 'dataset'. If there is no dataset +#' dimension, set NULL. +#'@param comp_dim A character string indicating the name of dimension along which +#' obs is taken into account only if it is complete. The default value +#' is NULL. +#'@param limits A vector of two integers indicating the range along comp_dim to +#' be completed. The default is c(1, length(comp_dim dimension)). +#'@param method A character string indicating the type of correlation: +#' 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no +#' member dimension, set NULL. The default value is NULL. +#'@param memb A logical value indicating whether to remain 'memb_dim' dimension +#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when +#' 'memb_dim' is not NULL. The default value is TRUE. +#'@param pval A logical value indicating whether to return or not the p-value +#' of the test Ho: Corr = 0. The default value is TRUE. +#'@param conf A logical value indicating whether to return or not the confidence +#' intervals. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: Corr = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, exp_memb, obs_memb, all other dimensions of exp except +#' time_dim and memb_dim).\cr +#'nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is the +#'number of observation (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and +#'nobs are omitted. exp_memb is the number of member in experiment (i.e., +#''memb_dim' in exp) and obs_memb is the number of member in observation (i.e., +#''memb_dim' in obs). If memb = F, exp_memb and obs_memb are omitted.\cr\cr +#'\item{$corr}{ +#' The correlation coefficient. +#'} +#'\item{$p.val}{ +#' The p-value. Only present if \code{pval = TRUE}. +#'} +#'\item{$conf.lower}{ +#' The lower confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' The upper confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$sign}{ +#' The statistical significance. Only present if \code{sign = TRUE}. +#'} +#' +#'@examples +#'# Case 1: Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'runmean_months <- 12 +#' +#'# Smooth along lead-times +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) +#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months) +#'required_complete_row <- 3 # Discard start dates which contain any NA lead-times +#'leadtimes_per_startdate <- 60 +#'corr <- Corr(MeanDims(smooth_ano_exp, 'member'), +#' MeanDims(smooth_ano_obs, 'member'), +#' comp_dim = 'ftime', +#' limits = c(ceiling((runmean_months + 1) / 2), +#' leadtimes_per_startdate - floor(runmean_months / 2))) +#' +#'# Case 2: Keep member dimension +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member') +#'# ensemble mean +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE) +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@importFrom stats cor pt qnorm +#'@export +Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', + comp_dim = NULL, limits = NULL, method = 'pearson', + memb_dim = NULL, memb = TRUE, + pval = TRUE, conf = TRUE, sign = FALSE, + alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension name") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## comp_dim + if (!is.null(comp_dim)) { + if (!is.character(comp_dim) | length(comp_dim) > 1) { + stop("Parameter 'comp_dim' must be a character string.") + } + if (!comp_dim %in% names(dim(exp)) | !comp_dim %in% names(dim(obs))) { + stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## limits + if (!is.null(limits)) { + if (is.null(comp_dim)) { + stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") + } + if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | + length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { + stop(paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.")) + } + } + ## method + if (!(method %in% c("kendall", "spearman", "pearson"))) { + stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb + if (!is.logical(memb) | length(memb) > 1) { + stop("Parameter 'memb' must be one logical value.") + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'dat_dim' and 'memb_dim'.")) + } + if (dim(exp)[time_dim] < 3) { + stop("The length of time_dim must be at least 3 to compute correlation.") + } + + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- Reorder(obs, order_obs) + + + ############################### + # Calculate Corr + + # Remove data along comp_dim dim if there is at least one NA between limits + if (!is.null(comp_dim)) { + pos <- which(names(dim(obs)) == comp_dim) + if (is.null(limits)) { + obs_sub <- obs + } else { + obs_sub <- ClimProjDiags::Subset(obs, pos, list(limits[1]:limits[2])) + } + outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE)) + outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) + obs[which(outrows)] <- NA + rm(obs_sub, outrows) + } + + if (!is.null(memb_dim)) { + if (!memb) { #ensemble mean + exp <- MeanDims(exp, memb_dim, na.rm = TRUE) + obs <- MeanDims(obs, memb_dim, na.rm = TRUE) +# name_exp <- names(dim(exp)) +# margin_dims_ind <- c(1:length(name_exp))[-which(name_exp == memb_dim)] +# exp <- apply(exp, margin_dims_ind, mean, na.rm = TRUE) #NOTE: remove NAs here +# obs <- apply(obs, margin_dims_ind, mean, na.rm = TRUE) + memb_dim <- NULL + } + } + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim, memb_dim), + c(time_dim, dat_dim, memb_dim)), + fun = .Corr, + dat_dim = dat_dim, memb_dim = memb_dim, + time_dim = time_dim, method = method, + pval = pval, conf = conf, sign = sign, alpha = alpha, + ncores = ncores) + + return(res) +} + +.Corr <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', + time_dim = 'sdate', method = 'pearson', + conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05) { + if (is.null(memb_dim)) { + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + nexp <- 1 + nobs <- 1 + CORR <- array(dim = c(nexp = nexp, nobs = nobs)) + if (any(!is.na(exp)) && sum(!is.na(obs)) > 2) { + CORR <- cor(exp, obs, use = "pairwise.complete.obs", method = method) + } + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + CORR <- array(dim = c(nexp = nexp, nobs = nobs)) + for (j in 1:nobs) { + for (y in 1:nexp) { + if (any(!is.na(exp[, y])) && sum(!is.na(obs[, j])) > 2) { + CORR[y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + } + } +#---------------------------------------- +# Same as above calculation. +#TODO: Compare which is faster. +# CORR <- sapply(1:nobs, function(i) { +# sapply(1:nexp, function (x) { +# if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) { +# cor(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method) +# } else { +# NA +# } +# }) +# }) +#----------------------------------------- + } + + } else { # memb_dim != NULL + exp_memb <- as.numeric(dim(exp)[memb_dim]) # memb_dim + obs_memb <- as.numeric(dim(obs)[memb_dim]) + + if (is.null(dat_dim)) { + # exp: [sdate, memb_exp] + # obs: [sdate, memb_obs] + nexp <- 1 + nobs <- 1 + CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + + if (any(!is.na(exp[,y])) && sum(!is.na(obs[, j])) > 2) { + CORR[, , y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + + } + } + } else { + # exp: [sdate, dat_exp, memb_exp] + # obs: [sdate, dat_obs, memb_obs] + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + + CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + CORR[, , y, j] <- sapply(1:nobs, function(i) { + sapply(1:nexp, function (x) { + if (any(!is.na(exp[, x, y])) && sum(!is.na(obs[, i, j])) > 2) { + cor(exp[, x, y], obs[, i, j], + use = "pairwise.complete.obs", + method = method) + } else { + NA + } + }) + }) + + } + } + } + + } + + +# if (pval) { +# for (i in 1:nobs) { +# p.val[, i] <- try(sapply(1:nexp, +# function(x) {(cor.test(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method)$p.value)/2}), silent = TRUE) +# if (class(p.val[, i]) == 'character') { +# p.val[, i] <- NA +# } +# } +# } + + if (pval || conf || sign) { + if (method == "kendall" | method == "spearman") { + if (!is.null(dat_dim) | !is.null(memb_dim)) { + tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3) + names(dim(tmp))[1] <- time_dim + eno <- Eno(tmp, time_dim) + } else { + tmp <- rank(obs) + tmp <- array(tmp) + names(dim(tmp)) <- time_dim + eno <- Eno(tmp, time_dim) + } + } else if (method == "pearson") { + eno <- Eno(obs, time_dim) + } + + if (is.null(memb_dim)) { + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + eno_expand[i, ] <- eno + } + } else { #member + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + for (i in 1:nexp) { + for (j in 1:exp_memb) { + eno_expand[i, , j, ] <- eno + } + } + } + + } + +#############old################# +#This doesn't return error but it's diff from cor.test() when method is spearman and kendall + if (pval || sign) { + t <- sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) + p.val <- pt(t, eno_expand - 2, lower.tail = FALSE) + if (sign) signif <- !is.na(p.val) & p.val <= alpha + } +################################### + if (conf) { + conf.lower <- alpha / 2 + conf.upper <- 1 - conf.lower + suppressWarnings({ + conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3)) + confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3)) + }) + } + +################################### + # Remove nexp and nobs if dat_dim = NULL + if (is.null(dat_dim) & !is.null(memb_dim)) { + dim(CORR) <- dim(CORR)[3:length(dim(CORR))] + if (pval) { + dim(p.val) <- dim(p.val)[3:length(dim(p.val))] + } + if (conf) { + dim(conflow) <- dim(conflow)[3:length(dim(conflow))] + dim(confhigh) <- dim(confhigh)[3:length(dim(confhigh))] + } + } + +################################### + + res <- list(corr = CORR) + if (pval) { + res <- c(res, list(p.val = p.val)) + } + if (conf) { + res <- c(res, list(conf.lower = conflow, conf.upper = confhigh)) + } + if (sign) { + res <- c(res, list(sign = signif)) + } + + return(res) + +} diff --git a/modules/Skill/tmp/RMSSS.R b/modules/Skill/tmp/RMSSS.R new file mode 100644 index 00000000..d2ff4861 --- /dev/null +++ b/modules/Skill/tmp/RMSSS.R @@ -0,0 +1,448 @@ +#'Compute root mean square error skill score +#' +#'Compute the root mean square error skill score (RMSSS) between an array of +#'forecast 'exp' and an array of observation 'obs'. The two arrays should +#'have the same dimensions except along dat_dim, where the length can be +#'different, with the number of experiments/models (nexp) and the number of +#'observational datasets (nobs).\cr +#'RMSSS computes the root mean square error skill score of each jexp in 1:nexp +#'against each job in 1:nobs which gives nexp * nobs RMSSS for each grid point +#'of the array.\cr +#'The RMSSS are computed along the time_dim dimension which should correspond +#'to the start date dimension.\cr +#'The p-value and significance test are optionally provided by an one-sided +#'Fisher test or Random Walk test.\cr +#' +#'@param exp A named numeric array of experimental data which contains at least +#' two dimensions for dat_dim and time_dim. It can also be a vector with the +#' same length as 'obs', then the vector will automatically be 'time_dim' and +#' 'dat_dim' will be 1. +#'@param obs A named numeric array of observational data which contains at least +#' two dimensions for dat_dim and time_dim. The dimensions should be the same +#' as paramter 'exp' except the length of 'dat_dim' dimension. The order of +#' dimension can be different. It can also be a vector with the same length as +#' 'exp', then the vector will automatically be 'time_dim' and 'dat_dim' will +#' be 1. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension, or 0 (typical climatological forecast) or 1 +#' (normalized climatological forecast). If it is an array, the dimensions must +#' be the same as 'exp' except 'memb_dim' and 'dat_dim'. If there is only one +#' reference dataset, it should not have dataset dimension. If there is +#' corresponding reference for each experiment, the dataset dimension must +#' have the same length as in 'exp'. If 'ref' is NULL, the typical +#' climatological forecast is used as reference forecast (equivelant to 0.) +#' The default value is NULL. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is 'dataset'. +#'@param time_dim A character string indicating the name of dimension along +#' which the RMSSS are computed. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter 'exp' +#' and 'ref' are already the ensemble mean. The default value is NULL. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho: RMSSS = 0. The default value is TRUE. +#'@param sign A logical value indicating whether to compute or not the +#' statistical significance of the test Ho: RMSSS = 0. The default value is +#' FALSE. +#'@param alpha A numeric of the significance level to be used in the +#' statistical significance test. The default value is 0.05. +#'@param sig_method A character string indicating the significance method. The +#' options are "one-sided Fisher" (default) and "Random Walk". +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr +#'nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the +#'number of observation (i.e., dat_dim in obs). If dat_dim is NULL, nexp and +#'nobs are omitted.\cr +#'\item{$rmsss}{ +#' A numerical array of the root mean square error skill score. +#'} +#'\item{$p.val}{ +#' A numerical array of the p-value with the same dimensions as $rmsss. +#' Only present if \code{pval = TRUE}. +#'} +#'\item{sign}{ +#' A logical array of the statistical significance of the RMSSS with the same +#' dimensions as $rmsss. Only present if \code{sign = TRUE}. +#'} +#' +#'@examples +#' set.seed(1) +#' exp <- array(rnorm(30), dim = c(dataset = 2, time = 3, memb = 5)) +#' set.seed(2) +#' obs <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) +#' res <- RMSSS(exp, obs, time_dim = 'time', dat_dim = 'dataset') +#' +#'@rdname RMSSS +#'@import multiApply +#'@importFrom stats pf +#'@export +RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', + memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, + sig_method = 'one-sided Fisher', ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) & is.null(dim(obs))) { #is vector + if (length(exp) == length(obs)) { + exp <- array(exp, dim = c(length(exp), 1)) + names(dim(exp)) <- c(time_dim, dat_dim) + obs <- array(obs, dim = c(length(obs), 1)) + names(dim(obs)) <- c(time_dim, dat_dim) + } else { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + } else if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension name.") + } + if (!is.null(ref)) { + if (!is.numeric(ref)) { + stop("Parameter 'ref' must be numeric.") + } + if (is.array(ref)) { + if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } else if (length(ref) != 1 | any(!ref %in% c(0, 1))) { + stop("Parameter 'ref' must be a numeric array or number 0 or 1.") + } + } + + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | length(alpha) > 1) { + stop("Parameter 'alpha' must be one numeric value.") + } + ## sig_method + if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { + stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") + } + if (sig_method == "Random Walk" & pval == T) { + warning("p-value cannot be calculated by significance method 'Random Walk'.") + pval <- FALSE + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'memb_dim' and 'dat_dim'.")) + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim) && memb_dim %in% name_ref) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has dataset dimension, it must be ", + "equal to dataset dimension of 'exp'.")) + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.")) + } + } + + if (dim(exp)[time_dim] <= 2) { + stop("The length of time_dim must be more than 2 to compute RMSSS.") + } + + + ############################### +# # Sort dimension +# name_exp <- names(dim(exp)) +# name_obs <- names(dim(obs)) +# order_obs <- match(name_exp, name_obs) +# obs <- Reorder(obs, order_obs) + + + ############################### + # Create ref array if needed + if (is.null(ref)) ref <- 0 + if (!is.array(ref)) { + ref <- array(data = ref, dim = dim(exp)) + } + + ############################### + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = T) + if (!is.null(ref) & memb_dim %in% names(dim(ref))) { + ref <- MeanDims(ref, memb_dim, na.rm = T) + } + } + + ############################### + # Calculate RMSSS + +# if (!is.null(ref)) { # use "ref" as reference forecast +# if (!is.null(dat_dim) && dat_dim %in% names(dim(ref))) { +# target_dims_ref <- c(time_dim, dat_dim) +# } else { +# target_dims_ref <- c(time_dim) +# } +# data <- list(exp = exp, obs = obs, ref = ref) +# target_dims = list(exp = c(time_dim, dat_dim), +# obs = c(time_dim, dat_dim), +# ref = target_dims_ref) +# } else { +# data <- list(exp = exp, obs = obs) +# target_dims = list(exp = c(time_dim, dat_dim), +# obs = c(time_dim, dat_dim)) +# } + data <- list(exp = exp, obs = obs, ref = ref) + if (!is.null(dat_dim)) { + if (dat_dim %in% names(dim(ref))) { + target_dims <- list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = c(time_dim, dat_dim)) + } else { + target_dims <- list(exp = c(time_dim, dat_dim), + obs = c(time_dim, dat_dim), + ref = c(time_dim)) + } + } else { + target_dims <- list(exp = time_dim, obs = time_dim, ref = time_dim) + } + + res <- Apply(data, + target_dims = target_dims, + fun = .RMSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, + sig_method = sig_method, + ncores = ncores) + + return(res) +} + +.RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = 'dataset', pval = TRUE, + sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher') { + # exp: [sdate, (dat)] + # obs: [sdate, (dat)] + # ref: [sdate, (dat)] or NULL + + if (is.null(ref)) { + ref <- array(data = 0, dim = dim(obs)) + } else if (identical(ref, 0) | identical(ref, 1)) { + ref <- array(ref, dim = dim(exp)) + } + + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + nexp <- 1 + nobs <- 1 + nref <- 1 + # Add dat dim back temporarily + dim(exp) <- c(dim(exp), dat = 1) + dim(obs) <- c(dim(obs), dat = 1) + dim(ref) <- c(dim(ref), dat = 1) + + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + nexp <- as.numeric(dim(exp)[2]) + nobs <- as.numeric(dim(obs)[2]) + if (dat_dim %in% names(dim(ref))) { + nref <- as.numeric(dim(ref)[2]) + } else { + dim(ref) <- c(dim(ref), dat = 1) + nref <- 1 + } + } + + nsdate <- as.numeric(dim(exp)[1]) + + # RMS of forecast + dif1 <- array(dim = c(nsdate, nexp, nobs)) + names(dim(dif1)) <- c(time_dim, 'nexp', 'nobs') + + for (i in 1:nobs) { + dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) + } + + rms_exp <- apply(dif1^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nexp, nobs)) + + # RMS of reference +# if (!is.null(ref)) { + dif2 <- array(dim = c(nsdate, nref, nobs)) + names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') + for (i in 1:nobs) { + dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) + } + rms_ref <- apply(dif2^2, c(2, 3), mean, na.rm = TRUE)^0.5 #array(dim = c(nref, nobs)) + if (nexp != nref) { + # expand rms_ref to nexp (nref is 1) + rms_ref <- array(rms_ref, dim = c(nobs = nobs, nexp = nexp)) + rms_ref <- Reorder(rms_ref, c(2, 1)) + } +# } else { +# rms_ref <- array(colMeans(obs^2, na.rm = TRUE)^0.5, dim = c(nobs = nobs, nexp = nexp)) +## rms_ref[which(abs(rms_ref) <= (max(abs(rms_ref), na.rm = TRUE) / 1000))] <- max(abs( +## rms_ref), na.rm = TRUE) / 1000 +# rms_ref <- Reorder(rms_ref, c(2, 1)) +# #rms_ref above: [nexp, nobs] +# } + + rmsss <- 1 - rms_exp / rms_ref + +################################################# + +# if (conf) { +# conflow <- (1 - conf.lev) / 2 +# confhigh <- 1 - conflow +# conf_low <- array(dim = c(nexp = nexp, nobs = nobs)) +# conf_high <- array(dim = c(nexp = nexp, nobs = nobs)) +# } + + if (sig_method == 'one-sided Fisher') { + p_val <- array(dim = c(nexp = nexp, nobs = nobs)) + ## pval and sign + if (pval || sign) { + eno1 <- Eno(dif1, time_dim) + if (is.null(ref)) { + eno2 <- Eno(obs, time_dim) + eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) + eno2 <- Reorder(eno2, c(2, 1)) + } else { + eno2 <- Eno(dif2, time_dim) + if (nref != nexp) { + eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) + eno2 <- Reorder(eno2, c(2, 1)) + } + } + + F.stat <- (eno2 * rms_ref^2 / (eno2 - 1)) / ((eno1 * rms_exp^2 / (eno1- 1))) + tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 + p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) + if (sign) signif <- p_val <= alpha + # If there isn't enough valid data, return NA + p_val[which(!tmp)] <- NA + if (sign) signif[which(!tmp)] <- NA + + # change not enough valid data rmsss to NA + rmsss[which(!tmp)] <- NA + } + + } else if (sig_method == "Random Walk") { + signif <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + + # Error + error_exp <- array(data = abs(exp[, i] - obs[, j]), dim = c(time = nsdate)) + if (nref == nexp) { + error_ref <- array(data = abs(ref[, i] - obs[, j]), dim = c(time = nsdate)) + } else { + # nref = 1 + error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) + } + signif[i, j] <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref)$signif + } + } + } + + ################################### + # Remove extra dimensions if dat_dim = NULL + if (is.null(dat_dim)) { + dim(rmsss) <- NULL + dim(p_val) <- NULL + if (sign) dim(signif) <- NULL + } + ################################### + + # output + res <- list(rmsss = rmsss) + if (pval) { + p.val <- list(p.val = p_val) + res <- c(res, p.val) + } + if (sign) { + signif <- list(sign = signif) + res <- c(res, signif) + } + + return(res) +} diff --git a/modules/Skill/tmp/RandomWalkTest.R b/modules/Skill/tmp/RandomWalkTest.R new file mode 100644 index 00000000..adeadc1e --- /dev/null +++ b/modules/Skill/tmp/RandomWalkTest.R @@ -0,0 +1,82 @@ +#'Random walk test for skill differences +#' +#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a +#'common reference) based on Random Walks, with significance estimate at the 95% +#'confidence level, as in DelSole and Tippett (2016). +#' +#'@param skill_A A numerical array of the time series of the skill with the +#' forecaster A's. +#'@param skill_B A numerical array of the time series of the skill with the +#' forecaster B's. The dimensions should be identical as parameter 'skill_A'. +#'@param time_dim A character string indicating the name of the dimension along +#' which the tests are computed. The default value is 'sdate'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of 2: +#'\item{$score}{ +#' A numerical array with the same dimensions as the input arrays except +#' 'time_dim'. The number of times that forecaster A has been better than +#' forecaster B minus the number of times that forecaster B has been better +#' than forecaster A (for skill positively oriented). If $score is positive +#' forecaster A is better than forecaster B, and if $score is negative +#' forecaster B is better than forecaster B. +#'} +#'\item{$signif}{ +#' A logical array with the same dimensions as the input arrays except +#' 'time_dim'. Whether the difference is significant or not at the 5% +#' significance level. +#'} +#' +#'@examples +#' fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2)) +#' fcst_B <- array(c(21:60), dim = c(sdate = 10, lat = 2, lon = 2)) +#' reference <- array(1:40, dim = c(sdate = 10, lat = 2, lon = 2)) +#' skill_A <- abs(fcst_A - reference) +#' skill_B <- abs(fcst_B - reference) +#' RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', ncores = 1) +#' +#'@import multiApply +#'@export +RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', ncores = NULL){ + + ## Check inputs + if (is.null(skill_A) | is.null(skill_B)){ + stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") + } + if(!is.numeric(skill_A) | !is.numeric(skill_B)){ + stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") + } + if (!identical(dim(skill_A),dim(skill_B))){ + stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") + } + if(!is.character(time_dim)){ + stop("Parameter 'time_dim' must be a character string.") + } + if(!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))){ + stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") + } + if (!is.null(ncores)){ + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1){ + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ## Compute the Random Walk Test + res <- multiApply::Apply(data = list(skill_A, skill_B), + target_dims = time_dim, + fun = .RandomWalkTest, + ncores = ncores) + return(res) +} + +.RandomWalkTest <- function(skill_A, skill_B){ + + score <- cumsum(skill_A > skill_B) - cumsum(skill_A < skill_B) + + ## TRUE if significant (if last value is above or below 2*sqrt(N)) + signif<- ifelse(test = (score[length(skill_A)] < (-2)*sqrt(length(skill_A))) | (score[length(skill_A)] > 2*sqrt(length(skill_A))), + yes = TRUE, no = FALSE) + + return(list("score"=score[length(skill_A)],"signif"=signif)) +} diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R new file mode 100644 index 00000000..d9f95e7c --- /dev/null +++ b/modules/Visualization/Visualization.R @@ -0,0 +1,359 @@ +#G# TODO: Remove once released in s2dv/CSTools +source("modules/Visualization/tmp/PlotMostLikelyQuantileMap.R") +source("modules/Visualization/tmp/PlotCombinedMap.R") + +## TODO: Add the possibility to read the data directly from netCDF +## TODO: Adapt to multi-model case +## TODO: Add param 'raw'? +## TODO: Reduce colorbar size and increase colorbar label size +## Param: bar_label_scale and ???? + +plot_data <- function(recipe, + data, + skill_metrics = NULL, + probabilities = NULL, + archive = NULL, + significance = F) { + # Try to produce and save several basic plots. + # recipe: the auto-s2s recipe as read by read_yaml() + # archive: the auto-s2s archive as read by read_yaml() + # data: list containing the hcst, obs and (optional) fcst s2dv_cube objects + # calibrated_data: list containing the calibrated hcst and (optional) fcst + # s2dv_cube objects + # skill_metrics: list of arrays containing the computed skill metrics + # significance: Bool. Whether to include significance dots where applicable + + outdir <- paste0(get_dir(recipe), "/plots/") + dir.create(outdir, showWarnings = FALSE, recursive = TRUE) + + if ((is.null(skill_metrics)) && (is.null(data$fcst))) { + error(recipe$Run$logger, "The Visualization module has been called, + but there is no fcst in 'data', and 'skill_metrics' is NULL + so there is no data that can be plotted.") + stop() + } + + if (is.null(archive)) { + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + archive <- read_yaml(paste0(recipe$Run$code_dir, + "conf/archive.yml"))$archive + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + archive <- read_yaml(paste0(recipe$Run$code_dir, + "conf/archive_decadal.yml"))$archive + } + } + + # Plot skill metrics + if (!is.null(skill_metrics)) { + plot_skill_metrics(recipe, archive, data$hcst, skill_metrics, outdir, + significance) + } + + # Plot forecast ensemble mean + if (!is.null(data$fcst)) { + plot_ensemble_mean(recipe, archive, data$fcst, outdir) + } + + # Plot Most Likely Terciles + if ((!is.null(probabilities)) && (!is.null(data$fcst))) { + plot_most_likely_terciles(recipe, archive, data$fcst, + probabilities, outdir) + } +} + +plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, + outdir, significance = F) { + + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + error(recipe$Run$logger, "Visualization functions not yet implemented + for daily data.") + stop() + } + # Abort if skill_metrics is not list + if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { + stop("The element 'skill_metrics' must be a list of named arrays.") + } + + latitude <- data_cube$coords$lat + longitude <- data_cube$coords$lon + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + init_month <- lubridate::month(as.numeric(substr(recipe$Analysis$Time$sdate, + start = 1, stop = 2)), + label = T, abb = T) + + # Group different metrics by type + skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", + "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", + "enscorr_specs", "rmsss") + scores <- c("rps", "frps", "crps", "frps_specs") + + for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { + + if (name %in% names(skill_metrics)) { + # Define plot characteristics and metric name to display in plot + if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", + "rpss_specs", "bss90_specs", "bss10_specs", + "rmsss")) { + display_name <- toupper(strsplit(name, "_")[[1]][1]) + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.1) + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + + } else if (name == "mean_bias_ss") { + display_name <- "Mean Bias Skill Score" + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.1) + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + + } else if (name %in% c("enscorr", "enscorr_specs")) { + display_name <- "Ensemble Mean Correlation" + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.1) + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + + } else if (name %in% scores) { + skill <- skill_metrics[[name]] + display_name <- toupper(strsplit(name, "_")[[1]][1]) + brks <- seq(0, 1, by = 0.1) + col2 <- grDevices::hcl.colors(length(brks) - 1, "Reds") + + } else if (name == "enssprerr") { + ## TODO: Adjust colorbar parameters + skill <- skill_metrics[[name]] + display_name <- "Spread-to-Error Ratio" + brks <- pretty(0:max(skill, na.rm = T), n = 20, min.n = 10) + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + + } else if (name == "mean_bias") { + skill <- skill_metrics[[name]] + display_name <- "Mean Bias" + max_value <- max(abs(skill)) + ugly_intervals <- seq(-max_value, max_value, (max_value*2)/10) + brks <- pretty(ugly_intervals, n = 20, min.n = 10) + col2 <- grDevices::hcl.colors(length(brks) - 1, "RdBu", rev = TRUE) + } + + options(bitmapType = "cairo") + + # Reorder dimensions + skill <- Reorder(skill, c("time", "longitude", "latitude")) + # If the significance has been requested and the variable has it, + # retrieve it and reorder its dimensions. + significance_name <- paste0(name, "_significance") + if ((significance) && (significance_name %in% names(skill_metrics))) { + significance_name <- paste0(name, "_significance") + skill_significance <- skill_metrics[[significance_name]] + skill_significance <- Reorder(skill_significance, c("time", + "longitude", + "latitude")) + # Split skill significance into list of lists, along the time dimension + # This allows for plotting the significance dots correctly. + skill_significance <- ClimProjDiags::ArrayToList(skill_significance, + dim = 'time', + level = "sublist", + names = "dots") + } else { + skill_significance <- NULL + } + # Define output file name and titles + outfile <- paste0(outdir, name, ".png") + toptitle <- paste(display_name, "-", data_cube$attrs$Variable$varName, + "-", system_name, "-", init_month, hcst_period) + months <- unique(lubridate::month(data_cube$attrs$Dates, + label = T, abb = F)) + titles <- as.vector(months) + # Plot + suppressWarnings( + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + asplit(skill, MARGIN=1), # Splitting array into a list + longitude, latitude, + special_args = skill_significance, + dot_symbol = 20, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + filled.continents=F, + brks = brks, + cols = col2, + col_inf = col2[1], + col_sup = col2[length(col2)], + fileout = outfile, + bar_label_digits = 3, + bar_extra_margin = rep(0.9, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) + ) + } + } + + info(recipe$Run$logger, + "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") +} + +plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { + + ## TODO: Add 'anomaly' to plot title + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + stop("Visualization functions not yet implemented for daily data.") + } + + latitude <- fcst$coords$lat + longitude <- fcst$coords$lon + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + variable <- recipe$Analysis$Variables$name + units <- attr(fcst$Variable, "variable")$units + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + + # Compute ensemble mean + ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') + # Drop extra dims, add time dim if missing: + ensemble_mean <- drop(ensemble_mean) + + if (!("time" %in% names(dim(ensemble_mean)))) { + dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) + } + if (!'syear' %in% names(dim(ensemble_mean))) { + ensemble_mean <- Reorder(ensemble_mean, c("time", "longitude", "latitude")) + } else { + ensemble_mean <- Reorder(ensemble_mean, c("syear", "time", "longitude", "latitude")) + } + ## TODO: Redefine column colors, possibly depending on variable + if (variable == 'prlr') { + palette = "BrBG" + rev = F + } else { + palette = "RdBu" + rev = T + } + + brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) + col2 <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) + # color <- colorRampPalette(col2)(length(brks) - 1) + options(bitmapType = "cairo") + + for (i_syear in start_date) { + # Define name of output file and titles + if (length(start_date) == 1) { + i_ensemble_mean <- ensemble_mean + outfile <- paste0(outdir, "forecast_ensemble_mean.png") + } else { + i_ensemble_mean <- ensemble_mean[which(start_date == i_syear), , , ] + outfile <- paste0(outdir, "forecast_ensemble_mean_", i_syear, ".png") + } + toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, + "- Initialization:", i_syear) + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + titles <- as.vector(months) + # Plots + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + i_ensemble_mean, longitude, latitude, + filled.continents = F, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + units = units, + cols = col2, + brks = brks, + fileout = outfile, + bar_label_digits = 4, + bar_extra_margin = rep(0.7, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) + } + + info(recipe$Run$logger, + "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") +} + +plot_most_likely_terciles <- function(recipe, archive, + fcst, + probabilities, + outdir) { + + ## TODO: Add 'anomaly' to plot title + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + stop("Visualization functions not yet implemented for daily data.") + } + + latitude <- fcst$coords$lat + longitude <- fcst$coords$lon + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + variable <- recipe$Analysis$Variables$name + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + + # Retrieve and rearrange probability bins for the forecast + if (is.null(probabilities$probs_fcst$prob_b33) || + is.null(probabilities$probs_fcst$prob_33_to_66) || + is.null(probabilities$probs_fcst$prob_a66)) { + stop("The forecast tercile probability bins are not present inside ", + "'probabilities', the most likely tercile map cannot be plotted.") + } + + probs_fcst <- abind(probabilities$probs_fcst$prob_b33, + probabilities$probs_fcst$prob_33_to_66, + probabilities$probs_fcst$prob_a66, + along = 0) + names(dim(probs_fcst)) <- c("bin", + names(dim(probabilities$probs_fcst$prob_b33))) + + ## TODO: Improve this section + # Drop extra dims, add time dim if missing: + probs_fcst <- drop(probs_fcst) + if (!("time" %in% names(dim(probs_fcst)))) { + dim(probs_fcst) <- c("time" = 1, dim(probs_fcst)) + } + if (!'syear' %in% names(dim(probs_fcst))) { + probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) + } else { + probs_fcst <- Reorder(probs_fcst, + c("syear", "time", "bin", "longitude", "latitude")) + } + + for (i_syear in start_date) { + # Define name of output file and titles + if (length(start_date) == 1) { + i_probs_fcst <- probs_fcst + outfile <- paste0(outdir, "forecast_most_likely_tercile.png") + } else { + i_probs_fcst <- probs_fcst[which(start_date == i_syear), , , , ] + outfile <- paste0(outdir, "forecast_most_likely_tercile_", i_syear, ".png") + } + toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", + "Initialization:", i_syear) + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + ## TODO: Ensure this works for daily and sub-daily cases + titles <- as.vector(months) + + # Plots + ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked + ## on. + suppressWarnings( + PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), + cat_dim = 'bin', + i_probs_fcst, longitude, latitude, + coast_width = 1.5, + title_scale = 0.6, + legend_scale = 0.8, #cex_bar_titles = 0.6, + toptitle = toptitle, + titles = titles, + fileout = outfile, + bar_label_digits = 2, + bar_scale = rep(0.7, 4), + bar_label_scale = 1.2, + axes_label_scale = 1.3, + triangle_ends = c(F, F), width = 11, height = 8) + ) + } + + info(recipe$Run$logger, + "##### MOST LIKELY TERCILE PLOT SAVED TO OUTPUT DIRECTORY #####") +} diff --git a/modules/Visualization/tmp/PlotCombinedMap.R b/modules/Visualization/tmp/PlotCombinedMap.R new file mode 100644 index 00000000..a7b5fc97 --- /dev/null +++ b/modules/Visualization/tmp/PlotCombinedMap.R @@ -0,0 +1,608 @@ +#'Plot Multiple Lon-Lat Variables In a Single Map According to a Decision Function +#'@description Plot a number a two dimensional matrices with (longitude, latitude) dimensions on a single map with the cylindrical equidistant latitude and longitude projection. +#'@author Nicolau Manubens, \email{nicolau.manubens@bsc.es} +#'@author Veronica Torralba, \email{veronica.torralba@bsc.es} +#' +#'@param maps List of matrices to plot, each with (longitude, latitude) dimensions, or 3-dimensional array with the dimensions (longitude, latitude, map). Dimension names are required. +#'@param lon Vector of longitudes. Must match the length of the corresponding dimension in 'maps'. +#'@param lat Vector of latitudes. Must match the length of the corresponding dimension in 'maps'. +#'@param map_select_fun Function that selects, for each grid point, which value to take among all the provided maps. This function receives as input a vector of values for a same grid point for all the provided maps, and must return a single selected value (not its index!) or NA. For example, the \code{min} and \code{max} functions are accepted. +#'@param display_range Range of values to be displayed for all the maps. This must be a numeric vector c(range min, range max). The values in the parameter 'maps' can go beyond the limits specified in this range. If the selected value for a given grid point (according to 'map_select_fun') falls outside the range, it will be coloured with 'col_unknown_map'. +#'@param map_dim Optional name for the dimension of 'maps' along which the multiple maps are arranged. Only applies when 'maps' is provided as a 3-dimensional array. Takes the value 'map' by default. +#'@param brks Colour levels to be sent to PlotEquiMap. This parameter is optional and adjusted automatically by the function. +#'@param cols List of vectors of colours to be sent to PlotEquiMap for the colour bar of each map. This parameter is optional and adjusted automatically by the function (up to 5 maps). The colours provided for each colour bar will be automatically interpolated to match the number of breaks. Each item in this list can be named, and the name will be used as title for the corresponding colour bar (equivalent to the parameter 'bar_titles'). +#'@param col_unknown_map Colour to use to paint the grid cells for which a map is not possible to be chosen according to 'map_select_fun' or for those values that go beyond 'display_range'. Takes the value 'white' by default. +#'@param mask Optional numeric array with dimensions (latitude, longitude), with values in the range [0, 1], indicating the opacity of the mask over each grid point. Cells with a 0 will result in no mask, whereas cells with a 1 will result in a totally opaque superimposed pixel coloured in 'col_mask'. +#'@param col_mask Colour to be used for the superimposed mask (if specified in 'mask'). Takes the value 'grey' by default. +#'@param dots Array of same dimensions as 'var' or with dimensions +#' c(n, dim(var)), where n is the number of dot/symbol layers to add to the +#' plot. A value of TRUE at a grid cell will draw a dot/symbol on the +#' corresponding square of the plot. By default all layers provided in 'dots' +#' are plotted with dots, but a symbol can be specified for each of the +#' layers via the parameter 'dot_symbol'. +#'@param bar_titles Optional vector of character strings providing the titles to be shown on top of each of the colour bars. +#'@param legend_scale Scale factor for the size of the colour bar labels. Takes 1 by default. +#'@param cex_bar_titles Scale factor for the sizes of the bar titles. Takes 1.5 by default. +#'@param fileout File where to save the plot. If not specified (default) a graphics device will pop up. Extensions allowed: eps/ps, jpeg, png, pdf, bmp and tiff +#'@param width File width, in the units specified in the parameter size_units (inches by default). Takes 8 by default. +#'@param height File height, in the units specified in the parameter size_units (inches by default). Takes 5 by default. +#'@param size_units Units of the size of the device (file or window) to plot in. Inches ('in') by default. See ?Devices and the creator function of the corresponding device. +#'@param res Resolution of the device (file or window) to plot in. See ?Devices and the creator function of the corresponding device. +#'@param drawleg Where to draw the common colour bar. Can take values TRUE, +#' FALSE or:\cr +#' 'up', 'u', 'U', 'top', 't', 'T', 'north', 'n', 'N'\cr +#' 'down', 'd', 'D', 'bottom', 'b', 'B', 'south', 's', 'S' (default)\cr +#' 'right', 'r', 'R', 'east', 'e', 'E'\cr +#' 'left', 'l', 'L', 'west', 'w', 'W' +#'@param ... Additional parameters to be passed on to \code{PlotEquiMap}. + +#'@seealso \code{PlotCombinedMap} and \code{PlotEquiMap} +#' +#'@importFrom s2dv PlotEquiMap ColorBar +#'@importFrom maps map +#'@importFrom graphics box image layout mtext par plot.new +#'@importFrom grDevices adjustcolor bmp colorRampPalette dev.cur dev.new dev.off hcl jpeg pdf png postscript svg tiff +#'@examples +#'# Simple example +#'x <- array(1:(20 * 10), dim = c(lat = 10, lon = 20)) / 200 +#'a <- x * 0.6 +#'b <- (1 - x) * 0.6 +#'c <- 1 - (a + b) +#'lons <- seq(0, 359.5, length = 20) +#'lats <- seq(-89.5, 89.5, length = 10) +#'PlotCombinedMap(list(a, b, c), lons, lats, +#' toptitle = 'Maximum map', +#' map_select_fun = max, +#' display_range = c(0, 1), +#' bar_titles = paste('% of belonging to', c('a', 'b', 'c')), +#' brks = 20, width = 10, height = 8) +#' +#'Lon <- c(0:40, 350:359) +#'Lat <- 51:26 +#'data <- rnorm(51 * 26 * 3) +#'dim(data) <- c(map = 3, lon = 51, lat = 26) +#'mask <- sample(c(0,1), replace = TRUE, size = 51 * 26) +#'dim(mask) <- c(lat = 26, lon = 51) +#'PlotCombinedMap(data, lon = Lon, lat = Lat, map_select_fun = max, +#' display_range = range(data), mask = mask, +#' width = 12, height = 8) +#' +#'@export +PlotCombinedMap <- function(maps, lon, lat, + map_select_fun, display_range, + map_dim = 'map', + brks = NULL, cols = NULL, + col_unknown_map = 'white', + mask = NULL, col_mask = 'grey', + dots = NULL, + bar_titles = NULL, legend_scale = 1, + cex_bar_titles = 1.5, + plot_margin = NULL, bar_margin = rep(0, 4), + fileout = NULL, width = 8, height = 5, + size_units = 'in', res = 100, drawleg = T, + ...) { + args <- list(...) + + # If there is any filenames to store the graphics, process them + # to select the right device + if (!is.null(fileout)) { + deviceInfo <- .SelectDevice(fileout = fileout, width = width, height = height, + units = size_units, res = res) + saveToFile <- deviceInfo$fun + fileout <- deviceInfo$files + } + + # Check probs + error <- FALSE + if (is.list(maps)) { + if (length(maps) < 1) { + stop("Parameter 'maps' must be of length >= 1 if provided as a list.") + } + check_fun <- function(x) { + is.numeric(x) && (length(dim(x)) == 2) + } + if (!all(sapply(maps, check_fun))) { + error <- TRUE + } + ref_dims <- dim(maps[[1]]) + equal_dims <- all(sapply(maps, function(x) identical(dim(x), ref_dims))) + if (!equal_dims) { + stop("All arrays in parameter 'maps' must have the same dimension ", + "sizes and names when 'maps' is provided as a list of arrays.") + } + num_maps <- length(maps) + maps <- unlist(maps) + dim(maps) <- c(ref_dims, map = num_maps) + map_dim <- 'map' + } + if (!is.numeric(maps)) { + error <- TRUE + } + if (is.null(dim(maps))) { + error <- TRUE + } + if (length(dim(maps)) != 3) { + error <- TRUE + } + if (error) { + stop("Parameter 'maps' must be either a numeric array with 3 dimensions ", + " or a list of numeric arrays of the same size with the 'lon' and ", + "'lat' dimensions.") + } + dimnames <- names(dim(maps)) + + # Check map_dim + if (is.character(map_dim)) { + if (is.null(dimnames)) { + stop("Specified a dimension name in 'map_dim' but no dimension names provided ", + "in 'maps'.") + } + map_dim <- which(dimnames == map_dim) + if (length(map_dim) < 1) { + stop("Dimension 'map_dim' not found in 'maps'.") + } else { + map_dim <- map_dim[1] + } + } else if (!is.numeric(map_dim)) { + stop("Parameter 'map_dim' must be either a numeric value or a ", + "dimension name.") + } + if (length(map_dim) != 1) { + stop("Parameter 'map_dim' must be of length 1.") + } + map_dim <- round(map_dim) + + # Work out lon_dim and lat_dim + lon_dim <- NULL + if (!is.null(dimnames)) { + lon_dim <- which(dimnames %in% c('lon', 'longitude'))[1] + } + if (length(lon_dim) < 1) { + lon_dim <- (1:3)[-map_dim][1] + } + lon_dim <- round(lon_dim) + + lat_dim <- NULL + if (!is.null(dimnames)) { + lat_dim <- which(dimnames %in% c('lat', 'latitude'))[1] + } + if (length(lat_dim) < 1) { + lat_dim <- (1:3)[-map_dim][2] + } + lat_dim <- round(lat_dim) + + # Check lon + if (!is.numeric(lon)) { + stop("Parameter 'lon' must be a numeric vector.") + } + if (length(lon) != dim(maps)[lon_dim]) { + stop("Parameter 'lon' does not match the longitude dimension in 'maps'.") + } + + # Check lat + if (!is.numeric(lat)) { + stop("Parameter 'lat' must be a numeric vector.") + } + if (length(lat) != dim(maps)[lat_dim]) { + stop("Parameter 'lat' does not match the longitude dimension in 'maps'.") + } + + # Check map_select_fun + if (is.numeric(map_select_fun)) { + if (length(dim(map_select_fun)) != 2) { + stop("Parameter 'map_select_fun' must be an array with dimensions ", + "'lon' and 'lat' if provided as an array.") + } + if (!identical(dim(map_select_fun), dim(maps)[-map_dim])) { + stop("The dimensions 'lon' and 'lat' in the 'map_select_fun' array must ", + "have the same size, name and order as in the 'maps' parameter.") + } + } + if (!is.function(map_select_fun)) { + stop("The parameter 'map_select_fun' must be a function or a numeric array.") + } + + # Check display_range + if (!is.numeric(display_range) || length(display_range) != 2) { + stop("Parameter 'display_range' must be a numeric vector of length 2.") + } + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = display_range[1], to = display_range[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= dim(maps)[map_dim]) { + chosen_sets <- 1:(dim(maps)[map_dim]) + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(1:length(col_sets), dim(maps)[map_dim]) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != dim(maps)[map_dim]) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in 1:length(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (!is.null(names(cols))) { + bar_titles <- names(cols) + } else { + bar_titles <- paste0("Map ", 1:length(cols)) + } + } else { + if (!is.character(bar_titles)) { + stop("Parameter 'bar_titles' must be a character vector.") + } + if (length(bar_titles) != length(cols)) { + stop("Parameter 'bar_titles' must be of the same length as the number of ", + "maps in 'maps'.") + } + } + + # Check legend_scale + if (!is.numeric(legend_scale)) { + stop("Parameter 'legend_scale' must be numeric.") + } + + # Check col_unknown_map + if (!is.character(col_unknown_map)) { + stop("Parameter 'col_unknown_map' must be a character string.") + } + + # Check col_mask + if (!is.character(col_mask)) { + stop("Parameter 'col_mask' must be a character string.") + } + + # Check mask + if (!is.null(mask)) { + if (!is.numeric(mask)) { + stop("Parameter 'mask' must be numeric.") + } + if (length(dim(mask)) != 2) { + stop("Parameter 'mask' must have two dimensions.") + } + if ((dim(mask)[1] != dim(maps)[lat_dim]) || + (dim(mask)[2] != dim(maps)[lon_dim])) { + stop("Parameter 'mask' must have dimensions c(lat, lon).") + } + } + # Check dots + if (!is.null(dots)) { + if (length(dim(dots)) != 2) { + stop("Parameter 'mask' must have two dimensions.") + } + if ((dim(dots)[1] != dim(maps)[lat_dim]) || + (dim(dots)[2] != dim(maps)[lon_dim])) { + stop("Parameter 'mask' must have dimensions c(lat, lon).") + } + } + + #---------------------- + # Identify the most likely map + #---------------------- + brks_norm <- seq(0, 1, length.out = length(brks)) + if (is.function(map_select_fun)) { + range_width <- display_range[2] - display_range[1] + ml_map <- apply(maps, c(lat_dim, lon_dim), function(x) { + if (any(is.na(x))) { + res <- NA + } else { + res <- which(x == map_select_fun(x)) + if (length(res) > 0) { + res <- res[1] + if (map_select_fun(x) < display_range[1] || + map_select_fun(x) > display_range[2]) { + res <- -0.5 + } else { + res <- res + (map_select_fun(x) - display_range[1]) / range_width + if (map_select_fun(x) == display_range[1]) { + res <- res + brks_norm[2] / (num_brks * 2) + } + } + } else { + res <- -0.5 + } + } + res + }) + } else { + stop("Providing 'map_select_fun' as array not implemented yet.") + ml_map <- map_select_fun + } + nmap <- dim(maps)[map_dim] + nlat <- length(lat) + nlon <- length(lon) + + #---------------------- + # Set latitudes from minimum to maximum + #---------------------- + if (lat[1] > lat[nlat]){ + lat <- lat[nlat:1] + indices <- list(nlat:1, TRUE) + ml_map <- do.call("[", c(list(x = ml_map), indices)) + if (!is.null(mask)){ + mask <- mask[nlat:1, ] + } + if (!is.null(dots)){ + dots <- dots[nlat:1,] + } + } + + #---------------------- + # Set layout and parameters + #---------------------- + # Open connection to graphical device + if (!is.null(fileout)) { + saveToFile(fileout) + } else if (names(dev.cur()) == 'null device') { + dev.new(units = size_units, res = res, width = width, height = height) + } + #NOTE: I think plot.new() is not necessary in any case. +# plot.new() + par(font.main = 1) + # If colorbars need to be plotted, re-define layout. + if (drawleg) { + layout(matrix(c(rep(1, nmap),2:(nmap + 1)), 2, nmap, byrow = TRUE), heights = c(6, 1.5)) + } + + #---------------------- + # Set colors and breaks and then PlotEquiMap + #---------------------- + tcols <- c(col_unknown_map, cols[[1]]) + for (k in 2:nmap) { + tcols <- append(tcols, c(col_unknown_map, cols[[k]])) + } + + tbrks <- c(-1, brks_norm + rep(1:nmap, each = length(brks))) + + if (is.null(plot_margin)) { + plot_margin <- c(5, 4, 4, 2) + 0.1 # default of par()$mar + } + + PlotEquiMap(var = ml_map, lon = lon, lat = lat, + brks = tbrks, cols = tcols, drawleg = FALSE, + filled.continents = FALSE, dots = dots, mar = plot_margin, ...) + + #---------------------- + # Add overplot on top + #---------------------- + if (!is.null(mask)) { + dims_mask <- dim(mask) + latb <- sort(lat, index.return = TRUE) + dlon <- lon[2:dims_mask[2]] - lon[1:(dims_mask[2] - 1)] + wher <- which(dlon > (mean(dlon) + 1)) + if (length(wher) > 0) { + lon[(wher + 1):dims_mask[2]] <- lon[(wher + 1):dims_mask[2]] - 360 + } + lonb <- sort(lon, index.return = TRUE) + + cols_mask <- sapply(seq(from = 0, to = 1, length.out = 10), + function(x) adjustcolor(col_mask, alpha.f = x)) + image(lonb$x, latb$x, t(mask)[lonb$ix, latb$ix], + axes = FALSE, col = cols_mask, + breaks = seq(from = 0, to = 1, by = 0.1), + xlab='', ylab='', add = TRUE, xpd = TRUE) + if (!exists('coast_color')) { + coast_color <- 'black' + } + if (min(lon) < 0) { + map('world', interior = FALSE, add = TRUE, lwd = 1, col = coast_color) # Low resolution world map (lon -180 to 180). + } else { + map('world2', interior = FALSE, add = TRUE, lwd = 1, col = coast_color) # Low resolution world map (lon 0 to 360). + } + box() + } + + #---------------------- + # Add colorbars + #---------------------- + if ('toptitle' %in% names(args)) { + size_title <- 1 + if ('title_scale' %in% names(args)) { + size_title <- args[['title_scale']] + } + old_mar <- par('mar') + old_mar[3] <- old_mar[3] - (2 * size_title + 1) + par(mar = old_mar) + } + + if (drawleg) { + for (k in 1:nmap) { + ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, + draw_separators = TRUE, extra_margin = c(2, 0, 2, 0), + label_scale = legend_scale * 1.5) + if (!is.null(bar_titles)) { + mtext(bar_titles[[k]], 3, line = -3, cex = cex_bar_titles) + } + #TODO: Change to below code. Plot title together. extra_margin needs to be adjusted. +# ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, +# draw_separators = TRUE, extra_margin = c(1, 0, 1, 0), +# label_scale = legend_scale * 1.5, title = bar_titles[[k]], title_scale = cex_bar_titles) + } + } + + # If the graphic was saved to file, close the connection with the device + if (!is.null(fileout)) dev.off() +} + +# Color bar for PlotMostLikelyQuantileMap +multi_ColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, + bar_limits = NULL, var_limits = NULL, + triangle_ends = NULL, plot = TRUE, + draw_separators = FALSE, + bar_titles = NULL, title_scale = 1, label_scale = 1, extra_margin = rep(0, 4), + ...) { + + minimum_value <- ceiling(1 / nmap * 10 * 1.1) * 10 + display_range = c(minimum_value, 100) + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = display_range[1], to = display_range[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= nmap) { + chosen_sets <- 1:nmap + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(1:length(col_sets), nmap) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != dim(maps)[map_dim]) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in 1:length(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (nmap == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nmap == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nmap, " (%)") + } + } + + if (plot) { + for (k in 1:nmap) { + s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, + bar_limits = bar_limits, var_limits = var_limits, + triangle_ends = triangle_ends, plot = TRUE, + draw_separators = draw_separators, + title = bar_titles[[k]], title_scale = title_scale, + label_scale = label_scale, extra_margin = extra_margin) + } + } else { + #TODO: col_inf and col_sup + return(list(brks = brks, cols = cols)) + } + +} + +#TODO: use s2dv:::.SelectDevice and remove this function here? +.SelectDevice <- function(fileout, width, height, units, res) { + # This function is used in the plot functions to check the extension of the + # files where the graphics will be stored and select the right R device to + # save them. + # If the vector of filenames ('fileout') has files with different + # extensions, then it will only accept the first one, changing all the rest + # of the filenames to use that extension. + + # We extract the extension of the filenames: '.png', '.pdf', ... + ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) + + if (length(ext) != 0) { + # If there is an extension specified, select the correct device + ## units of width and height set to accept inches + if (ext[1] == ".png") { + saveToFile <- function(fileout) { + png(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".jpeg") { + saveToFile <- function(fileout) { + jpeg(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] %in% c(".eps", ".ps")) { + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".pdf") { + saveToFile <- function(fileout) { + pdf(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".svg") { + saveToFile <- function(fileout) { + svg(filename = fileout, width = width, height = height) + } + } else if (ext[1] == ".bmp") { + saveToFile <- function(fileout) { + bmp(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".tiff") { + saveToFile <- function(fileout) { + tiff(filename = fileout, width = width, height = height, res = res, units = units) + } + } else { + warning("file extension not supported, it will be used '.eps' by default.") + ## In case there is only one filename + fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) + ext[1] <- ".eps" + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } + # Change filenames when necessary + if (any(ext != ext[1])) { + warning(paste0("some extensions of the filenames provided in 'fileout' are not ", ext[1],". The extensions are being converted to ", ext[1], ".")) + fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) + } + } else { + # Default filenames when there is no specification + warning("there are no extensions specified in the filenames, default to '.eps'") + fileout <- paste0(fileout, ".eps") + saveToFile <- postscript + } + + # return the correct function with the graphical device, and the correct + # filenames + list(fun = saveToFile, files = fileout) +} + diff --git a/modules/Visualization/tmp/PlotMostLikelyQuantileMap.R b/modules/Visualization/tmp/PlotMostLikelyQuantileMap.R new file mode 100644 index 00000000..9f9f1914 --- /dev/null +++ b/modules/Visualization/tmp/PlotMostLikelyQuantileMap.R @@ -0,0 +1,196 @@ +#'Plot Maps of Most Likely Quantiles +#' +#'@author Veronica Torralba, \email{veronica.torralba@bsc.es}, Nicolau Manubens, \email{nicolau.manubens@bsc.es} +#'@description This function receives as main input (via the parameter \code{probs}) a collection of longitude-latitude maps, each containing the probabilities (from 0 to 1) of the different grid cells of belonging to a category. As many categories as maps provided as inputs are understood to exist. The maps of probabilities must be provided on a common rectangular regular grid, and a vector with the longitudes and a vector with the latitudes of the grid must be provided. The input maps can be provided in two forms, either as a list of multiple two-dimensional arrays (one for each category) or as a three-dimensional array, where one of the dimensions corresponds to the different categories. +#' +#'@param probs a list of bi-dimensional arrays with the named dimensions 'latitude' (or 'lat') and 'longitude' (or 'lon'), with equal size and in the same order, or a single tri-dimensional array with an additional dimension (e.g. 'bin') for the different categories. The arrays must contain probability values between 0 and 1, and the probabilities for all categories of a grid cell should not exceed 1 when added. +#'@param lon a numeric vector with the longitudes of the map grid, in the same order as the values along the corresponding dimension in \code{probs}. +#'@param lat a numeric vector with the latitudes of the map grid, in the same order as the values along the corresponding dimension in \code{probs}. +#'@param cat_dim the name of the dimension along which the different categories are stored in \code{probs}. This only applies if \code{probs} is provided in the form of 3-dimensional array. The default expected name is 'bin'. +#'@param bar_titles vector of character strings with the names to be drawn on top of the color bar for each of the categories. As many titles as categories provided in \code{probs} must be provided. +#'@param col_unknown_cat character string with a colour representation of the colour to be used to paint the cells for which no category can be clearly assigned. Takes the value 'white' by default. +#'@param drawleg Where to draw the common colour bar. Can take values TRUE, +#' FALSE or:\cr +#' 'up', 'u', 'U', 'top', 't', 'T', 'north', 'n', 'N'\cr +#' 'down', 'd', 'D', 'bottom', 'b', 'B', 'south', 's', 'S' (default)\cr +#' 'right', 'r', 'R', 'east', 'e', 'E'\cr +#' 'left', 'l', 'L', 'west', 'w', 'W' +#'@param ... additional parameters to be sent to \code{PlotCombinedMap} and \code{PlotEquiMap}. +#'@seealso \code{PlotCombinedMap} and \code{PlotEquiMap} +#' +#'@importFrom maps map +#'@importFrom graphics box image layout mtext par plot.new +#'@importFrom grDevices adjustcolor bmp colorRampPalette dev.cur dev.new dev.off hcl jpeg pdf png postscript svg tiff +#'@examples +#'# Simple example +#'x <- array(1:(20 * 10), dim = c(lat = 10, lon = 20)) / 200 +#'a <- x * 0.6 +#'b <- (1 - x) * 0.6 +#'c <- 1 - (a + b) +#'lons <- seq(0, 359.5, length = 20) +#'lats <- seq(-89.5, 89.5, length = 10) +#'PlotMostLikelyQuantileMap(list(a, b, c), lons, lats, +#' toptitle = 'Most likely tercile map', +#' bar_titles = paste('% of belonging to', c('a', 'b', 'c')), +#' brks = 20, width = 10, height = 8) +#' +#'# More complex example +#'n_lons <- 40 +#'n_lats <- 20 +#'n_timesteps <- 100 +#'n_bins <- 4 +#' +#'# 1. Generation of sample data +#'lons <- seq(0, 359.5, length = n_lons) +#'lats <- seq(-89.5, 89.5, length = n_lats) +#' +#'# This function builds a 3-D gaussian at a specified point in the map. +#'make_gaussian <- function(lon, sd_lon, lat, sd_lat) { +#' w <- outer(lons, lats, function(x, y) dnorm(x, lon, sd_lon) * dnorm(y, lat, sd_lat)) +#' min_w <- min(w) +#' w <- w - min_w +#' w <- w / max(w) +#' w <- t(w) +#' names(dim(w)) <- c('lat', 'lon') +#' w +#'} +#' +#'# This function generates random time series (with values ranging 1 to 5) +#'# according to 2 input weights. +#'gen_data <- function(w1, w2, n) { +#' r <- sample(1:5, n, +#' prob = c(.05, .9 * w1, .05, .05, .9 * w2), +#' replace = TRUE) +#' r <- r + runif(n, -0.5, 0.5) +#' dim(r) <- c(time = n) +#' r +#'} +#' +#'# We build two 3-D gaussians. +#'w1 <- make_gaussian(120, 80, 20, 30) +#'w2 <- make_gaussian(260, 60, -10, 40) +#' +#'# We generate sample data (with dimensions time, lat, lon) according +#'# to the generated gaussians +#'sample_data <- multiApply::Apply(list(w1, w2), NULL, +#' gen_data, n = n_timesteps)$output1 +#' +#'# 2. Binning sample data +#'prob_thresholds <- 1:n_bins / n_bins +#'prob_thresholds <- prob_thresholds[1:(n_bins - 1)] +#'thresholds <- quantile(sample_data, prob_thresholds) +#' +#'binning <- function(x, thresholds) { +#' n_samples <- length(x) +#' n_bins <- length(thresholds) + 1 +#' +#' thresholds <- c(thresholds, max(x)) +#' result <- 1:n_bins +#' lower_threshold <- min(x) - 1 +#' for (i in 1:n_bins) { +#' result[i] <- sum(x > lower_threshold & x <= thresholds[i]) / n_samples +#' lower_threshold <- thresholds[i] +#' } +#' +#' dim(result) <- c(bin = n_bins) +#' result +#'} +#' +#'bins <- multiApply::Apply(sample_data, 'time', binning, thresholds)$output1 +#' +#'# 3. Plotting most likely quantile/bin +#'PlotMostLikelyQuantileMap(bins, lons, lats, +#' toptitle = 'Most likely quantile map', +#' bar_titles = paste('% of belonging to', letters[1:n_bins]), +#' mask = 1 - (w1 + w2 / max(c(w1, w2))), +#' brks = 20, width = 10, height = 8) +#' +#'@export +PlotMostLikelyQuantileMap <- function(probs, lon, lat, cat_dim = 'bin', + bar_titles = NULL, + col_unknown_cat = 'white', drawleg = T, + ...) { + # Check probs + error <- FALSE + if (is.list(probs)) { + if (length(probs) < 1) { + stop("Parameter 'probs' must be of length >= 1 if provided as a list.") + } + check_fun <- function(x) { + is.numeric(x) && (length(dim(x)) == 2) + } + if (!all(sapply(probs, check_fun))) { + error <- TRUE + } + ref_dims <- dim(probs[[1]]) + equal_dims <- all(sapply(probs, function(x) identical(dim(x), ref_dims))) + if (!equal_dims) { + stop("All arrays in parameter 'probs' must have the same dimension ", + "sizes and names when 'probs' is provided as a list of arrays.") + } + num_probs <- length(probs) + probs <- unlist(probs) + dim(probs) <- c(ref_dims, map = num_probs) + cat_dim <- 'map' + } + if (!is.numeric(probs)) { + error <- TRUE + } + if (is.null(dim(probs))) { + error <- TRUE + } + if (length(dim(probs)) != 3) { + error <- TRUE + } + if (error) { + stop("Parameter 'probs' must be either a numeric array with 3 dimensions ", + " or a list of numeric arrays of the same size with the 'lon' and ", + "'lat' dimensions.") + } + dimnames <- names(dim(probs)) + + # Check cat_dim + if (is.character(cat_dim)) { + if (is.null(dimnames)) { + stop("Specified a dimension name in 'cat_dim' but no dimension names provided ", + "in 'probs'.") + } + cat_dim <- which(dimnames == cat_dim) + if (length(cat_dim) < 1) { + stop("Dimension 'cat_dim' not found in 'probs'.") + } + cat_dim <- cat_dim[1] + } else if (!is.numeric(cat_dim)) { + stop("Parameter 'cat_dim' must be either a numeric value or a ", + "dimension name.") + } + if (length(cat_dim) != 1) { + stop("Parameter 'cat_dim' must be of length 1.") + } + cat_dim <- round(cat_dim) + nprobs <- dim(probs)[cat_dim] + + # Check bar_titles + if (is.null(bar_titles)) { + if (nprobs == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nprobs == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nprobs, " (%)") + } + } + + minimum_value <- ceiling(1 / nprobs * 10 * 1.1) * 10 + + # By now, the PlotCombinedMap function is included below in this file. + # In the future, PlotCombinedMap will be part of s2dverification and will + # be properly imported. + PlotCombinedMap(probs * 100, lon, lat, map_select_fun = max, + display_range = c(minimum_value, 100), + map_dim = cat_dim, + bar_titles = bar_titles, + col_unknown_map = col_unknown_cat, + drawleg = drawleg, ...) +} diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R new file mode 100644 index 00000000..b8541488 --- /dev/null +++ b/modules/test_seasonal.R @@ -0,0 +1,25 @@ +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +recipe_file <- "modules/Loading/testing_recipes/recipe_seasonal-tests.yml" +recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- load_datasets(recipe) +# Calibrate datasets +calibrated_data <- calibrate_datasets(recipe, data) +# Compute anomalies +calibrated_data <- compute_anomalies(recipe, calibrated_data) +# Compute skill metrics +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +# Compute percentiles and probability bins +probabilities <- compute_probabilities(recipe, calibrated_data) +# Export all data to netCDF +save_data(recipe, calibrated_data, skill_metrics, probabilities) +# Plot data +plot_data(recipe, calibrated_data, skill_metrics, probabilities, + significance = T) diff --git a/modules/verifications.R b/modules/verifications.R new file mode 100644 index 00000000..df1026f4 --- /dev/null +++ b/modules/verifications.R @@ -0,0 +1,100 @@ +for (indep in verifications$independent) { + + ## TODO: re-write this condition + if (length(indep) == 1) { + info(logger, + paste(" #*****************************************#", + " # Starting Independent verification of Indicator ", + indep, + sep = "\n")) + ind_table <- read_yaml(paste0(recipe$Run$code_dir, + "conf/indicators_table.yml")) + variable <- ind_table[indep[[1]]][[1]]$ECVs + store.freq <- ind_table[indep[[1]]][[1]]$freq + ind.fun <- ind_table[indep[[1]]][[1]]$fun + } else { + info(logger, + paste(" #*****************************************#", + " # Starting Independent verification of ECV ", + paste(indep, collapse = ' '), + sep = "\n")) + variable <- indep$name + store.freq <- indep$freq + ind.fun <- NULL + } + + + # ========================================================= + # DATA LOADING -------------------------------------------- + # ========================================================= + + source(recipe$Analysis$Data_load$module) + + # ========================================================= + # WORKFLOW MODULES RUN ------------------------------------ + # ========================================================= + + # Translate $Workflow to call modules: + ## 1) Clean step of the workflow set as FALSE or NULL or None: + modules <- names(recipe$Analysis$Workflow) + for (mod in modules) { + if ((is.logical(recipe$Analysis$Workflow[[mod]][[1]]) && + recipe$Analysis$Workflow[[mod]][[1]] == FALSE) || + recipe$Analysis$Workflow[[mod]][[1]] == 'None' || + is.null(recipe$Analysis$Workflow[[mod]][[1]])) { + info(logger, + paste("The module", mod, "won't be executed.")) + recipe$Analysis$Workflow <- recipe$Analysis$Workflow[ + -which(names(recipe$Analysis$Workflow) == mod)] + } + } + + modules <- names(recipe$Analysis$Workflow) + ## 2) Create a common format for all modules + tmp_modules <- list() + for (mod in modules) { + if (length(recipe$Analysis$Workflow[[mod]]) > 1) { + names(recipe$Analysis$Workflow[[mod]]) <- + rep(mod, length(recipe$Analysis$Workflow[[mod]])) + tmp_modules <- append(tmp_modules, + recipe$Analysis$Workflow[[mod]]) + } else { + tmp_modules <- append(tmp_modules, + recipe$Analysis$Workflow[mod]) + } + + modules <- tmp_modules + ## 3) Call each module and pass arguments: + for (mod in 1:length(modules)) { + # In case multiple calls to a module e.g.: Skill_1 --> Skill + if (any(strsplit(names(modules)[mod], "")[[1]] == "_")) { + module_name <- substr(names(modules)[mod], start = 1, + stop = which(strsplit(names(modules)[mod], "")[[1]] == "_") - 1) + } else { + module_name <- names(modules)[mod] + } + info(logger, paste("Start running module", module_name)) + module_code <- file.path(conf$code_dir, "modules", + module_name, + paste0(module_name, ".R")) + # Define variables setup in the recipe + for (param in names(modules[[mod]])) { + if (length(modules[[mod]][[param]])) { + tmp <- paste(modules[[mod]][[param]], + collapse = ",") + } else { + tmp <- modules[[mod]][[param]] + } + info(logger, paste("Variable *", param, "* set as", tmp)) + assign(as.character(param), + modules[[mod]][[param]]) + } + source(module_code) + # TO DO: + # Check the arguments of each module can be an option here: + #... + info(logger, paste(module_name, "module run finished.")) + } + } +} + diff --git a/recipes/seasonal_complex.yml-OUTDATED b/recipes/seasonal_complex.yml-OUTDATED new file mode 100644 index 00000000..2c27e0b7 --- /dev/null +++ b/recipes/seasonal_complex.yml-OUTDATED @@ -0,0 +1,46 @@ +Description: + Author: N.Pérez-Zanón + Info: This is a test to transform s2s4e data-analysis for SEAS5 + +Analysis: + Horizon: Seasonal + Variables: + ECVs: + - {name: tas, freq: monthly_mean} + - {name: tas, freq: daily_mean} + Indicators: + - {name: gdd} + Datasets: + System: + - name: system5c3s + - name: glosea5 + Multimodel: False + Reference: + - name: ERA5 + - name: ERAInterim + Time: + sdate: + fcst_year: 2021 + fcst_month: [07, 08] + fcst_day: 01 + hcst_start: 2000 + hcst_end: 2002 + leadtimemin: 2 + leadtimemax: 4 + Region: + - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} + Regrid: + method: bicubic + type: system + Workflow: + Calibration: + method: SBC + Skill: + metric: RPSS + Indicators: + index: False + Output_format: S2S4E + +Run: + Loglevel: INFO + Terminal: True diff --git a/recipes/seasonal_oper.yml b/recipes/seasonal_oper.yml new file mode 100644 index 00000000..5e5f61fc --- /dev/null +++ b/recipes/seasonal_oper.yml @@ -0,0 +1,68 @@ +# +# ___ ___ _ _ _ +# / __| / __| ___ _ __ ___ _ _ __ _ | |_ (_) ___ _ _ __ _ | | +# | (__ \__ \ / _ \ | '_ \ / -_) | '_| / _` | | _| | | / _ \ | ' \ / _` | | | +# \___| |___/ \___/ | .__/ \___| |_| \__,_| \__| |_| \___/ |_||_| \__,_| |_| +# |_| + +################################################################################# +# RECIPE DESCRIPTION +################################################################################# + +Description: + Author: N.Pérez-Zanón # [Optional?/str] +Info: This is a test to transform s2s4e data-analysis for SEAS5 # [Optional?/str] + +################################################################################# +# ANALYSIS CONF +################################################################################# + +Analysis: + Horizon: Seasonal # [Mandatory/str (either seasonal, subseasonal, decadal)] + Variables: + ECVs: # [Mandatory/list of dicts {name: , freq: } or None] + - {name: tas, freq: monthly_mean} + Indicators: # list of strs? + - None + Datasets: + System: + - name: system5c3s # list of strs + Multimodel: False # boolean, if true system above are aggregated into single multi-model + Reference: # single dict? in the future multiple ref can be an asset + - {name: era5} # str + Time: + sdate: + fcst_syear: ["2017"] # list of ints or None (case where only hcst is verfied) + fcst_sday: ["0701", "0601"] # int or list of ints with MMDD format + hcst_start: "1993" # int mandatory + hcst_end: "2016" # int mandatory + leadtimemin: 2 # int mandatory + leadtimemax: 4 # int mandatory [units????] + Region: + - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} + - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} + Regrid: + method: bilinear # str mandatory + type: to_system # str either to_system, to_reference or CDO-compatible grid mandatory + Data_load: + module: "modules/data_load/seas5.load.R" + Workflow: + Calibration: + method: SBC # str + Skill: + metric: RPSS # str + Indicators: + index: FALSE # bool + Output_format: S2S4E # str + +################################################################################# +# Run CONF +################################################################################# + +Run: + Loglevel: INFO # str + Terminal: TRUE # bool + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ + + diff --git a/recipes/seasonal_oper_atomic.yml-OUTDATED b/recipes/seasonal_oper_atomic.yml-OUTDATED new file mode 100644 index 00000000..31b2f5d7 --- /dev/null +++ b/recipes/seasonal_oper_atomic.yml-OUTDATED @@ -0,0 +1,73 @@ +# +# ___ ___ _ _ _ +# / __| / __| ___ _ __ ___ _ _ __ _ | |_ (_) ___ _ _ __ _ | | +# | (__ \__ \ / _ \ | '_ \ / -_) | '_| / _` | | _| | | / _ \ | ' \ / _` | | | +# \___| |___/ \___/ | .__/ \___| |_| \__,_| \__| |_| \___/ |_||_| \__,_| |_| +# |_| + +################################################################################# +# RECIPE DESCRIPTION +################################################################################# + +Description: + Author: Ll. Palma # [Optional?/str] + Info: This is a test of an atomic recipe to calibrate and verify SEAS5 # [Optional?/str] + +################################################################################# +# ANALYSIS CONF +################################################################################# + +Analysis: + Horizon: Seasonal # [Mandatory/str (either seasonal, subseasonal, decadal)] + Variables: + ECVs: + name: tas + freq: monthly_mean + Indicators: + no + Datasets: + System: + name: system5c3s # list of strs + Multimodel: False # boolean, if true system above are aggregated into single multi-model + Reference: # single dict? in the future multiple ref can be an asset + name: era5 # str + Time: + sdate: + fcst_year: no # list of ints or no (case where only hcst is verfied) + fcst_month: ["07"] # int or list of ints with MMDD format + fcst_day: ["01"] # int or list of ints with MMDD format + hcst_start: "1993" # int mandatory + hcst_end: "2016" # int mandatory + leadtimemin: 1 # int mandatory + leadtimemax: 6 # int mandatory [units????] + Region: + # FOCUS SADC + latmin: -40 + latmax: 10 + lonmin: 0 + lonmax: 60 + Regrid: + method: conservative # str mandatory + type: to_system # str either system or reference mandatory + Data_load: + module: "modules/data_load/seas5.load.R" + Workflow: + Calibration: + method: SBC # str + Skill: + metric: RPSS # str + Indicators: + index: FALSE # bool + Output_format: S2S4E # str + +################################################################################# +# Run CONF +################################################################################# + +Run: + Loglevel: INFO # str + Terminal: TRUE # bool + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ + + diff --git a/recipes/tests/execute_tests.R b/recipes/tests/execute_tests.R new file mode 100644 index 00000000..2fa6a137 --- /dev/null +++ b/recipes/tests/execute_tests.R @@ -0,0 +1,44 @@ +library(yaml) + +args <- NULL; + +# Function to run tests: +# source_lines("/esarchive/scratch/nperez/git/startR/inst/doc/usecase/ex2_1_timedim.R", +# start = 4, end = 14) +source_lines <- function(file, start, end, ...) { + file.lines <- scan(file, what = character(), skip = start - 1, + nlines = end - start + 1, sep = '\n') + file.lines.collapsed <- paste(file.lines, collapse = '\n') + source(textConnection(file.lines.collapsed), ...) +} + +# ------------------------------------------ +# Section to check recipes that should work: +args[1] <- "recipes/tests/seasonal_testWorkflow1.yml" +source_lines("OperationalCS.R", start = 14, end = 50) +# Calibration method None --> raw data verification +args[1] <- "recipes/tests/seasonal_testWorkflow4.yml" +source_lines("OperationalCS.R", start = 14, end = 50) +# Calibration: None --> raw data verification +args[1] <- "recipes/tests/seasonal_testWorkflow5.yml" +source_lines("OperationalCS.R", start = 14, end = 50) +# Case Skill_1 and Skill_2 when multiple times needed +args[1] <- "recipes/tests/seasonal_testWorkflow7.yml" +source_lines("OperationalCS.R", start = 14, end = 50) +# Indicator +args[1] <- "recipes/tests/seasonal_testWorkflow8.yml" +source_lines("OperationalCS.R", start = 14, end = 50) + +# ------------------------------------------ +# Section to check recipes that should fail: +## This should fail because there is no Horizon: +args[1] <- "recipes/tests/seasonal_testWorkflow2.yml" +source_lines("OperationalCS.R", start = 14, end = 50) + +## This should fail because there are 2 Calibration options: +args[1] <- "recipes/tests/seasonal_testWorkflow3.yml" +source_lines("OperationalCS.R", start = 14, end = 50) + +## This fails because it is not allow repeating the name Skill +args[1] <- "recipes/tests/seasonal_testWorkflow6.yml" +source_lines("OperationalCS.R", start = 14, end = 50) diff --git a/recipes/tests/seasonal_testWorkflow1.yml b/recipes/tests/seasonal_testWorkflow1.yml new file mode 100644 index 00000000..3c9e55f6 --- /dev/null +++ b/recipes/tests/seasonal_testWorkflow1.yml @@ -0,0 +1,53 @@ +Description: + Author: N.Pérez-Zanón + Info: This is a test to transform s2s4e data-analysis for SEAS5 + +Analysis: + Horizon: Seasonal + Variables: + ECVs: + - {name: tas, freq: monthly_mean} + Indicators: + - None + Datasets: + System: + - name: system5c3s + Multimodel: False + Reference: + - {name: era5} + Time: + sdate: + fcst_year: 2021 + fcst_month: 07 + fcst_day: 01 + hcst_start: 2000 + hcst_end: 2002 + leadtimemin: 2 + leadtimemax: 4 + Region: + Global: TRUE + Aggregation: False + Regional: + - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} + - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} + Regrid: + method: bilinear + type: system + Workflow: + Calibration: + method: SBC + Skill: + - {metric: fRPSS, probs: [1/3, 2/3]} + - {metric: BSS10} + - {metric: BSS90} + - {metric: EnsCorr} + - {metric: Bias} + Indicators: + index: FALSE + Output_format: S2S4E + +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow2.yml b/recipes/tests/seasonal_testWorkflow2.yml new file mode 100644 index 00000000..4b05eb8c --- /dev/null +++ b/recipes/tests/seasonal_testWorkflow2.yml @@ -0,0 +1,54 @@ +Description: + Author: N.Pérez-Zanón + Info: This is a test to transform s2s4e data-analysis for SEAS5 + +Analysis: + Variables: + ECVs: + - {name: tas, freq: monthly_mean} + Indicators: + - None + Datasets: + System: + - name: system5c3s + Multimodel: False + Reference: + - {name: era5} + Time: + sdate: + fcst_year: 2021 + fcst_month: 07 + fcst_day: 01 + hcst_start: 2000 + hcst_end: 2002 + leadtimemin: 2 + leadtimemax: 4 + Region: + Global: TRUE + Aggregation: False + Regional: + - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} + - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} + Regrid: + method: bilinear + type: system + Workflow: + Calibration: + method: SBC + Skill: + - {metric: fRPSS, probs: [1/3, 2/3]} + - {metric: BSS10} + - {metric: BSS90} + - {metric: EnsCorr} + - {metric: Bias} + Indicators: + index: FALSE + Output_format: S2S4E + +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ + + diff --git a/recipes/tests/seasonal_testWorkflow3.yml b/recipes/tests/seasonal_testWorkflow3.yml new file mode 100644 index 00000000..2b544fa6 --- /dev/null +++ b/recipes/tests/seasonal_testWorkflow3.yml @@ -0,0 +1,52 @@ +Description: + Author: N.Pérez-Zanón + Info: This is a test to transform s2s4e data-analysis for SEAS5 + +Analysis: + Horizon: Seasonal + Variables: + ECVs: + - {name: tas, freq: monthly_mean} + Indicators: + - None + Datasets: + System: + - name: system5c3s + Multimodel: False + Reference: + - {name: era5} + Time: + sdate: + fcst_year: 2021 + fcst_month: 07 + fcst_day: 01 + hcst_start: 2000 + hcst_end: 2002 + leadtimemin: 2 + leadtimemax: 4 + Region: + Global: TRUE + Aggregation: False + Regional: + - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} + - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} + Regrid: + method: bilinear + type: system + Workflow: + Calibration: + - {method: SBC} + - {method: VarianceInflation} + Skill: + - {metric: fRPSS, probs: [1/3, 2/3]} + - {metric: BSS10} + - {metric: BSS90} + - {metric: EnsCorr} + - {metric: Bias} + Output_format: S2S4E + +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow4.yml b/recipes/tests/seasonal_testWorkflow4.yml new file mode 100644 index 00000000..e3f9499c --- /dev/null +++ b/recipes/tests/seasonal_testWorkflow4.yml @@ -0,0 +1,53 @@ +Description: + Author: N.Pérez-Zanón + Info: This is a test to transform s2s4e data-analysis for SEAS5 + +Analysis: + Horizon: Seasonal + Variables: + ECVs: + - {name: tas, freq: monthly_mean} + Indicators: + - None + Datasets: + System: + - name: system5c3s + Multimodel: False + Reference: + - {name: era5} + Time: + sdate: + fcst_year: 2021 + fcst_month: 07 + fcst_day: 01 + hcst_start: 2000 + hcst_end: 2002 + leadtimemin: 2 + leadtimemax: 4 + Region: + Global: TRUE + Aggregation: False + Regional: + - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} + - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} + Regrid: + method: bilinear + type: system + Workflow: + Calibration: + method: None + Skill: + - {metric: fRPSS, probs: [1/3, 2/3]} + - {metric: BSS10} + - {metric: BSS90} + - {metric: EnsCorr} + - {metric: Bias} + Indicators: + index: FALSE + Output_format: S2S4E + +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow5.yml b/recipes/tests/seasonal_testWorkflow5.yml new file mode 100644 index 00000000..7029db3c --- /dev/null +++ b/recipes/tests/seasonal_testWorkflow5.yml @@ -0,0 +1,51 @@ +Description: + Author: N.Pérez-Zanón + Info: This is a test to transform s2s4e data-analysis for SEAS5 + +Analysis: + Horizon: Seasonal + Variables: + ECVs: + - {name: tas, freq: monthly_mean} + Indicators: + - None + Datasets: + System: + - name: system5c3s + Multimodel: False + Reference: + - {name: era5} + Time: + sdate: + fcst_year: 2021 + fcst_month: 07 + fcst_day: 01 + hcst_start: 2000 + hcst_end: 2002 + leadtimemin: 2 + leadtimemax: 4 + Region: + Global: TRUE + Aggregation: False + Regional: + - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} + - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} + Regrid: + method: bilinear + type: system + Workflow: + Calibration: FALSE + Skill: + - {metric: fRPSS, probs: [1/3, 2/3]} + - {metric: BSS10} + - {metric: BSS90} + - {metric: EnsCorr} + - {metric: Bias} + Indicators: FALSE + Output_format: S2S4E + +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow6.yml b/recipes/tests/seasonal_testWorkflow6.yml new file mode 100644 index 00000000..94417582 --- /dev/null +++ b/recipes/tests/seasonal_testWorkflow6.yml @@ -0,0 +1,53 @@ +Description: + Author: N.Pérez-Zanón + Info: This is a test to transform s2s4e data-analysis for SEAS5 + +Analysis: + Horizon: Seasonal + Variables: + ECVs: + - {name: tas, freq: monthly_mean} + Indicators: + - None + Datasets: + System: + - name: system5c3s + Multimodel: False + Reference: + - {name: era5} + Time: + sdate: + fcst_year: 2021 + fcst_month: 07 + fcst_day: 01 + hcst_start: 2000 + hcst_end: 2002 + leadtimemin: 2 + leadtimemax: 4 + Region: + Global: TRUE + Aggregation: False + Regional: + - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} + - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} + Regrid: + method: bilinear + type: system + Workflow: + Skill: + - {metric: EnsCorr} + - {metric: Bias} + Calibration: + method: SBC + Skill: + - {metric: EnsCorr} + - {metric: Bias} + Indicators: + index: FALSE + Output_format: S2S4E + +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow7.yml b/recipes/tests/seasonal_testWorkflow7.yml new file mode 100644 index 00000000..595b677b --- /dev/null +++ b/recipes/tests/seasonal_testWorkflow7.yml @@ -0,0 +1,53 @@ +Description: + Author: N.Pérez-Zanón + Info: This is a test to transform s2s4e data-analysis for SEAS5 + +Analysis: + Horizon: Seasonal + Variables: + ECVs: + - {name: tas, freq: monthly_mean} + Indicators: + - None + Datasets: + System: + - name: system5c3s + Multimodel: False + Reference: + - {name: era5} + Time: + sdate: + fcst_year: 2021 + fcst_month: 07 + fcst_day: 01 + hcst_start: 2000 + hcst_end: 2002 + leadtimemin: 2 + leadtimemax: 4 + Region: + Global: TRUE + Aggregation: False + Regional: + - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} + - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} + Regrid: + method: bilinear + type: system + Workflow: + Skill_1: + - {metric: EnsCorr} + - {metric: Bias} + Calibration: + method: SBC + Skill_2: + - {metric: EnsCorr} + - {metric: Bias} + Indicators: + index: FALSE + Output_format: S2S4E + +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ diff --git a/recipes/tests/seasonal_testWorkflow8.yml b/recipes/tests/seasonal_testWorkflow8.yml new file mode 100644 index 00000000..b6d0c662 --- /dev/null +++ b/recipes/tests/seasonal_testWorkflow8.yml @@ -0,0 +1,51 @@ +Description: + Author: N.Pérez-Zanón + Info: This is a test to transform s2s4e data-analysis for SEAS5 + +Analysis: + Horizon: Seasonal + Variables: + Indicators: + - {name: gdd} + Datasets: + System: + - name: system5c3s + Multimodel: False + Reference: + - {name: era5} + Time: + sdate: + fcst_year: 2021 + fcst_month: 07 + fcst_day: 01 + hcst_start: 2000 + hcst_end: 2002 + leadtimemin: 2 + leadtimemax: 4 + Region: + Global: TRUE + Aggregation: False + Regional: + - {latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} + - {latmin: -10, latmax: 10, lonmin: 0, lonmax: 20} + Regrid: + method: bilinear + type: system + Workflow: + Calibration: + method: SBC + Skill: + - {metric: fRPSS, probs: [1/3, 2/3]} + - {metric: BSS10} + - {metric: BSS90} + - {metric: EnsCorr} + - {metric: Bias} + Indicators: + index: TRUE + Output_format: S2S4E + +Run: + Loglevel: INFO + Terminal: TRUE + output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/lpalma/git/auto-s2s/ \ No newline at end of file diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml new file mode 100644 index 00000000..7a2a575b --- /dev/null +++ b/tests/recipes/recipe-decadal_daily_1.yml @@ -0,0 +1,51 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: daily_mean + Datasets: + System: + name: MIROC6 #EC-Earth3-i4 #BCC-CSM2-MR #CanESM5 + member: r1i1p1f1,r2i1p1f1,r3i1p1f1 #'all' + Multimodel: no + Reference: + name: ERA5 + Time: + fcst_year: [2017,2018] + hcst_start: 1990 + hcst_end: 1992 + season: 'Annual' + ftime_min: 3 + ftime_max: 5 + Region: + latmin: 10 #-90 + latmax: 20 #90 + lonmin: 0 + lonmax: 15 #359.9 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Anomalies: + compute: no + cross_validation: + Calibration: + method: qmap + Skill: + metric: RPSS + Probabilities: + percentiles: [[1/10, 9/10]] + Indicators: + index: FALSE + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml new file mode 100644 index 00000000..35b55b1a --- /dev/null +++ b/tests/recipes/recipe-decadal_monthly_1.yml @@ -0,0 +1,51 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: EC-Earth3-i4 + member: r1i4p1f1,r2i4p1f1 + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst_year: 2021 + hcst_start: 1991 + hcst_end: 1994 +# season: 'Annual' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Anomalies: + compute: no + cross-validation: + Calibration: + method: bias + Skill: + metric: RPSS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + Indicators: + index: FALSE + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/tests/recipes/recipe-decadal_monthly_1b.yml b/tests/recipes/recipe-decadal_monthly_1b.yml new file mode 100644 index 00000000..5551d9c7 --- /dev/null +++ b/tests/recipes/recipe-decadal_monthly_1b.yml @@ -0,0 +1,51 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: EC-Earth3-i4 + member: r1i4p1f1,r2i4p1f1 + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst_year: [2020,2021] + hcst_start: 1991 + hcst_end: 1994 +# season: 'Annual' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Anomalies: + compute: no + cross_validation: + Calibration: + method: bias + Skill: + metric: RPSS + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + Indicators: + index: FALSE + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml new file mode 100644 index 00000000..45eb01dd --- /dev/null +++ b/tests/recipes/recipe-decadal_monthly_2.yml @@ -0,0 +1,51 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: EC-Earth3-i4 #CanESM5 + member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all' + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst_year: [2020,2021] + hcst_start: 1990 + hcst_end: 1992 +# season: 'Annual' + ftime_min: 1 + ftime_max: 14 + Region: + latmin: -60 #-90 + latmax: -55 #90 + lonmin: -2 + lonmax: 2 #359.9 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: + Calibration: + method: raw + Skill: + metric: RPSS_specs EnsCorr_specs FRPS_specs FRPSS_specs BSS10_specs FRPS + Probabilities: + percentiles: [[1/3, 2/3]] + Indicators: + index: FALSE + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/tests/recipes/recipe-decadal_monthly_3.yml b/tests/recipes/recipe-decadal_monthly_3.yml new file mode 100644 index 00000000..94bdfebc --- /dev/null +++ b/tests/recipes/recipe-decadal_monthly_3.yml @@ -0,0 +1,51 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: FGOALS-f3-L + member: 'all' # 3 in total + Multimodel: no + Reference: + name: JRA-55 + Time: + fcst_year: + hcst_start: 2015 # 2015-2016 in dcppA, 2017-2018 in dcppB + hcst_end: 2018 +# season: 'Annual' + ftime_min: 6 # Apr + ftime_max: 8 + Region: + latmin: 40 + latmax: 65 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: + Calibration: + method: 'evmos' + Skill: + metric: BSS10 Corr + Probabilities: + percentiles: [[1/3, 2/3]] + Indicators: + index: FALSE + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/tests/recipes/recipe-seasonal_daily_1.yml b/tests/recipes/recipe-seasonal_daily_1.yml new file mode 100644 index 00000000..52c7c0b8 --- /dev/null +++ b/tests/recipes/recipe-seasonal_daily_1.yml @@ -0,0 +1,45 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: daily_mean + Datasets: + System: + name: system5c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1201' + fcst_year: + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 1 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: conservative + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: + Calibration: + method: qmap + Skill: + metric: EnsCorr_specs + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/recipes/recipe-seasonal_monthly_1.yml b/tests/recipes/recipe-seasonal_monthly_1.yml new file mode 100644 index 00000000..00331332 --- /dev/null +++ b/tests/recipes/recipe-seasonal_monthly_1.yml @@ -0,0 +1,47 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: + Calibration: + method: mse_min + Skill: + metric: RPSS CRPSS EnsCorr Corr Enscorr_specs + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/test_decadal.R b/tests/test_decadal.R new file mode 100644 index 00000000..75b7fca7 --- /dev/null +++ b/tests/test_decadal.R @@ -0,0 +1,16 @@ +library(testthat) + +path_testthat <- file.path('./tests/testthat/') +files_testthat <- list.files('./tests/testthat/', pattern = 'decadal') + +for (i_file in 1:length(files_testthat)) { + source(paste0('./tests/testthat/', files_testthat[i_file])) +} + +#================ +#--- recipe-decadal_monthly_1.yml --- +# +#--- recipe-decadal_monthly_2.yml --- +# +#--- recipe-decadal_monthly_3.yml --- +# FGOALS-f3-L, hcst contains dcppA and dcppB diff --git a/tests/test_seasonal.R b/tests/test_seasonal.R new file mode 100644 index 00000000..4718e3d4 --- /dev/null +++ b/tests/test_seasonal.R @@ -0,0 +1,9 @@ +library(testthat) + +path_testthat <- file.path('./tests/testthat/') +files_testthat <- list.files('./tests/testthat/', pattern = 'seasonal') + +for (i_file in 1:length(files_testthat)) { + source(paste0('./tests/testthat/', files_testthat[i_file])) +} + diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R new file mode 100644 index 00000000..400b864d --- /dev/null +++ b/tests/testthat/test-decadal_daily_1.R @@ -0,0 +1,222 @@ +context("Decadal daily data - 1") + +########################################### + +source("modules/Loading/Loading_decadal.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") + +recipe_file <- "tests/recipes/recipe-decadal_daily_1.yml" +recipe <- prepare_outputs(recipe_file) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe) +))}) + +## Calibrate datasets +#suppressWarnings({invisible(capture.output( +# calibrated_data <- calibrate_datasets(data, recipe) +#))}) +# +## Compute skill metrics +#suppressWarnings({invisible(capture.output( +#skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, +# recipe, na.rm = T, ncores = 4) +#))}) + +#====================================== + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 3, time = 90, latitude = 7, longitude = 11, ensemble = 3) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 2, time = 90, latitude = 7, longitude = 11, ensemble = 3) +) +expect_equal( +dim(data$hcst$attrs$Dates), +c(sday = 1, sweek = 1, syear = 3, time = 90) +) +# hcst data +expect_equal( +as.vector(aperm(drop(data$hcst$data), c(5, 1:4))[1, 2:3, 1:3, 1, 1]), +c(298.5787, 293.6479, 298.5042, 293.7802, 297.8072, 293.0764), +tolerance = 0.0001 +) +expect_equal( +as.vector(aperm(drop(data$hcst$data), c(5, 1:4))[2, , 89:90, 1, 1]), +c(301.6978, 308.9792, 308.4501, 302.1620, 307.6034, 307.6388), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +301.2666, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(285.9326, 314.9579), +tolerance = 0.0001 +) + +# fcst data +expect_equal( +as.vector(aperm(drop(data$fcst$data), c(5, 1:4))[1, , 1:3, 1, 1]), +c(295.0745, 291.1006, 296.2279, 291.6309, 295.3123, 290.8995), +tolerance = 0.0001 +) +expect_equal( +as.vector(aperm(drop(data$fcst$data), c(5, 1:4))[2, , 89:90, 1, 1]), +c(305.3428, 305.0657, 305.5445, 305.5681), +tolerance = 0.0001 +) + +# time value +expect_equal( +(data$hcst$attrs$Dates)[1], +as.POSIXct("1991-01-01 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[2], +as.POSIXct("1992-01-01 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[5], +as.POSIXct("1992-01-02 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[1, 1, 3, 90], +as.POSIXct("1993-03-31 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[1, 1, 2, 90], +as.POSIXct("1992-03-30 12:00:00", tz = 'UTC') +) + +}) + +##====================================== +#test_that("2. Calibration", { +# +#expect_equal( +#is.list(calibrated_data), +#TRUE +#) +#expect_equal( +#names(calibrated_data), +#c("hcst", "fcst") +#) +#expect_equal( +#class(calibrated_data$hcst), +#"s2dv_cube" +#) +#expect_equal( +#class(calibrated_data$fcst), +#"s2dv_cube" +#) +#expect_equal( +#dim(calibrated_data$hcst$data), +#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#dim(calibrated_data$fcst$data), +#c(dat = 1, var = 1, ensemble = 2, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#mean(calibrated_data$fcst$data), +#291.8375, +#tolerance = 0.0001 +#) +#expect_equal( +#mean(calibrated_data$hcst$data), +#289.6679, +#tolerance = 0.0001 +#) +#expect_equal( +#as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), +#c(286.3895, 286.6408, 290.6652, 288.3759), +#tolerance = 0.0001 +#) +#expect_equal( +#range(calibrated_data$fcst$data), +#c(287.2173, 297.4578), +#tolerance = 0.0001 +#) +# +#}) +# +# +##====================================== +#test_that("3. Metrics", { +# +#expect_equal( +#is.list(skill_metrics), +#TRUE +#) +#expect_equal( +#names(skill_metrics), +#c("rpss", "rpss_significance") +#) +#expect_equal( +#class(skill_metrics$rpss[[1]]), +#"array" +#) +#expect_equal( +#dim(skill_metrics$rpss[[1]]), +#c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 5, longitude = 4) +#) +#expect_equal( +#dim(skill_metrics$rpss_significance[[1]]), +#dim(skill_metrics$rpss[[1]]) +#) +#expect_equal( +#as.vector(drop(skill_metrics$rpss[[1]])[, 2, 3]), +#c(-0.2857143, -1.2500000, -1.8928571), +#tolerance = 0.0001 +#) +#expect_equal( +#as.vector(drop(skill_metrics$rpss_significance[[1]])[, 2, 3]), +#rep(FALSE, 3) +#) +# +#}) + + diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R new file mode 100644 index 00000000..e32f7177 --- /dev/null +++ b/tests/testthat/test-decadal_monthly_1.R @@ -0,0 +1,338 @@ +context("Decadal monthly data - 1") + +########################################### + +source("modules/Loading/Loading_decadal.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +recipe_file <- "tests/recipes/recipe-decadal_monthly_1.yml" +recipe <- prepare_outputs(recipe_file) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe) +))}) + +# Calibrate datasets +suppressWarnings({invisible(capture.output( + calibrated_data <- calibrate_datasets(recipe, data) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +))}) +suppressWarnings({invisible(capture.output( +probs <- compute_probabilities(recipe, calibrated_data) +))}) + +# Saving +suppressWarnings({invisible(capture.output( +save_data(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, archive = archive) +))}) + +# Plotting +suppressWarnings({invisible(capture.output( +plot_data(recipe = recipe, archive = archive, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, significance = T) +))}) + + +outdir <- get_dir(recipe) + +#====================================== + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4, ensemble = 2) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4, ensemble = 2) +) +expect_equal( +dim(data$hcst$attr$Dates), +c(sday = 1, sweek = 1, syear = 4, time = 3) +) +expect_equal( +as.vector(aperm(drop(data$hcst$data), c(5, 1:4))[, 1:2,1,2,3]), +c(291.3831, 291.6227, 292.3012, 290.9779), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +287.3804, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(281.7395, 294.2467), +tolerance = 0.0001 +) +expect_equal( +(data$hcst$attrs$Dates)[1], +as.POSIXct("1991-11-16", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[2], +as.POSIXct("1992-11-16", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[5], +as.POSIXct("1991-12-16 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[10], +as.POSIXct("1993-01-16 12:00:00", tz = 'UTC') +) + +}) + +#====================================== +test_that("2. Calibration", { + +expect_equal( +is.list(calibrated_data), +TRUE +) +expect_equal( +names(calibrated_data), +c("hcst", "obs", "fcst") +) +expect_equal( +class(calibrated_data$hcst), +"s2dv_cube" +) +expect_equal( +class(calibrated_data$fcst), +"s2dv_cube" +) +expect_equal( +dim(calibrated_data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 5, longitude = 4, ensemble = 2) +) +expect_equal( +dim(calibrated_data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 5, longitude = 4, ensemble = 2) +) +expect_equal( +mean(calibrated_data$fcst$data), +291.8375, +tolerance = 0.0001 +) +expect_equal( +mean(calibrated_data$hcst$data), +289.6679, +tolerance = 0.0001 +) +expect_equal( +as.vector(aperm(drop(calibrated_data$hcst$data), c(5, 1:4))[1, , 2, 3, 4]), +c(286.3895, 286.6408, 290.6652, 288.3759), +tolerance = 0.0001 +) +expect_equal( +range(calibrated_data$fcst$data), +c(287.2173, 297.4578), +tolerance = 0.0001 +) + +}) + + +#====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("rpss", "rpss_significance") +) +expect_equal( +class(skill_metrics$rpss), +"array" +) +expect_equal( +dim(skill_metrics$rpss), +c(time = 3, latitude = 5, longitude = 4) +) +expect_equal( +dim(skill_metrics$rpss_significance), +dim(skill_metrics$rpss) +) +expect_equal( +as.vector(drop(skill_metrics$rpss)[, 2, 3]), +c(-0.2857143, -1.2500000, -1.8928571), +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(skill_metrics$rpss_significance)[, 2, 3]), +rep(FALSE, 3) +) + +# Probs +expect_equal( +names(probs), +c('probs', 'probs_fcst', 'percentiles') +) +expect_equal( +names(probs$probs), +c('prob_b33', 'prob_33_to_66', 'prob_a66', 'prob_b10', 'prob_10_to_90', 'prob_a90') +) +expect_equal( +names(probs$percentiles), +c('percentile_33', 'percentile_66', 'percentile_10', 'percentile_90') +) +expect_equal( +dim(probs$probs$prob_b33), +c(syear = 4, time = 3, latitude = 5, longitude = 4) +) +expect_equal( +dim(probs$percentiles$percentile_33), +c(time = 3, latitude = 5, longitude = 4) +) +expect_equal( +as.vector(probs$probs$prob_b33[, 1, 2, 2]), +c(0.0, 0.5, 0.0, 1.0) +) +expect_equal( +as.vector(probs$probs$prob_10_to_90[, 1, 2, 2]), +c(1.0, 1.0, 0.5, 0.5) +) +expect_equal( +as.vector(probs$percentiles$percentile_33[, 1, 2]), +c(293.7496, 287.4263, 285.8295), +tolerance = 0.0001 +) +expect_equal( +as.vector(probs$percentiles$percentile_10[, 1, 2]), +c(293.1772, 286.9533, 284.7887), +tolerance = 0.0001 +) + +}) + +#====================================== + +test_that("4. Saving", { + +expect_equal( +list.files(outdir), +c("plots", "tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", + "tas-obs_19911101.nc", "tas-obs_19921101.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc", + "tas-percentiles_month11.nc", "tas-probs_19911101.nc", "tas-probs_19921101.nc", + "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc") +) +# open the files and check values/attributes? +#expect_equal( +#) + + +}) + + +test_that("5. Visualization", { +expect_equal( +list.files(paste0(outdir, "/plots/")), +c("forecast_ensemble_mean.png", "forecast_most_likely_tercile.png", + "rpss.png") +) + +}) + +# Delete files +unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) + + +#============================================================== + +# Compare with 2 forecast + +recipe_file <- "tests/recipes/recipe-decadal_monthly_1b.yml" +recipe <- prepare_outputs(recipe_file) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data_b <- load_datasets(recipe) +))}) + +# Calibrate datasets +suppressWarnings({invisible(capture.output( + calibrated_data_b <- calibrate_datasets(recipe, data_b) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics_b <- compute_skill_metrics(recipe, calibrated_data_b) +))}) +suppressWarnings({invisible(capture.output( +probs_b <- compute_probabilities(recipe, calibrated_data_b) +))}) + + +test_that("6. Compare with two sdates in forecast", { + + +expect_equal( +c(ClimProjDiags::Subset(data_b$fcst$data, 'syear', 2, drop = F)), +c(data$fcst$data) +) + +expect_equal( +c(ClimProjDiags::Subset(calibrated_data_b$fcst$data, 'syear', 2, drop = F)), +c(calibrated_data$fcst$data) +) + +expect_equal( +skill_metrics_b, +skill_metrics +) + +expect_equal( +lapply(probs_b$probs_fcst, ClimProjDiags::Subset, 'syear', 2), +probs$probs_fcst +) + +}) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R new file mode 100644 index 00000000..da67c48b --- /dev/null +++ b/tests/testthat/test-decadal_monthly_2.R @@ -0,0 +1,275 @@ +context("Decadal monthly data - 2") + +########################################### + +source("modules/Loading/Loading_decadal.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +recipe_file <- "tests/recipes/recipe-decadal_monthly_2.yml" +recipe <- prepare_outputs(recipe_file) + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe) +))}) + +# Calibrate datasets +suppressWarnings({invisible(capture.output( + calibrated_data <- calibrate_datasets(recipe, data) +))}) + +# Compute skill metrics +suppressMessages({invisible(capture.output( +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +))}) +suppressWarnings({invisible(capture.output( +probs <- compute_probabilities(recipe, calibrated_data) +))}) + +# Saving +suppressWarnings({invisible(capture.output( +save_data(recipe, calibrated_data, skill_metrics, probs) +))}) + +# Plotting +suppressWarnings({invisible(capture.output( +plot_data(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, + probabilities = probs, significance = T) +))}) + + +outdir <- get_dir(recipe) + + +#====================================== + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 3, time = 14, latitude = 8, longitude = 5, ensemble = 3) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 2, time = 14, latitude = 8, longitude = 5, ensemble = 3) +) +expect_equal( +dim(data$hcst$attrs$Dates), +c(sday = 1, sweek = 1, syear = 3, time = 14) +) +#expect_equal( +#dim(data$fcst$Dates$start), +#c(time = 14) +#) +# hcst data +expect_equal( +as.vector(aperm(drop(data$hcst$data), c(5, 1:4))[1,, 1:2, 2, 2]), +c(272.8613, 271.0689, 270.8007, 273.5594, 272.1561, 272.8729), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +269.8822, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(253.8541, 276.6805), +tolerance = 0.0001 +) +# fcst data +expect_equal( +as.vector(aperm(drop(data$fcst$data), c(5, 1:4))[1, , 1:2, 2, 2]), +c(271.7708, 271.8424, 272.4980, 273.5842), +tolerance = 0.0001 +) +expect_equal( +mean(data$fcst$data), +271.2158, +tolerance = 0.0001 +) + +expect_equal( +(data$hcst$attrs$Dates)[1], +as.POSIXct("1990-11-16", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[2], +as.POSIXct("1991-11-16", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[5], +as.POSIXct("1991-12-16 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[10], +as.POSIXct("1991-02-15", tz = 'UTC') +) + +}) + +#====================================== +test_that("2. Calibration", { + +expect_equal( +names(calibrated_data), +c("hcst", "obs", "fcst") +) +## TODO: Ask An-Chi about this test +# expect_equal( +# calibrated_data, +# data[1:2] +# ) + +}) + + +##====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("rpss_specs", "enscorr_specs", "frps_specs", "frpss_specs", "bss10_specs", "frps") +) +expect_equal( +class(skill_metrics$rpss_specs), +"array" +) +expect_equal( +all(unlist(lapply(lapply(skill_metrics, dim), all.equal, c(time = 14, latitude = 8, longitude = 5)))), +TRUE +) +expect_equal( +as.vector(skill_metrics$rpss_specs[6:8, 1, 2]), +c(-0.3333333, 0.1666667, -0.3333333), +tolerance = 0.0001 +) +#expect_equal( +#all(is.na(skill_metrics$bss90_specs)), +#TRUE +#) +expect_equal( +as.vector(skill_metrics$enscorr_specs[6:8, 1, 2]), +c(0.4474382, 0.1026333, 0.4042823), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$frps_specs[6:8, 1, 2]), +c(0.4444444, 0.2222222, 0.4444444), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$frpss_specs[4:7, 1, 5]), +c( 1.0, -0.5, -0.5, 0.5), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$bss10_specs[6:8, 1, 2]), +c(0.5, -0.5, -0.5), +) +expect_equal( +as.vector(skill_metrics$frps[6:8, 1, 2]), +c(0.4444444, 0.2222222, 0.4444444), +tolerance = 0.0001 +) + +# Probs +expect_equal( +names(probs), +c('probs', 'probs_fcst', 'percentiles') +) +expect_equal( +names(probs$probs), +c('prob_b33', 'prob_33_to_66', 'prob_a66') +) +expect_equal( +names(probs$percentiles), +c('percentile_33', 'percentile_66') +) +expect_equal( +dim(probs$probs$prob_b33), +c(syear = 3, time = 14, latitude = 8, longitude = 5) +) +expect_equal( +dim(probs$percentiles$percentile_33), +c(time = 14, latitude = 8, longitude = 5) +) +expect_equal( +as.vector(probs$probs$prob_b33[, 1, 2, 2]), +c(0.0, 0.3333333, 0.6666667), +tolerance = 0.0001 +) +expect_equal( +as.vector(probs$percentiles$percentile_33[1:3, 1, 2]), +c(271.7508, 273.1682, 274.1937), +tolerance = 0.0001 +) + +}) + +#====================================== + +test_that("4. Saving", { + +expect_equal( +list.files(outdir), +c("plots", "tas_19901101.nc", "tas_19911101.nc", "tas_19921101.nc", "tas_20201101.nc", "tas_20211101.nc", + "tas-obs_19901101.nc", "tas-obs_19911101.nc", "tas-obs_19921101.nc", + "tas-percentiles_month11.nc", "tas-probs_19901101.nc", "tas-probs_19911101.nc", + "tas-probs_19921101.nc", "tas-probs_20201101.nc", "tas-probs_20211101.nc", "tas-skill_month11.nc") +) +}) + +#====================================== + +test_that("5. Visualization", { +expect_equal( +list.files(paste0(outdir, "/plots/")), +c("bss10_specs.png", "enscorr_specs.png", "forecast_ensemble_mean_2020.png", "forecast_ensemble_mean_2021.png", "forecast_most_likely_tercile_2020.png", "forecast_most_likely_tercile_2021.png", "frps_specs.png", "frps.png", "rpss_specs.png") +) + +}) + +# Delete files +unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) + diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R new file mode 100644 index 00000000..85c15c88 --- /dev/null +++ b/tests/testthat/test-decadal_monthly_3.R @@ -0,0 +1,199 @@ +context("Decadal monthly data - 3") + +########################################### + +source("modules/Loading/Loading_decadal.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") + +recipe_file <- "tests/recipes/recipe-decadal_monthly_3.yml" +recipe <- prepare_outputs(recipe_file) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe) +))}) + +# Calibrate datasets +suppressWarnings({invisible(capture.output( + calibrated_data <- calibrate_datasets(recipe, data) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +))}) +suppressWarnings({invisible(capture.output( +probs <- compute_probabilities(recipe, calibrated_data) +))}) + +#====================================== + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +data$fcst, +NULL +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 25, longitude = 16, ensemble = 3) +) +expect_equal( +dim(data$hcst$attrs$Dates), +c(sday = 1, sweek = 1, syear = 4, time = 3) +) +# hcst data +expect_equal( +as.vector(aperm(drop(data$hcst$data), c(5, 1:4))[3, , 2, 2, 2]), +c(278.4305, 279.5065, 280.4110, 278.7608), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +284.3765, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(263.3929, 300.4329), +tolerance = 0.0001 +) + +expect_equal( +(data$hcst$attrs$Dates)[1], +as.POSIXct("2016-04-16", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[2], +as.POSIXct("2017-04-16", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[5], +as.POSIXct("2016-05-16 12:00:00", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[12], +as.POSIXct("2019-06-16", tz = 'UTC') +) + +}) + +#====================================== +test_that("2. Calibration", { + +expect_equal( +names(calibrated_data), +c("hcst", "obs", "fcst") +) +expect_equal( +as.vector(aperm(drop(calibrated_data$hcst$data), c(5, 1:4))[3, , 2, 2, 2]), +c(279.0648, 281.0578, 282.6535, 280.3137), +tolerance = 0.0001 +) +expect_equal( +calibrated_data$fcst, +NULL +) +}) + + +##====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("bss10", "bss10_significance", "corr", "corr_significance") +) +expect_equal( +class(skill_metrics[[1]]), +"array" +) +expect_equal( +all(unlist(lapply(lapply(skill_metrics, dim)[1:2], all.equal, c(time = 3, latitude = 25, longitude = 16)))), +TRUE +) +expect_equal( +all(unlist(lapply(lapply(skill_metrics, dim)[3:4], all.equal, c(ensemble = 3, time = 3, latitude = 25, longitude = 16)))), +TRUE +) +expect_equal( +as.vector(skill_metrics$bss10[, 1, 2]), +c(-0.1904762, -0.1904762, -0.1904762), +tolerance = 0.0001 +) +expect_equal( +any(as.vector(skill_metrics$bss10_significance)), +FALSE +) +expect_equal( +as.vector(skill_metrics$corr[2, , 1, 2]), +c(-0.2015265, 0.4635463, -0.1019575), +tolerance = 0.0001 +) + +# Probs +expect_equal( +names(probs), +c('probs', 'percentiles') +) +expect_equal( +names(probs$probs), +c('prob_b33', 'prob_33_to_66', 'prob_a66') +) +expect_equal( +names(probs$percentiles), +c('percentile_33', 'percentile_66') +) +expect_equal( +dim(probs$probs$prob_b33), +c(syear = 4, time = 3, latitude = 25, longitude = 16) +) +expect_equal( +dim(probs$percentiles$percentile_33), +c(time = 3, latitude = 25, longitude = 16) +) +expect_equal( +as.vector(probs$probs$prob_b33[, 1, 2, 2]), +c(0.0, 0.3333333, 0.3333333, 0.6666667), +tolerance = 0.0001 +) +expect_equal( +as.vector(probs$percentiles$percentile_33[1:3, 1, 2]), +c(278.1501, 279.5226, 282.0237), +tolerance = 0.0001 +) + +}) + + diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R new file mode 100644 index 00000000..ae80d522 --- /dev/null +++ b/tests/testthat/test-seasonal_daily.R @@ -0,0 +1,167 @@ +context("Seasonal daily data") + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") + +recipe_file <- "tests/recipes/recipe-seasonal_daily_1.yml" +recipe <- prepare_outputs(recipe_file) +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe) +))}) + +# Calibrate data +suppressWarnings({invisible(capture.output( +calibrated_data <- calibrate_datasets(recipe, data) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +))}) + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +data$fcst, +NULL +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 25) +) +expect_equal( +dim(data$obs$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 1) +) +expect_equal( +dim(data$obs$attrs$Dates), +c(sday = 1, sweek = 1, syear = 4, time = 31) +) +expect_equal( +as.vector(drop(data$hcst$data)[1:2,1:2,1,2,3]), +c(295.5691, 291.7752, 294.0874, 290.1173), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +288.3723, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(280.1490, 298.2324), +tolerance = 0.0001 +) +expect_equal( +(data$hcst$attrs$Dates)[1], +as.POSIXct("1993-12-01 18:00:00 UTC", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[2], +as.POSIXct("1994-12-01 18:00:00 UTC", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[5], +as.POSIXct("1993-12-02 18:00:00 UTC", tz = 'UTC') +) +expect_equal( +(data$obs$attrs$Dates)[10], +as.POSIXct("1994-12-03 11:30:00 UTC", tz = 'UTC') +) + +}) + +test_that("2. Calibration", { + +expect_equal( +is.list(calibrated_data), +TRUE +) +expect_equal( +names(calibrated_data), +c("hcst", "obs", "fcst") +) +expect_equal( +class(calibrated_data$hcst), +"s2dv_cube" +) +expect_equal( +calibrated_data$fcst, +NULL +) +expect_equal( +dim(calibrated_data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 31, latitude = 4, longitude = 4, ensemble = 25) +) +expect_equal( +mean(calibrated_data$hcst$data), +289.5354, +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(calibrated_data$hcst$data)[1, 1:4, 2, 3, 4]), +c(291.5555, 291.9029, 293.2685, 290.7782), +tolerance = 0.0001 +) +expect_equal( +range(calibrated_data$hcst$data), +c(284.2823, 296.7545), +tolerance = 0.0001 +) +}) + + +#====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("enscorr_specs") +) +expect_equal( +class(skill_metrics$enscorr_specs), +"array" +) +expect_equal( +dim(skill_metrics$enscorr_specs), +c(time = 31, latitude = 4, longitude = 4) +) +expect_equal( +skill_metrics$enscorr_specs[1:3, 1, 1], +c(0.7509920, 0.6514916, 0.5118371), +tolerance=0.0001 +) +}) + +unlink(recipe$Run$output_dir) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R new file mode 100644 index 00000000..476ce06f --- /dev/null +++ b/tests/testthat/test-seasonal_monthly.R @@ -0,0 +1,238 @@ +context("Seasonal monthly data") + +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +recipe_file <- "tests/recipes/recipe-seasonal_monthly_1.yml" +recipe <- prepare_outputs(recipe_file) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- load_datasets(recipe) +))}) + +# Calibrate data +suppressWarnings({invisible(capture.output( +calibrated_data <- calibrate_datasets(recipe, data) +))}) + +# Compute skill metrics +suppressWarnings({invisible(capture.output( +skill_metrics <- compute_skill_metrics(recipe, calibrated_data) +))}) + +suppressWarnings({invisible(capture.output( +probs <- compute_probabilities(recipe, calibrated_data) +))}) + +# Saving +suppressWarnings({invisible(capture.output( +save_data(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs) +))}) + +# Plotting +suppressWarnings({invisible(capture.output( +plot_data(recipe = recipe, data = calibrated_data, + skill_metrics = skill_metrics, probabilities = probs, + significance = T) +))}) +outdir <- get_dir(recipe) + +# ------- TESTS -------- + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 3, longitude = 3, ensemble = 25) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 3, longitude = 3, ensemble = 51) +) +expect_equal( +dim(data$hcst$attrs$Dates), +c(sday = 1, sweek = 1, syear = 4, time = 3) +) +expect_equal( +as.vector(drop(data$hcst$data)[1:2,1:2,1,2,3]), +c(293.9651, 295.9690, 290.6771, 290.7957), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +290.8758, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(284.7413, 299.6219), +tolerance = 0.0001 +) +expect_equal( +(data$hcst$attrs$Dates)[1], +as.POSIXct("1993-11-30 23:59:59", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[2], +as.POSIXct("1994-11-30 23:59:59", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[5], +as.POSIXct("1993-12-31 23:59:59", tz = 'UTC') +) +expect_equal( +(data$obs$attrs$Dates)[10], +as.POSIXct("1995-01-15 12:00:00", tz = 'UTC') +) + +}) + +test_that("2. Calibration", { + +expect_equal( +is.list(calibrated_data), +TRUE +) +expect_equal( +names(calibrated_data), +c("hcst", "obs", "fcst") +) +expect_equal( +class(calibrated_data$hcst), +"s2dv_cube" +) +expect_equal( +class(calibrated_data$fcst), +"s2dv_cube" +) +expect_equal( +dim(calibrated_data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 3, longitude = 3, ensemble = 25) +) +expect_equal( +dim(calibrated_data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 3, longitude = 3, ensemble = 51) +) +expect_equal( +mean(calibrated_data$fcst$data), +291.6433, +tolerance = 0.0001 +) +expect_equal( +mean(calibrated_data$hcst$data), +290.9006, +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(calibrated_data$hcst$data)[1, , 2, 3, 4]), +c(291.8887, 287.0233, 289.8808), +tolerance = 0.0001 +) +expect_equal( +range(calibrated_data$fcst$data), +c(283.8926, 299.0644), +tolerance = 0.0001 +) + +}) + + +#====================================== +test_that("3. Metrics", { + +expect_equal( +is.list(skill_metrics), +TRUE +) +expect_equal( +names(skill_metrics), +c("rpss", "rpss_significance", "crpss", "crpss_significance", "enscorr", + "enscorr_significance", "corr", "corr_significance", "enscorr_specs") +) +expect_equal( +class(skill_metrics$rpss), +"array" +) +expect_equal( +dim(skill_metrics$rpss), +c(time = 3, latitude = 3, longitude = 3) +) +expect_equal( +dim(skill_metrics$rpss_significance), +dim(skill_metrics$rpss) +) +expect_equal( +as.vector(skill_metrics$rpss[, 2, 3]), +c(-0.2918857, -1.4809143, -1.3842286), +tolerance = 0.0001 +) +expect_equal( +as.vector(skill_metrics$rpss_significance[, 2, 3]), +rep(FALSE, 3) +) + +}) + +test_that("4. Saving", { + +expect_equal( +list.files(outdir), +c("plots", "tas_19931101.nc", "tas_19941101.nc", "tas_19951101.nc", + "tas_19961101.nc", "tas_20201101.nc", "tas-corr_month11.nc", + "tas-obs_19931101.nc", "tas-obs_19941101.nc", "tas-obs_19951101.nc", + "tas-obs_19961101.nc", "tas-percentiles_month11.nc", "tas-probs_19931101.nc", + "tas-probs_19941101.nc", "tas-probs_19951101.nc", "tas-probs_19961101.nc", + "tas-probs_20201101.nc", "tas-skill_month11.nc") +) + +}) + +test_that("5. Visualization", { +expect_equal( +list.files(paste0(outdir, "/plots/")), +c("crpss.png", "enscorr_specs.png", "enscorr.png", "forecast_ensemble_mean.png", + "forecast_most_likely_tercile.png", "rpss.png") +) + +}) + +# Delete files +unlink(paste0(outdir, list.files(outdir, recursive = TRUE))) diff --git a/tools/add_dims.R b/tools/add_dims.R new file mode 100644 index 00000000..ce9445c8 --- /dev/null +++ b/tools/add_dims.R @@ -0,0 +1,9 @@ +# Function created in S2S$E-backend. Copied from: +# https://earth.bsc.es/gitlab/es/S2S4E-backend-BSC/-/blob/master/data-analysis/SEAS5/seas5.load.R +add_dims <- function(data){ + default_dims <- c(sdate_hcst_1 = 1, sdate_hcst_2 = 1, sdate_hcst_year = length(data)) + default_dims[names(dim(data))] <- dim(data) + dim(data) <- default_dims + return(data) + } + diff --git a/tools/check_recipe.R b/tools/check_recipe.R new file mode 100644 index 00000000..25536335 --- /dev/null +++ b/tools/check_recipe.R @@ -0,0 +1,324 @@ +check_recipe <- function(recipe, logger) { + + # recipe: yaml recipe already read it + # output: errors or the total number of workflow (vars x regions) to compute + + info(logger, paste("Checking recipe", recipe$filename)) + + # --------------------------------------------------------------------- + # ANALYSIS CHECKS + # --------------------------------------------------------------------- + + TIME_SETTINGS = c('sdate','leadtimemin','leadtimemax','hcst_start','hcst_end') + PARAMS = c('Horizon','Time','Variables','Region','Regrid','Workflow','Datasets') + HORIZONS <- c('Subseasonal','Seasonal','Decadal') + + # create output dirs: + if (!any(names(recipe) %in% "Analysis")) { + error(logger, "The recipe should contain an element called 'Analysis'.") + } + + if (!all(PARAMS %in% names(recipe$Analysis))) { + error(logger, + paste("The element 'Analysis' in the recipe should contain these", + "elements:", paste(PARAMS, collapse = " "))) + } + + if (!any(HORIZONS %in% recipe$Analysis$Horizon)) { + error(logger, + "The element 'Horizon' in the recipe should be one of the followings:", + paste(HORIZONS, collapse = " ")) + } + # Check temporal settings and + # count the number of verifications + if (!all(TIME_SETTINGS %in% names(recipe$Analysis$Time))) { + error(logger, + paste("The element 'Time' in the recipe should contain these elements:", + paste(TIME_SETTINGS, collapse = " "))) + } + if (is.null(recipe$Analysis$Time$sdate$fcst_year) || + recipe$Analysis$Time$sdate$fcst_year == 'None') { + stream <- "hindcast" + recipe$Analysis$Time$sdate$fcst_year <- 'YYYY' + } else { + stream <- "fcst" + } + if (length(recipe$Analysis$Time$sdate$fcst_day) > 1 && + tolower(recipe$Analysis$Horizon) != "subseasonal") { + warn(logger, + paste("Only subseasonal verification allows multiple forecast days."), + "Element fcst_day in recipe set as 1.") + recipe$Analysis$Time$sdate$fcst_day <- '01' + } + if (is.null(recipe$Analysis$Time$sdate$fcst_sday)) { + error(logger, + paste("The element 'fcst_sday' in the recipe should be defined.")) + } + if (is.null(recipe$Analysis$Time$sdate$fcst_syear)) { + error(logger, + paste("The element 'fcst_syear' in the recipe should be defined.")) + } + + + fcst.sdate <- NULL + for (syear in recipe$Analysis$Time$sdate$fcst_syear) { + for (sday in recipe$Analysis$Time$sdate$fcst_sday) { + fcst.sdate <- c(fcst.sdate, + paste0(syear, + sprintf("%04d", as.numeric(sday)))) + } + } + fcst.sdate <- list(stream = stream, fcst.sdate = fcst.sdate) + # Regrid checks: + if (length(recipe$Analysis$Regrid) != 2) { + error(logger, + "The 'Regrid' element should specified the 'method' and 'type'.") + stop("EXECUTION FAILED") + } +# more checks + # ... + # calculate number of workflows to create for each variable and + if (length(recipe$Analysis$Horizon) > 1) { + error(logger, "Only 1 Horizon can be specified in the recipe") + stop("EXECUTION FAILED") + } + nvar <- length(recipe$Analysis$Variables) + if (nvar > 2) { + error(logger, + "Only two type of Variables can be listed: ECVs and Indicators.") + stop("EXECUTION FAILED") + } + # remove NULL or None Indicators or ECVs from the recipe: + if (!is.null(recipe$Analysis$Variables$Indicators) && + !is.list(recipe$Analysis$Variables$Indicators)) { + recipe$Analysis$Variables <- recipe$Analysis$Variables[ + -which(names(recipe$Analysis$Variables) == 'Indicators')] + } + if (!is.null(recipe$Analysis$Variables$ECVs) && + !is.list(recipe$Analysis$Variables$ECVs)) { + recipe$Analysis$Variables <- recipe$Analysis$Variables[ + -which(names(recipe$Analysis$Variables) == 'ECVs')] + } + # Only one Calibration method allowed: + if ((is.logical(recipe$Analysis$Workflow$Calibration[[1]]) && + recipe$Analysis$Workflow$Calibration[[1]] == FALSE) || + recipe$Analysis$Workflow$Calibration[[1]] == 'None' || + is.null(recipe$Analysis$Workflow$Calibration[[1]])) { + warn(logger, + "There is no Calibration method selected, raw data verification.") + recipe$Analysis$Workflow$Calibration[[1]] <- FALSE + } else { + # remove multiple calibration methods + if (is.null(names(recipe$Analysis$Workflow$Calibration))) { + error(logger, + "The 'Calibration' element should specified at least the 'method'.") + stop("EXECUTION FAILED") + } + } + + if ("Region" %in% names(recipe$Analysis)) { + nregions <- length(recipe$Analysis$Region$Regional) + limits <- c('latmin', 'latmax', 'lonmin', 'lonmax') + for (i in 1:length(recipe$Analysis$Region)) { + if (!all(limits %in% names(recipe$Analysis$Region[[i]]))) { + limits <- paste(limits, collapse = " ") + error(logger, + paste("Each region defined in element 'Regional'", + "should have 4 elements:", + limits)) + stop("EXECUTION FAILED") + } + # are numeric? class list mode list + } + } else { + error(logger, + paste("'Region'", + "should be defined", + limits)) + stop("EXECUTION FAILED") + } + + # --------------------------------------------------------------------- + # RUN CHECKS + # --------------------------------------------------------------------- + + RUN_FIELDS = c("Loglevel","Terminal","output_dir","code_dir") + LOG_LEVELS = c("INFO","DEBUG","WARNING","ERROR") + + if (!any(names(recipe) %in% "Run")) { + error(logger, "The recipe should contain an element called 'Run'.") + } + if (!all(RUN_FIELDS %in% names(recipe$Run))) { + error(logger, paste0("Run should contain the fields: ", + paste(RUN_FIELDS,collapse=", "), ".")) + } + if (!is.character(recipe$Run$output_dir)) { + error(logger, + paste("The Run element 'output_dir' in", recipe$filename,"file ", + "should be a character string indicating the path ", + "where to save the outputs.")) + } + if (!is.character(recipe$Run$code_dir)) { + error(logger, + paste("The Run element 'code_dir' in", recipe$filename,"file ", + "should be a character string indicating the path ", + "where the code is.")) + } + if (!is.logical(recipe$Run$Terminal)) { + error(logger, + paste("The Run element 'Terminal' in", recipe$filename,"file ", + "should be a boolean value indicating wether to print or not the log", + "in the terminal.")) + } + if (!is.character(recipe$Run$Loglevel) || !any(recipe$Run$Loglevel %in% LOG_LEVELS)) { + error(logger, + paste("The Run element 'Loglevel' in", recipe$filename,"file ", + "should be a character string indicating one of the levels available: ", + paste0(LOG_LEVELS,collapse='/'))) + } + + # --------------------------------------------------------------------- + # WORKFLOW CHECKS + # --------------------------------------------------------------------- + # Check workflow: need to define restrictions? + # e.g. only one calibration method + nverifications <- check_number_of_dependent_verifications(recipe) + info(logger, paste("Start Dates", paste(fcst.sdate, collapse = " "))) + info(logger, "Recipe checked succsessfully.") + return(append(nverifications, fcst.sdate)) +} + +check_number_of_dependent_verifications <- function(recipe) { + # Number of verifications depends on the variables and indicators requested + # and the order of the workflow: + # workflow: correction + indicator --> only 1 variable is calibrated + # workflow: indicator + correction --> the indicator and the ecv are calibrated + independent_verifications <- NULL + dependent_verifications <- NULL + dep <- 1 + # check workflow order: + if (all(c('Calibration', 'Indicators') %in% names(recipe$Analysis$Workflow))) { + cal_pos <- which(names(recipe$Analysis$Workflow) == 'Calibration') + ind_pos <- which(names(recipe$Analysis$Workflow) == 'Indicators') + if (cal_pos < ind_pos) { + workflow_independent <- FALSE + } else { + workflow_independent <- TRUE + } + } + if (workflow_independent) { + independent_verifications <- append(recipe$Analysis$Variables$ECVs, + recipe$Analysis$Variables$Indicators) + } else { + if (is.null(recipe$Analysis$Variables$Indicators) || + (length(recipe$Analysis$Variables$Indicators) == 1 && + is.null(recipe$Analysis$Variables$ECVs))) { + independent_verifications <- append(recipe$Analysis$Variables$ECVs, + recipe$Analysis$Variables$Indicators) + } else { + ecvs <- recipe$Analysi$Variables$ECVs + inds <- recipe$Analysi$Variables$Indicators + ind_table <- read_yaml(paste0(recipe$Run$code_dir, + "conf/indicators_table.yml")) + # first, loop on ecvs if any and compare to indicators + done <- NULL # to gather the indicators reviewed + if (!is.null(ecvs)) { + for (i in 1:length(ecvs)) { + dependent <- list(ecvs[[i]]) + for (j in 1:length(inds)) { + if (ind_table[inds[[j]]$name][[1]]$ECVs == ecvs[[i]]$name) { + if (ind_table[inds[[j]]$name][[1]]$freq == ecvs[[i]]$freq) { + # they are dependent + dependent <- append(dependent, inds[[j]]) + done <- append(done, inds[[j]]) + } + } + } + if (length(dependent) == 1) { + dependent <- NULL + independent_verifications <- append(independent_verifications, + list(ecvs[[i]])) + } else { + dependent_verifications <- append(dependent_verifications, + list(dependent)) + } + } + # There are indicators not reviewed yet? + if (length(done) < length(inds)) { + if (length(inds) == 1) { + independent_verifications <- append(independent_verifications, + inds) + } else { + done <- NULL + for (i in 1:(length(inds) - 1)) { + dependent <- list(inds[[i]]$name) + if (is.na(match(unlist(dependent), unlist(done)))) { + for (j in (i+1):length(inds)) { + if (ind_table[inds[[i]]$name][[1]]$ECVs == + ind_table[inds[[j]]$name][[1]]$ECVs) { + if (ind_table[inds[[i]]$name][[1]]$freq == + ind_table[inds[[j]]$name][[1]]$freq) { + dependent <- append(dependent, inds[[j]]$name) + done <- dependent + } + } + } + } + if (length(dependent) == 1) { + independent_verifications <- dependent + dependent <- NULL + } else { + dependent_verifications <- dependent + } + } + } + } + } else { # there are only Indicators: + done <- NULL + for (i in 1:(length(inds) - 1)) { + dependent <- list(inds[[i]]$name) + if (is.na(match(unlist(dependent), unlist(done)))) { + for (j in (i+1):length(inds)) { + if (ind_table[inds[[i]]$name][[1]]$ECVs == + ind_table[inds[[j]]$name][[1]]$ECVs) { + if (ind_table[inds[[i]]$name][[1]]$freq == + ind_table[inds[[j]]$name][[1]]$freq) { + dependent <- append(dependent, inds[[j]]$name) + done <- dependent + } + } + } + } + if (length(dependent) == 1) { + independent_verifications <- dependent + dependent <- NULL + } else { + dependent_verifications <- dependent + } + } + } + } + } + if (!is.null(independent_verifications)) { + info(logger, paste("The variables for independent verification are ", + paste(independent_verifications, collapse = " "))) + } + if (!is.null(dependent_verifications)) { + info(logger, paste("The variables for dependent verification are: ", + paste(dependent_verifications, collapse = " "))) + } + # remove unnecessary names in objects to be removed + return(list(independent = independent_verifications, + dependent = dependent_verifications)) +} +#workflow <- list(Calibration = list(method = 'SBC'), +# Skill = list(metric = 'RPSS')) +#ApplyWorkflow <- function(workflow) { + +#res <- do.call('CST_BiasCorrection', +# args = list(exp = lonlat_data$exp, +# obs = lonlat_data$obs)) + + + + diff --git a/tools/data_summary.R b/tools/data_summary.R new file mode 100644 index 00000000..597f42cb --- /dev/null +++ b/tools/data_summary.R @@ -0,0 +1,38 @@ +# Print a summary of the loaded data for the user, for each object. +# object: hindcast, forecast or reference data in s2dv_cube format. +## TODO: Adapt to daily/subseasonal cases +## TODO: Add check for missing files/NAs by dimension + +data_summary <- function(data_cube, recipe) { + # Get name, leadtime months and date range + object_name <- deparse(substitute(data_cube)) + if (recipe$Analysis$Variables$freq == "monthly_mean") { + date_format <- '%b %Y' + } else if (recipe$Analysis$Variables$freq == "daily_mean") { + date_format <- '%b %d %Y' + } + months <- unique(format(as.Date(data_cube$attrs$Dates), format = '%B')) + months <- paste(as.character(months), collapse=", ") + sdate_min <- format(min(as.Date(data_cube$attrs$Dates)), format = date_format) + sdate_max <- format(max(as.Date(data_cube$attrs$Dates)), format = date_format) + + # Create log instance and sink output to logfile and terminal + info(recipe$Run$logger, "DATA SUMMARY:") + info(recipe$Run$logger, paste(object_name, "months:", months)) + info(recipe$Run$logger, paste(object_name, "range:", sdate_min, "to", + sdate_max)) + info(recipe$Run$logger, paste(object_name, "dimensions:")) + # Use capture.output() and for loop to display results neatly + output_string <- capture.output(dim(data_cube$data)) + for (i in output_string) { + info(recipe$Run$logger, i) + } + info(recipe$Run$logger, paste0("Statistical summary of the data in ", + object_name, ":")) + output_string <- capture.output(summary(data_cube$data)) + for (i in output_string) { + info(recipe$Run$logger, i) + } + info(recipe$Run$logger, "---------------------------------------------") +} + diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R new file mode 100644 index 00000000..dafc8704 --- /dev/null +++ b/tools/divide_recipe.R @@ -0,0 +1,113 @@ +# recipe: the content of the recipe +# verifications: the output from check_recipe +# folder: the name of the output folder for this run +# logger: the log file obtain from prepare_outputs +divide_recipe <- function(recipe, verifications, folder, logger) { + info(logger, "Spliting recipe in single verifications.") + beta_recipe <- list(Description = append(recipe$Description, + "split version"), + Analysis = list(Horizon = recipe$Analysis$Horizon, + Variables = NULL, + Datasets = NULL, + Time = NULL, + Region = NULL, + Regrid = recipe$Analysis$Regrid, + Workflow = recipe$Analysis$Workflow, + Output_format = + recipe$Analysis$Output_format), + Run = recipe$Run) + # duplicate recipe by Variables considering dep and indep: + all_recipes <- list(beta_recipe) + i <- 1 # to get track of the recipe number + for (indep in verifications$independent) { + all_recipes[[i]]$Analysis$Variables <- indep + i = i + 1 + all_recipes <- append(all_recipes, list(beta_recipe)) + } + for (dep in verifications$dependent) { + all_recipes[[i]]$Analysis$Variables <- dep + i = i + 1 + all_recipes <- append(all_recipes, list(beta_recipe)) + } + all_recipes <- all_recipes[-length(all_recipes)] + # duplicate recipe by Datasets: + # check Systems + if (recipe$Analysis$Datasets$Multimodel) { + for (reci in 1:length(all_recipes)) { + all_recipes[[reci]]$Analysis$Datasets <- list( + System = recipe$Analysis$Datasets$System, + Multimodel = recipe$Analysis$Datasets$Multimodel, + Reference = NULL) + } + } else { + for (sys in 1:length(recipe$Analysis$Datasets$System)) { + for (reci in 1:length(all_recipes)) { + all_recipes[[reci]]$Analysis$Datasets <- list( + System = recipe$Analysis$Datasets$System[[sys]], + Multimodel = recipe$Analysis$Datasets$Multimodel, + Reference = NULL) + } + if (sys == 1) { + recipes <- all_recipes + } else { + recipes <- append(recipes, all_recipes) + } + } + all_recipes <- recipes + rm(list = 'recipes') + } + # check References + for (ref in 1:length(recipe$Analysis$Datasets$Reference)) { + for (reci in 1:length(all_recipes)) { + all_recipes[[reci]]$Analysis$Datasets$Reference <- + recipe$Analysis$Datasets$Reference[[ref]] + } + if (ref == 1) { + recipes <- all_recipes + } else { + recipes <- append(recipes, all_recipes) + } + } + all_recipes <- recipes + # Duplicate recipe by Region + recipes <- list() + for (reg in 1:length(recipe$Analysis$Region)) { + if (length(recipe$Analysis$Region[[reg]]) == 4) { ##TODO: THIS SHOULD BE ONLY CHECK IN THE RECIPE CHECKER? + for (reci in 1:length(all_recipes)) { + all_recipes[[reci]]$Analysis$Region <- + recipe$Analysis$Region[[reg]] + } + recipes <- append(recipes, all_recipes) + } + } + all_recipes <- recipes + rm(list = 'recipes') + if (tolower(recipe$Analysis$Horizon) == 'seasonal') { + for (sday in 1:length(recipe$Analysis$Time$sdate$fcst_sday)) { + for (reci in 1:length(all_recipes)) { + all_recipes[[reci]]$Analysis$Time <- list(sdate = list( + fcst_syear = recipe$Analysis$Time$sdate$fcst_syear, + fcst_sday = recipe$Analysis$Time$sdate$fcst_sday[[sday]]), + hcst_start = recipe$Analysis$Time$hcst_start, + hcst_end = recipe$Analysis$Time$hcst_end, + leadtimemin = recipe$Analysis$Time$leadtimemin, + leadtimemax = recipe$Analysis$Time$leadtimemax) + } + if (sday == 1) { + recipes <- all_recipes + } else { + recipes <- append(recipes, all_recipes) + } + } + all_recipes <- recipes + rm(list = 'recipes') + } # Rest of horizons + # Finally, save all recipes in saparated yaml files + for (reci in 1:length(all_recipes)) { + write_yaml(all_recipes[[reci]], + paste0(folder, "/logs/recipes/recipe_", reci, ".yml")) + } + text <- paste0("See folder ",folder,"/logs/recipes/ to see the individual recipes.") + info(logger, text) + return(all_recipes) +} diff --git a/tools/libs.R b/tools/libs.R new file mode 100644 index 00000000..a0767f76 --- /dev/null +++ b/tools/libs.R @@ -0,0 +1,35 @@ +library(log4r) +library(startR) +library(ClimProjDiags) +library(multiApply) +library(yaml) +library(s2dv) +library(abind) +# library(s2dverification) +# library(ncdf4) +# library(easyVerification) +library(easyNCDF) +library(CSTools) +library(lubridate) +library(PCICt) +library(RColorBrewer) +library(grDevices) +# +# library(parallel) +# library(pryr) # To check mem usage. +#setwd("/esarchive/scratch/nperez/git/S2S4E-backend-BSC/data-analysis/") +# source('export_2_nc.R') +# source('S2S/s2s.filefmt.R') +# source('s2s.calib.R') +# +# source("s2s_tools.R") +# source("Calibration_fcst4.R") +# source("R_Reorder.R") +# source("R_CST_MergeDims.R") +#setwd(recipe$Run$code_dir) +# # To be removed when new package is done by library(CSOperational) +source("tools/check_recipe.R") +source("tools/prepare_outputs.R") +source("tools/divide_recipe.R") +source("tools/data_summary.R") +# source("tools/add_dims.R") # Not sure if necessary yet diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R new file mode 100644 index 00000000..a89e5e7b --- /dev/null +++ b/tools/prepare_outputs.R @@ -0,0 +1,81 @@ +#'Read recipe YAML file and create and store logfile info +#' +#'The purpose of this function is to read the recipe configuration for Auto-S2S +#'workflows and create logfiles stores in an the output directory specified in +#'the recipe. It returns an object of class logger that stores information on +#'the recipe configuration and errors. +#' +#'@param recipe_file path to a YAML file with Auto-S2S configuration recipe +#' +#'@return list contaning recipe with logger, log file name and log dir name +#' +#'@import log4r +#'@import yaml +#' +#'@examples +#'setwd("/esarchive/scratch/vagudets/repos/auto-s2s/") +#'library(yaml) +#'recipe <- prepare_outputs("modules/data_load/recipe_1.yml") +#'info(recipe$Run$logger, "This is an info message") +#' +#'@export + +prepare_outputs <- function(recipe_file) { + +# recipe: the content of the readed recipe +# file: the recipe file name + + recipe <- read_yaml(recipe_file) + recipe$recipe_path <- recipe_file + recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) + + output_dir = recipe$Run$output_dir + # Create output folders: + folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$name)), "_", + gsub(" ", "", gsub(":", "", gsub("-", "", Sys.time())))) + + print("Saving all outputs to:") + print(output_dir) + print(folder_name) + + dir.create(file.path(output_dir, folder_name, 'outputs'), recursive = TRUE) + dir.create(file.path(output_dir, folder_name, 'logs')) + dir.create(file.path(output_dir, folder_name, 'logs', 'recipes')) + + file.copy(recipe$recipe_path, file.path(output_dir, folder_name, 'logs', + 'recipes')) + + logfile <- file.path(output_dir, folder_name, 'logs', 'log.txt') + file.create(logfile) + + # Set default behaviour of log output file: + if (is.null(recipe$Run)) { + recipe$Run <- list(Loglevel = 'INFO', Terminal = TRUE) + } + if (is.null(recipe$Run$Loglevel)) { + recipe$Run$Loglevel <- 'INFO' + } + + if (!is.logical(recipe$Run$Terminal)) { + recipe$Run$Terminal <- TRUE + } + if (recipe$Run$Terminal) { + logger <- log4r::logger(threshold = recipe$Run$Loglevel, + appenders = list(console_appender(layout = default_log_layout()), + file_appender(logfile, append = TRUE, + layout = default_log_layout()))) + } else { + logger <- log4r::logger(threshold = recipe$Run$Loglevel, + appenders = list(file_appender(logfile, append = TRUE, + layout = default_log_layout()))) + } + + recipe$Run$output_dir <- file.path(output_dir, folder_name) + recipe$Run$logger <- logger + recipe$Run$logfile <- logfile + + info(recipe$Run$logger, + "##### LOGGER SET UP AND OUTPUT DIRECTORY PREPARED #####") + + return(recipe) +} diff --git a/tools/test_check_number_of_independent_verifications.R b/tools/test_check_number_of_independent_verifications.R new file mode 100644 index 00000000..846dc5be --- /dev/null +++ b/tools/test_check_number_of_independent_verifications.R @@ -0,0 +1,161 @@ +library(testthat) +test_that("A few combinations", { + source("/esarchive/scratch/nperez/git/auto-s2s/tools/check_recipe.R") + recipe <- list(Analysis = + list(Variables = list(ECVs = list( + list(name = 'tas', freq = 'daily_mean')), + Indicators = list(list(name = 'gdd'))), + Workflow = list(Calibration = 'SBC', Indicators = TRUE))) + conf <- list(code_dir = "/esarchive/scratch/nperez/git/auto-s2s/") + expect_equal(check_number_of_dependent_verifications(recipe, conf), + list(independent = NULL, + dependent = list(list(list(name = 'tas', freq = 'daily_mean'), + name = 'gdd')))) + recipe <- list(Analysis = + list(Variables = list(ECVs = list( + list(name = 'tas', freq = 'daily_mean')), + Indicators = list(name = 'gdd')), + Workflow = list(Indicators = TRUE, Calibration = 'SBC'))) + expect_equal(check_number_of_dependent_verifications(recipe, conf), + list(independent = list(list(name = 'tas', freq = 'daily_mean'), + name = 'gdd'), dependent = NULL)) + + recipe <- list(Analysis = + list(Variables = list(ECVs = list( + list(name = 'tas', freq = 'daily_mean'))), + Workflow = list(Indicators = FALSE, Calibration = 'SBC'))) + expect_equal(check_number_of_dependent_verifications(recipe, conf), + list(independent = list(list(name = 'tas', freq = 'daily_mean')), + dependent = NULL)) + + recipe <- list(Analysis = + list(Variables = list(ECVs = list( + list(name = 'tas', freq = 'daily_mean'), + list(name = 'tasmax', freq = 'daily_mean'))), + Workflow = list(Indicators = TRUE, Calibration = 'SBC'))) + expect_equal(check_number_of_dependent_verifications(recipe, conf), + list(independent = list(list(name = 'tas', freq = 'daily_mean'), + list(name = 'tasmax', freq = 'daily_mean')), + dependent = NULL)) + + recipe <- list(Analysis = + list(Variables = list(Indicators = list(list(name = 'gdd'))), + Workflow = list(Indicators = TRUE, Calibration = 'SBC'))) + expect_equal(check_number_of_dependent_verifications(recipe, conf), + list(independent = list(list(name = 'gdd')), dependent = NULL)) + + recipe <- list(Analysis = + list(Variables = list(ECVs = list( + list(name = 'tas', freq = 'daily_mean')), + Indicators = list(list(name = 'gdd'), + list(name = 'gst'))), + Workflow = list(Indicators = TRUE, Calibration = 'SBC'))) + expect_equal(check_number_of_dependent_verifications(recipe, conf), + list(independent = list(list(name = 'tas', freq = 'daily_mean'), + list(name = 'gdd'), list(name = 'gst')), dependent = NULL)) + + recipe <- list(Analysis = + list(Variables = list(ECVs = list( + list(name = 'tas', freq = 'daily_mean')), + Indicators = list(list(name = 'gdd'), + list(name = 'gst'))), + Workflow = list(Calibration = 'SBC', Indicators = TRUE))) + expect_equal(check_number_of_dependent_verifications(recipe, conf), + list(independent = NULL, + dependent = list(list(list(name = 'tas', freq = 'daily_mean'), + name = 'gdd', name = 'gst')))) +# Dependent workflow: cal & ind + recipe <- list(Analysis = + list(Variables = list( + Indicators = list(list(name = 'gdd'), + list(name = 'gst'))), + Workflow = list(Calibration = 'SBC', Indicators = TRUE))) + expect_equal(check_number_of_dependent_verifications(recipe, conf), + list(independent = NULL, dependent = list('gdd', 'gst'))) + + recipe <- list(Analysis = + list(Variables = list(ECVs = list( + list(name = 'tasmax', freq = 'daily_mean')), + Indicators = list(list(name = 'gdd'), + list(name = 'gst'))), + Workflow = list(Calibration = 'SBC', Indicators = TRUE))) + expect_equal(check_number_of_dependent_verifications(recipe, conf), + list(independent = list(list(name = 'tasmax', freq = 'daily_mean')), + dependent = list('gdd', 'gst'))) + + recipe <- list(Analysis = + list(Variables = list(ECVs = list( + list(name = 'tasmax', freq = 'daily_mean'), + list(name = 'tas', freq = 'daily_mean')), + Indicators = list(list(name = 'gdd'), + list(name = 'gst'))), + Workflow = list(Calibration = 'SBC', Indicators = TRUE))) + expect_equal(check_number_of_dependent_verifications(recipe, conf), + list(independent = list(list(name = 'tasmax', freq = 'daily_mean')), + dependent = list(list(list(name = 'tas', freq = 'daily_mean'), + name = 'gdd', name = 'gst')))) + + recipe <- list(Analysis = + list(Variables = list(ECVs = list( + list(name = 'tasmax', freq = 'daily_mean'), + list(name = 'tas', freq = 'daily_mean')), + Indicators = list(list(name = 'gdd'), + list(name = 'gst'), + list(name = 'spr32'))), + Workflow = list(Calibration = 'SBC', Indicators = TRUE))) + expect_equal(check_number_of_dependent_verifications(recipe, conf), + list(independent = NULL, + dependent = list(list(list(name = 'tasmax', freq = 'daily_mean'), + name = 'spr32'), + list(list(name = 'tas', freq = 'daily_mean'), + name = 'gdd', name ='gst')))) +# Independent workflow: ind & cal + recipe <- list(Analysis = + list(Variables = list( + Indicators = list(list(name = 'gdd'), + list(name = 'gst'))), + Workflow = list(Indicators = TRUE, Calibration = 'SBC'))) + expect_equal(check_number_of_dependent_verifications(recipe, conf), + list(independent = list(list(name = 'gdd'), list(name = 'gst')), + dependent = NULL)) + + recipe <- list(Analysis = + list(Variables = list(ECVs = list( + list(name ='tasmax', freq = 'daily_freq')), + Indicators = list(list(name = 'gdd'), + list(name = 'gst'))), + Workflow = list(Indicators = TRUE, Calibration = 'SBC'))) + expect_equal(check_number_of_dependent_verifications(recipe, conf), + list(independent = list(list(name = 'tasmax', freq = 'daily_freq'), + list(name = 'gdd'), list(name = 'gst')), + dependent = NULL)) + + recipe <- list(Analysis = + list(Variables = list(ECVs = list( + list(name = 'tasmax', freq = 'daily_mean'), + list(name = 'tas', freq = 'daily_mean')), + Indicators = list(list(name = 'gdd'), + list(name = 'gst'))), + Workflow = list(Indicators = TRUE, Calibration = 'SBC'))) + expect_equal(check_number_of_dependent_verifications(recipe, conf), + list(independent = list(list(name = 'tasmax', freq = 'daily_mean'), + list(name = 'tas', freq = 'daily_mean'), + list(name = 'gdd'), + list(name = 'gst')), dependent = NULL)) + + recipe <- list(Analysis = + list(Variables = list(ECVs = list( + list(name = 'tasmax', freq = 'daily_mean'), + list(name = 'tas', freq = 'daily_mean')), + Indicators = list(list(name = 'gdd'), + list(name = 'gst'), + list(name = 'spr32'))), + Workflow = list(Indicators = TRUE, Calibration = 'SBC'))) + expect_equal(check_number_of_dependent_verifications(recipe, conf), + list(independent = list(list(name = 'tasmax', freq = 'daily_mean'), + list(name = 'tas', freq = 'daily_mean'), + list(name = 'gdd'), list(name = 'gst'), + list(name = 'spr32')), + dependent = NULL)) +}) + diff --git a/tools/tmp/as.s2dv_cube.R b/tools/tmp/as.s2dv_cube.R new file mode 100644 index 00000000..019f69a2 --- /dev/null +++ b/tools/tmp/as.s2dv_cube.R @@ -0,0 +1,184 @@ +#'Conversion of 'startR_array' or 'list' objects to 's2dv_cube' +#' +#'This function converts data loaded using startR package or s2dverification Load function into a 's2dv_cube' object. +#' +#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +#'@author Nicolau Manubens, \email{nicolau.manubens@bsc.es} +#' +#'@param object an object of class 'startR_array' generated from function \code{Start} from startR package (version 0.1.3 from earth.bsc.es/gitlab/es/startR) or a list output from function \code{Load} from s2dverification package. +#' +#'@return The function returns a 's2dv_cube' object to be easily used with functions \code{CST} from CSTools package. +#' +#'@seealso \code{\link{s2dv_cube}}, \code{\link[s2dverification]{Load}}, \code{\link[startR]{Start}} and \code{\link{CST_Load}} +#'@examples +#'\dontrun{ +#'library(startR) +#'repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc' +#'data <- Start(dat = repos, +#' var = 'tas', +#' sdate = c('20170101', '20180101'), +#' ensemble = indices(1:20), +#' time = 'all', +#' latitude = 'all', +#' longitude = indices(1:40), +#' return_vars = list(latitude = 'dat', longitude = 'dat', time = 'sdate'), +#' retrieve = TRUE) +#'data <- as.s2dv_cube(data) +#'class(data) +#'startDates <- c('20001101', '20011101', '20021101', +#' '20031101', '20041101', '20051101') +#'data <- Load(var = 'tas', exp = 'system5c3s', +#' nmember = 15, sdates = startDates, +#' leadtimemax = 3, latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40, output = 'lonlat') +#'data <- as.s2dv_cube(data) +#'class(data) +#'} +#'@export +as.s2dv_cube <- function(object) { + if (is.list(object)) { + if (is.null(object) || (is.null(object$mod) && is.null(object$obs))) { + stop("The s2dverification::Load call did not return any data.") + } + obs <- object + obs$mod <- NULL + object$obs <- NULL + names(object)[[1]] <- 'data' + names(obs)[[1]] <- 'data' + remove_matches <- function(v, patterns) { + if (length(v) > 0) { + matches <- c() + for (pattern in patterns) { + matches <- c(matches, which(grepl(pattern, v))) + } + if (length(matches) > 0) { + v <- v[-matches] + } + } + v + } + + harmonize_patterns <- function(v) { + matches <- grepl('.*\\.nc$', v) + if (sum(!matches) > 0) { + match_indices <- which(!matches) + v[match_indices] <- sapply(v[match_indices], function(x) paste0(x, '*')) + } + v <- glob2rx(v) + v <- gsub('\\$.*\\$', '*', v) + v + } + + if (!is.null(obs$data)) { + obs$Datasets$exp <- NULL + obs$Datasets <- obs$Datasets$obs + obs_path_patterns <- sapply(obs$Datasets, function(x) attr(x, 'source')) + obs_path_patterns <- harmonize_patterns(obs_path_patterns) + } + + if (!is.null(object$data)) { + object$Datasets$obs <- NULL + object$Datasets <- object$Datasets$exp + exp_path_patterns <- sapply(object$Datasets, function(x) attr(x, 'source')) + exp_path_patterns <- harmonize_patterns(exp_path_patterns) + } + + if (!is.null(obs$data) && !is.null(object$data)) { + obs$source_files <- remove_matches(obs$source_files, + exp_path_patterns) + obs$not_found_files <- remove_matches(obs$not_found_files, + exp_path_patterns) + + object$source_files <- remove_matches(object$source_files, + obs_path_patterns) + object$not_found_files <- remove_matches(object$not_found_files, + obs_path_patterns) + } + + result <- list() + if (!is.null(object$data)) { + class(object) <- 's2dv_cube' + result$exp <- object + } + if (!is.null(obs$data)) { + class(obs) <- 's2dv_cube' + result$obs <- obs + } + if (is.list(result)) { + if (is.null(result$exp)) { + result <- result$obs + } else if (is.null(result$obs)) { + result <- result$exp + } else { + warning("The output is a list of two 's2dv_cube' objects", + " corresponding to 'exp' and 'obs'.") + } + } + + } else if (class(object) == 'startR_array') { + result <- list() + result$data <- as.vector(object) + dim(result$data) <- dim(object) + + dat_attr_names <- names(attributes(object)$Variables$dat1) + common_attr_names <- names(attributes(object)$Variables$common) + # $lon + known_lon_names <- s2dv:::.KnownLonNames() + if (!is.null(dat_attr_names[which(dat_attr_names %in% known_lon_names)]) & + !identical(dat_attr_names[which(dat_attr_names %in% known_lon_names)], character(0))) { + result$lon <- attributes(object)$Variables$dat1[[dat_attr_names[which(dat_attr_names %in% known_lon_names)]]] + } else if (!is.null(common_attr_names[which(common_attr_names %in% known_lon_names)]) & + !identical(common_attr_names[which(common_attr_names %in% known_lon_names)], character(0))) { + result$lon <- attributes(object)$Variables$common[[common_attr_names[which(common_attr_names %in% known_lon_names)]]] + } else { + warning("'lon' is not found in this object.") + result$lon <- NULL + } + # $lat + known_lat_names <- s2dv:::.KnownLatNames() + if (!is.null(dat_attr_names[which(dat_attr_names %in% known_lat_names)]) & + !identical(dat_attr_names[which(dat_attr_names %in% known_lat_names)], character(0))) { + result$lat <- attributes(object)$Variables$dat1[[dat_attr_names[which(dat_attr_names %in% known_lat_names)]]] + } else if (!is.null(common_attr_names[which(common_attr_names %in% known_lat_names)]) & + !identical(common_attr_names[which(common_attr_names %in% known_lat_names)], character(0))) { + result$lat <- attributes(object)$Variables$common[[common_attr_names[which(common_attr_names %in% known_lat_names)]]] + } else { + warning("'lat' is not found in this object.") + result$lat <- NULL + } + + vars <- which(!common_attr_names %in% c("time", known_lon_names, known_lat_names)) + + if (length(vars) > 1) { + warning("More than one variable has been provided and ", + "only the first one '", common_attr_names[vars[1]],"' will be used.") + vars <- vars[1] + } + + Variable <- list() + Variable$varName <- names(attributes(object)$Variables$common)[vars] + attr(Variable, 'variable') <- attributes(object)$Variables$common[[vars]] + result$Variable <- Variable + dims <- dim(object) + if (any(c('sdate', 'sdates') %in% names(dims))) { + n_sdates <- dims[which(names(dims) == 'sdate' | names(dims) == 'sdates')] + sdates <- attributes(object)$Variables$common$time[1 : n_sdates] + } else { + sdates <- attributes(object)$Variables$common$time[1] + } + Dataset <- list(list(InitializationDates = list(Member_1 = sdates))) + names(Dataset) <- list(deparse(substitute(object))) + result$Datasets <- Dataset + result$Dates$start <- attributes(object)$Variables$common$time + result$when <- Sys.time() + result$source_files <- as.vector(attributes(object)$Files) + result$load_parameters <- attributes(object)$FileSelectors + class(result) <- 's2dv_cube' + } else { + stop("The class of parameter 'object' is not implemented", + " to be converted into 's2dv_cube' class yet.") + } + result + +} + -- GitLab From f97bf93def72d614c91abd1450a54e9ffa274722 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 8 Feb 2023 14:46:49 +0100 Subject: [PATCH 28/47] Set up gitignore --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d17d7634..f7a39eb8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,9 @@ out-logs/ *.swp *.swo -/modules/Calibration/test_victoria.R +modules/Calibration +conf/*decadal* +modules/Loading/*decadal* modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml conf/vitigeoss-vars-dict.yml -- GitLab From d45bf46571d4a5cca65b1571d1e69219e30d37e0 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 9 Feb 2023 14:26:26 +0100 Subject: [PATCH 29/47] Exclude sticky::append from namespace --- tools/libs.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/libs.R b/tools/libs.R index 19b26e87..1f86edcd 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -1,5 +1,5 @@ library(log4r) -library(sticky) +library(sticky, exclude = "append") library(startR) library(ClimProjDiags) library(multiApply) @@ -15,7 +15,6 @@ library(lubridate) library(PCICt) library(RColorBrewer) library(grDevices) -append <- base::append # # library(parallel) # library(pryr) # To check mem usage. -- GitLab From a8acc69f067ee666f3ba6c039edc1bee1258aba7 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 1 Mar 2023 11:56:47 +0100 Subject: [PATCH 30/47] Remove sticky, add TODO --- modules/test_seasonal.R | 5 +++-- tools/libs.R | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R index b8541488..72d7ef54 100644 --- a/modules/test_seasonal.R +++ b/modules/test_seasonal.R @@ -20,6 +20,7 @@ skill_metrics <- compute_skill_metrics(recipe, calibrated_data) probabilities <- compute_probabilities(recipe, calibrated_data) # Export all data to netCDF save_data(recipe, calibrated_data, skill_metrics, probabilities) +## TODO: Fix plotting # Plot data -plot_data(recipe, calibrated_data, skill_metrics, probabilities, - significance = T) +# plot_data(recipe, calibrated_data, skill_metrics, probabilities, +# significance = T) diff --git a/tools/libs.R b/tools/libs.R index 2a4a1445..9acf0756 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -1,5 +1,4 @@ library(log4r) -library(sticky, exclude = "append") library(startR) library(ClimProjDiags) library(multiApply) -- GitLab From 3f5fc801544fedf8d82772895a5dc5781b288961 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Tue, 2 May 2023 18:07:48 +0200 Subject: [PATCH 31/47] loading tas-tos --- conf/archive.yml | 13 +- conf/variable-dictionary.yml | 8 + modules/Loading/Dev_Loading.R | 480 ++++++++++++++++++ modules/Loading/Loading.R | 250 ++++----- modules/Loading/R/mask_tas_tos.R | 78 +++ .../recipe_test_multivar_nadia.yml | 49 ++ recipes/recipe_scorecards_s2s-suite.yml | 50 ++ tas-tos_scorecards_data_loading.R | 78 +++ 8 files changed, 879 insertions(+), 127 deletions(-) create mode 100644 modules/Loading/Dev_Loading.R create mode 100644 modules/Loading/R/mask_tas_tos.R create mode 100644 recipes/atomic_recipes/recipe_test_multivar_nadia.yml create mode 100644 recipes/recipe_scorecards_s2s-suite.yml create mode 100644 tas-tos_scorecards_data_loading.R diff --git a/conf/archive.yml b/conf/archive.yml index a982b84f..b6f44a07 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -16,7 +16,7 @@ esarchive: "tasmin":"_f24h/", "tasmax":"_f24h/", "ta300":"_f12h/", "ta500":"_f12h/", "ta850":"_f12h/", "g300":"_f12h/", "g500":"_f12h/", "g850":"_f12h/", - "tdps":"_f6h/"} + "tdps":"_f6h/", "tos":"_f6h/"} nmember: fcst: 51 hcst: 25 @@ -156,7 +156,9 @@ esarchive: "tasmin":"_f1h-r1440x721cds/", "ta300":"_f1h-r1440x721cds/", "ta500":"_f1h-r1440x721cds/", - "ta850":"_f1h-r1440x721cds/"} + "ta850":"_f1h-r1440x721cds/", + "tos":"_f1h-r1440x721cds" + } calendar: "standard" reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" ERA5-Land: @@ -197,6 +199,13 @@ esarchive: monthly_mean: {"prlr":"_f6h-r2631x1113/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/recon/ecmwf/cerraland/monthly_mean/prlr_f6h-r2631x1113/prlr_200412.nc" + HadCRUT5: + name: "HadCRUT5" + institution: "Met Office" + src: "obs/ukmo/hadcrut_v5.0_analysis/" + monthly_mean: {"tasanomaly":"/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/obs/ukmo/hadcrut_v5.0_analysis/monthly_mean/tasanomaly/tasanomaly_202001.nc" diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index 917abc64..0bfbffe0 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -204,6 +204,14 @@ vars: long_name: "Surface Upward Sensible Heat Flux" standard_name: "surface_upward_sensible_heat_flux" accum: no +## Adding new variable + tasanomaly: + units: "K" + long_name: "Near-Surface Air Temperature Anomaly" + standard_name: "air_temperature_anom" + accum: no + + # Coordinates coords: diff --git a/modules/Loading/Dev_Loading.R b/modules/Loading/Dev_Loading.R new file mode 100644 index 00000000..5d16e54b --- /dev/null +++ b/modules/Loading/Dev_Loading.R @@ -0,0 +1,480 @@ +## TODO: remove paths to personal scratchs +source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") +# Load required libraries/funs +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +## TODO: Move to prepare_outputs.R +source("tools/libs.R") +## TODO: remove these two lines when new as.s2dv_cube() is in CSTools +source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/as.s2dv_cube.R') +source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/zzz.R') + +## TODO: Source new s2dv_cube version +## TODO: Eliminate dim_var dimension (merge_across_dims?) + +load_datasets <- function(recipe) { + + # ------------------------------------------- + # Set params ----------------------------------------- + + hcst.inityear <- recipe$Analysis$Time$hcst_start + hcst.endyear <- recipe$Analysis$Time$hcst_end + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- recipe$Analysis$Datasets$System$name + + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]][1] + vars <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + store.freq <- recipe$Analysis$Variables$freq + + # get sdates array + ## LOGGER: Change dates2load to extract logger from recipe? + sdates <- dates2load(recipe, recipe$Run$logger) + + idxs <- NULL + idxs$hcst <- get_timeidx(sdates$hcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + + if (!(is.null(sdates$fcst))) { + idxs$fcst <- get_timeidx(sdates$fcst, + recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, + time_freq=store.freq) + } + + ## TODO: Examine this verifications part, verify if it's necessary + # stream <- verifications$stream + # sdates <- verifications$fcst.sdate + + ## TODO: define fcst.name + ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name + + # get esarchive datasets dict: + ## TODO: Adapt to 'filesystem' option in recipe + archive <- read_yaml("conf/archive.yml")$esarchive + exp_descrip <- archive$System[[exp.name]] + + freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) + reference_descrip <- archive$Reference[[ref.name]] + freq.obs <- unlist(reference_descrip[[store.freq]][variable]) + obs.dir <- reference_descrip$src + fcst.dir <- exp_descrip$src + hcst.dir <- exp_descrip$src + fcst.nmember <- exp_descrip$nmember$fcst + hcst.nmember <- exp_descrip$nmember$hcst + + ## TODO: it is necessary? + ##if ("accum" %in% names(reference_descrip)) { + ## accum <- unlist(reference_descrip$accum[store.freq][[1]]) + ##} else { + ## accum <- FALSE + ##} + + var_dir_obs <- reference_descrip[[store.freq]][vars] + var_dir_exp <- exp_descrip[[store.freq]][vars] + + # ----------- + obs.path <- paste0(archive$src, + obs.dir, store.freq, "/$var$", "$var_dir$", + "/$var$_$file_date$.nc") + + hcst.path <- paste0(archive$src, + hcst.dir, store.freq, "/$var$", "$var_dir$", + "$var$_$file_date$.nc") + + fcst.path <- paste0(archive$src, + hcst.dir, store.freq, "/$var$", "$var_dir$", + "/$var$_$file_date$.nc") + + # Define regrid parameters: + #------------------------------------------------------------------- + regrid_params <- get_regrid_params(recipe, archive) + + # Longitude circular sort and latitude check + #------------------------------------------------------------------- + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean"){ + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Load hindcast + #------------------------------------------------------------------- + hcst <- Start(dat = hcst.path, + var = vars, + var_dir = var_dir_exp, + file_date = sdates$hcst, + time = idxs$hcst, + var_dir_depends = 'var', + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:hcst.nmember), + metadata_dims = 'var', # change to just 'var'? + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(hcst))) { + hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (recipe$Analysis$Variables$freq == "daily_mean") { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + + # Convert hcst to s2dv_cube object + ## TODO: Give correct dimensions to $Dates + ## (sday, sweek, syear instead of file_date) + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + ## tas tos loading + if(recipe$Analysis$Variables$name == 'tas tos'){ + if(recipe$Analysis$Datasets$Reference$name == 'HadCRUT5'){ + source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') + hcst <- mask_tas_tos(input_data = hcst, region = c(0.1, 359.95, -90, 90), + grid = 'r360x181', + lon = attributes(hcst$coords$longitude)$variables$longitude$dim[[1]]$vals, + lat = attributes(hcst$coords$latitude)$variables$latitude$dim[[1]]$vals, + lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) + + hcst$dims[['var']] <- dim(hcst$data)[['var']] + } + } + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + # the call uses file_date instead of fcst_syear so that it can work + # with the daily case and the current version of startR not allowing + # multiple dims split + + fcst <- Start(dat = fcst.path, + var = vars, + var_dir = var_dir_exp, + var_dir_depends = 'var', + file_date = sdates$fcst, + time = idxs$fcst, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:fcst.nmember), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + if ("var_dir" %in% names(dim(fcst))) { + fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") + } + + if (recipe$Analysis$Variables$freq == "daily_mean") { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + + # Convert fcst to s2dv_cube + fcst <- as.s2dv_cube(fcst) + # Adjust dates for models where the time stamp goes into the next month + if (recipe$Analysis$Variables$freq == "monthly_mean") { + fcst$attrs$Dates[] <- + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + } + + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Separate Start() call for monthly vs daily data + if (store.freq == "monthly_mean") { + + dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") + dim(dates_file) <- dim(dates) + + ## tas tos mask + if (recipe$Analysis$Variables$name == 'tas tos'){ + if (recipe$Analysis$Datasets$Reference$name == 'HadCRUT5'){ + vars <- 'tasanomaly' + var_dir_obs <- reference_descrip[[store.freq]][vars] + } + } + + obs <- Start(dat = obs.path, + var = vars, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq == "daily_mean") { + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = vars, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + } + + # Remove var_dir dimension + if ("var_dir" %in% names(dim(obs))) { + obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") + } + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!(recipe$Analysis$Regrid$type == 'none')) { + if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + + } + } + + # Remove negative values in accumulative variables + dictionary <- read_yaml("conf/variable-dictionary.yml") + for (var_idx in 1:length(vars)) { + var_name <- vars[var_idx] + if (dictionary$vars[[var_name]]$accum) { + info(recipe$Run$logger, + paste0("Accumulated variable ", var_name, + ": setting negative values to zero.")) + # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, + # along = "var", + # indices = var_idx, F), 0) + obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 + hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 + } + } + + # Convert prlr from m/s to mm/day + ## TODO: Make a unit conversion function + if (vars[[var_idx]] == "prlr") { + # Verify that the units are m/s and the same in obs and hcst + if (((obs$attrs$Variable$metadata[[var_name]]$units == "m s-1") || + (obs$attrs$Variable$metadata[[var_name]]$units == "m s**-1")) && + ((hcst$attrs$Variable$metadata[[var_name]]$units == "m s-1") || + (hcst$attrs$Variable$metadata[[var_name]]$units == "m s**-1"))) { + info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") + obs$data[, var_idx, , , , , , , ] <- + obs$data[, var_idx, , , , , , , ]*86400*1000 + obs$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + hcst$data[, var_idx, , , , , , , ] <- + hcst$data[, var_idx, , , , , , , ]*86400*1000 + hcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + if (!is.null(fcst)) { + fcst$data[, var_idx, , , , , , , ] <- + fcst$data[, var_idx, , , , , , , ]*86400*1000 + fcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + } + } + } + } + # Compute anomalies if requested + # Print a summary of the loaded data for the user, for each object + if (recipe$Run$logger$threshold <= 2) { + data_summary(hcst, recipe) + data_summary(obs, recipe) + if (!is.null(fcst)) { + data_summary(fcst, recipe) + } + } + + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + + ############################################################################ + # + # CHECKS ON MISSING FILES + # + ############################################################################ + + #obs.NA_dates.ind <- Apply(obs, + # fun=(function(x){ all(is.na(x))}), + # target_dims=c('time', 'latitude', 'longitude'))[[1]] + #obs.NA_dates <- dates_file[obs.NA_dates.ind] + #obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)] + #obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_", + # freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc") + # + #if (any(is.na(hcst))){ + # fatal(recipe$Run$logger, + # paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "hcst files:", + # hcst.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status = 1) + #} + # + #if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){ + # fatal(recipe$logger, + # paste(" ERROR: MISSING OBS VALUES FOUND DURING LOADING # ", + # " ################################################# ", + # " ###### MISSING FILES #### ", + # " ################################################# ", + # "obs files:", + # obs.NA_files, + # " ################################################# ", + # " ################################################# ", + # sep="\n")) + # quit(status=1) + #} + # + #info(recipe$logger, + # "######### DATA LOADING COMPLETED SUCCESFULLY ##############") + + ############################################################################ + ############################################################################ + + return(list(hcst = hcst, fcst = fcst, obs = obs)) + +} diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index aadc5fb3..4fc212fc 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -14,10 +14,10 @@ source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube ## TODO: Eliminate dim_var dimension (merge_across_dims?) load_datasets <- function(recipe) { - + # ------------------------------------------- # Set params ----------------------------------------- - + hcst.inityear <- recipe$Analysis$Time$hcst_start hcst.endyear <- recipe$Analysis$Time$hcst_end lats.min <- recipe$Analysis$Region$latmin @@ -30,11 +30,11 @@ load_datasets <- function(recipe) { variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]][1] vars <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] store.freq <- recipe$Analysis$Variables$freq - + # get sdates array ## LOGGER: Change dates2load to extract logger from recipe? sdates <- dates2load(recipe, recipe$Run$logger) - + idxs <- NULL idxs$hcst <- get_timeidx(sdates$hcst, recipe$Analysis$Time$ftime_min, @@ -47,7 +47,7 @@ load_datasets <- function(recipe) { recipe$Analysis$Time$ftime_max, time_freq=store.freq) } - + ## TODO: Examine this verifications part, verify if it's necessary # stream <- verifications$stream # sdates <- verifications$fcst.sdate @@ -75,22 +75,22 @@ load_datasets <- function(recipe) { ##} else { ## accum <- FALSE ##} - + var_dir_obs <- reference_descrip[[store.freq]][vars] var_dir_exp <- exp_descrip[[store.freq]][vars] # ----------- obs.path <- paste0(archive$src, obs.dir, store.freq, "/$var$", "$var_dir$", - "/$var$_$file_date$.nc") + "/$var$_$file_date$.nc") hcst.path <- paste0(archive$src, hcst.dir, store.freq, "/$var$", "$var_dir$", - "$var$_$file_date$.nc") + "$var$_$file_date$.nc") fcst.path <- paste0(archive$src, hcst.dir, store.freq, "/$var$", "$var_dir$", - "/$var$_$file_date$.nc") + "/$var$_$file_date$.nc") # Define regrid parameters: #------------------------------------------------------------------- @@ -99,7 +99,7 @@ load_datasets <- function(recipe) { # Longitude circular sort and latitude check #------------------------------------------------------------------- circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) - + if (recipe$Analysis$Variables$freq == "monthly_mean"){ split_multiselected_dims = TRUE } else { @@ -110,10 +110,10 @@ load_datasets <- function(recipe) { #------------------------------------------------------------------- hcst <- Start(dat = hcst.path, var = vars, - var_dir = var_dir_exp, + var_dir = var_dir_exp, file_date = sdates$hcst, time = idxs$hcst, - var_dir_depends = 'var', + var_dir_depends = 'var', latitude = values(list(lats.min, lats.max)), latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), @@ -124,20 +124,20 @@ load_datasets <- function(recipe) { transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), + ensemble = c('member', 'ensemble')), ensemble = indices(1:hcst.nmember), - metadata_dims = 'var', # change to just 'var'? + metadata_dims = 'var', # change to just 'var'? return_vars = list(latitude = 'dat', longitude = 'dat', time = 'file_date'), split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) - + # Remove var_dir dimension if ("var_dir" %in% names(dim(hcst))) { hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") } - + if (recipe$Analysis$Variables$freq == "daily_mean") { # Adjusts dims for daily case, could be removed if startR allows # multidim split @@ -150,12 +150,12 @@ load_datasets <- function(recipe) { # Change time attribute dimensions default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) names(dim(attr(hcst, "Variables")$common$time))[which(names( - dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- dim(attr(hcst, "Variables")$common$time) dim(attr(hcst, "Variables")$common$time) <- default_time_dims } - + # Convert hcst to s2dv_cube object ## TODO: Give correct dimensions to $Dates ## (sday, sweek, syear instead of file_date) @@ -164,43 +164,43 @@ load_datasets <- function(recipe) { if (recipe$Analysis$Variables$freq == "monthly_mean") { hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) } - + # Load forecast #------------------------------------------------------------------- if (!is.null(recipe$Analysis$Time$fcst_year)) { # the call uses file_date instead of fcst_syear so that it can work # with the daily case and the current version of startR not allowing # multiple dims split - + fcst <- Start(dat = fcst.path, - var = vars, - var_dir = var_dir_exp, - var_dir_depends = 'var', - file_date = sdates$fcst, + var = vars, + var_dir = var_dir_exp, + var_dir_depends = 'var', + file_date = sdates$fcst, time = idxs$fcst, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$fcst.transform, - transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), - ensemble = indices(1:fcst.nmember), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = indices(1:fcst.nmember), metadata_dims = 'var', - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), split_multiselected_dims = split_multiselected_dims, - retrieve = TRUE) - + retrieve = TRUE) + if ("var_dir" %in% names(dim(fcst))) { fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected") } - + if (recipe$Analysis$Variables$freq == "daily_mean") { # Adjusts dims for daily case, could be removed if startR allows # multidim split @@ -213,62 +213,62 @@ load_datasets <- function(recipe) { # Change time attribute dimensions default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) names(dim(attr(fcst, "Variables")$common$time))[which(names( - dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- dim(attr(fcst, "Variables")$common$time) dim(attr(fcst, "Variables")$common$time) <- default_time_dims } - + # Convert fcst to s2dv_cube fcst <- as.s2dv_cube(fcst) # Adjust dates for models where the time stamp goes into the next month if (recipe$Analysis$Variables$freq == "monthly_mean") { fcst$attrs$Dates[] <- - fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) } - + } else { fcst <- NULL } # Load reference #------------------------------------------------------------------- - + # Obtain dates and date dimensions from the loaded hcst data to make sure # the corresponding observations are loaded correctly. dates <- hcst$attrs$Dates dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] - + # Separate Start() call for monthly vs daily data if (store.freq == "monthly_mean") { - + dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") dim(dates_file) <- dim(dates) - + obs <- Start(dat = obs.path, var = vars, - var_dir = var_dir_obs, - var_dir_depends = 'var', - file_date = dates_file, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - metadata_dims = 'var', - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) - + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + } else if (store.freq == "daily_mean") { - + # Get year and month for file_date dates_file <- sapply(dates, format, '%Y%m') dim(dates_file) <- dim(dates) @@ -277,36 +277,36 @@ load_datasets <- function(recipe) { lubridate::minute(dates) <- 00 # Restore correct dimensions dim(dates) <- dim(dates_file) - + obs <- Start(dat = obs.path, - var = vars, - var_dir = var_dir_obs, - var_dir_depends = 'var', - file_date = sort(unique(dates_file)), - time = dates, - time_var = 'time', - time_across = 'file_date', - merge_across_dims = TRUE, - merge_across_dims_narm = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - metadata_dims = 'var', - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) + var = vars, + var_dir = var_dir_obs, + var_dir_depends = 'var', + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) } - - + + # Remove var_dir dimension if ("var_dir" %in% names(dim(obs))) { obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected") @@ -317,7 +317,7 @@ load_datasets <- function(recipe) { latitude = 1, longitude = 1, ensemble = 1) default_dims[names(dim(obs))] <- dim(obs) dim(obs) <- default_dims - + # Convert obs to s2dv_cube obs <- as.s2dv_cube(obs) @@ -325,33 +325,33 @@ load_datasets <- function(recipe) { if (!(recipe$Analysis$Regrid$type == 'none')) { if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) { lat_error_msg <- paste("Latitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") + "Please check the original grids and the", + "regrid parameters in your recipe.") error(recipe$Run$logger, lat_error_msg) hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], - "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) info(recipe$Run$logger, hcst_lat_msg) obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], - "; Last obs lat: ", obs$lat[length(obs$lat)]) + "; Last obs lat: ", obs$lat[length(obs$lat)]) info(recipe$Run$logger, obs_lat_msg) stop("hcst and obs don't share the same latitudes.") } if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) { lon_error_msg <- paste("Longitude mismatch between hcst and obs.", - "Please check the original grids and the", - "regrid parameters in your recipe.") + "Please check the original grids and the", + "regrid parameters in your recipe.") error(recipe$Run$logger, lon_error_msg) hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], - "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) info(recipe$Run$logger, hcst_lon_msg) obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], - "; Last obs lon: ", obs$lon[length(obs$lon)]) + "; Last obs lon: ", obs$lon[length(obs$lon)]) info(recipe$Run$logger, obs_lon_msg) stop("hcst and obs don't share the same longitudes.") - + } } - + # Remove negative values in accumulative variables dictionary <- read_yaml("conf/variable-dictionary.yml") for (var_idx in 1:length(vars)) { @@ -359,36 +359,36 @@ load_datasets <- function(recipe) { if (dictionary$vars[[var_name]]$accum) { info(recipe$Run$logger, paste0("Accumulated variable ", var_name, - ": setting negative values to zero.")) + ": setting negative values to zero.")) # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data, # along = "var", # indices = var_idx, F), 0) obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0 hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0 if (!is.null(fcst)) { - fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 + fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0 } } - + # Convert prlr from m/s to mm/day ## TODO: Make a unit conversion function if (vars[[var_idx]] == "prlr") { # Verify that the units are m/s and the same in obs and hcst if (((obs$attrs$Variable$metadata[[var_name]]$units == "m s-1") || - (obs$attrs$Variable$metadata[[var_name]]$units == "m s**-1")) && - ((hcst$attrs$Variable$metadata[[var_name]]$units == "m s-1") || - (hcst$attrs$Variable$metadata[[var_name]]$units == "m s**-1"))) { + (obs$attrs$Variable$metadata[[var_name]]$units == "m s**-1")) && + ((hcst$attrs$Variable$metadata[[var_name]]$units == "m s-1") || + (hcst$attrs$Variable$metadata[[var_name]]$units == "m s**-1"))) { info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.") obs$data[, var_idx, , , , , , , ] <- - obs$data[, var_idx, , , , , , , ]*86400*1000 - obs$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + obs$data[, var_idx, , , , , , , ]*86400*1000 + obs$attrs$Variable$metadata[[var_name]]$units <- "mm/day" hcst$data[, var_idx, , , , , , , ] <- - hcst$data[, var_idx, , , , , , , ]*86400*1000 - hcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + hcst$data[, var_idx, , , , , , , ]*86400*1000 + hcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" if (!is.null(fcst)) { fcst$data[, var_idx, , , , , , , ] <- - fcst$data[, var_idx, , , , , , , ]*86400*1000 - fcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" + fcst$data[, var_idx, , , , , , , ]*86400*1000 + fcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day" } } } @@ -402,7 +402,7 @@ load_datasets <- function(recipe) { data_summary(fcst, recipe) } } - + info(recipe$Run$logger, "##### DATA LOADING COMPLETED SUCCESSFULLY #####") @@ -453,7 +453,7 @@ load_datasets <- function(recipe) { ############################################################################ ############################################################################ - + return(list(hcst = hcst, fcst = fcst, obs = obs)) - + } diff --git a/modules/Loading/R/mask_tas_tos.R b/modules/Loading/R/mask_tas_tos.R new file mode 100644 index 00000000..eb2fca6d --- /dev/null +++ b/modules/Loading/R/mask_tas_tos.R @@ -0,0 +1,78 @@ +library(multiApply) +library(startR) +library(s2dv) + +mask_tas_tos <- function(input_data, grid, lon, lat, region, + lon_dim = 'lon', lat_dim = 'lat', ncores = NULL){ + + + mask <- .load_mask(grid = grid, lon_dim = lon_dim, lat_dim = lat_dim, + sea_value = 1, land_value = 0, region = region) + + + ## TO DO: improve the check and correct lats + stopifnot(all(lon == mask$lon)) + stopifnot(max(abs(as.numeric(round(lat,2) - round(mask$lat,2)))) < 0.1) # stopifnot(all(lat == mask$lat)) + + tas <- Subset(input_data$data, along = 'var', indices = 1) + tos <- Subset(input_data$data, along = 'var', indices = 2) + + tas_tos <- multiApply::Apply(data = list(tas, tos), + target_dims = c(lon_dim, lat_dim), + fun = .mask_tas_tos, + mask = mask$mask, + sea_value = 1, + ncores = ncores)$output1 + input_data$data <- tas_tos + + return(input_data) +} + +.mask_tas_tos <- function(data_tas, data_tos, mask, sea_value){ + data_tas[mask == sea_value] <- data_tos[mask == sea_value] + return(data_tas) +} + +.load_mask <- function(grid, mask_path = NULL, land_value = 0, sea_value = 1, + lon_dim = 'lon', lat_dim = 'lat', region){ + + if (is.null(mask_path)){ + mask_sea_land_path <- '/esarchive/recon/ecmwf/era5land/constant/lsm-r3600x1801cds/lsm.nc' ##'/esarchive/exp/ecmwf/system5c3s/constant/lsm/lsm.nc' + } else if (is.character(mask_path)){ + mask_sea_land_path <- mask_path + } else { + stop("mask_path must be NULL (to use the default mask and interpolate it to + the specified grid) or a string with the mask's path you want to load") + } + + lons.min <- region[1] + lons.max <- region[2] + lats.min <- region[3] + lats.max <- region[4] + + data <- startR::Start(dat = mask_sea_land_path, + var = 'lsm', + lon = 'all', + lat = 'all', + # lon = values(list(lons.min, lons.max)), + # lat = values(list(lats.min, lats.max)), + transform = CDORemapper, transform_extra_cells = 2, + transform_params = list(grid = grid, method = 'con', crop = region), + transform_vars = c('lat','lon'), + return_vars = list(lat = NULL, lon = NULL), + synonims = list(lon = c('lon','longitude'), + lat = c('lat','latitude')), + lat_reorder = Sort(decreasing = TRUE), + lon_reorder = CircularSort(0,359.9), + num_procs = 1, retrieve = TRUE) + + mask <- list(mask = drop(data), + lon = as.numeric(attr(data,'Variables')$common$lon), + lat = as.numeric(attr(data,'Variables')$common$lat)) + + mask$mask[data <= 0.5] <- sea_value + mask$mask[data > 0.5] <- land_value + names(dim(mask$mask)) <- c(lon_dim, lat_dim) + + return(mask) +} diff --git a/recipes/atomic_recipes/recipe_test_multivar_nadia.yml b/recipes/atomic_recipes/recipe_test_multivar_nadia.yml new file mode 100644 index 00000000..b6e6d100 --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_multivar_nadia.yml @@ -0,0 +1,49 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas tos + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5 + Multimodel: False + Reference: + name: HadCRUT5 + Time: + sdate: '0101' + fcst_year: + hcst_start: '2012' + hcst_end: '2015' + ftime_min: 1 + ftime_max: 1 + Region: + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.9 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: raw + Anomalies: + compute: yes + cross_validation: yes + Skill: + metric: mean_bias EnsCorr RPS RPSS CRPS CRPSS enssprerr + Probabilities: + percentiles: [[1/3, 2/3]] + Indicators: + index: no + ncores: 7 + remove_NAs: yes + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/nmilders/scorecards_data/test/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/ diff --git a/recipes/recipe_scorecards_s2s-suite.yml b/recipes/recipe_scorecards_s2s-suite.yml new file mode 100644 index 00000000..ae17f9cd --- /dev/null +++ b/recipes/recipe_scorecards_s2s-suite.yml @@ -0,0 +1,50 @@ +Description: + Author: nmilders + Info: scorecards data + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: prlr # Mandatory, str: tas prlr psl sfcWind + freq: monthly_mean # Mandatory, str: either monthly_mean or daily_mean + Datasets: + System: + name: ECMWF-SEAS5 # Mandatory, str: system5c3s system21_m1 system35c3s system3_m1-c3s system2_m1 system7c3s + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0101' ## MMDD + fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: -90 # Mandatory, int: minimum latitude + latmax: 90 # Mandatory, int: maximum latitude + lonmin: 0 # Mandatory, int: minimum longitude + lonmax: 359.9 # Mandatory, int: maximum longitude + Regrid: + method: conservative # conservative for prlr, bilinear for tas, psl, sfcWind + type: to_system + Workflow: + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + Anomalies: + compute: yes + cross_validation: no + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # str: Skill metric or list of skill metrics. See docu. + Probabilities: + percentiles: [[1/3, 2/3], [1/10], [9/10]] # frac: Quantile thresholds. + Indicators: + index: no + ncores: 15 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: Scorecards #S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/nmilders/scorecards_data/to_system/cross_validation/tercile_cross_val/ECMWF-SEAS5/prlr/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/ diff --git a/tas-tos_scorecards_data_loading.R b/tas-tos_scorecards_data_loading.R new file mode 100644 index 00000000..4f8bd1e3 --- /dev/null +++ b/tas-tos_scorecards_data_loading.R @@ -0,0 +1,78 @@ + +rm(list = ls()); gc() + +#args <- commandArgs(trailingOnly = TRUE) + + +setwd("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/") + +#source("modules/Loading/Loading.R") +source("modules/Loading/Dev_Loading.R") +# source("modules/Anomalies/Anomalies.R") +# #source("modules/Calibration/Calibration.R") +# source("modules/Skill/Skill.R") +# source("modules/Saving/Saving.R") +#source("modules/Visualization/Visualization.R") +source("tools/prepare_outputs.R") + +recipe_file <- "recipes/atomic_recipes/recipe_test_multivar_nadia.yml" +recipe <- prepare_outputs(recipe_file) + +## Run job for each start month +#recipe$Analysis$Time$sdate <- paste0(sprintf("%02d", as.numeric(args)), '01') + +## Load datasets +source("modules/Loading/Dev_Loading.R") +data <- load_datasets(recipe) + + + +################################################################################ + +## data checks +#dim(data$hcst$data) + +lon <- attributes(data$hcst$coords$longitude)$variables$longitude$dim[[1]]$vals +lat <- attributes(data$hcst$coords$latitude)$variables$latitude$dim[[1]]$vals + + +source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') +tas_tos_hcst <- mask_tas_tos(input_data = data$hcst, region = c(0.1, 359.95, -90, 90), + grid = 'r360x181', lon = lon, lat = lat, + lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) + +tas_tos_obs <- mask_tas_tos(input_data = data$obs, region = c(0.1, 359.95, -90, 90), + grid = 'r360x181', lon = lon, lat = lat, + lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) + + +source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') +mask <- .load_mask(grid = 'r360x181', mask_path = NULL, + land_value = 0, sea_value = 1, + lon_dim = 'lon', lat_dim = 'lat', region = NULL) + +################################################################################ + +# plot_mask <- PlotEquiMap(var = mask$mask, lon = mask$lon, lat = mask$lat, +# #brks = c(-0.5,0, 0.5, 1), +# fileout = '/esarchive/scratch/nmilders/test_tastos/tastos_mask2.png') + + +## compute anomalies +anomalies <- compute_anomalies(recipe, data) + + +## calibrate - temp for decathlon +##calibrated_data <- calibrate_datasets(recipe, data) + + +## Compute skill metrics of data +skill_metrics <- compute_skill_metrics(recipe, anomalies) + + +## save data +save_data(recipe, data, skill_metrics = skill_metrics) +gc() + +## plot metrics maps +## plot_data(recipe, anomalies, skill_metrics, significance = T) -- GitLab From 62d1cb107449ef668d8e71e6d6c4f9db55627f10 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Thu, 4 May 2023 11:32:58 +0200 Subject: [PATCH 32/47] working version tas-tos --- modules/Loading/Dev_Loading.R | 6 ++--- modules/Loading/R/mask_tas_tos.R | 2 +- .../recipe_test_multivar_nadia.yml | 2 +- tas-tos_scorecards_data_loading.R | 25 ++++++------------- 4 files changed, 13 insertions(+), 22 deletions(-) diff --git a/modules/Loading/Dev_Loading.R b/modules/Loading/Dev_Loading.R index 5d16e54b..ee941a32 100644 --- a/modules/Loading/Dev_Loading.R +++ b/modules/Loading/Dev_Loading.R @@ -165,14 +165,14 @@ load_datasets <- function(recipe) { hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) } - ## tas tos loading + ## Combine tas and tos data into one variable: tas-tos if(recipe$Analysis$Variables$name == 'tas tos'){ if(recipe$Analysis$Datasets$Reference$name == 'HadCRUT5'){ source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') hcst <- mask_tas_tos(input_data = hcst, region = c(0.1, 359.95, -90, 90), grid = 'r360x181', - lon = attributes(hcst$coords$longitude)$variables$longitude$dim[[1]]$vals, - lat = attributes(hcst$coords$latitude)$variables$latitude$dim[[1]]$vals, + lon = hcst$coords$longitude, + lat = hcst$coords$latitude, lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) hcst$dims[['var']] <- dim(hcst$data)[['var']] diff --git a/modules/Loading/R/mask_tas_tos.R b/modules/Loading/R/mask_tas_tos.R index eb2fca6d..c3c0ba8b 100644 --- a/modules/Loading/R/mask_tas_tos.R +++ b/modules/Loading/R/mask_tas_tos.R @@ -62,7 +62,7 @@ mask_tas_tos <- function(input_data, grid, lon, lat, region, return_vars = list(lat = NULL, lon = NULL), synonims = list(lon = c('lon','longitude'), lat = c('lat','latitude')), - lat_reorder = Sort(decreasing = TRUE), + lat_reorder = Sort(decreasing = FALSE), lon_reorder = CircularSort(0,359.9), num_procs = 1, retrieve = TRUE) diff --git a/recipes/atomic_recipes/recipe_test_multivar_nadia.yml b/recipes/atomic_recipes/recipe_test_multivar_nadia.yml index b6e6d100..022211bf 100644 --- a/recipes/atomic_recipes/recipe_test_multivar_nadia.yml +++ b/recipes/atomic_recipes/recipe_test_multivar_nadia.yml @@ -41,7 +41,7 @@ Analysis: index: no ncores: 7 remove_NAs: yes - Output_format: S2S4E + Output_format: scorecards Run: Loglevel: INFO Terminal: yes diff --git a/tas-tos_scorecards_data_loading.R b/tas-tos_scorecards_data_loading.R index 4f8bd1e3..f7bf36e7 100644 --- a/tas-tos_scorecards_data_loading.R +++ b/tas-tos_scorecards_data_loading.R @@ -8,13 +8,14 @@ setwd("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/") #source("modules/Loading/Loading.R") source("modules/Loading/Dev_Loading.R") -# source("modules/Anomalies/Anomalies.R") +source("modules/Anomalies/Anomalies.R") # #source("modules/Calibration/Calibration.R") -# source("modules/Skill/Skill.R") -# source("modules/Saving/Saving.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") #source("modules/Visualization/Visualization.R") -source("tools/prepare_outputs.R") + +source("tools/prepare_outputs.R") recipe_file <- "recipes/atomic_recipes/recipe_test_multivar_nadia.yml" recipe <- prepare_outputs(recipe_file) @@ -32,8 +33,9 @@ data <- load_datasets(recipe) ## data checks #dim(data$hcst$data) -lon <- attributes(data$hcst$coords$longitude)$variables$longitude$dim[[1]]$vals -lat <- attributes(data$hcst$coords$latitude)$variables$latitude$dim[[1]]$vals + +lon <- attributes(data$hcst$coords$longitude) +lat <- attributes(data$hcst$coords$latitude) source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') @@ -53,23 +55,12 @@ mask <- .load_mask(grid = 'r360x181', mask_path = NULL, ################################################################################ -# plot_mask <- PlotEquiMap(var = mask$mask, lon = mask$lon, lat = mask$lat, -# #brks = c(-0.5,0, 0.5, 1), -# fileout = '/esarchive/scratch/nmilders/test_tastos/tastos_mask2.png') - - ## compute anomalies anomalies <- compute_anomalies(recipe, data) - -## calibrate - temp for decathlon -##calibrated_data <- calibrate_datasets(recipe, data) - - ## Compute skill metrics of data skill_metrics <- compute_skill_metrics(recipe, anomalies) - ## save data save_data(recipe, data, skill_metrics = skill_metrics) gc() -- GitLab From 408107f54552389dc50f0ec896fd3e1e39e30e62 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 4 May 2023 14:49:34 +0200 Subject: [PATCH 33/47] Separate functions in different files --- modules/Saving/R/Utils.R | 69 ++ modules/Saving/R/get_dir.R | 55 ++ modules/Saving/R/get_filename.R | 54 ++ modules/Saving/R/save_corr.R | 117 +++ modules/Saving/R/save_forecast.R | 137 +++ modules/Saving/R/save_metrics.R | 120 +++ modules/Saving/R/save_observations.R | 137 +++ modules/Saving/R/save_percentiles.R | 107 +++ modules/Saving/R/save_probabilities.R | 124 +++ modules/Saving/Saving.R | 837 +----------------- modules/Saving/paths2save.R | 111 --- modules/Visualization/R/plot_ensemble_mean.R | 88 ++ .../R/plot_most_likely_terciles_map.R | 88 ++ modules/Visualization/R/plot_skill_metrics.R | 161 ++++ modules/Visualization/Visualization.R | 343 +------ 15 files changed, 1270 insertions(+), 1278 deletions(-) create mode 100644 modules/Saving/R/Utils.R create mode 100644 modules/Saving/R/get_dir.R create mode 100644 modules/Saving/R/get_filename.R create mode 100644 modules/Saving/R/save_corr.R create mode 100644 modules/Saving/R/save_forecast.R create mode 100644 modules/Saving/R/save_metrics.R create mode 100644 modules/Saving/R/save_observations.R create mode 100644 modules/Saving/R/save_percentiles.R create mode 100644 modules/Saving/R/save_probabilities.R delete mode 100644 modules/Saving/paths2save.R create mode 100644 modules/Visualization/R/plot_ensemble_mean.R create mode 100644 modules/Visualization/R/plot_most_likely_terciles_map.R create mode 100644 modules/Visualization/R/plot_skill_metrics.R diff --git a/modules/Saving/R/Utils.R b/modules/Saving/R/Utils.R new file mode 100644 index 00000000..a5bd5d0c --- /dev/null +++ b/modules/Saving/R/Utils.R @@ -0,0 +1,69 @@ +.get_global_attributes <- function(recipe, archive) { + # Generates metadata of interest to add to the global attributes of the + # netCDF files. + parameters <- recipe$Analysis + hcst_period <- paste0(parameters$Time$hcst_start, " to ", + parameters$Time$hcst_end) + current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) + system_name <- parameters$Datasets$System$name + reference_name <- parameters$Datasets$Reference$name + + attrs <- list(reference_period = hcst_period, + institution_system = archive$System[[system_name]]$institution, + institution_reference = archive$Reference[[reference_name]]$institution, + system = system_name, + reference = reference_name, + calibration_method = parameters$Workflow$Calibration$method, + computed_on = current_time) + + return(attrs) +} + +.get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { + # Generates time dimensions and the corresponding metadata. + ## TODO: Subseasonal + + switch(fcst.horizon, + "seasonal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}, + "subseasonal" = {len <- 4; ref <- 'hours since '; + stdname <- ''}, + "decadal" = {time <- leadtimes; ref <- 'hours since '; + stdname <- paste(strtoi(leadtimes), collapse=", ")}) + + dim(time) <- length(time) + sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting + metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), + calendar = calendar)) + attr(time, 'variables') <- metadata + names(dim(time)) <- 'time' + + sdate <- 1:length(sdate) + dim(sdate) <- length(sdate) + metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), + collapse=", "), + units = paste0('Init date'))) + attr(sdate, 'variables') <- metadata + names(dim(sdate)) <- 'sdate' + + return(list(time=time)) +} + +.get_latlon <- function(latitude, longitude) { + # Adds dimensions and metadata to lat and lon + # latitude: array containing the latitude values + # longitude: array containing the longitude values + + dim(longitude) <- length(longitude) + metadata <- list(longitude = list(units = 'degrees_east')) + attr(longitude, 'variables') <- metadata + names(dim(longitude)) <- 'longitude' + + dim(latitude) <- length(latitude) + metadata <- list(latitude = list(units = 'degrees_north')) + attr(latitude, 'variables') <- metadata + names(dim(latitude)) <- 'latitude' + + return(list(lat=latitude, lon=longitude)) + +} diff --git a/modules/Saving/R/get_dir.R b/modules/Saving/R/get_dir.R new file mode 100644 index 00000000..511dd3c0 --- /dev/null +++ b/modules/Saving/R/get_dir.R @@ -0,0 +1,55 @@ +## TODO: Separate by time aggregation +get_dir <- function(recipe, agg = "global") { + # This function builds the path for the output directory. The output + # directories will be subdirectories within outdir, organized by variable, + # startdate, and aggregation. + + ## TODO: Get aggregation from recipe + outdir <- paste0(recipe$Run$output_dir, "/outputs/") + ## TODO: multivar case + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) + + if (tolower(recipe$Analysis$Output_format) == 'scorecards') { + # Define output dir name accordint to Scorecards format + dict <- read_yaml("conf/output_dictionaries/scorecards.yml") + # system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name + dir <- paste0(outdir, "/", system, "/", variable, "/") + } else { + # Default generic output format based on FOCUS + # Get startdate or hindcast period + if (!is.null(recipe$Analysis$Time$fcst_year)) { + if (tolower(recipe$Analysis$Horizon) == 'decadal') { + ## PROBLEM: decadal doesn't have sdate + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_') + } else { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } + } else { + if (tolower(recipe$Analysis$Horizon) == 'decadal') { + ## PROBLEM: decadal doesn't have sdate + fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$hcst_end, + sep = '_')) + } else { + fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) + } + } + + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + store.freq <- recipe$Analysis$Variables$freq + ## TODO: Change "_country" + if (!is.null(recipe$Analysis$Region$name)) { + outdir <- paste0(outdir, "/", recipe$Analysis$Region$name) + } + switch(tolower(agg), + "country" = {dir <- paste0(outdir, "/", system, "/", calib.method, + "-", store.freq, "/", variable, + "_country/", fcst.sdate, "/")}, + "global" = {dir <- paste0(outdir, "/", system, "/", calib.method, + "-", store.freq, "/", variable, "/", + fcst.sdate, "/")}) + } + return(dir) +} diff --git a/modules/Saving/R/get_filename.R b/modules/Saving/R/get_filename.R new file mode 100644 index 00000000..b991fd61 --- /dev/null +++ b/modules/Saving/R/get_filename.R @@ -0,0 +1,54 @@ +## TODO: Separate by time aggregation + +get_filename <- function(dir, recipe, var, date, agg, file.type) { + # This function builds the path of the output file based on directory, + # variable, forecast date, startdate, aggregation, forecast horizon and + # type of metric/forecast/probability. + + if (recipe$Analysis$Horizon == "subseasonal") { + shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%V") + dd <- "week" + } else { + shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%m") + dd <- "month" + } + + switch(tolower(agg), + "country" = {gg <- "-country"}, + "global" = {gg <- ""}) + + system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) + reference <- gsub('.','', recipe$Analysis$Datasets$Reference$name, fixed = T) + + if (tolower(recipe$Analysis$Output_format) == 'scorecards') { + # Define output dir name accordint to Scorecards format + dict <- read_yaml("conf/output_dictionaries/scorecards.yml") + # Get necessary names + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + + switch(file.type, + "skill" = {type_info <- "-skill_"}, + "corr" = {type_info <- "-corr_"}, + "exp" = {type_info <- paste0("_", date, "_")}, + "obs" = {type_info <- paste0("-obs_", date, "_")}, + "percentiles" = {type_info <- "-percentiles_"}, + "probs" = {type_info <- paste0("-probs_", date, "_")}, + "bias" = {type_info <- paste0("-bias_", date, "_")}) + + # Build file name + file <- paste0("scorecards_", system, "_", reference, "_", + var, type_info, hcst_start, "-", hcst_end, "_s", shortdate) + } else { + switch(file.type, + "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, + "corr" = {file <- paste0(var, gg, "-corr_", dd, shortdate)}, + "exp" = {file <- paste0(var, gg, "_", date)}, + "obs" = {file <- paste0(var, gg, "-obs_", date)}, + "percentiles" = {file <- paste0(var, gg, "-percentiles_", dd, + shortdate)}, + "probs" = {file <- paste0(var, gg, "-probs_", date)}, + "bias" = {file <- paste0(var, gg, "-bias_", date)}) + } + return(paste0(dir, file, ".nc")) +} diff --git a/modules/Saving/R/save_corr.R b/modules/Saving/R/save_corr.R new file mode 100644 index 00000000..e884d973 --- /dev/null +++ b/modules/Saving/R/save_corr.R @@ -0,0 +1,117 @@ +save_corr <- function(recipe, + skill, + data_cube, + agg = "global", + outdir = NULL) { + # This function adds metadata to the ensemble correlation in 'skill' + # and exports it to a netCDF file inside 'outdir'. + + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + skill <- lapply(skill, function(x) { + Reorder(x, c(lalo, 'ensemble', 'time'))}) + } + # Add global and variable attributes + global_attributes <- .get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(global_attributes, + list(from_anomalies = "Yes")) + } else { + global_attributes <- c(global_attributes, + list(from_anomalies = "No")) + } + attr(skill[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(skill)) { + metric <- names(skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + skill[[i]][is.na(skill[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'ensemble', 'time') + } else { + sdname <- paste0(metric) #, " grid point metric") # formerly names(metric) + dims <- c(lalo, 'ensemble', 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "corr") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, skill), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, skill) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, + "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R new file mode 100644 index 00000000..823b132a --- /dev/null +++ b/modules/Saving/R/save_forecast.R @@ -0,0 +1,137 @@ +save_forecast <- function(recipe, + data_cube, + type = "hcst", + agg = "global", + outdir = NULL) { + # Loops over the years in the s2dv_cube containing a hindcast or forecast + # and exports each year to a netCDF file. + # data_cube: s2dv_cube containing the data and metadata + # recipe: the auto-s2s recipe + # outdir: directory where the files should be saved + # agg: aggregation, "global" or "country" + + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + variable <- data_cube$attrs$Variable$varName + var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name + global_attributes <- .get_global_attributes(recipe, archive) + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + ## Method 1: Use the first date as init_date. But it may be better to use + ## the real initialized date (ask users) + # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') + ## Method 2: use initial month + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + if (type == 'hcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else if (type == 'fcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } + } else { + if (type == 'hcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } else if (type == 'fcst') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + } + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + # expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) + + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) + } + + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'ensemble', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } else { + dims <- c(lalo, 'ensemble', 'time') + var.expname <- variable + var.sdname <- var.sdname + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date + if (fcst.horizon == 'decadal') { + ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal + ## data has been reshaped + # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') + + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') + + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } + + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, variable, fcst.sdate, + agg, "exp") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } + } + info(recipe$Run$logger, paste("#####", toupper(type), + "SAVED TO NETCDF FILE #####")) +} diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R new file mode 100644 index 00000000..0dd60c81 --- /dev/null +++ b/modules/Saving/R/save_metrics.R @@ -0,0 +1,120 @@ +save_metrics <- function(recipe, + skill, + data_cube, + agg = "global", + outdir = NULL) { + # This function adds metadata to the skill metrics in 'skill' + # and exports them to a netCDF file inside 'outdir'. + + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + + + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + skill <- lapply(skill, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + # Add global and variable attributes + global_attributes <- .get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } + attr(skill[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(skill)) { + metric <- names(skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + skill[[i]][is.na(skill[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'time') + } else { + sdname <- paste0(metric) #, " grid point metric") + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "skill") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, skill), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, skill) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") +} + diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R new file mode 100644 index 00000000..3e8acdd8 --- /dev/null +++ b/modules/Saving/R/save_observations.R @@ -0,0 +1,137 @@ +save_observations <- function(recipe, + data_cube, + agg = "global", + outdir = NULL) { + # Loops over the years in the s2dv_cube containing the observations and + # exports each year to a netCDF file. + # data_cube: s2dv_cube containing the data and metadata + # recipe: the auto-s2s recipe + # outdir: directory where the files should be saved + # agg: aggregation, "global" or "country" + + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + dictionary <- read_yaml("conf/variable-dictionary.yml") + variable <- data_cube$attrs$Variable$varName + var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name + global_attributes <- .get_global_attributes(recipe, archive) + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$Reference[[global_attributes$reference]]$calendar + + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + ## Method 1: Use the first date as init_date. But it may be better to use + ## the real initialized date (ask users) +# init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') + ## Method 2: use initial month + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + ## expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'time'))) + } + + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } else { + dims <- c(lalo, 'time') + var.expname <- variable + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date. The date is computed for each year, and adapted for + # consistency with the hcst/fcst dates, so that both sets of files have + # the same name pattern. + ## Because observations are loaded differently in the daily vs. monthly + ## cases, different approaches are necessary. + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + } else { + + if (store.freq == "monthly_mean") { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') + } else { + fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) + } + } + + # Ensure the year is correct if the first leadtime goes to the next year + init_date <- as.POSIXct(init_date) + if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { + lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 + } + # Ensure that the initialization month is consistent with the hindcast + lubridate::month(fcst.sdate) <- lubridate::month(init_date) + fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') + + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, "obs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } + } + + info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R new file mode 100644 index 00000000..0163644b --- /dev/null +++ b/modules/Saving/R/save_percentiles.R @@ -0,0 +1,107 @@ +save_percentiles <- function(recipe, + percentiles, + data_cube, + agg = "global", + outdir = NULL) { + # This function adds metadata to the percentiles + # and exports them to a netCDF file inside 'outdir'. + archive <- get_archive(recipe) + + # Define grid dimensions and names + lalo <- c('longitude', 'latitude') + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + percentiles <- lapply(percentiles, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + + # Add global and variable attributes + global_attributes <- .get_global_attributes(recipe, archive) + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } + attr(percentiles[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(percentiles)) { + ## TODO: replace with proper standard names + percentile <- names(percentiles[i]) + long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = percentile, long_name = long_name)) + attr(percentiles[[i]], 'variables') <- metadata + names(dim(percentiles[[i]])) <- dims + } + + # Time indices and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + # Generate vector containing leadtimes + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + # Select start date + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (fcst.horizon == 'decadal') { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + #PROBLEM: May be more than one fcst_year + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], + sprintf('%02d', init_month), '01') + } else { + fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') + } + } else { + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) + } + } + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "percentiles") + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, percentiles), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, percentiles) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") +} diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R new file mode 100644 index 00000000..4efced11 --- /dev/null +++ b/modules/Saving/R/save_probabilities.R @@ -0,0 +1,124 @@ +save_probabilities <- function(recipe, + probs, + data_cube, + agg = "global", + type = "hcst", + outdir = NULL) { + # Loops over the years in the s2dv_cube containing a hindcast or forecast + # and exports the corresponding category probabilities to a netCDF file. + # probs: array containing the probability data + # recipe: the auto-s2s recipe + # data_cube: s2dv_cube containing the data and metadata + # outdir: directory where the files should be saved + # type: 'exp' (hcst and fcst) or 'obs' + # agg: aggregation, "global" or "country" + # type: 'hcst' or 'fcst' + + lalo <- c('longitude', 'latitude') + archive <- get_archive(recipe) + variable <- data_cube$attrs$Variable$varName + var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name + global_attributes <- .get_global_attributes(recipe, archive) + if (is.null(outdir)) { + outdir <- get_dir(recipe) + } + # Add anomaly computation to global attributes + ## TODO: Sort out the logic once default behavior is decided + if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + global_attributes <- c(list(from_anomalies = "Yes"), + global_attributes) + } else { + global_attributes <- c(list(from_anomalies = "No"), + global_attributes) + } + fcst.horizon <- tolower(recipe$Analysis$Horizon) + store.freq <- recipe$Analysis$Variables$freq + calendar <- archive$System[[global_attributes$system]]$calendar + + # Generate vector containing leadtimes + ## TODO: Move to a separate function? + dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), + cal = calendar) + if (fcst.horizon == 'decadal') { + init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', + sprintf('%02d', init_month), '-01'), + cal = calendar) + } else { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) + } + + # Get time difference in hours + leadtimes <- as.numeric(dates - init_date)/3600 + + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) + ## expect dim = [sday = 1, sweek = 1, syear, time] + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + for (i in syears) { + # Select year from array and rearrange dimensions + probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') + if (tolower(agg) == "global") { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c(lalo, 'time'))}) + } else { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c('country', 'time'))}) + } + + ## TODO: Replace for loop with something more efficient? + for (bin in 1:length(probs_syear)) { + prob_bin <- names(probs_syear[bin]) + long_name <- paste0(prob_bin, " probability category") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = prob_bin, long_name = long_name)) + attr(probs_syear[[bin]], 'variables') <- metadata + names(dim(probs_syear[[bin]])) <- dims # is this necessary? + } + + # Add global attributes + attr(probs_syear[[1]], 'global_attrs') <- global_attributes + + # Select start date + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } + + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "probs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, probs_syear), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, probs_syear) + ArrayToNc(vars, outfile) + } + } + + info(recipe$Run$logger, + paste("#####", toupper(type), + "PROBABILITIES SAVED TO NETCDF FILE #####")) +} diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 61ef2938..5f7cbc67 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -1,7 +1,15 @@ ## TODO: Save obs percentiles ## TODO: Insert vardim to simplify the code? -source("modules/Saving/paths2save.R") +source("modules/Saving/R/get_dir.R") +source("modules/Saving/R/get_filename.R") +source("modules/Saving/R/Utils.R") +source("modules/Saving/R/save_forecast.R") +source("modules/Saving/R/save_observations.R") +source("modules/Saving/R/save_metrics.R") +source("modules/Saving/R/save_corr.R") +source("modules/Saving/R/save_probabilities.R") +source("modules/Saving/R/save_percentiles.R") save_data <- function(recipe, data, skill_metrics = NULL, @@ -159,830 +167,3 @@ save_data <- function(recipe, data, } } } - -get_global_attributes <- function(recipe, archive) { - # Generates metadata of interest to add to the global attributes of the - # netCDF files. - parameters <- recipe$Analysis - hcst_period <- paste0(parameters$Time$hcst_start, " to ", - parameters$Time$hcst_end) - current_time <- paste0(as.character(Sys.time()), " ", Sys.timezone()) - system_name <- parameters$Datasets$System$name - reference_name <- parameters$Datasets$Reference$name - - attrs <- list(reference_period = hcst_period, - institution_system = archive$System[[system_name]]$institution, - institution_reference = archive$Reference[[reference_name]]$institution, - system = system_name, - reference = reference_name, - calibration_method = parameters$Workflow$Calibration$method, - computed_on = current_time) - - return(attrs) -} - -get_times <- function(store.freq, fcst.horizon, leadtimes, sdate, calendar) { - # Generates time dimensions and the corresponding metadata. - ## TODO: Subseasonal - - switch(fcst.horizon, - "seasonal" = {time <- leadtimes; ref <- 'hours since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "subseasonal" = {len <- 4; ref <- 'hours since '; - stdname <- ''}, - "decadal" = {time <- leadtimes; ref <- 'hours since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}) - - dim(time) <- length(time) - sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting - metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), - calendar = calendar)) - attr(time, 'variables') <- metadata - names(dim(time)) <- 'time' - - sdate <- 1:length(sdate) - dim(sdate) <- length(sdate) - metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), - collapse=", "), - units = paste0('Init date'))) - attr(sdate, 'variables') <- metadata - names(dim(sdate)) <- 'sdate' - - return(list(time=time)) -} - -get_latlon <- function(latitude, longitude) { - # Adds dimensions and metadata to lat and lon - # latitude: array containing the latitude values - # longitude: array containing the longitude values - - dim(longitude) <- length(longitude) - metadata <- list(longitude = list(units = 'degrees_east')) - attr(longitude, 'variables') <- metadata - names(dim(longitude)) <- 'longitude' - - dim(latitude) <- length(latitude) - metadata <- list(latitude = list(units = 'degrees_north')) - attr(latitude, 'variables') <- metadata - names(dim(latitude)) <- 'latitude' - - return(list(lat=latitude, lon=longitude)) - -} - -save_forecast <- function(recipe, - data_cube, - type = "hcst", - agg = "global", - outdir = NULL) { - # Loops over the years in the s2dv_cube containing a hindcast or forecast - # and exports each year to a netCDF file. - # data_cube: s2dv_cube containing the data and metadata - # recipe: the auto-s2s recipe - # outdir: directory where the files should be saved - # agg: aggregation, "global" or "country" - - lalo <- c('longitude', 'latitude') - archive <- get_archive(recipe) - dictionary <- read_yaml("conf/variable-dictionary.yml") - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name - global_attributes <- get_global_attributes(recipe, archive) - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - ## Method 1: Use the first date as init_date. But it may be better to use - ## the real initialized date (ask users) - # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') - ## Method 2: use initial month - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - if (type == 'hcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else if (type == 'fcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year[1], '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } - } else { - if (type == 'hcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } else if (type == 'fcst') { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - # expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) - - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) - } - - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'ensemble', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } else { - dims <- c(lalo, 'ensemble', 'time') - var.expname <- variable - var.sdname <- var.sdname - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } - - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes - - # Select start date - if (fcst.horizon == 'decadal') { - ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal - ## data has been reshaped - # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') - - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - fcst.sdate <- format(fcst.sdate, '%Y%m%d') - - } else { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - } - - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, variable, fcst.sdate, - agg, "exp") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) - } - } - info(recipe$Run$logger, paste("#####", toupper(type), - "SAVED TO NETCDF FILE #####")) -} - - -save_observations <- function(recipe, - data_cube, - agg = "global", - outdir = NULL) { - # Loops over the years in the s2dv_cube containing the observations and - # exports each year to a netCDF file. - # data_cube: s2dv_cube containing the data and metadata - # recipe: the auto-s2s recipe - # outdir: directory where the files should be saved - # agg: aggregation, "global" or "country" - - lalo <- c('longitude', 'latitude') - archive <- get_archive(recipe) - dictionary <- read_yaml("conf/variable-dictionary.yml") - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name - global_attributes <- get_global_attributes(recipe, archive) - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$Reference[[global_attributes$reference]]$calendar - - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - - # Generate vector containing leadtimes - ## TODO: Move to a separate function? - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - ## Method 1: Use the first date as init_date. But it may be better to use - ## the real initialized date (ask users) -# init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') - ## Method 2: use initial month - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - ## expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'time'))) - } - - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } else { - dims <- c(lalo, 'time') - var.expname <- variable - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } - - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes - - # Select start date. The date is computed for each year, and adapted for - # consistency with the hcst/fcst dates, so that both sets of files have - # the same name pattern. - ## Because observations are loaded differently in the daily vs. monthly - ## cases, different approaches are necessary. - if (fcst.horizon == 'decadal') { - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - } else { - - if (store.freq == "monthly_mean") { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') - } else { - fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) - } - } - - # Ensure the year is correct if the first leadtime goes to the next year - init_date <- as.POSIXct(init_date) - if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { - lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 - } - # Ensure that the initialization month is consistent with the hindcast - lubridate::month(fcst.sdate) <- lubridate::month(init_date) - fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') - - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, variable, - fcst.sdate, agg, "obs") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) - } - } - - info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") -} - -## TODO: Place inside a function somewhere -# if (tolower(agg) == "country") { -# load(mask.path) -# grid <- europe.countries.iso -# } else { -# grid <- list(lon=attr(var.obs, 'Variables')$dat1$longitude, -# lat=attr(var.obs, 'Variables')$dat1$latitude) -# } - -save_metrics <- function(recipe, - skill, - data_cube, - agg = "global", - outdir = NULL) { - # This function adds metadata to the skill metrics in 'skill' - # and exports them to a netCDF file inside 'outdir'. - - # Define grid dimensions and names - lalo <- c('longitude', 'latitude') - archive <- get_archive(recipe) - dictionary <- read_yaml("conf/variable-dictionary.yml") - - - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - skill <- lapply(skill, function(x) { - Reorder(x, c(lalo, 'time'))}) - } - # Add global and variable attributes - global_attributes <- get_global_attributes(recipe, archive) - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) - } else { - global_attributes <- c(list(from_anomalies = "No"), - global_attributes) - } - attr(skill[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(skill)) { - metric <- names(skill[i]) - long_name <- dictionary$metrics[[metric]]$long_name - missing_val <- -9.e+33 - skill[[i]][is.na(skill[[i]])] <- missing_val - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'time') - } else { - sdname <- paste0(metric) #, " grid point metric") - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) - attr(skill[[i]], 'variables') <- metadata - names(dim(skill[[i]])) <- dims - } - - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - # Select start date - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (fcst.horizon == 'decadal') { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') - } else { - fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') - } - } else { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) - } - } - - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "skill") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, skill), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill) - ArrayToNc(vars, outfile) - } - info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") -} - -save_corr <- function(recipe, - skill, - data_cube, - agg = "global", - outdir = NULL) { - # This function adds metadata to the ensemble correlation in 'skill' - # and exports it to a netCDF file inside 'outdir'. - - archive <- get_archive(recipe) - dictionary <- read_yaml("conf/variable-dictionary.yml") - # Define grid dimensions and names - lalo <- c('longitude', 'latitude') - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - skill <- lapply(skill, function(x) { - Reorder(x, c(lalo, 'ensemble', 'time'))}) - } - # Add global and variable attributes - global_attributes <- get_global_attributes(recipe, archive) - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(global_attributes, - list(from_anomalies = "Yes")) - } else { - global_attributes <- c(global_attributes, - list(from_anomalies = "No")) - } - attr(skill[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(skill)) { - metric <- names(skill[i]) - long_name <- dictionary$metrics[[metric]]$long_name - missing_val <- -9.e+33 - skill[[i]][is.na(skill[[i]])] <- missing_val - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'ensemble', 'time') - } else { - sdname <- paste0(metric) #, " grid point metric") # formerly names(metric) - dims <- c(lalo, 'ensemble', 'time') - } - metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) - attr(skill[[i]], 'variables') <- metadata - names(dim(skill[[i]])) <- dims - } - - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - # Select start date - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (fcst.horizon == 'decadal') { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') - } else { - fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') - } - } else { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) - } - } - - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "corr") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, skill), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill) - ArrayToNc(vars, outfile) - } - info(recipe$Run$logger, - "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") -} - -save_percentiles <- function(recipe, - percentiles, - data_cube, - agg = "global", - outdir = NULL) { - # This function adds metadata to the percentiles - # and exports them to a netCDF file inside 'outdir'. - archive <- get_archive(recipe) - - # Define grid dimensions and names - lalo <- c('longitude', 'latitude') - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - percentiles <- lapply(percentiles, function(x) { - Reorder(x, c(lalo, 'time'))}) - } - - # Add global and variable attributes - global_attributes <- get_global_attributes(recipe, archive) - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) - } else { - global_attributes <- c(list(from_anomalies = "No"), - global_attributes) - } - attr(percentiles[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(percentiles)) { - ## TODO: replace with proper standard names - percentile <- names(percentiles[i]) - long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - } else { - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = percentile, long_name = long_name)) - attr(percentiles[[i]], 'variables') <- metadata - names(dim(percentiles[[i]])) <- dims - } - - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - # Generate vector containing leadtimes - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - # Select start date - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (fcst.horizon == 'decadal') { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - sprintf('%02d', init_month), '01') - } else { - fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') - } - } else { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) - } - } - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "percentiles") - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, percentiles), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, percentiles) - ArrayToNc(vars, outfile) - } - info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") -} - -save_probabilities <- function(recipe, - probs, - data_cube, - agg = "global", - type = "hcst", - outdir = NULL) { - # Loops over the years in the s2dv_cube containing a hindcast or forecast - # and exports the corresponding category probabilities to a netCDF file. - # probs: array containing the probability data - # recipe: the auto-s2s recipe - # data_cube: s2dv_cube containing the data and metadata - # outdir: directory where the files should be saved - # type: 'exp' (hcst and fcst) or 'obs' - # agg: aggregation, "global" or "country" - # type: 'hcst' or 'fcst' - - lalo <- c('longitude', 'latitude') - archive <- get_archive(recipe) - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name - global_attributes <- get_global_attributes(recipe, archive) - if (is.null(outdir)) { - outdir <- get_dir(recipe) - } - # Add anomaly computation to global attributes - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) - } else { - global_attributes <- c(list(from_anomalies = "No"), - global_attributes) - } - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - calendar <- archive$System[[global_attributes$system]]$calendar - - # Generate vector containing leadtimes - ## TODO: Move to a separate function? - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - if (fcst.horizon == 'decadal') { - init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - sprintf('%02d', init_month), '-01'), - cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Get time difference in hours - leadtimes <- as.numeric(dates - init_date)/3600 - - syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - ## expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') - if (tolower(agg) == "global") { - probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c(lalo, 'time'))}) - } else { - probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c('country', 'time'))}) - } - - ## TODO: Replace for loop with something more efficient? - for (bin in 1:length(probs_syear)) { - prob_bin <- names(probs_syear[bin]) - long_name <- paste0(prob_bin, " probability category") - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - } else { - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = prob_bin, long_name = long_name)) - attr(probs_syear[[bin]], 'variables') <- metadata - names(dim(probs_syear[[bin]])) <- dims # is this necessary? - } - - # Add global attributes - attr(probs_syear[[1]], 'global_attrs') <- global_attributes - - # Select start date - if (fcst.horizon == 'decadal') { - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - fcst.sdate <- format(fcst.sdate, '%Y%m%d') - } else { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - } - - # Get time dimension values and metadata - times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "probs") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, probs_syear), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, probs_syear) - ArrayToNc(vars, outfile) - } - } - - info(recipe$Run$logger, - paste("#####", toupper(type), - "PROBABILITIES SAVED TO NETCDF FILE #####")) -} diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R deleted file mode 100644 index 2a136e6f..00000000 --- a/modules/Saving/paths2save.R +++ /dev/null @@ -1,111 +0,0 @@ -## TODO: Separate by time aggregation -## TODO: Build a default path that accounts for: -## variable, system, reference, start date and region name - -get_filename <- function(dir, recipe, var, date, agg, file.type) { - # This function builds the path of the output file based on directory, - # variable, forecast date, startdate, aggregation, forecast horizon and - # type of metric/forecast/probability. - - if (recipe$Analysis$Horizon == "subseasonal") { - shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%V") - dd <- "week" - } else { - shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%m") - dd <- "month" - } - - switch(tolower(agg), - "country" = {gg <- "-country"}, - "global" = {gg <- ""}) - - system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) - reference <- gsub('.','', recipe$Analysis$Datasets$Reference$name, fixed = T) - - if (tolower(recipe$Analysis$Output_format) == 'scorecards') { - # Define output dir name accordint to Scorecards format - dict <- read_yaml("conf/output_dictionaries/scorecards.yml") - # Get necessary names - hcst_start <- recipe$Analysis$Time$hcst_start - hcst_end <- recipe$Analysis$Time$hcst_end - - switch(file.type, - "skill" = {type_info <- "-skill_"}, - "corr" = {type_info <- "-corr_"}, - "exp" = {type_info <- paste0("_", date, "_")}, - "obs" = {type_info <- paste0("-obs_", date, "_")}, - "percentiles" = {type_info <- "-percentiles_"}, - "probs" = {type_info <- paste0("-probs_", date, "_")}, - "bias" = {type_info <- paste0("-bias_", date, "_")}) - - # Build file name - file <- paste0("scorecards_", system, "_", reference, "_", - var, type_info, hcst_start, "-", hcst_end, "_s", shortdate) - } else { - switch(file.type, - "skill" = {file <- paste0(var, gg, "-skill_", dd, shortdate)}, - "corr" = {file <- paste0(var, gg, "-corr_", dd, shortdate)}, - "exp" = {file <- paste0(var, gg, "_", date)}, - "obs" = {file <- paste0(var, gg, "-obs_", date)}, - "percentiles" = {file <- paste0(var, gg, "-percentiles_", dd, - shortdate)}, - "probs" = {file <- paste0(var, gg, "-probs_", date)}, - "bias" = {file <- paste0(var, gg, "-bias_", date)}) - } - return(paste0(dir, file, ".nc")) -} - -get_dir <- function(recipe, agg = "global") { - # This function builds the path for the output directory. The output - # directories will be subdirectories within outdir, organized by variable, - # startdate, and aggregation. - - ## TODO: Get aggregation from recipe - outdir <- paste0(recipe$Run$output_dir, "/outputs/") - ## TODO: multivar case - variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] - system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) - - if (tolower(recipe$Analysis$Output_format) == 'scorecards') { - # Define output dir name accordint to Scorecards format - dict <- read_yaml("conf/output_dictionaries/scorecards.yml") - # system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name - dir <- paste0(outdir, "/", system, "/", variable, "/") - } else { - # Default generic output format based on FOCUS - # Get startdate or hindcast period - if (!is.null(recipe$Analysis$Time$fcst_year)) { - if (tolower(recipe$Analysis$Horizon) == 'decadal') { - ## PROBLEM: decadal doesn't have sdate - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_') - } else { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } - } else { - if (tolower(recipe$Analysis$Horizon) == 'decadal') { - ## PROBLEM: decadal doesn't have sdate - fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$hcst_end, - sep = '_')) - } else { - fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) - } - } - - calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) - store.freq <- recipe$Analysis$Variables$freq - ## TODO: Change "_country" - if (!is.null(recipe$Analysis$Region$name)) { - outdir <- paste0(outdir, "/", recipe$Analysis$Region$name) - } - switch(tolower(agg), - "country" = {dir <- paste0(outdir, "/", system, "/", calib.method, - "-", store.freq, "/", variable, - "_country/", fcst.sdate, "/")}, - "global" = {dir <- paste0(outdir, "/", system, "/", calib.method, - "-", store.freq, "/", variable, "/", - fcst.sdate, "/")}) - } - return(dir) -} diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R new file mode 100644 index 00000000..c104c892 --- /dev/null +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -0,0 +1,88 @@ +plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { + + ## TODO: Add 'anomaly' to plot title + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + stop("Visualization functions not yet implemented for daily data.") + } + + latitude <- fcst$coords$lat + longitude <- fcst$coords$lon + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + variable <- recipe$Analysis$Variables$name + units <- attr(fcst$Variable, "variable")$units + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + # Compute ensemble mean + ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') + # Drop extra dims, add time dim if missing: + ensemble_mean <- drop(ensemble_mean) + + if (!("time" %in% names(dim(ensemble_mean)))) { + dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) + } + if (!'syear' %in% names(dim(ensemble_mean))) { + ensemble_mean <- Reorder(ensemble_mean, c("time", + "longitude", + "latitude")) + } else { + ensemble_mean <- Reorder(ensemble_mean, c("syear", + "time", + "longitude", + "latitude")) + } + ## TODO: Redefine column colors, possibly depending on variable + if (variable == 'prlr') { + palette = "BrBG" + rev = F + } else { + palette = "RdBu" + rev = T + } + # Define brks, centered on in the case of anomalies + ## + if (grepl("anomaly", + fcst$attrs$Variable$metadata[[variable]]$long_name)) { + variable <- paste(variable, "anomaly") + max_value <- max(abs(ensemble_mean)) + ugly_intervals <- seq(-max_value, max_value, max_value/20) + brks <- pretty(ugly_intervals, n = 12, min.n = 8) + } else { + brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) + } + cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) + options(bitmapType = "cairo") + + for (i_syear in start_date) { + # Define name of output file and titles + if (length(start_date) == 1) { + i_ensemble_mean <- ensemble_mean + outfile <- paste0(outdir, "forecast_ensemble_mean-", start_date, ".png") + } else { + i_ensemble_mean <- ensemble_mean[which(start_date == i_syear), , , ] + outfile <- paste0(outdir, "forecast_ensemble_mean-", i_syear, ".png") + } + toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, + "- Initialization:", i_syear) + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + titles <- as.vector(months) + # Plots + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + i_ensemble_mean, longitude, latitude, + filled.continents = F, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + units = units, + cols = cols, + brks = brks, + fileout = outfile, + bar_label_digits = 4, + bar_extra_margin = rep(0.7, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) + } + info(recipe$Run$logger, + "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") +} diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R new file mode 100644 index 00000000..f9ff1c86 --- /dev/null +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -0,0 +1,88 @@ +## TODO: Change name +plot_most_likely_terciles <- function(recipe, archive, + fcst, + probabilities, + outdir) { + + ## TODO: Add 'anomaly' to plot title + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + stop("Visualization functions not yet implemented for daily data.") + } + + latitude <- fcst$coords$lat + longitude <- fcst$coords$lon + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + variable <- recipe$Analysis$Variables$name + start_date <- paste0(recipe$Analysis$Time$fcst_year, + recipe$Analysis$Time$sdate) + + # Retrieve and rearrange probability bins for the forecast + if (is.null(probabilities$probs_fcst$prob_b33) || + is.null(probabilities$probs_fcst$prob_33_to_66) || + is.null(probabilities$probs_fcst$prob_a66)) { + stop("The forecast tercile probability bins are not present inside ", + "'probabilities', the most likely tercile map cannot be plotted.") + } + + probs_fcst <- abind(probabilities$probs_fcst$prob_b33, + probabilities$probs_fcst$prob_33_to_66, + probabilities$probs_fcst$prob_a66, + along = 0) + names(dim(probs_fcst)) <- c("bin", + names(dim(probabilities$probs_fcst$prob_b33))) + + ## TODO: Improve this section + # Drop extra dims, add time dim if missing: + probs_fcst <- drop(probs_fcst) + if (!("time" %in% names(dim(probs_fcst)))) { + dim(probs_fcst) <- c("time" = 1, dim(probs_fcst)) + } + if (!'syear' %in% names(dim(probs_fcst))) { + probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) + } else { + probs_fcst <- Reorder(probs_fcst, + c("syear", "time", "bin", "longitude", "latitude")) + } + + for (i_syear in start_date) { + # Define name of output file and titles + if (length(start_date) == 1) { + i_probs_fcst <- probs_fcst + outfile <- paste0(outdir, "forecast_most_likely_tercile-", start_date, + ".png") + } else { + i_probs_fcst <- probs_fcst[which(start_date == i_syear), , , , ] + outfile <- paste0(outdir, "forecast_most_likely_tercile-", i_syear, ".png") + } + toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", + "Initialization:", i_syear) + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + ## TODO: Ensure this works for daily and sub-daily cases + titles <- as.vector(months) + + # Plots + ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked + ## on. + suppressWarnings( + PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), + cat_dim = 'bin', + i_probs_fcst, longitude, latitude, + coast_width = 1.5, + title_scale = 0.6, + legend_scale = 0.8, #cex_bar_titles = 0.6, + toptitle = toptitle, + titles = titles, + fileout = outfile, + bar_label_digits = 2, + bar_scale = rep(0.7, 4), + bar_label_scale = 1.2, + axes_label_scale = 1.3, + triangle_ends = c(F, F), width = 11, height = 8) + ) + } + + info(recipe$Run$logger, + "##### MOST LIKELY TERCILE PLOT SAVED TO OUTPUT DIRECTORY #####") +} diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R new file mode 100644 index 00000000..8bc8ebc4 --- /dev/null +++ b/modules/Visualization/R/plot_skill_metrics.R @@ -0,0 +1,161 @@ +plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, + outdir, significance = F) { + # recipe: Auto-S2S recipe + # archive: Auto-S2S archive + # data_cube: s2dv_cube object with the corresponding hindcast data + # skill_metrics: list of named skill metrics arrays + # outdir: output directory + # significance: T/F, whether to display the significance dots in the plots + + ## TODO: OPTION for CERISE: Using PuOr + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq == "daily_mean") { + error(recipe$Run$logger, "Visualization functions not yet implemented + for daily data.") + stop() + } + # Abort if skill_metrics is not list + if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { + stop("The element 'skill_metrics' must be a list of named arrays.") + } + + latitude <- data_cube$coords$lat + longitude <- data_cube$coords$lon + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + init_month <- as.numeric(substr(recipe$Analysis$Time$sdate, + start = 1, stop = 2)) + month_label <- tolower(month.name[init_month]) + month_abbreviation <- month.abb[init_month] + + # Define color palette and number of breaks according to output format + ## TODO: Make separate function + if (tolower(recipe$Analysis$Output_format) %in% c("scorecards", "cerise")) { + diverging_palette <- "purpleorange" + sequential_palette <- "Oranges" + } else { + diverging_palette <- "bluered" + sequential_palette <- "Reds" + } + + # Group different metrics by type + skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", + "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", + "enscorr_specs", "rmsss") + scores <- c("rps", "frps", "crps", "frps_specs") + # Assign colorbar to each metric type + ## TODO: Triangle ends + for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { + if (name %in% names(skill_metrics)) { + # Define plot characteristics and metric name to display in plot + if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", + "rpss_specs", "bss90_specs", "bss10_specs", + "rmsss")) { + display_name <- toupper(strsplit(name, "_")[[1]][1]) + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL + } else if (name == "mean_bias_ss") { + display_name <- "Mean Bias Skill Score" + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL + } else if (name %in% c("enscorr", "enscorr_specs")) { + display_name <- "Ensemble Mean Correlation" + skill <- skill_metrics[[name]] + brks <- seq(-1, 1, by = 0.2) + cols <- clim.colors(length(brks) - 1, diverging_palette) + col_inf <- NULL + col_sup <- NULL + } else if (name %in% scores) { + skill <- skill_metrics[[name]] + display_name <- toupper(strsplit(name, "_")[[1]][1]) + brks <- seq(0, 1, by = 0.1) + colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) + cols <- colorbar[1:(length(colorbar) - 1)] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] + } else if (name == "enssprerr") { + skill <- skill_metrics[[name]] + display_name <- "Spread-to-Error Ratio" + brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) + colorbar <- clim.colors(length(brks), diverging_palette) + cols <- colorbar[1:length(colorbar) - 1] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] + } else if (name == "mean_bias") { + skill <- skill_metrics[[name]] + display_name <- "Mean Bias" + max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), + abs(quantile(skill, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] + } + options(bitmapType = "cairo") + # Reorder dimensions + skill <- Reorder(skill, c("time", "longitude", "latitude")) + # If the significance has been requested and the variable has it, + # retrieve it and reorder its dimensions. + significance_name <- paste0(name, "_significance") + if ((significance) && (significance_name %in% names(skill_metrics))) { + skill_significance <- skill_metrics[[significance_name]] + skill_significance <- Reorder(skill_significance, c("time", + "longitude", + "latitude")) + # Split skill significance into list of lists, along the time dimension + # This allows for plotting the significance dots correctly. + skill_significance <- ClimProjDiags::ArrayToList(skill_significance, + dim = 'time', + level = "sublist", + names = "dots") + } else { + skill_significance <- NULL + } + # Define output file name and titles + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + outfile <- paste0(outdir, name, "-", month_label, ".png") + } else { + outfile <- paste0(outdir, name, ".png") + } + toptitle <- paste(display_name, "-", data_cube$attrs$Variable$varName, + "-", system_name, "-", month_abbreviation, + hcst_period) + months <- unique(lubridate::month(data_cube$attrs$Dates, + label = T, abb = F)) + titles <- as.vector(months) + # Plot + suppressWarnings( + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + asplit(skill, MARGIN=1), # Splitting array into a list + longitude, latitude, + special_args = skill_significance, + dot_symbol = 20, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + filled.continents=F, + brks = brks, + cols = cols, + col_inf = col_inf, + col_sup = col_sup, + fileout = outfile, + bar_label_digits = 3, + bar_extra_margin = rep(0.9, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) + ) + } + } + info(recipe$Run$logger, + "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") +} diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index cc0cd88e..86acbbad 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -4,6 +4,10 @@ ## TODO: Add param 'raw'? ## TODO: Decadal plot names +source("modules/Visualization/R/plot_skill_metrics.R") +source("modules/Visualization/R/plot_most_likely_terciles_map.R") +source("modules/Visualization/R/plot_ensemble_mean.R") + plot_data <- function(recipe, data, skill_metrics = NULL, @@ -58,342 +62,3 @@ plot_data <- function(recipe, probabilities, outdir) } } - -plot_skill_metrics <- function(recipe, archive, data_cube, skill_metrics, - outdir, significance = F) { - # recipe: Auto-S2S recipe - # archive: Auto-S2S archive - # data_cube: s2dv_cube object with the corresponding hindcast data - # skill_metrics: list of named skill metrics arrays - # outdir: output directory - # significance: T/F, whether to display the significance dots in the plots - - ## TODO: OPTION for CERISE: Using PuOr - # Abort if frequency is daily - if (recipe$Analysis$Variables$freq == "daily_mean") { - error(recipe$Run$logger, "Visualization functions not yet implemented - for daily data.") - stop() - } - # Abort if skill_metrics is not list - if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { - stop("The element 'skill_metrics' must be a list of named arrays.") - } - - latitude <- data_cube$coords$lat - longitude <- data_cube$coords$lon - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", - recipe$Analysis$Time$hcst_end) - init_month <- as.numeric(substr(recipe$Analysis$Time$sdate, - start = 1, stop = 2)) - month_label <- tolower(month.name[init_month]) - month_abbreviation <- month.abb[init_month] - - # Define color palette and number of breaks according to output format - ## TODO: Make separate function - if (tolower(recipe$Analysis$Output_format) %in% c("scorecards", "cerise")) { - diverging_palette <- "purpleorange" - sequential_palette <- "Oranges" - } else { - diverging_palette <- "bluered" - sequential_palette <- "Reds" - } - - # Group different metrics by type - skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", - "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", - "enscorr_specs", "rmsss") - scores <- c("rps", "frps", "crps", "frps_specs") - # Assign colorbar to each metric type - ## TODO: Triangle ends - for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { - if (name %in% names(skill_metrics)) { - # Define plot characteristics and metric name to display in plot - if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", - "rpss_specs", "bss90_specs", "bss10_specs", - "rmsss")) { - display_name <- toupper(strsplit(name, "_")[[1]][1]) - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- NULL - } else if (name == "mean_bias_ss") { - display_name <- "Mean Bias Skill Score" - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- NULL - } else if (name %in% c("enscorr", "enscorr_specs")) { - display_name <- "Ensemble Mean Correlation" - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - cols <- clim.colors(length(brks) - 1, diverging_palette) - col_inf <- NULL - col_sup <- NULL - } else if (name %in% scores) { - skill <- skill_metrics[[name]] - display_name <- toupper(strsplit(name, "_")[[1]][1]) - brks <- seq(0, 1, by = 0.1) - colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) - cols <- colorbar[1:(length(colorbar) - 1)] - col_inf <- NULL - col_sup <- colorbar[length(colorbar)] - } else if (name == "enssprerr") { - skill <- skill_metrics[[name]] - display_name <- "Spread-to-Error Ratio" - brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) - colorbar <- clim.colors(length(brks), diverging_palette) - cols <- colorbar[1:length(colorbar) - 1] - col_inf <- NULL - col_sup <- colorbar[length(colorbar)] - } else if (name == "mean_bias") { - skill <- skill_metrics[[name]] - display_name <- "Mean Bias" - max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), - abs(quantile(skill, 0.98, na.rm = T))) - brks <- max_value * seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- colorbar[length(colorbar)] - } - options(bitmapType = "cairo") - # Reorder dimensions - skill <- Reorder(skill, c("time", "longitude", "latitude")) - # If the significance has been requested and the variable has it, - # retrieve it and reorder its dimensions. - significance_name <- paste0(name, "_significance") - if ((significance) && (significance_name %in% names(skill_metrics))) { - skill_significance <- skill_metrics[[significance_name]] - skill_significance <- Reorder(skill_significance, c("time", - "longitude", - "latitude")) - # Split skill significance into list of lists, along the time dimension - # This allows for plotting the significance dots correctly. - skill_significance <- ClimProjDiags::ArrayToList(skill_significance, - dim = 'time', - level = "sublist", - names = "dots") - } else { - skill_significance <- NULL - } - # Define output file name and titles - if (tolower(recipe$Analysis$Horizon) == "seasonal") { - outfile <- paste0(outdir, name, "-", month_label, ".png") - } else { - outfile <- paste0(outdir, name, ".png") - } - toptitle <- paste(display_name, "-", data_cube$attrs$Variable$varName, - "-", system_name, "-", month_abbreviation, - hcst_period) - months <- unique(lubridate::month(data_cube$attrs$Dates, - label = T, abb = F)) - titles <- as.vector(months) - # Plot - suppressWarnings( - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - asplit(skill, MARGIN=1), # Splitting array into a list - longitude, latitude, - special_args = skill_significance, - dot_symbol = 20, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - filled.continents=F, - brks = brks, - cols = cols, - col_inf = col_inf, - col_sup = col_sup, - fileout = outfile, - bar_label_digits = 3, - bar_extra_margin = rep(0.9, 4), - bar_label_scale = 1.5, - axes_label_scale = 1.3) - ) - } - } - info(recipe$Run$logger, - "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") -} - -plot_ensemble_mean <- function(recipe, archive, fcst, outdir) { - - ## TODO: Add 'anomaly' to plot title - # Abort if frequency is daily - if (recipe$Analysis$Variables$freq == "daily_mean") { - stop("Visualization functions not yet implemented for daily data.") - } - - latitude <- fcst$coords$lat - longitude <- fcst$coords$lon - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - variable <- recipe$Analysis$Variables$name - units <- attr(fcst$Variable, "variable")$units - start_date <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - # Compute ensemble mean - ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') - # Drop extra dims, add time dim if missing: - ensemble_mean <- drop(ensemble_mean) - - if (!("time" %in% names(dim(ensemble_mean)))) { - dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) - } - if (!'syear' %in% names(dim(ensemble_mean))) { - ensemble_mean <- Reorder(ensemble_mean, c("time", - "longitude", - "latitude")) - } else { - ensemble_mean <- Reorder(ensemble_mean, c("syear", - "time", - "longitude", - "latitude")) - } - ## TODO: Redefine column colors, possibly depending on variable - if (variable == 'prlr') { - palette = "BrBG" - rev = F - } else { - palette = "RdBu" - rev = T - } - # Define brks, centered on in the case of anomalies - ## - if (grepl("anomaly", - fcst$attrs$Variable$metadata[[variable]]$long_name)) { - variable <- paste(variable, "anomaly") - max_value <- max(abs(ensemble_mean)) - ugly_intervals <- seq(-max_value, max_value, max_value/20) - brks <- pretty(ugly_intervals, n = 12, min.n = 8) - } else { - brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) - } - cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) - options(bitmapType = "cairo") - - for (i_syear in start_date) { - # Define name of output file and titles - if (length(start_date) == 1) { - i_ensemble_mean <- ensemble_mean - outfile <- paste0(outdir, "forecast_ensemble_mean-", start_date, ".png") - } else { - i_ensemble_mean <- ensemble_mean[which(start_date == i_syear), , , ] - outfile <- paste0(outdir, "forecast_ensemble_mean-", i_syear, ".png") - } - toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, - "- Initialization:", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) - titles <- as.vector(months) - # Plots - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - i_ensemble_mean, longitude, latitude, - filled.continents = F, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - units = units, - cols = cols, - brks = brks, - fileout = outfile, - bar_label_digits = 4, - bar_extra_margin = rep(0.7, 4), - bar_label_scale = 1.5, - axes_label_scale = 1.3) - } - info(recipe$Run$logger, - "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") -} - -plot_most_likely_terciles <- function(recipe, archive, - fcst, - probabilities, - outdir) { - - ## TODO: Add 'anomaly' to plot title - # Abort if frequency is daily - if (recipe$Analysis$Variables$freq == "daily_mean") { - stop("Visualization functions not yet implemented for daily data.") - } - - latitude <- fcst$coords$lat - longitude <- fcst$coords$lon - system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - variable <- recipe$Analysis$Variables$name - start_date <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - - # Retrieve and rearrange probability bins for the forecast - if (is.null(probabilities$probs_fcst$prob_b33) || - is.null(probabilities$probs_fcst$prob_33_to_66) || - is.null(probabilities$probs_fcst$prob_a66)) { - stop("The forecast tercile probability bins are not present inside ", - "'probabilities', the most likely tercile map cannot be plotted.") - } - - probs_fcst <- abind(probabilities$probs_fcst$prob_b33, - probabilities$probs_fcst$prob_33_to_66, - probabilities$probs_fcst$prob_a66, - along = 0) - names(dim(probs_fcst)) <- c("bin", - names(dim(probabilities$probs_fcst$prob_b33))) - - ## TODO: Improve this section - # Drop extra dims, add time dim if missing: - probs_fcst <- drop(probs_fcst) - if (!("time" %in% names(dim(probs_fcst)))) { - dim(probs_fcst) <- c("time" = 1, dim(probs_fcst)) - } - if (!'syear' %in% names(dim(probs_fcst))) { - probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) - } else { - probs_fcst <- Reorder(probs_fcst, - c("syear", "time", "bin", "longitude", "latitude")) - } - - for (i_syear in start_date) { - # Define name of output file and titles - if (length(start_date) == 1) { - i_probs_fcst <- probs_fcst - outfile <- paste0(outdir, "forecast_most_likely_tercile-", start_date, - ".png") - } else { - i_probs_fcst <- probs_fcst[which(start_date == i_syear), , , , ] - outfile <- paste0(outdir, "forecast_most_likely_tercile-", i_syear, ".png") - } - toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", - "Initialization:", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) - ## TODO: Ensure this works for daily and sub-daily cases - titles <- as.vector(months) - - # Plots - ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked - ## on. - suppressWarnings( - PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), - cat_dim = 'bin', - i_probs_fcst, longitude, latitude, - coast_width = 1.5, - title_scale = 0.6, - legend_scale = 0.8, #cex_bar_titles = 0.6, - toptitle = toptitle, - titles = titles, - fileout = outfile, - bar_label_digits = 2, - bar_scale = rep(0.7, 4), - bar_label_scale = 1.2, - axes_label_scale = 1.3, - triangle_ends = c(F, F), width = 11, height = 8) - ) - } - - info(recipe$Run$logger, - "##### MOST LIKELY TERCILE PLOT SAVED TO OUTPUT DIRECTORY #####") -} -- GitLab From 43e3d33df2f32914a60a373fb0a1ea19345fdfac Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 5 May 2023 16:58:56 +0200 Subject: [PATCH 34/47] Fix bug in call to get_dir() --- modules/Saving/Saving.R | 3 ++- modules/Visualization/Visualization.R | 4 +++- tests/testthat/test-seasonal_monthly.R | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 5f7cbc67..9d901a7c 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -36,7 +36,8 @@ save_data <- function(recipe, data, stop() } # Create output directory - outdir <- get_dir(recipe) + outdir <- get_dir(recipe = recipe, + variable = data$hcst$attrs$Variable$varName) for (directory in outdir) { dir.create(directory, showWarnings = FALSE, recursive = TRUE) } diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 52cb7127..44640055 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -23,7 +23,9 @@ plot_data <- function(recipe, plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, ", | |,")[[1]] recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/plots/") - outdir <- paste0(get_dir(recipe)) + ## TODO: Sort this out + outdir <- get_dir(recipe = recipe, + variable = data$hcst$attrs$Variable$varName) for (directory in outdir) { dir.create(directory, showWarnings = FALSE, recursive = TRUE) } diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index 83b5ceab..cbee82cf 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -41,7 +41,7 @@ plot_data(recipe = recipe, data = calibrated_data, skill_metrics = skill_metrics, probabilities = probs, significance = T) ))}) -outdir <- get_dir(recipe) +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) # ------- TESTS -------- -- GitLab From 1f7da2c2d8146a94e588df2f457e7a58f5c91d60 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 11 May 2023 10:52:13 +0200 Subject: [PATCH 35/47] Improve saving function to handle var dimension --- modules/Saving/R/save_corr.R | 105 ++++++++------- modules/Saving/R/save_forecast.R | 161 ++++++++++++----------- modules/Saving/R/save_metrics.R | 102 ++++++++------- modules/Saving/R/save_observations.R | 176 +++++++++++++------------- modules/Saving/R/save_percentiles.R | 96 +++++++------- modules/Saving/R/save_probabilities.R | 132 ++++++++++--------- modules/Saving/Saving.R | 147 ++++++--------------- 7 files changed, 443 insertions(+), 476 deletions(-) diff --git a/modules/Saving/R/save_corr.R b/modules/Saving/R/save_corr.R index 9533885b..146c5c99 100644 --- a/modules/Saving/R/save_corr.R +++ b/modules/Saving/R/save_corr.R @@ -10,11 +10,6 @@ save_corr <- function(recipe, dictionary <- read_yaml("conf/variable-dictionary.yml") # Define grid dimensions and names lalo <- c('longitude', 'latitude') - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - skill <- lapply(skill, function(x) { - Reorder(x, c(lalo, 'ensemble', 'time'))}) - } # Add global and variable attributes global_attributes <- .get_global_attributes(recipe, archive) ## TODO: Sort out the logic once default behavior is decided @@ -26,27 +21,6 @@ save_corr <- function(recipe, global_attributes <- c(global_attributes, list(from_anomalies = "No")) } - attr(skill[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(skill)) { - metric <- names(skill[i]) - long_name <- dictionary$metrics[[metric]]$long_name - missing_val <- -9.e+33 - skill[[i]][is.na(skill[[i]])] <- missing_val - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'ensemble', 'time') - } else { - sdname <- paste0(metric) #, " grid point metric") # formerly names(metric) - dims <- c(lalo, 'ensemble', 'time') - } - metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) - attr(skill[[i]], 'variables') <- metadata - names(dim(skill[[i]])) <- dims - } # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) @@ -92,29 +66,62 @@ save_corr <- function(recipe, times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) time <- times$time - # Generate name of output file - if (is.null(outdir)) { - outdir <- get_dir(recipe = recipe, variable = data_cube$attrs$Variable$varName) - } - if (!dir.exists(outdir)) { - dir.create(outdir, recursive = T) - } - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "corr") + # Loop over variable dimension + for (var in 1:data_cube$dims[['var']]) { + # Subset skill arrays + subset_skill <- lapply(skill, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- .get_countries(grid) - ArrayToNc(append(country, time, skill), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill) - ArrayToNc(vars, outfile) + # Generate name of output file + variable <- data_cube$attrs$Variable$varName[[var]] + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) + } + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, "corr") + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + subset_skill <- lapply(subset_skill, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + attr(subset_skill[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(subset_skill)) { + metric <- names(subset_skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + skill[[i]][is.na(subset_skill[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'ensemble', 'time') + } else { + sdname <- paste0(metric) #, " grid point metric") # formerly names(metric) + dims <- c(lalo, 'ensemble', 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims + } + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- .get_countries(grid) + ArrayToNc(append(country, time, subset_skill), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, subset_skill) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, + "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") } - info(recipe$Run$logger, - "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R index 19335807..07fdddf7 100644 --- a/modules/Saving/R/save_forecast.R +++ b/modules/Saving/R/save_forecast.R @@ -13,20 +13,11 @@ save_forecast <- function(recipe, lalo <- c('longitude', 'latitude') archive <- get_archive(recipe) dictionary <- read_yaml("conf/variable-dictionary.yml") - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name global_attributes <- .get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq calendar <- archive$System[[global_attributes$system]]$calendar - if (is.null(outdir)) { - outdir <- get_dir(recipe = recipe, variable = variable) - } - if (!dir.exists(outdir)) { - dir.create(outdir, recursive = T) - } - # Generate vector containing leadtimes dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) @@ -58,83 +49,99 @@ save_forecast <- function(recipe, } # Get time difference in hours leadtimes <- as.numeric(dates - init_date)/3600 - + syears <- seq(1:dim(data_cube$data)['syear'][[1]]) # expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) + + # Loop over variables + for (var in 1:data_cube$dims[['var']]) { + subset_cube <- CST_Subset(data_cube, along = 'var', indices = var, + drop = F, var_dim = 'var', dat_dim = 'dat') + variable <- subset_cube$attrs$Variable$varName + var.longname <- subset_cube$attrs$Variable$metadata[[variable]]$long_name - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) + # Create output directory + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) } - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'ensemble', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } else { - dims <- c(lalo, 'ensemble', 'time') - var.expname <- variable - var.sdname <- var.sdname - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } + # Loop over each year in the data and save independently + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(subset_cube$data, 'syear', i, drop = T) - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'ensemble', 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'ensemble', 'time'))) + } + + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'ensemble', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- subset_cube$attrs$Variable$metadata[[variable]]$units + } else { + dims <- c(lalo, 'ensemble', 'time') + var.expname <- variable + var.sdname <- var.sdname + var.units <- subset_cube$attrs$Variable$metadata[[variable]]$units + } - # Select start date - if (fcst.horizon == 'decadal') { - ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal - ## data has been reshaped - # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') - - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - fcst.sdate <- format(fcst.sdate, '%Y%m%d') - - } else { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - } - - # Get time dimension values and metadata - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time - - # Generate name of output file - outfile <- get_filename(outdir, recipe, variable, fcst.sdate, - agg, "exp") + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date + if (fcst.horizon == 'decadal') { + ## NOTE: Not good to use data_cube$load_parameters$dat1 because decadal + ## data has been reshaped + # fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d') + + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') + + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time + + # Generate name of output file + outfile <- get_filename(outdir, recipe, variable, fcst.sdate, + agg, "exp") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } } + info(recipe$Run$logger, paste("#####", toupper(type), + "SAVED TO NETCDF FILE #####")) } - info(recipe$Run$logger, paste("#####", toupper(type), - "SAVED TO NETCDF FILE #####")) } diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index f6f26adb..609537c0 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -11,11 +11,6 @@ save_metrics <- function(recipe, archive <- get_archive(recipe) dictionary <- read_yaml("conf/variable-dictionary.yml") - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - skill <- lapply(skill, function(x) { - Reorder(x, c(lalo, 'time'))}) - } # Add global and variable attributes global_attributes <- .get_global_attributes(recipe, archive) ## TODO: Sort out the logic once default behavior is decided @@ -27,27 +22,6 @@ save_metrics <- function(recipe, global_attributes <- c(list(from_anomalies = "No"), global_attributes) } - attr(skill[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(skill)) { - metric <- names(skill[i]) - long_name <- dictionary$metrics[[metric]]$long_name - missing_val <- -9.e+33 - skill[[i]][is.na(skill[[i]])] <- missing_val - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'time') - } else { - sdname <- paste0(metric) #, " grid point metric") - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) - attr(skill[[i]], 'variables') <- metadata - names(dim(skill[[i]])) <- dims - } # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) @@ -94,28 +68,60 @@ save_metrics <- function(recipe, times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) time <- times$time - # Generate name of output file - if (is.null(outdir)) { - outdir <- get_dir(recipe = recipe, variable = data_cube$attrs$Variable$varName) - } - if (!dir.exists(outdir)) { - dir.create(outdir, recursive = T) - } - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "skill") + # Loop over variable dimension + for (var in 1:data_cube$dims[['var']]) { + # Subset skill arrays + subset_skill <- lapply(skill, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + # Generate name of output file + variable <- data_cube$attrs$Variable$varName[[var]] + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) + } + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, "skill") + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + subset_skill <- lapply(subset_skill, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + attr(subset_skill[[1]], 'global_attrs') <- global_attributes - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, skill), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill) - ArrayToNc(vars, outfile) + for (i in 1:length(subset_skill)) { + metric <- names(subset_skill[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + subset_skill[[i]][is.na(subset_skill[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'time') + } else { + sdname <- paste0(metric) + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(subset_skill[[i]], 'variables') <- metadata + names(dim(subset_skill[[i]])) <- dims + } + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, subset_skill), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, subset_skill) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") } - info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R index ff617997..fcf91d2f 100644 --- a/modules/Saving/R/save_observations.R +++ b/modules/Saving/R/save_observations.R @@ -12,20 +12,11 @@ save_observations <- function(recipe, lalo <- c('longitude', 'latitude') archive <- get_archive(recipe) dictionary <- read_yaml("conf/variable-dictionary.yml") - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name global_attributes <- .get_global_attributes(recipe, archive) fcst.horizon <- tolower(recipe$Analysis$Horizon) store.freq <- recipe$Analysis$Variables$freq calendar <- archive$Reference[[global_attributes$reference]]$calendar - if (is.null(outdir)) { - outdir <- get_dir(recipe = recipe, variable = variable) - } - if (!dir.exists(outdir)) { - dir.create(outdir, recursive = T) - } - # Generate vector containing leadtimes ## TODO: Move to a separate function? dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), @@ -33,7 +24,7 @@ save_observations <- function(recipe, if (fcst.horizon == 'decadal') { ## Method 1: Use the first date as init_date. But it may be better to use ## the real initialized date (ask users) -# init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') + # init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d') ## Method 2: use initial month init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', @@ -51,89 +42,104 @@ save_observations <- function(recipe, syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T) - if (!("time" %in% names(dim(fcst)))) { - dim(fcst) <- c("time" = 1, dim(fcst)) - } - if (tolower(agg) == "global") { - fcst <- list(Reorder(fcst, c(lalo, 'time'))) - } else { - fcst <- list(Reorder(fcst, c('country', 'time'))) - } - # Add metadata - var.sdname <- dictionary$vars[[variable]]$standard_name - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - var.expname <- paste0(variable, '_country') - var.longname <- paste0("Country-Aggregated ", var.longname) - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units - } else { - dims <- c(lalo, 'time') - var.expname <- variable - var.units <- data_cube$attrs$Variable$metadata[[variable]]$units + # Loop over variables + for (var in 1:data_cube$dims[['var']]) { + subset_cube <- CST_Subset(data_cube, along = 'var', indices = var, + drop = F, var_dim = 'var', dat_dim = 'dat') + variable <- subset_cube$attrs$Variable$varName + var.longname <- subset_cube$attrs$Variable$metadata[[variable]]$long_name + + # Create output directory + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) } - - metadata <- list(fcst = list(name = var.expname, - standard_name = var.sdname, - long_name = var.longname, - units = var.units)) - attr(fcst[[1]], 'variables') <- metadata - names(dim(fcst[[1]])) <- dims - # Add global attributes - attr(fcst[[1]], 'global_attrs') <- global_attributes - - # Select start date. The date is computed for each year, and adapted for - # consistency with the hcst/fcst dates, so that both sets of files have - # the same name pattern. - ## Because observations are loaded differently in the daily vs. monthly - ## cases, different approaches are necessary. - if (fcst.horizon == 'decadal') { - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - } else { + + for (i in syears) { + # Select year from array and rearrange dimensions + fcst <- ClimProjDiags::Subset(subset_cube$data, 'syear', i, drop = T) + if (!("time" %in% names(dim(fcst)))) { + dim(fcst) <- c("time" = 1, dim(fcst)) + } + if (tolower(agg) == "global") { + fcst <- list(Reorder(fcst, c(lalo, 'time'))) + } else { + fcst <- list(Reorder(fcst, c('country', 'time'))) + } - if (store.freq == "monthly_mean") { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') + # Add metadata + var.sdname <- dictionary$vars[[variable]]$standard_name + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + var.expname <- paste0(variable, '_country') + var.longname <- paste0("Country-Aggregated ", var.longname) + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units } else { - fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) + dims <- c(lalo, 'time') + var.expname <- variable + var.units <- data_cube$attrs$Variable$metadata[[variable]]$units } - } + + metadata <- list(fcst = list(name = var.expname, + standard_name = var.sdname, + long_name = var.longname, + units = var.units)) + attr(fcst[[1]], 'variables') <- metadata + names(dim(fcst[[1]])) <- dims + # Add global attributes + attr(fcst[[1]], 'global_attrs') <- global_attributes + + # Select start date. The date is computed for each year, and adapted for + # consistency with the hcst/fcst dates, so that both sets of files have + # the same name pattern. + ## Because observations are loaded differently in the daily vs. monthly + ## cases, different approaches are necessary. + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + } else { - # Ensure the year is correct if the first leadtime goes to the next year - init_date <- as.POSIXct(init_date) - if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { - lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 - } - # Ensure that the initialization month is consistent with the hindcast - lubridate::month(fcst.sdate) <- lubridate::month(init_date) - fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') + if (store.freq == "monthly_mean") { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') + } else { + fcst.sdate <- as.Date(data_cube$attrs$Dates[i]) + } + } - # Get time dimension values and metadata - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + # Ensure the year is correct if the first leadtime goes to the next year + init_date <- as.POSIXct(init_date) + if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) { + lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1 + } + # Ensure that the initialization month is consistent with the hindcast + lubridate::month(fcst.sdate) <- lubridate::month(init_date) + fcst.sdate <- format(fcst.sdate, format = '%Y%m%d') + + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time - # Generate name of output file - outfile <- get_filename(outdir, recipe, variable, - fcst.sdate, agg, "obs") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, fcst), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, fcst) - ArrayToNc(vars, outfile) + # Generate name of output file + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, "obs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, fcst), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, fcst) + ArrayToNc(vars, outfile) + } } + info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") } - info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index 835fce05..68095671 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -6,15 +6,8 @@ save_percentiles <- function(recipe, # This function adds metadata to the percentiles # and exports them to a netCDF file inside 'outdir'. archive <- get_archive(recipe) - # Define grid dimensions and names lalo <- c('longitude', 'latitude') - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - percentiles <- lapply(percentiles, function(x) { - Reorder(x, c(lalo, 'time'))}) - } - # Add global and variable attributes global_attributes <- .get_global_attributes(recipe, archive) ## TODO: Sort out the logic once default behavior is decided @@ -26,21 +19,6 @@ save_percentiles <- function(recipe, global_attributes <- c(list(from_anomalies = "No"), global_attributes) } - attr(percentiles[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(percentiles)) { - ## TODO: replace with proper standard names - percentile <- names(percentiles[i]) - long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - } else { - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = percentile, long_name = long_name)) - attr(percentiles[[i]], 'variables') <- metadata - names(dim(percentiles[[i]])) <- dims - } # Time indices and metadata fcst.horizon <- tolower(recipe$Analysis$Horizon) @@ -83,29 +61,57 @@ save_percentiles <- function(recipe, } times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) time <- times$time + + for (var in 1:data_cube$dims[['var']]) { + # Subset arrays + subset_percentiles <- lapply(percentiles, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + # Generate name of output file + variable <- data_cube$attrs$Variable$varName[[var]] + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) + } + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, "percentiles") - # Generate name of output file - if (is.null(outdir)) { - outdir <- get_dir(recipe = recipe, - variable = data_cube$attrs$Variable$varName) - } - if (!dir.exists(outdir)) { - dir.create(outdir, recursive = T) - } - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "percentiles") - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, percentiles), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, percentiles) - ArrayToNc(vars, outfile) + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + subset_percentiles <- lapply(subset_percentiles, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + + attr(subset_percentiles[[1]], 'global_attrs') <- global_attributes + + for (i in 1:length(subset_percentiles)) { + ## TODO: replace with proper standard names + percentile <- names(subset_percentiles[i]) + long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = percentile, long_name = long_name)) + attr(subset_percentiles[[i]], 'variables') <- metadata + names(dim(subset_percentiles[[i]])) <- dims + } + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, subset_percentiles), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, subset_percentiles) + ArrayToNc(vars, outfile) + } + info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") } - info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") } diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index 3e725b55..974ef7dc 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -16,15 +16,7 @@ save_probabilities <- function(recipe, lalo <- c('longitude', 'latitude') archive <- get_archive(recipe) - variable <- data_cube$attrs$Variable$varName - var.longname <- data_cube$attrs$Variable$metadata[[variable]]$long_name global_attributes <- .get_global_attributes(recipe, archive) - if (is.null(outdir)) { - outdir <- get_dir(recipe = recipe, variable = variable) - } - if (!dir.exists(outdir)) { - dir.create(outdir, recursive = T) - } # Add anomaly computation to global attributes ## TODO: Sort out the logic once default behavior is decided if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && @@ -60,68 +52,82 @@ save_probabilities <- function(recipe, syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - for (i in syears) { - # Select year from array and rearrange dimensions - probs_syear <- lapply(probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') - if (tolower(agg) == "global") { - probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c(lalo, 'time'))}) - } else { - probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c('country', 'time'))}) + + # Loop over variable dimension + for (var in 1:data_cube$dims[['var']]) { + subset_probs <- lapply(probs, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + # Create output directory + variable <- data_cube$attrs$Variable$varName[[var]] + outdir <- get_dir(recipe = recipe, variable = variable) + if (!dir.exists(outdir)) { + dir.create(outdir, recursive = T) } - ## TODO: Replace for loop with something more efficient? - for (bin in 1:length(probs_syear)) { - prob_bin <- names(probs_syear[bin]) - long_name <- paste0(prob_bin, " probability category") - if (tolower(agg) == "country") { - dims <- c('Country', 'time') + # Loop over each year in the data and save independently + for (i in syears) { + # Select year from array and rearrange dimensions + probs_syear <- lapply(subset_probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') + if (tolower(agg) == "global") { + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c(lalo, 'time'))}) } else { - dims <- c(lalo, 'time') + probs_syear <- lapply(probs_syear, function(x) { + Reorder(x, c('country', 'time'))}) } - metadata <- list(metric = list(name = prob_bin, long_name = long_name)) - attr(probs_syear[[bin]], 'variables') <- metadata - names(dim(probs_syear[[bin]])) <- dims # is this necessary? - } - # Add global attributes - attr(probs_syear[[1]], 'global_attrs') <- global_attributes - - # Select start date - if (fcst.horizon == 'decadal') { - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - fcst.sdate <- format(fcst.sdate, '%Y%m%d') - } else { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - } + for (bin in 1:length(probs_syear)) { + prob_bin <- names(probs_syear[bin]) + long_name <- paste0(prob_bin, " probability category") + if (tolower(agg) == "country") { + dims <- c('Country', 'time') + } else { + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = prob_bin, long_name = long_name)) + attr(probs_syear[[bin]], 'variables') <- metadata + names(dim(probs_syear[[bin]])) <- dims # is this necessary? + } + + # Add global attributes + attr(probs_syear[[1]], 'global_attrs') <- global_attributes + + # Select start date + if (fcst.horizon == 'decadal') { + # init_date is like "1990-11-01" + init_date <- as.POSIXct(init_date) + fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) + fcst.sdate <- format(fcst.sdate, '%Y%m%d') + } else { + fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] + } - # Get time dimension values and metadata - times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) - time <- times$time + # Get time dimension values and metadata + times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) + time <- times$time - # Generate name of output file - outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, - fcst.sdate, agg, "probs") - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, probs_syear), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, probs_syear) - ArrayToNc(vars, outfile) + # Generate name of output file + outfile <- get_filename(outdir, recipe, data_cube$attrs$Variable$varName, + fcst.sdate, agg, "probs") + + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, probs_syear), outfile) + } else { + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, probs_syear) + ArrayToNc(vars, outfile) + } } + info(recipe$Run$logger, + paste("#####", toupper(type), + "PROBABILITIES SAVED TO NETCDF FILE #####")) } - - info(recipe$Run$logger, - paste("#####", toupper(type), - "PROBABILITIES SAVED TO NETCDF FILE #####")) } diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 9d901a7c..73f982d0 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -35,12 +35,6 @@ save_data <- function(recipe, data, "of at least two s2dv_cubes containing the hcst and obs.")) stop() } - # Create output directory - outdir <- get_dir(recipe = recipe, - variable = data$hcst$attrs$Variable$varName) - for (directory in outdir) { - dir.create(directory, showWarnings = FALSE, recursive = TRUE) - } # Separate ensemble correlation from the rest of the metrics, as it has one # extra dimension "ensemble" and must be saved to a different file @@ -56,114 +50,49 @@ save_data <- function(recipe, data, } # Iterate over variables to subset s2dv_cubes and save outputs - for (var in 1:data$hcst$dims[['var']]) { - info(recipe$Run$logger, - paste("Saving outputs for", data$hcst$attrs$Variable$varName[var])) - # Export hindcast, forecast and observations - subset_hcst <- CST_Subset(data$hcst, along = 'var', indices = var, - drop = F, var_dim = 'var', dat_dim = 'dat') - - save_forecast(recipe = recipe, - data_cube = subset_hcst, + save_forecast(recipe = recipe, + data_cube = data$hcst, + outdir = outdir[var], + type = 'hcst') + if (!is.null(data$fcst)) { + save_forecast(recipe = recipe, + data_cube = data$fcst, outdir = outdir[var], - type = 'hcst') - if (!is.null(data$fcst)) { - subset_fcst <- CST_Subset(data$fcst, along = 'var', indices = var, - drop = F, var_dim = 'var', dat_dim = 'dat') - save_forecast(recipe = recipe, - data_cube = subset_fcst, - outdir = outdir[var], - type = 'fcst') - } - subset_obs <- CST_Subset(data$obs, along = 'var', indices = var, - drop = F, var_dim = 'var', dat_dim = 'dat') - save_observations(recipe = recipe, - data_cube = subset_obs, - outdir = outdir[var]) - - # Export skill metrics - if (!is.null(skill_metrics)) { - if (data$hcst$dims[['var']] == 1) { - save_metrics(recipe = recipe, - skill = skill_metrics, - data_cube = data$hcst, - outdir = outdir[var]) - if (!is.null(corr_metrics)) { - save_corr(recipe = recipe, - skill = corr_metrics, - data_cube = data$hcst, - outdir = outdir[var]) - } - } else { - subset_skill <- lapply(skill_metrics, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) - save_metrics(recipe = recipe, - skill = subset_skill, - data_cube = subset_hcst, - outdir = outdir[var]) - if (!is.null(corr_metrics)) { - subset_corr <- lapply(corr_metrics, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) - save_corr(recipe = recipe, - skill = subset_corr, - data_cube = subset_hcst, + type = 'fcst') + } + save_observations(recipe = recipe, + data_cube = data$obs, outdir = outdir[var]) - } - } - } - - # Export probabilities onto outfile - if (!is.null(probabilities)) { - if (data$hcst$dims[['var']] == 1) { - save_percentiles(recipe = recipe, - percentiles = probabilities$percentiles, + + # Export skill metrics + if (!is.null(skill_metrics)) { + save_metrics(recipe = recipe, + skill = skill_metrics, + data_cube = data$hcst, + outdir = outdir[var]) + } + if (!is.null(corr_metrics)) { + save_corr(recipe = recipe, + skill = corr_metrics, + data_cube = data$hcst, + outdir = outdir[var]) + # Export probabilities onto outfile + if (!is.null(probabilities)) { + save_percentiles(recipe = recipe, + percentiles = probabilities$percentiles, + data_cube = data$hcst, + outdir = outdir[var]) + save_probabilities(recipe = recipe, + probs = probabilities$probs, data_cube = data$hcst, - outdir = outdir[var]) + outdir = outdir[var], + type = "hcst") + if (!is.null(probabilities$probs_fcst)) { save_probabilities(recipe = recipe, - probs = probabilities$probs, - data_cube = data$hcst, + probs = probabilities$probs_fcst, + data_cube = subset_fcst, outdir = outdir[var], - type = "hcst") - if (!is.null(probabilities$probs_fcst)) { - save_probabilities(recipe = recipe, - probs = probabilities$probs_fcst, - data_cube = subset_fcst, - outdir = outdir[var], - type = "fcst") - } - } else { - subset_percentiles <- lapply(probabilities$percentiles, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) - save_percentiles(recipe = recipe, - percentiles = subset_percentiles, - data_cube = subset_hcst, - outdir = outdir[var]) - subset_probs <- lapply(probabilities$probs, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) - save_probabilities(recipe = recipe, - probs = subset_probs, - data_cube = subset_hcst, - outdir = outdir[var], - type = "hcst") - if (!is.null(probabilities$probs_fcst)) { - subset_probs_fcst <- lapply(probabilities$probs_fcst, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) - save_probabilities(recipe = recipe, - probs = subset_probs_fcst, - data_cube = subset_fcst, - outdir = outdir[var], - type = "fcst") - } + type = "fcst") } } } -- GitLab From 83b784e5582770ca969df6dc5ed806011f42d6a2 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 11 May 2023 10:52:36 +0200 Subject: [PATCH 36/47] Improve .drop_dims() to drop only nonessential dimensions --- modules/Skill/Skill.R | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 7eb44321..599b1743 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -464,20 +464,17 @@ compute_probabilities <- function(recipe, data) { } } -## TODO: Replace with ClimProjDiags::Subset .drop_dims <- function(metric_array) { - # Drop all singleton dimensions - metric_array <- drop(metric_array) - # If time happened to be a singleton dimension, add it back in the array - if (!("time" %in% names(dim(metric_array)))) { - dim(metric_array) <- c("time" = 1, dim(metric_array)) - } - # If array has memb dim (Corr case), change name to 'ensemble' - if ("exp_memb" %in% names(dim(metric_array))) { - names(dim(metric_array))[which(names(dim(metric_array)) == - "exp_memb")] <- "ensemble" - # } else { - # dim(metric_array) <- c(dim(metric_array), "ensemble" = 1) - } + # Define dimensions that are not essential for saving + droppable_dims <- c("dat", "sday", "sweek", "syear", "ensemble", "nobs", + "nexp", "bin") + # Select non-essential dimensions of length 1 + dims_to_drop <- intersect(names(which(dim(metric_array) == 1)), + droppable_dims) + drop_indices <- grep(paste(dims_to_drop, collapse = "|"), + names(dim(metric_array))) + # Drop selected dimensions + metric_array <- abind::adrop(metric_array, drop = drop_indices) + return(metric_array) } -- GitLab From d251d03a2c3568a7d52731df0ed211a795ea2cd4 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 11 May 2023 10:52:50 +0200 Subject: [PATCH 37/47] adapt unit tests --- tests/testthat/test-decadal_monthly_2.R | 2 +- tests/testthat/test-seasonal_daily.R | 4 ++-- tests/testthat/test-seasonal_monthly.R | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 01fe0440..04b9a419 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -37,7 +37,7 @@ plot_data(recipe = recipe, data = calibrated_data, ))}) -outdir <- get_dir(recipe) +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) #====================================== diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R index da0e789b..8a51cc86 100644 --- a/tests/testthat/test-seasonal_daily.R +++ b/tests/testthat/test-seasonal_daily.R @@ -155,10 +155,10 @@ class(skill_metrics$enscorr_specs), ) expect_equal( dim(skill_metrics$enscorr_specs), -c(time = 31, latitude = 4, longitude = 4) +c(var = 1, time = 31, latitude = 4, longitude = 4) ) expect_equal( -skill_metrics$enscorr_specs[1:3, 1, 1], +skill_metrics$enscorr_specs[, 1:3, 1, 1], c(0.7509920, 0.6514916, 0.5118371), tolerance=0.0001 ) diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R index cbee82cf..0e311424 100644 --- a/tests/testthat/test-seasonal_monthly.R +++ b/tests/testthat/test-seasonal_monthly.R @@ -193,19 +193,19 @@ class(skill_metrics$rpss), ) expect_equal( dim(skill_metrics$rpss), -c(time = 3, latitude = 3, longitude = 3) +c(var = 1, time = 3, latitude = 3, longitude = 3) ) expect_equal( dim(skill_metrics$rpss_significance), dim(skill_metrics$rpss) ) expect_equal( -as.vector(skill_metrics$rpss[, 2, 3]), +as.vector(skill_metrics$rpss[, , 2, 3]), c(-0.2918857, -1.4809143, -1.3842286), tolerance = 0.0001 ) expect_equal( -as.vector(skill_metrics$rpss_significance[, 2, 3]), +as.vector(skill_metrics$rpss_significance[, , 2, 3]), rep(FALSE, 3) ) -- GitLab From 4214923456ca17dea4f0d5b0230b54a783d0946c Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 11 May 2023 10:52:58 +0200 Subject: [PATCH 38/47] update recipe --- recipes/atomic_recipes/recipe_test_multivar.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/recipes/atomic_recipes/recipe_test_multivar.yml b/recipes/atomic_recipes/recipe_test_multivar.yml index 7a6426b4..94e41223 100644 --- a/recipes/atomic_recipes/recipe_test_multivar.yml +++ b/recipes/atomic_recipes/recipe_test_multivar.yml @@ -30,13 +30,17 @@ Analysis: Workflow: Calibration: method: raw + save: 'exp_only' Anomalies: compute: yes cross_validation: yes + save: 'none' Skill: metric: RPS RPSS CRPS CRPSS BSS10 BSS90 EnsCorr mean_bias mean_bias_SS + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' Indicators: index: no ncores: 7 @@ -45,5 +49,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ -- GitLab From 70fc8563c4739e14d1710404fe2b9f40cc78210e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 11 May 2023 13:11:02 +0200 Subject: [PATCH 39/47] Fix bugs in saving corr --- modules/Saving/R/save_corr.R | 2 +- modules/Skill/Skill.R | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/modules/Saving/R/save_corr.R b/modules/Saving/R/save_corr.R index 146c5c99..65349acd 100644 --- a/modules/Saving/R/save_corr.R +++ b/modules/Saving/R/save_corr.R @@ -85,7 +85,7 @@ save_corr <- function(recipe, # Remove singleton dimensions and rearrange lon, lat and time dims if (tolower(agg) == "global") { subset_skill <- lapply(subset_skill, function(x) { - Reorder(x, c(lalo, 'time'))}) + Reorder(x, c(lalo, 'ensemble', 'time'))}) } attr(subset_skill[[1]], 'global_attrs') <- global_attributes diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 599b1743..f167b2a7 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -467,7 +467,7 @@ compute_probabilities <- function(recipe, data) { .drop_dims <- function(metric_array) { # Define dimensions that are not essential for saving droppable_dims <- c("dat", "sday", "sweek", "syear", "ensemble", "nobs", - "nexp", "bin") + "nexp", "exp_memb", "obs_memb", "bin") # Select non-essential dimensions of length 1 dims_to_drop <- intersect(names(which(dim(metric_array) == 1)), droppable_dims) @@ -475,6 +475,10 @@ compute_probabilities <- function(recipe, data) { names(dim(metric_array))) # Drop selected dimensions metric_array <- abind::adrop(metric_array, drop = drop_indices) - + # If array has memb dim (Corr case), change name to 'ensemble' + if ("exp_memb" %in% names(dim(metric_array))) { + names(dim(metric_array))[which(names(dim(metric_array)) == + "exp_memb")] <- "ensemble" + } return(metric_array) } -- GitLab From b1dfd52233756e56194a4b78885decf46fc15743 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 12 May 2023 16:14:28 +0200 Subject: [PATCH 40/47] included BEST obs reference --- conf/archive.yml | 9 +++++++ modules/Loading/Dev_Loading.R | 27 ++++++++++++++++--- modules/Loading/R/mask_tas_tos.R | 14 +++++++--- .../recipe_test_multivar_nadia.yml | 6 ++--- tas-tos_scorecards_data_loading.R | 14 +++------- 5 files changed, 50 insertions(+), 20 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index b6f44a07..88872cb8 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -206,6 +206,15 @@ esarchive: monthly_mean: {"tasanomaly":"/"} calendar: "proleptic_gregorian" reference_grid: "/esarchive/obs/ukmo/hadcrut_v5.0_analysis/monthly_mean/tasanomaly/tasanomaly_202001.nc" + BEST: + name: "BEST" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "obs/berkeleyearth/berkeleyearth/" + daily_mean: {"tas":"/"} + monthly_mean: {"tas":"/"} + calendar: "proleptic_gregorian" + reference_grid: "/esarchive/obs/berkeleyearth/berkeleyearth/monthly_mean/tas/tas_201805.nc" + diff --git a/modules/Loading/Dev_Loading.R b/modules/Loading/Dev_Loading.R index ee941a32..c7a94687 100644 --- a/modules/Loading/Dev_Loading.R +++ b/modules/Loading/Dev_Loading.R @@ -167,16 +167,16 @@ load_datasets <- function(recipe) { ## Combine tas and tos data into one variable: tas-tos if(recipe$Analysis$Variables$name == 'tas tos'){ - if(recipe$Analysis$Datasets$Reference$name == 'HadCRUT5'){ + #if(recipe$Analysis$Datasets$Reference$name == 'HadCRUT5' || recipe$Analysis$Datasets$Reference$name == 'BEST') { source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') - hcst <- mask_tas_tos(input_data = hcst, region = c(0.1, 359.95, -90, 90), + hcst <- mask_tas_tos(input_data = hcst, region = c(lons.min, lons.max,lats.min, lats.max), grid = 'r360x181', lon = hcst$coords$longitude, lat = hcst$coords$latitude, lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) hcst$dims[['var']] <- dim(hcst$data)[['var']] - } + #} } # Load forecast @@ -267,6 +267,13 @@ load_datasets <- function(recipe) { } } + if (recipe$Analysis$Variables$name == 'tas tos'){ + if (recipe$Analysis$Datasets$Reference$name == 'BEST'){ + vars <- 'tas' + var_dir_obs <- reference_descrip[[store.freq]][vars] + } + } + obs <- Start(dat = obs.path, var = vars, var_dir = var_dir_obs, @@ -341,6 +348,20 @@ load_datasets <- function(recipe) { # Convert obs to s2dv_cube obs <- as.s2dv_cube(obs) + + ## Combine tas and tos data into one variable: tas-tos + if(recipe$Analysis$Variables$name == 'tas tos'){ + if(recipe$Analysis$Datasets$Reference$name != 'HadCRUT5' || recipe$Analysis$Datasets$Reference$name != 'BEST'){ + source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') + obs <- mask_tas_tos(input_data = obs, region = c(lons.min, lons.max,lats.min, lats.max), + grid = 'r360x181', + lon = obs$coords$longitude, + lat = obs$coords$latitude, + lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) + + obs$dims[['var']] <- dim(obs$data)[['var']] + } + } # Check for consistency between hcst and obs grid if (!(recipe$Analysis$Regrid$type == 'none')) { diff --git a/modules/Loading/R/mask_tas_tos.R b/modules/Loading/R/mask_tas_tos.R index c3c0ba8b..a2eeb0b6 100644 --- a/modules/Loading/R/mask_tas_tos.R +++ b/modules/Loading/R/mask_tas_tos.R @@ -2,7 +2,7 @@ library(multiApply) library(startR) library(s2dv) -mask_tas_tos <- function(input_data, grid, lon, lat, region, +mask_tas_tos <- function(input_data, grid, lon, lat, region = region , lon_dim = 'lon', lat_dim = 'lat', ncores = NULL){ @@ -34,10 +34,10 @@ mask_tas_tos <- function(input_data, grid, lon, lat, region, } .load_mask <- function(grid, mask_path = NULL, land_value = 0, sea_value = 1, - lon_dim = 'lon', lat_dim = 'lat', region){ + lon_dim = 'lon', lat_dim = 'lat', region = region){ if (is.null(mask_path)){ - mask_sea_land_path <- '/esarchive/recon/ecmwf/era5land/constant/lsm-r3600x1801cds/lsm.nc' ##'/esarchive/exp/ecmwf/system5c3s/constant/lsm/lsm.nc' + mask_sea_land_path <- '/esarchive/exp/ecmwf/system5c3s/constant/lsm/lsm.nc' ## /esarchive/recon/ecmwf/era5land/constant/lsm-r3600x1801cds/lsm.nc' } else if (is.character(mask_path)){ mask_sea_land_path <- mask_path } else { @@ -50,11 +50,17 @@ mask_tas_tos <- function(input_data, grid, lon, lat, region, lats.min <- region[3] lats.max <- region[4] + ## TO DO: + ## Fix region filter for lat and lon + ## Fix 'number' parameter for mask + + data <- startR::Start(dat = mask_sea_land_path, var = 'lsm', lon = 'all', lat = 'all', - # lon = values(list(lons.min, lons.max)), + number = 1, ## needed to add for ensemble member dimension of lsm.nc + # lon = values(list(lons.min, lons.max)), # lat = values(list(lats.min, lats.max)), transform = CDORemapper, transform_extra_cells = 2, transform_params = list(grid = grid, method = 'con', crop = region), diff --git a/recipes/atomic_recipes/recipe_test_multivar_nadia.yml b/recipes/atomic_recipes/recipe_test_multivar_nadia.yml index 022211bf..f13d895e 100644 --- a/recipes/atomic_recipes/recipe_test_multivar_nadia.yml +++ b/recipes/atomic_recipes/recipe_test_multivar_nadia.yml @@ -11,12 +11,12 @@ Analysis: name: ECMWF-SEAS5 Multimodel: False Reference: - name: HadCRUT5 + name: ERA5 Time: sdate: '0101' fcst_year: - hcst_start: '2012' - hcst_end: '2015' + hcst_start: '2014' + hcst_end: '2016' ftime_min: 1 ftime_max: 1 Region: diff --git a/tas-tos_scorecards_data_loading.R b/tas-tos_scorecards_data_loading.R index f7bf36e7..52475c9f 100644 --- a/tas-tos_scorecards_data_loading.R +++ b/tas-tos_scorecards_data_loading.R @@ -3,19 +3,17 @@ rm(list = ls()); gc() #args <- commandArgs(trailingOnly = TRUE) - setwd("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/") #source("modules/Loading/Loading.R") source("modules/Loading/Dev_Loading.R") source("modules/Anomalies/Anomalies.R") -# #source("modules/Calibration/Calibration.R") +#source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Saving.R") #source("modules/Visualization/Visualization.R") - - source("tools/prepare_outputs.R") + recipe_file <- "recipes/atomic_recipes/recipe_test_multivar_nadia.yml" recipe <- prepare_outputs(recipe_file) @@ -23,23 +21,19 @@ recipe <- prepare_outputs(recipe_file) #recipe$Analysis$Time$sdate <- paste0(sprintf("%02d", as.numeric(args)), '01') ## Load datasets -source("modules/Loading/Dev_Loading.R") data <- load_datasets(recipe) - ################################################################################ -## data checks -#dim(data$hcst$data) - +### For Testing ### lon <- attributes(data$hcst$coords$longitude) lat <- attributes(data$hcst$coords$latitude) source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') -tas_tos_hcst <- mask_tas_tos(input_data = data$hcst, region = c(0.1, 359.95, -90, 90), +tas_tos_hcst <- mask_tas_tos(input_data = data$hcst, region = c(20, 40, 30, 50), grid = 'r360x181', lon = lon, lat = lat, lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) -- GitLab From a003589d6721cc788e9ea47ed871da30cae5437b Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 12 May 2023 16:54:07 +0200 Subject: [PATCH 41/47] bug fix in Dev_Loading.R --- modules/Loading/Dev_Loading.R | 2 +- .../recipe_test_multivar_nadia.yml | 8 ++-- tas-tos_scorecards_data_loading.R | 42 +++++++++---------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/modules/Loading/Dev_Loading.R b/modules/Loading/Dev_Loading.R index c7a94687..fb456eb3 100644 --- a/modules/Loading/Dev_Loading.R +++ b/modules/Loading/Dev_Loading.R @@ -351,7 +351,7 @@ load_datasets <- function(recipe) { ## Combine tas and tos data into one variable: tas-tos if(recipe$Analysis$Variables$name == 'tas tos'){ - if(recipe$Analysis$Datasets$Reference$name != 'HadCRUT5' || recipe$Analysis$Datasets$Reference$name != 'BEST'){ + if(recipe$Analysis$Datasets$Reference$name != 'HadCRUT5' & recipe$Analysis$Datasets$Reference$name != 'BEST'){ source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') obs <- mask_tas_tos(input_data = obs, region = c(lons.min, lons.max,lats.min, lats.max), grid = 'r360x181', diff --git a/recipes/atomic_recipes/recipe_test_multivar_nadia.yml b/recipes/atomic_recipes/recipe_test_multivar_nadia.yml index f13d895e..50cc62f7 100644 --- a/recipes/atomic_recipes/recipe_test_multivar_nadia.yml +++ b/recipes/atomic_recipes/recipe_test_multivar_nadia.yml @@ -11,14 +11,14 @@ Analysis: name: ECMWF-SEAS5 Multimodel: False Reference: - name: ERA5 + name: BEST Time: sdate: '0101' fcst_year: - hcst_start: '2014' + hcst_start: '1993' hcst_end: '2016' ftime_min: 1 - ftime_max: 1 + ftime_max: 6 Region: latmin: -90 latmax: 90 @@ -45,5 +45,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/nmilders/scorecards_data/test/ + output_dir: /esarchive/scratch/nmilders/scorecards_data/to_system/tas-tos/ECMWF-SEAS5/tas-tos/ code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/ diff --git a/tas-tos_scorecards_data_loading.R b/tas-tos_scorecards_data_loading.R index 52475c9f..da124cd4 100644 --- a/tas-tos_scorecards_data_loading.R +++ b/tas-tos_scorecards_data_loading.R @@ -1,7 +1,7 @@ rm(list = ls()); gc() -#args <- commandArgs(trailingOnly = TRUE) +args <- commandArgs(trailingOnly = TRUE) setwd("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/") @@ -18,7 +18,7 @@ recipe_file <- "recipes/atomic_recipes/recipe_test_multivar_nadia.yml" recipe <- prepare_outputs(recipe_file) ## Run job for each start month -#recipe$Analysis$Time$sdate <- paste0(sprintf("%02d", as.numeric(args)), '01') +recipe$Analysis$Time$sdate <- paste0(sprintf("%02d", as.numeric(args)), '01') ## Load datasets data <- load_datasets(recipe) @@ -27,25 +27,25 @@ data <- load_datasets(recipe) ################################################################################ ### For Testing ### - -lon <- attributes(data$hcst$coords$longitude) -lat <- attributes(data$hcst$coords$latitude) - - -source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') -tas_tos_hcst <- mask_tas_tos(input_data = data$hcst, region = c(20, 40, 30, 50), - grid = 'r360x181', lon = lon, lat = lat, - lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) - -tas_tos_obs <- mask_tas_tos(input_data = data$obs, region = c(0.1, 359.95, -90, 90), - grid = 'r360x181', lon = lon, lat = lat, - lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) - - -source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') -mask <- .load_mask(grid = 'r360x181', mask_path = NULL, - land_value = 0, sea_value = 1, - lon_dim = 'lon', lat_dim = 'lat', region = NULL) +# +# lon <- attributes(data$hcst$coords$longitude) +# lat <- attributes(data$hcst$coords$latitude) +# +# +# source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') +# tas_tos_hcst <- mask_tas_tos(input_data = data$hcst, region = c(20, 40, 30, 50), +# grid = 'r360x181', lon = lon, lat = lat, +# lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) +# +# tas_tos_obs <- mask_tas_tos(input_data = data$obs, region = c(0.1, 359.95, -90, 90), +# grid = 'r360x181', lon = lon, lat = lat, +# lon_dim = 'longitude', lat_dim = 'latitude', ncores = NULL) +# +# +# source('/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/modules/Loading/R/mask_tas_tos.R') +# mask <- .load_mask(grid = 'r360x181', mask_path = NULL, +# land_value = 0, sea_value = 1, +# lon_dim = 'lon', lat_dim = 'lat', region = NULL) ################################################################################ -- GitLab From ed9030281e43f8db5309d8b056c86a22fac9b142 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 15 May 2023 14:54:56 +0200 Subject: [PATCH 42/47] Adapt plotting functions to subset by var dimension and fix saving bug --- modules/Saving/Saving.R | 2 +- modules/Skill/Skill.R | 12 +- modules/Visualization/R/plot_ensemble_mean.R | 128 +++++----- .../R/plot_most_likely_terciles_map.R | 86 +++---- modules/Visualization/R/plot_skill_metrics.R | 225 +++++++++--------- 5 files changed, 222 insertions(+), 231 deletions(-) diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index 73f982d0..c52991d3 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -90,7 +90,7 @@ save_data <- function(recipe, data, if (!is.null(probabilities$probs_fcst)) { save_probabilities(recipe = recipe, probs = probabilities$probs_fcst, - data_cube = subset_fcst, + data_cube = data$fcst, outdir = outdir[var], type = "fcst") } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index f167b2a7..f22d6b9b 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -425,11 +425,11 @@ compute_probabilities <- function(recipe, data) { named_quantiles <- lapply(named_quantiles, function(x) {.drop_dims(x)}) if (!is.null(data$fcst)) { fcst_years <- dim(data$fcst$data)[['syear']] - named_probs_fcst <- lapply(named_probs_fcst, - function(x) {Subset(x, - along = 'syear', - indices = 1:fcst_years, - drop = 'non-selected')}) + named_probs_fcst <- lapply(named_probs_fcst, function(x) {.drop_dims(x)}) + # function(x) {Subset(x, + # along = 'syear', + # indices = 1:fcst_years, + # drop = 'non-selected')}) results <- list(probs = named_probs, probs_fcst = named_probs_fcst, percentiles = named_quantiles) @@ -466,7 +466,7 @@ compute_probabilities <- function(recipe, data) { .drop_dims <- function(metric_array) { # Define dimensions that are not essential for saving - droppable_dims <- c("dat", "sday", "sweek", "syear", "ensemble", "nobs", + droppable_dims <- c("dat", "sday", "sweek", "ensemble", "nobs", "nexp", "exp_memb", "obs_memb", "bin") # Select non-essential dimensions of length 1 dims_to_drop <- intersect(names(which(dim(metric_array) == 1)), diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index e0fa8b84..e3d75138 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -10,80 +10,72 @@ plot_ensemble_mean <- function(recipe, fcst, outdir) { longitude <- fcst$coords$lon archive <- get_archive(recipe) system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name - variable <- recipe$Analysis$Variables$name - units <- attr(fcst$Variable, "variable")$units start_date <- paste0(recipe$Analysis$Time$fcst_year, recipe$Analysis$Time$sdate) # Compute ensemble mean ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') - # Drop extra dims, add time dim if missing: - ensemble_mean <- drop(ensemble_mean) + # Loop over variable dimension + for (var in 1:fcst$dims[['var']]) { + variable <- fcst$attrs$Variable$varName[[var]] + units <- fcst$attrs$Variable$metadata[[variable]]$units + var_ens_mean <- ClimProjDiags::Subset(ensemble_mean, + along = c("dat", "var", + "sday", "sweek"), + indices = list(1, var, 1, 1), + drop = 'selected') - if (!("time" %in% names(dim(ensemble_mean)))) { - dim(ensemble_mean) <- c("time" = 1, dim(ensemble_mean)) - } - if (!'syear' %in% names(dim(ensemble_mean))) { - ensemble_mean <- Reorder(ensemble_mean, c("time", - "longitude", - "latitude")) - } else { - ensemble_mean <- Reorder(ensemble_mean, c("syear", - "time", - "longitude", - "latitude")) - } - ## TODO: Redefine column colors, possibly depending on variable - if (variable == 'prlr') { - palette = "BrBG" - rev = F - } else { - palette = "RdBu" - rev = T - } - # Define brks, centered on in the case of anomalies - ## - if (grepl("anomaly", - fcst$attrs$Variable$metadata[[variable]]$long_name)) { - variable <- paste(variable, "anomaly") - max_value <- max(abs(ensemble_mean)) - ugly_intervals <- seq(-max_value, max_value, max_value/20) - brks <- pretty(ugly_intervals, n = 12, min.n = 8) - } else { - brks <- pretty(range(ensemble_mean, na.rm = T), n = 15, min.n = 8) - } - cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) - options(bitmapType = "cairo") - - for (i_syear in start_date) { - # Define name of output file and titles - if (length(start_date) == 1) { - i_ensemble_mean <- ensemble_mean - outfile <- paste0(outdir, "forecast_ensemble_mean-", start_date, ".png") + var_ens_mean <- Reorder(var_ens_mean, c("syear", + "time", + "longitude", + "latitude")) + ## TODO: Redefine column colors, possibly depending on variable + if (variable == 'prlr') { + palette = "BrBG" + rev = F } else { - i_ensemble_mean <- ensemble_mean[which(start_date == i_syear), , , ] - outfile <- paste0(outdir, "forecast_ensemble_mean-", i_syear, ".png") + palette = "RdBu" + rev = T + } + # Define brks, centered on in the case of anomalies + ## + if (grepl("anomaly", + fcst$attrs$Variable$metadata[[variable]]$long_name)) { + variable <- paste(variable, "anomaly") + max_value <- max(abs(var_ens_mean)) + ugly_intervals <- seq(-max_value, max_value, max_value/20) + brks <- pretty(ugly_intervals, n = 12, min.n = 8) + } else { + brks <- pretty(range(var_ens_mean, na.rm = T), n = 15, min.n = 8) + } + cols <- grDevices::hcl.colors(length(brks) - 1, palette, rev = rev) + options(bitmapType = "cairo") + + for (i_syear in start_date) { + # Define name of output file and titles + i_var_ens_mean <- var_ens_mean[which(start_date == i_syear), , , ] + outfile <- paste0(outdir[[var]], "forecast_ensemble_mean-", i_syear, ".png") + toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, + "- Initialization:", i_syear) + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + titles <- as.vector(months) + # Plots + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + i_var_ens_mean, longitude, latitude, + filled.continents = F, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + units = units, + cols = cols, + brks = brks, + fileout = outfile, + bar_label_digits = 4, + bar_extra_margin = rep(0.7, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) } - toptitle <- paste("Forecast Ensemble Mean -", variable, "-", system_name, - "- Initialization:", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) - titles <- as.vector(months) - # Plots - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - i_ensemble_mean, longitude, latitude, - filled.continents = F, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - units = units, - cols = cols, - brks = brks, - fileout = outfile, - bar_label_digits = 4, - bar_extra_margin = rep(0.7, 4), - bar_label_scale = 1.5, - axes_label_scale = 1.3) + info(recipe$Run$logger, + "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") } - info(recipe$Run$logger, - "##### FCST ENSEMBLE MEAN PLOT SAVED TO OUTPUT DIRECTORY #####") } diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index 2e7b0a05..f912e249 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -35,55 +35,47 @@ plot_most_likely_terciles <- function(recipe, ## TODO: Improve this section # Drop extra dims, add time dim if missing: - probs_fcst <- drop(probs_fcst) - if (!("time" %in% names(dim(probs_fcst)))) { - dim(probs_fcst) <- c("time" = 1, dim(probs_fcst)) - } - if (!'syear' %in% names(dim(probs_fcst))) { - probs_fcst <- Reorder(probs_fcst, c("time", "bin", "longitude", "latitude")) - } else { - probs_fcst <- Reorder(probs_fcst, - c("syear", "time", "bin", "longitude", "latitude")) - } + for (var in 1:fcst$dims[['var']]) { + variable <- fcst$attrs$Variable$varName[[var]] + var_probs <- ClimProjDiags::Subset(probs_fcst, + along = c("var"), + indices = var, + drop = 'selected') - for (i_syear in start_date) { - # Define name of output file and titles - if (length(start_date) == 1) { - i_probs_fcst <- probs_fcst - outfile <- paste0(outdir, "forecast_most_likely_tercile-", start_date, - ".png") - } else { - i_probs_fcst <- probs_fcst[which(start_date == i_syear), , , , ] - outfile <- paste0(outdir, "forecast_most_likely_tercile-", i_syear, ".png") + var_probs <- Reorder(var_probs, + c("syear", "time", "bin", "longitude", "latitude")) + for (i_syear in start_date) { + # Define name of output file and titles + i_var_probs <- var_probs[which(start_date == i_syear), , , , ] + outfile <- paste0(outdir[[var]], "forecast_most_likely_tercile-", + i_syear, ".png") + toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", + "Initialization:", i_syear) + months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], + label = T, abb = F) + ## TODO: Ensure this works for daily and sub-daily cases + titles <- as.vector(months) + # Plots + ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked + ## on. + suppressWarnings( + PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), + cat_dim = 'bin', + i_var_probs, longitude, latitude, + coast_width = 1.5, + title_scale = 0.6, + legend_scale = 0.8, #cex_bar_titles = 0.6, + toptitle = toptitle, + titles = titles, + fileout = outfile, + bar_label_digits = 2, + bar_scale = rep(0.7, 4), + bar_label_scale = 1.2, + axes_label_scale = 1.3, + triangle_ends = c(F, F), width = 11, height = 8) + ) } - toptitle <- paste("Most Likely Tercile -", variable, "-", system_name, "-", - "Initialization:", i_syear) - months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) - ## TODO: Ensure this works for daily and sub-daily cases - titles <- as.vector(months) - - # Plots - ## NOTE: PlotLayout() and PlotMostLikelyQuantileMap() are still being worked - ## on. - suppressWarnings( - PlotLayout(PlotMostLikelyQuantileMap, c('bin', 'longitude', 'latitude'), - cat_dim = 'bin', - i_probs_fcst, longitude, latitude, - coast_width = 1.5, - title_scale = 0.6, - legend_scale = 0.8, #cex_bar_titles = 0.6, - toptitle = toptitle, - titles = titles, - fileout = outfile, - bar_label_digits = 2, - bar_scale = rep(0.7, 4), - bar_label_scale = 1.2, - axes_label_scale = 1.3, - triangle_ends = c(F, F), width = 11, height = 8) - ) - } - info(recipe$Run$logger, "##### MOST LIKELY TERCILE PLOT SAVED TO OUTPUT DIRECTORY #####") + } } diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R index f8be19d9..e62496f6 100644 --- a/modules/Visualization/R/plot_skill_metrics.R +++ b/modules/Visualization/R/plot_skill_metrics.R @@ -47,116 +47,123 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, scores <- c("rps", "frps", "crps", "frps_specs") # Assign colorbar to each metric type ## TODO: Triangle ends - for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { - if (name %in% names(skill_metrics)) { - # Define plot characteristics and metric name to display in plot - if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", - "rpss_specs", "bss90_specs", "bss10_specs", - "rmsss")) { - display_name <- toupper(strsplit(name, "_")[[1]][1]) - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- NULL - } else if (name == "mean_bias_ss") { - display_name <- "Mean Bias Skill Score" - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- NULL - } else if (name %in% c("enscorr", "enscorr_specs")) { - display_name <- "Ensemble Mean Correlation" - skill <- skill_metrics[[name]] - brks <- seq(-1, 1, by = 0.2) - cols <- clim.colors(length(brks) - 1, diverging_palette) - col_inf <- NULL - col_sup <- NULL - } else if (name %in% scores) { - skill <- skill_metrics[[name]] - display_name <- toupper(strsplit(name, "_")[[1]][1]) - brks <- seq(0, 1, by = 0.1) - colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) - cols <- colorbar[1:(length(colorbar) - 1)] - col_inf <- NULL - col_sup <- colorbar[length(colorbar)] - } else if (name == "enssprerr") { - skill <- skill_metrics[[name]] - display_name <- "Spread-to-Error Ratio" - brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) - colorbar <- clim.colors(length(brks), diverging_palette) - cols <- colorbar[1:length(colorbar) - 1] - col_inf <- NULL - col_sup <- colorbar[length(colorbar)] - } else if (name == "mean_bias") { - skill <- skill_metrics[[name]] - display_name <- "Mean Bias" - max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), - abs(quantile(skill, 0.98, na.rm = T))) - brks <- max_value * seq(-1, 1, by = 0.2) - colorbar <- clim.colors(length(brks) + 1, diverging_palette) - cols <- colorbar[2:(length(colorbar) - 1)] - col_inf <- colorbar[1] - col_sup <- colorbar[length(colorbar)] - } - options(bitmapType = "cairo") - # Reorder dimensions - skill <- Reorder(skill, c("time", "longitude", "latitude")) - # If the significance has been requested and the variable has it, - # retrieve it and reorder its dimensions. - significance_name <- paste0(name, "_significance") - if ((significance) && (significance_name %in% names(skill_metrics))) { - skill_significance <- skill_metrics[[significance_name]] - skill_significance <- Reorder(skill_significance, c("time", - "longitude", - "latitude")) - # Split skill significance into list of lists, along the time dimension - # This allows for plotting the significance dots correctly. - skill_significance <- ClimProjDiags::ArrayToList(skill_significance, - dim = 'time', - level = "sublist", - names = "dots") - } else { - skill_significance <- NULL - } - # Define output file name and titles - if (tolower(recipe$Analysis$Horizon) == "seasonal") { - outfile <- paste0(outdir, name, "-", month_label, ".png") - } else { - outfile <- paste0(outdir, name, ".png") + for (var in 1:data_cube$dims[['var']]) { + var_skill <- lapply(skill_metrics, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + + for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { + if (name %in% names(skill_metrics)) { + # Define plot characteristics and metric name to display in plot + if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", + "rpss_specs", "bss90_specs", "bss10_specs", + "rmsss")) { + display_name <- toupper(strsplit(name, "_")[[1]][1]) + skill <- var_skill[[name]] + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL + } else if (name == "mean_bias_ss") { + display_name <- "Mean Bias Skill Score" + skill <- var_skill[[name]] + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL + } else if (name %in% c("enscorr", "enscorr_specs")) { + display_name <- "Ensemble Mean Correlation" + skill <- var_skill[[name]] + brks <- seq(-1, 1, by = 0.2) + cols <- clim.colors(length(brks) - 1, diverging_palette) + col_inf <- NULL + col_sup <- NULL + } else if (name %in% scores) { + skill <- var_skill[[name]] + display_name <- toupper(strsplit(name, "_")[[1]][1]) + brks <- seq(0, 1, by = 0.1) + colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) + cols <- colorbar[1:(length(colorbar) - 1)] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] + } else if (name == "enssprerr") { + skill <- var_skill[[name]] + display_name <- "Spread-to-Error Ratio" + brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) + colorbar <- clim.colors(length(brks), diverging_palette) + cols <- colorbar[1:length(colorbar) - 1] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] + } else if (name == "mean_bias") { + skill <- var_skill[[name]] + display_name <- "Mean Bias" + max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), + abs(quantile(skill, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] + } + options(bitmapType = "cairo") + # Reorder dimensions + skill <- Reorder(skill, c("time", "longitude", "latitude")) + # If the significance has been requested and the variable has it, + # retrieve it and reorder its dimensions. + significance_name <- paste0(name, "_significance") + if ((significance) && (significance_name %in% names(skill_metrics))) { + skill_significance <- var_skill[[significance_name]] + skill_significance <- Reorder(skill_significance, c("time", + "longitude", + "latitude")) + # Split skill significance into list of lists, along the time dimension + # This allows for plotting the significance dots correctly. + skill_significance <- ClimProjDiags::ArrayToList(skill_significance, + dim = 'time', + level = "sublist", + names = "dots") + } else { + skill_significance <- NULL + } + # Define output file name and titles + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + outfile <- paste0(outdir[var], name, "-", month_label, ".png") + } else { + outfile <- paste0(outdir[var], name, ".png") + } + toptitle <- paste(display_name, "-", data_cube$attrs$Variable$varName[var], + "-", system_name, "-", month_abbreviation, + hcst_period) + months <- unique(lubridate::month(data_cube$attrs$Dates, + label = T, abb = F)) + titles <- as.vector(months) + # Plot + suppressWarnings( + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + asplit(skill, MARGIN=1), # Splitting array into a list + longitude, latitude, + special_args = skill_significance, + dot_symbol = 20, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + filled.continents=F, + brks = brks, + cols = cols, + col_inf = col_inf, + col_sup = col_sup, + fileout = outfile, + bar_label_digits = 3, + bar_extra_margin = rep(0.9, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3) + ) } - toptitle <- paste(display_name, "-", data_cube$attrs$Variable$varName, - "-", system_name, "-", month_abbreviation, - hcst_period) - months <- unique(lubridate::month(data_cube$attrs$Dates, - label = T, abb = F)) - titles <- as.vector(months) - # Plot - suppressWarnings( - PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - asplit(skill, MARGIN=1), # Splitting array into a list - longitude, latitude, - special_args = skill_significance, - dot_symbol = 20, - toptitle = toptitle, - title_scale = 0.6, - titles = titles, - filled.continents=F, - brks = brks, - cols = cols, - col_inf = col_inf, - col_sup = col_sup, - fileout = outfile, - bar_label_digits = 3, - bar_extra_margin = rep(0.9, 4), - bar_label_scale = 1.5, - axes_label_scale = 1.3) - ) } + info(recipe$Run$logger, + "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") } - info(recipe$Run$logger, - "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") } -- GitLab From c57080e0170e9c253b26bca2eaab4b3b652c52a3 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 15 May 2023 14:55:13 +0200 Subject: [PATCH 43/47] Fix decadal unit tests (WIP) --- tests/testthat/test-decadal_monthly_1.R | 14 +++++++------- tests/testthat/test-decadal_monthly_2.R | 22 +++++++++++----------- tests/testthat/test-decadal_monthly_3.R | 12 ++++++------ 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R index 9b46cce8..a5c95e0c 100644 --- a/tests/testthat/test-decadal_monthly_1.R +++ b/tests/testthat/test-decadal_monthly_1.R @@ -187,7 +187,7 @@ class(skill_metrics$rpss), ) expect_equal( dim(skill_metrics$rpss), -c(time = 3, latitude = 5, longitude = 4) +c(var = 1, time = 3, latitude = 5, longitude = 4) ) expect_equal( dim(skill_metrics$rpss_significance), @@ -218,27 +218,27 @@ c('percentile_33', 'percentile_66', 'percentile_10', 'percentile_90') ) expect_equal( dim(probs$probs$prob_b33), -c(syear = 4, time = 3, latitude = 5, longitude = 4) +c(var = 1, syear = 4, time = 3, latitude = 5, longitude = 4) ) expect_equal( dim(probs$percentiles$percentile_33), -c(time = 3, latitude = 5, longitude = 4) +c(var = 1, time = 3, latitude = 5, longitude = 4) ) expect_equal( -as.vector(probs$probs$prob_b33[, 1, 2, 2]), +as.vector(probs$probs$prob_b33[, , 1, 2, 2]), c(0.0, 0.5, 0.0, 1.0) ) expect_equal( -as.vector(probs$probs$prob_10_to_90[, 1, 2, 2]), +as.vector(probs$probs$prob_10_to_90[, , 1, 2, 2]), c(1.0, 1.0, 0.5, 0.5) ) expect_equal( -as.vector(probs$percentiles$percentile_33[, 1, 2]), +as.vector(probs$percentiles$percentile_33[, , 1, 2]), c(293.7496, 287.4263, 285.8295), tolerance = 0.0001 ) expect_equal( -as.vector(probs$percentiles$percentile_10[, 1, 2]), +as.vector(probs$percentiles$percentile_10[, , 1, 2]), c(293.1772, 286.9533, 284.7887), tolerance = 0.0001 ) diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 04b9a419..2b605109 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -171,11 +171,11 @@ class(skill_metrics$rpss_specs), "array" ) expect_equal( -all(unlist(lapply(lapply(skill_metrics, dim), all.equal, c(time = 14, latitude = 8, longitude = 5)))), +all(unlist(lapply(lapply(skill_metrics, dim), all.equal, c(var = 1, time = 14, latitude = 8, longitude = 5)))), TRUE ) expect_equal( -as.vector(skill_metrics$rpss_specs[6:8, 1, 2]), +as.vector(skill_metrics$rpss_specs[, 6:8, 1, 2]), c(-0.3333333, 0.1666667, -0.3333333), tolerance = 0.0001 ) @@ -184,26 +184,26 @@ tolerance = 0.0001 #TRUE #) expect_equal( -as.vector(skill_metrics$enscorr_specs[6:8, 1, 2]), +as.vector(skill_metrics$enscorr_specs[, 6:8, 1, 2]), c(0.4474382, 0.1026333, 0.4042823), tolerance = 0.0001 ) expect_equal( -as.vector(skill_metrics$frps_specs[6:8, 1, 2]), +as.vector(skill_metrics$frps_specs[, 6:8, 1, 2]), c(0.4444444, 0.2222222, 0.4444444), tolerance = 0.0001 ) expect_equal( -as.vector(skill_metrics$frpss_specs[4:7, 1, 5]), +as.vector(skill_metrics$frpss_specs[, 4:7, 1, 5]), c( 1.0, -0.5, -0.5, 0.5), tolerance = 0.0001 ) expect_equal( -as.vector(skill_metrics$bss10_specs[6:8, 1, 2]), +as.vector(skill_metrics$bss10_specs[, 6:8, 1, 2]), c(0.5, -0.5, -0.5), ) expect_equal( -as.vector(skill_metrics$frps[6:8, 1, 2]), +as.vector(skill_metrics$frps[, 6:8, 1, 2]), c(0.4444444, 0.2222222, 0.4444444), tolerance = 0.0001 ) @@ -223,19 +223,19 @@ c('percentile_33', 'percentile_66') ) expect_equal( dim(probs$probs$prob_b33), -c(syear = 3, time = 14, latitude = 8, longitude = 5) +c(var = 1, syear = 3, time = 14, latitude = 8, longitude = 5) ) expect_equal( dim(probs$percentiles$percentile_33), -c(time = 14, latitude = 8, longitude = 5) +c(var = 1,time = 14, latitude = 8, longitude = 5) ) expect_equal( -as.vector(probs$probs$prob_b33[, 1, 2, 2]), +as.vector(probs$probs$prob_b33[, , 1, 2, 2]), c(0.0, 0.3333333, 0.6666667), tolerance = 0.0001 ) expect_equal( -as.vector(probs$percentiles$percentile_33[1:3, 1, 2]), +as.vector(probs$percentiles$percentile_33[, 1:3, 1, 2]), c(271.7508, 273.1682, 274.1937), tolerance = 0.0001 ) diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 988172c6..7c3c386a 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -140,15 +140,15 @@ class(skill_metrics[[1]]), "array" ) expect_equal( -all(unlist(lapply(lapply(skill_metrics, dim)[1:2], all.equal, c(time = 3, latitude = 25, longitude = 16)))), +all(unlist(lapply(lapply(skill_metrics, dim)[1:2], all.equal, c(var = 1, time = 3, latitude = 25, longitude = 16)))), TRUE ) expect_equal( -all(unlist(lapply(lapply(skill_metrics, dim)[3:4], all.equal, c(ensemble = 3, time = 3, latitude = 25, longitude = 16)))), +all(unlist(lapply(lapply(skill_metrics, dim)[3:4], all.equal, c(var = 1, ensemble = 3, time = 3, latitude = 25, longitude = 16)))), TRUE ) expect_equal( -as.vector(skill_metrics$bss10[, 1, 2]), +as.vector(skill_metrics$bss10[, , 1, 2]), c(-0.1904762, -0.1904762, -0.1904762), tolerance = 0.0001 ) @@ -157,7 +157,7 @@ any(as.vector(skill_metrics$bss10_significance)), FALSE ) expect_equal( -as.vector(skill_metrics$corr[2, , 1, 2]), +as.vector(skill_metrics$corr[1, 2, , 1, 2]), c(-0.2015265, 0.4635463, -0.1019575), tolerance = 0.0001 ) @@ -184,12 +184,12 @@ dim(probs$percentiles$percentile_33), c(time = 3, latitude = 25, longitude = 16) ) expect_equal( -as.vector(probs$probs$prob_b33[, 1, 2, 2]), +as.vector(probs$probs$prob_b33[, , 1, 2, 2]), c(0.0, 0.3333333, 0.3333333, 0.6666667), tolerance = 0.0001 ) expect_equal( -as.vector(probs$percentiles$percentile_33[1:3, 1, 2]), +as.vector(probs$percentiles$percentile_33[, 1:3, 1, 2]), c(278.1501, 279.5226, 282.0237), tolerance = 0.0001 ) -- GitLab From 2859c4a92de18ea2dfe9794444ee6a5331363f11 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 16 May 2023 10:16:53 +0200 Subject: [PATCH 44/47] Fix pipeline! --- tests/testthat/test-decadal_monthly_3.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R index 7c3c386a..7232ebfd 100644 --- a/tests/testthat/test-decadal_monthly_3.R +++ b/tests/testthat/test-decadal_monthly_3.R @@ -144,7 +144,7 @@ all(unlist(lapply(lapply(skill_metrics, dim)[1:2], all.equal, c(var = 1, time = TRUE ) expect_equal( -all(unlist(lapply(lapply(skill_metrics, dim)[3:4], all.equal, c(var = 1, ensemble = 3, time = 3, latitude = 25, longitude = 16)))), +all(unlist(lapply(lapply(skill_metrics, dim)[3:4], all.equal, c(ensemble = 3, var = 1, time = 3, latitude = 25, longitude = 16)))), TRUE ) expect_equal( @@ -157,7 +157,7 @@ any(as.vector(skill_metrics$bss10_significance)), FALSE ) expect_equal( -as.vector(skill_metrics$corr[1, 2, , 1, 2]), +as.vector(skill_metrics$corr[2, , , 1, 2]), c(-0.2015265, 0.4635463, -0.1019575), tolerance = 0.0001 ) @@ -177,11 +177,11 @@ c('percentile_33', 'percentile_66') ) expect_equal( dim(probs$probs$prob_b33), -c(syear = 4, time = 3, latitude = 25, longitude = 16) +c(var = 1, syear = 4, time = 3, latitude = 25, longitude = 16) ) expect_equal( dim(probs$percentiles$percentile_33), -c(time = 3, latitude = 25, longitude = 16) +c(var = 1, time = 3, latitude = 25, longitude = 16) ) expect_equal( as.vector(probs$probs$prob_b33[, , 1, 2, 2]), -- GitLab From 7ce7543d7b3a57d5287d3045ba029c20a79abd92 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 18 May 2023 14:35:21 +0200 Subject: [PATCH 45/47] Clean code --- modules/Loading/Loading.R | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index c979b084..05f54091 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -27,8 +27,7 @@ load_datasets <- function(recipe) { ref.name <- recipe$Analysis$Datasets$Reference$name exp.name <- recipe$Analysis$Datasets$System$name - variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]][1] - vars <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] store.freq <- recipe$Analysis$Variables$freq # get sdates array @@ -60,9 +59,9 @@ load_datasets <- function(recipe) { archive <- read_yaml("conf/archive.yml")$esarchive exp_descrip <- archive$System[[exp.name]] - freq.hcst <- unlist(exp_descrip[[store.freq]][variable]) + freq.hcst <- unlist(exp_descrip[[store.freq]][variable[1]]) reference_descrip <- archive$Reference[[ref.name]] - freq.obs <- unlist(reference_descrip[[store.freq]][variable]) + freq.obs <- unlist(reference_descrip[[store.freq]][variable[1]]) obs.dir <- reference_descrip$src fcst.dir <- exp_descrip$src hcst.dir <- exp_descrip$src @@ -76,8 +75,8 @@ load_datasets <- function(recipe) { ## accum <- FALSE ##} - var_dir_obs <- reference_descrip[[store.freq]][vars] - var_dir_exp <- exp_descrip[[store.freq]][vars] + var_dir_obs <- reference_descrip[[store.freq]][variable] + var_dir_exp <- exp_descrip[[store.freq]][variable] # ----------- obs.path <- paste0(archive$src, @@ -109,7 +108,7 @@ load_datasets <- function(recipe) { # Load hindcast #------------------------------------------------------------------- hcst <- Start(dat = hcst.path, - var = vars, + var = variable, var_dir = var_dir_exp, file_date = sdates$hcst, time = idxs$hcst, @@ -173,7 +172,7 @@ load_datasets <- function(recipe) { # multiple dims split fcst <- Start(dat = fcst.path, - var = vars, + var = variable, var_dir = var_dir_exp, var_dir_depends = 'var', file_date = sdates$fcst, @@ -246,7 +245,7 @@ load_datasets <- function(recipe) { dim(dates_file) <- dim(dates) obs <- Start(dat = obs.path, - var = vars, + var = variable, var_dir = var_dir_obs, var_dir_depends = 'var', file_date = dates_file, @@ -279,7 +278,7 @@ load_datasets <- function(recipe) { dim(dates) <- dim(dates_file) obs <- Start(dat = obs.path, - var = vars, + var = variable, var_dir = var_dir_obs, var_dir_depends = 'var', file_date = sort(unique(dates_file)), @@ -354,8 +353,8 @@ load_datasets <- function(recipe) { # Remove negative values in accumulative variables dictionary <- read_yaml("conf/variable-dictionary.yml") - for (var_idx in 1:length(vars)) { - var_name <- vars[var_idx] + for (var_idx in 1:length(variable)) { + var_name <- variable[var_idx] if (dictionary$vars[[var_name]]$accum) { info(recipe$Run$logger, paste0("Accumulated variable ", var_name, @@ -372,7 +371,7 @@ load_datasets <- function(recipe) { # Convert prlr from m/s to mm/day ## TODO: Make a unit conversion function - if (vars[[var_idx]] == "prlr") { + if (variable[[var_idx]] == "prlr") { # Verify that the units are m/s and the same in obs and hcst if (((obs$attrs$Variable$metadata[[var_name]]$units == "m s-1") || (obs$attrs$Variable$metadata[[var_name]]$units == "m s**-1")) && -- GitLab From 43c70d0a6fc7e3462cb1410f1b32533e9f4acad7 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 26 May 2023 15:02:32 +0200 Subject: [PATCH 46/47] Add multivar to decadal loading; can't work with multi_path yet (use both dcppA and dcppB) --- conf/archive_decadal.yml | 10 ++-- modules/Loading/Loading_decadal.R | 27 +++++---- modules/Loading/helper_loading_decadal.R | 26 +++++++-- .../recipe_test_multivar_decadal.yml | 57 +++++++++++++++++++ ...recipe_test_multivar_decadal_multipath.yml | 57 +++++++++++++++++++ 5 files changed, 154 insertions(+), 23 deletions(-) create mode 100644 recipes/atomic_recipes/recipe_test_multivar_decadal.yml create mode 100644 recipes/atomic_recipes/recipe_test_multivar_decadal_multipath.yml diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index 91637024..2e0a1b29 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -91,9 +91,9 @@ esarchive: first_dcppB_syear: 2019 monthly_mean: table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "ts":"Amon", "tos":"Omon"} - grid: {"tas":"gr", "psl":"gr", "pr":"gr", "ts":"gr", "tos":"gr"} + grid: {"tas":"gn", "psl":"gr", "pr":"gr", "ts":"gr", "tos":"gr"} #version depends on member and variable - version: {"tas":"v20200316", "psl":"v20200316", "pr":"v20200316", "ts":"v20200316", "tos":"v20200417"} + version: {"tas":"v20200417", "psl":"v20200316", "pr":"v20200316", "ts":"v20200316", "tos":"v20200417"} daily_mean: grid: {"tas":"gn"} version: {"tasmin":"v20200101", "tasmax":"v20200101", "pr":"v20200417"} @@ -132,10 +132,10 @@ esarchive: fcst: "exp/canesm5/cmip6-dcppB-forecast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppB-forecast/" first_dcppB_syear: 2020 monthly_mean: - table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon"} + table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tasmin":"Amon", "tasmax":"Amon", "tos":"Omon"} - grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn"} - version: {"tas":"v20190429", "pr":"v20190429", "psl":"v20190429", "tasmin":"v20190429", "tasmax":"v20190429"} + grid: {"tas":"gn", "pr":"gn", "psl":"gn", "tasmin":"gn", "tasmax":"gn", "tos":"gr"} + version: {"tas":"v20190429", "pr":"v20190429", "psl":"v20190429", "tasmin":"v20190429", "tasmax":"v20190429", "tos":"v20190429"} daily_mean: grid: {"pr":"gn", "tas":"gn", "tasmax":"gn", "tasmin":"gn"} version: {"pr":"v20190429", "tas":"v20190429", "tasmax":"v20190429", "tasmin":"v20190429"} diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index b9a145e3..bc8baae0 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -15,8 +15,8 @@ source("tools/libs.R") #==================================================================== -# recipe_file <- "modules/Loading/testing_recipes/recipe_decadal.yml" -# recipe_file <- "modules/Loading/testing_recipes/recipe_decadal_daily.yml" +# recipe_file <- "recipes/atomic_recipes/recipe_decadal.yml" +# recipe_file <- "recipes/atomic_recipes/recipe_decadal_daily.yml" load_datasets <- function(recipe) { @@ -35,7 +35,8 @@ load_datasets <- function(recipe) { exp.name <- recipe$Analysis$Datasets$System$name #'HadGEM3' ref.name <- recipe$Analysis$Datasets$Reference$name #'era5' member <- strsplit(recipe$Analysis$Datasets$System$member, ',')[[1]] #c("r1i1p1f2", "r2i1p1f2") - variable <- recipe$Analysis$Variables$name #'tas' +# variable <- recipe$Analysis$Variables$name #'tas' + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] store.freq <- recipe$Analysis$Variables$freq #monthly_mean lats.min <- as.numeric(recipe$Analysis$Region$latmin) #0 lats.max <- as.numeric(recipe$Analysis$Region$latmax) #10 @@ -64,12 +65,12 @@ load_datasets <- function(recipe) { # Read from archive: #------------------------- if (store.freq == "monthly_mean") { - table <- archive$System[[exp.name]][[store.freq]]$table[[variable]] #'Amon' + table <- archive$System[[exp.name]][[store.freq]]$table[variable] #list(tas = 'Amon') } else { table <- 'day' } - grid <- archive$System[[exp.name]][[store.freq]]$grid[[variable]] - version <- archive$System[[exp.name]][[store.freq]]$version[[variable]] + grid <- archive$System[[exp.name]][[store.freq]]$grid[variable] #list(tas = 'gr') + version <- archive$System[[exp.name]][[store.freq]]$version[variable] #list(tas = 'v20210910') if (identical(member, 'all')) { member <- strsplit(archive$System[[exp.name]]$member, ',')[[1]] } @@ -95,13 +96,9 @@ load_datasets <- function(recipe) { version = version, sdates = sdates_hcst) path_list <- tmp$path_list multi_path <- tmp$multi_path -# hcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, -# '$ensemble$', table, '$var$', grid, version) -# hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') - Start_default_arg_list <- list( - dat = path_list, #file.path(hcst.path, hcst.files), + dat = path_list, var = variable, syear = paste0(sdates_hcst), chunk = 'all', @@ -120,7 +117,7 @@ load_datasets <- function(recipe) { transform_params = list(grid = regrid_params$fcst.gridtype, method = regrid_params$fcst.gridmethod), transform_vars = c('latitude', 'longitude'), - path_glob_permissive = 2, # for version +# path_glob_permissive = 2, # for version synonims = list(longitude = c('lon', 'longitude'), latitude = c('lat', 'latitude')), return_vars = list(latitude = NULL, longitude = NULL, @@ -128,6 +125,12 @@ load_datasets <- function(recipe) { silent = !DEBUG, retrieve = T) + if (length(variable) > 1) { + Start_default_arg_list <- c(Start_default_arg_list, + list(table = table, grid = grid, version = version, + table_depends = 'var', grid_depends = 'var', version_depends = 'var')) + } + if (!multi_path) { Start_hcst_arg_list <- Start_default_arg_list hcst <- do.call(Start, Start_hcst_arg_list) diff --git a/modules/Loading/helper_loading_decadal.R b/modules/Loading/helper_loading_decadal.R index f4f1ec32..b93f3279 100644 --- a/modules/Loading/helper_loading_decadal.R +++ b/modules/Loading/helper_loading_decadal.R @@ -106,22 +106,36 @@ correct_daily_for_leap <- function(data = NULL, time_attr, return_time = TRUE) { #========================================== # This function generates the path for Start() call. It shouldn't be needed when Start() is improved. +# table, grid, version: A list with variables as name. E.g., list(tas = 'Amon') get_dcpp_path <- function(archive, exp.name, table, grid, version, sdates) { # Define path (monthly and daily) multi_path <- FALSE if (is.null(archive$System[[exp.name]]$src$first_dcppB_syear) | isTRUE(all(sdates < archive$System[[exp.name]]$src$first_dcppB_syear))) { # only dcppA - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, - '$ensemble$', table, '$var$', grid, version) - fcst.files <- paste0('$var$_', table, '_*_dcppA-hindcast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + if (length(table) == 1) { # only one variable + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, + '$ensemble$', table, '$var$', grid, version) + fcst.files <- paste0('$var$_', table, '_*_dcppA-hindcast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + } else { # multiple vars + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst, + '$ensemble$', '$table$', '$var$', '$grid$', '$version$') + fcst.files <- paste0('$var$_', '$table$', '_*_dcppA-hindcast_s$syear$-$ensemble$_', '$grid$', '_$chunk$.nc') + } path_list <- file.path(fcst.path, fcst.files) } else { if (all(sdates >= archive$System[[exp.name]]$src$first_dcppB_syear)) { # only dcppB - fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, - '$ensemble$', table, '$var$', grid) #, version) - fcst.files <- paste0('v*/$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + if (length(table) == 1) { # only one variable + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, + '$ensemble$', table, '$var$', grid, version) + + fcst.files <- paste0('$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + } else { + fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, + '$ensemble$', '$table$', '$var$', '$grid$', '$version$') + fcst.files <- paste0('$var$_', '$table$', '_*_dcppB-forecast_s$syear$-$ensemble$_', '$grid$', '_$chunk$.nc') + } path_list <- file.path(fcst.path, fcst.files) } else { # have both dcppA and dcppB diff --git a/recipes/atomic_recipes/recipe_test_multivar_decadal.yml b/recipes/atomic_recipes/recipe_test_multivar_decadal.yml new file mode 100644 index 00000000..00563fe2 --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_multivar_decadal.yml @@ -0,0 +1,57 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas tos + freq: monthly_mean + Datasets: + System: + name: CanESM5 + member: r1i1p2f1,r2i1p2f1,r3i1p2f1 #'all' + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst_year: [2020,2021] + hcst_start: 1990 + hcst_end: 1993 +# season: 'Annual' + ftime_min: 2 + ftime_max: 14 + Region: + latmin: 10 + latmax: 20 + lonmin: 150 + lonmax: 170 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Anomalies: + compute: no + cross_validation: + save: + Calibration: + method: bias + save: 'all' + Skill: + metric: RPSS Corr + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'all' + Indicators: + index: FALSE + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + diff --git a/recipes/atomic_recipes/recipe_test_multivar_decadal_multipath.yml b/recipes/atomic_recipes/recipe_test_multivar_decadal_multipath.yml new file mode 100644 index 00000000..a38f81b8 --- /dev/null +++ b/recipes/atomic_recipes/recipe_test_multivar_decadal_multipath.yml @@ -0,0 +1,57 @@ +Description: + Author: An-Chi Ho + '': split version +Analysis: + Horizon: Decadal + Variables: + name: tas pr + freq: monthly_mean + Datasets: + System: + name: EC-Earth3-i4 + member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all' + Multimodel: no + Reference: + name: ERA5 #JRA-55 + Time: + fcst_year: [2020,2021] + hcst_start: 1990 + hcst_end: 1993 +# season: 'Annual' + ftime_min: 2 + ftime_max: 14 + Region: + latmin: 10 #-90 + latmax: 20 #90 + lonmin: 0 + lonmax: 15 #359.9 + Regrid: + method: bilinear + type: to_system #to_reference + Workflow: + Anomalies: + compute: no + cross_validation: + save: + Calibration: + method: bias + save: 'all' + Skill: + metric: RPSS Corr + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'all' + Indicators: + index: FALSE + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/aho/git/auto-s2s/ + -- GitLab From e4eef2539aab1de053904316d86c8c79aa240b35 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 26 May 2023 15:35:22 +0200 Subject: [PATCH 47/47] Add stop() for multi_path + multivar case --- modules/Loading/Loading_decadal.R | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R index bc8baae0..43b3c54f 100644 --- a/modules/Loading/Loading_decadal.R +++ b/modules/Loading/Loading_decadal.R @@ -97,6 +97,11 @@ load_datasets <- function(recipe) { path_list <- tmp$path_list multi_path <- tmp$multi_path + #TODO: to make this case work; enhance Start() if it's possible + if (multi_path & length(variable) > 1) { + stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") + } + Start_default_arg_list <- list( dat = path_list, var = variable, @@ -192,10 +197,11 @@ load_datasets <- function(recipe) { version = version, sdates = sdates_fcst) path_list <- tmp$path_list multi_path <- tmp$multi_path -# fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst, -# '$ensemble$', table, '$var$', grid, version) -# fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc') + #TODO: to make this case work; enhance Start() if it's possible + if (multi_path & length(variable) > 1) { + stop("The recipe requests multiple variables and start dates from both dpccA-hindcast and dcppB-forecast. This case is not available for now.") + } # monthly & daily if (!multi_path) { -- GitLab