From 74126fb241e37c094fe10d2c6470bca8f4f890a4 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 29 Jul 2022 10:06:16 +0200
Subject: [PATCH 01/18] Change initial month from 13 to 1

---
 conf/archive_decadal.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml
index 28ff6028..7ac36b1a 100644
--- a/conf/archive_decadal.yml
+++ b/conf/archive_decadal.yml
@@ -129,7 +129,7 @@ archive:
           version: {"pr":"v20190429", "tas":"v20190429", "tasmax":"v20190429", "tasmin":"v20190429"}
           calendar: "365_day"
         member: r1i1p2f1,r2i1p2f1,r3i1p2f1,r4i1p2f1,r5i1p2f1,r6i1p2f1,r7i1p2f1,r8i1p2f1, r9i1p2f1, r10i1p2f1, r11i1p2f1,r12i1p2f1,r13i1p2f1,r14i1p2f1,r15i1p2f1,r16i1p2f1,r17i1p2f1,r18i1p2f1, r19i1p2f1, r20i1p2f1,r21i1p2f1,r22i1p2f1,r23i1p2f1,r24i1p2f1,r25i1p2f1,r26i1p2f1,r27i1p2f1,r28i1p2f1, r29i1p2f1, r30i1p2f1, r31i1p2f1,r32i1p2f1,r33i1p2f1,r34i1p2f1,r35i1p2f1,r36i1p2f1,r37i1p2f1,r38i1p2f1, r39i1p2f1, r40i1p2f1
-        initial_month: 13 #next year Jan
+        initial_month: 1 #next year Jan
         reference_grid: "/esarchive/exp/canesm5/cmip6-dcppA-hindcast_i1p2/original_files/cmorfiles/DCPP/CCCma/CanESM5/dcppA-hindcast/r1i1p2f1/Amon/tas/gn/v20190429/tas_Amon_CanESM5_dcppA-hindcast_s2008-r1i1p2f1_gn_200901-201812.nc"
 
 # ----
-- 
GitLab


From 3891dc94d688437331b3bcdf9029a0dca3b40861 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 29 Jul 2022 10:06:46 +0200
Subject: [PATCH 02/18] New recipe to test

---
 .../recipe_decadal_monthly_2.yml              | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml

diff --git a/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml b/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml
new file mode 100644
index 00000000..6896a541
--- /dev/null
+++ b/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml
@@ -0,0 +1,44 @@
+Description:
+  Author: An-Chi Ho
+  '': split version
+Analysis:
+  Horizon: Decadal
+  Variables:
+    name: tas
+    freq: daily_mean
+  Datasets:
+    System: 
+      name: CanESM5 
+      member: r1i1p2f1,r2i1p2f1 #'all'
+    Multimodel: no
+    Reference:
+      name: ERA5 #JRA-55
+  Time:
+    fcst: 
+    hcst_start: 2014 
+    hcst_end: 2016 
+#    season: 'Annual'
+    ftime_min: 0
+    ftime_max: 2
+  Region:
+    latmin: 10 #-90
+    latmax: 20 #90
+    lonmin: 0
+    lonmax: 15 #359.9
+  Regrid:
+    method: bilinear
+    type: to_system #to_reference
+  Workflow:
+    Calibration:
+      method: bias
+    Skill:
+      metric: RPSS
+    Indicators:
+      index: FALSE
+  Output_format: S2S4E
+Run:
+  Loglevel: INFO
+  Terminal: yes
+  output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/
+  code_dir: /esarchive/scratch/aho/git/auto-s2s/
+
-- 
GitLab


From 9d6427e75b093c408e0ee0207a64b3a96d88251e Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 5 Aug 2022 16:08:24 +0200
Subject: [PATCH 03/18] Revise for improved as.s2dv_cube()

---
 modules/Loading/Loading_decadal.R          |   9 +-
 tests/recipes/recipe-decadal_daily_1.yml   |   2 +-
 tests/recipes/recipe-decadal_monthly_1.yml |   2 +-
 tests/recipes/recipe-decadal_monthly_2.yml |   2 +-
 tests/testthat/test-decadal_daily_1.R      |   2 +-
 tests/testthat/test-decadal_monthly_1.R    |   2 +-
 tests/testthat/test-decadal_monthly_2.R    |   2 +-
 tools/tmp/as.s2dv_cube.R                   | 184 +++++++++++++++++++++
 8 files changed, 198 insertions(+), 7 deletions(-)
 create mode 100644 tools/tmp/as.s2dv_cube.R

diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R
index 6ff9023a..7739cf9a 100644
--- a/modules/Loading/Loading_decadal.R
+++ b/modules/Loading/Loading_decadal.R
@@ -6,12 +6,14 @@
 
 ## TODO: remove paths to personal scratchs 
 source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R")
-#source("/esarchive/scratch/vagudets/repos/cstools/R/s2dv_cube.R")
 # Load required libraries/funs
 source("modules/Loading/get_daily_time_ind.R")
 source("modules/Loading/dates2load.R")
 source("modules/Loading/check_latlon.R")
 source("tools/libs.R")
+## TODO: Remove once the fun is included in CSTools
+source("tools/tmp/as.s2dv_cube.R")
+
 
 #====================================================================
 
@@ -221,6 +223,11 @@ load_datasets <- function(recipe_file) {
     dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(fcst)[6:7])
   }
 
+  #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1
+  if (multi_path) {
+    attributes(fcst)$Variables$common[[variable]] <- attributes(fcst)$Variables$dat1[[variable]]
+  }
+
   # Change class from startR_array to s2dv_cube
   suppressWarnings(
   fcst <- as.s2dv_cube(fcst)
diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml
index de2c9288..ba3d1f26 100644
--- a/tests/recipes/recipe-decadal_daily_1.yml
+++ b/tests/recipes/recipe-decadal_daily_1.yml
@@ -41,6 +41,6 @@ Analysis:
 Run:
   Loglevel: INFO
   Terminal: yes
-  output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/
+  output_dir: ./out-logs/
   code_dir: /esarchive/scratch/aho/git/auto-s2s/
 
diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml
index 577fdb89..78044fd9 100644
--- a/tests/recipes/recipe-decadal_monthly_1.yml
+++ b/tests/recipes/recipe-decadal_monthly_1.yml
@@ -41,6 +41,6 @@ Analysis:
 Run:
   Loglevel: INFO
   Terminal: yes
-  output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/
+  output_dir: ./out-logs/
   code_dir: /esarchive/scratch/aho/git/auto-s2s/
 
diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml
index 92f1553d..8f95f961 100644
--- a/tests/recipes/recipe-decadal_monthly_2.yml
+++ b/tests/recipes/recipe-decadal_monthly_2.yml
@@ -41,6 +41,6 @@ Analysis:
 Run:
   Loglevel: INFO
   Terminal: yes
-  output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/
+  output_dir: ./out-logs/
   code_dir: /esarchive/scratch/aho/git/auto-s2s/
 
diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R
index 84a528cd..2fbede4c 100644
--- a/tests/testthat/test-decadal_daily_1.R
+++ b/tests/testthat/test-decadal_daily_1.R
@@ -52,7 +52,7 @@ class(data$obs),
 )
 expect_equal(
 names(data$hcst),
-c("data", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters")
+c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters")
 )
 expect_equal(
 names(data$hcst),
diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R
index 51d10f1b..07e084df 100644
--- a/tests/testthat/test-decadal_monthly_1.R
+++ b/tests/testthat/test-decadal_monthly_1.R
@@ -54,7 +54,7 @@ class(data$obs),
 )
 expect_equal(
 names(data$hcst),
-c("data", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters")
+c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters")
 )
 expect_equal(
 names(data$hcst),
diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R
index 02aabdf1..b1e375f8 100644
--- a/tests/testthat/test-decadal_monthly_2.R
+++ b/tests/testthat/test-decadal_monthly_2.R
@@ -55,7 +55,7 @@ class(data$obs),
 )
 expect_equal(
 names(data$hcst),
-c("data", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters")
+c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters")
 )
 expect_equal(
 names(data$hcst),
diff --git a/tools/tmp/as.s2dv_cube.R b/tools/tmp/as.s2dv_cube.R
new file mode 100644
index 00000000..019f69a2
--- /dev/null
+++ b/tools/tmp/as.s2dv_cube.R
@@ -0,0 +1,184 @@
+#'Conversion of 'startR_array' or 'list' objects to 's2dv_cube'
+#'
+#'This function converts data loaded using startR package or s2dverification Load function into a 's2dv_cube' object.
+#'
+#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es}
+#'@author Nicolau Manubens, \email{nicolau.manubens@bsc.es}
+#'
+#'@param object an object of class 'startR_array' generated from function \code{Start} from startR package (version 0.1.3 from earth.bsc.es/gitlab/es/startR) or a list output from function \code{Load} from s2dverification package.
+#'
+#'@return The function returns a 's2dv_cube' object to be easily used with functions \code{CST} from CSTools package.
+#'
+#'@seealso \code{\link{s2dv_cube}}, \code{\link[s2dverification]{Load}}, \code{\link[startR]{Start}} and \code{\link{CST_Load}}
+#'@examples
+#'\dontrun{
+#'library(startR)
+#'repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc'
+#'data <- Start(dat = repos,
+#'              var = 'tas',
+#'              sdate = c('20170101', '20180101'),
+#'              ensemble = indices(1:20),
+#'              time = 'all',
+#'              latitude = 'all',
+#'              longitude = indices(1:40),
+#'              return_vars = list(latitude = 'dat', longitude = 'dat', time = 'sdate'),
+#'              retrieve = TRUE)
+#'data <- as.s2dv_cube(data)
+#'class(data)
+#'startDates <- c('20001101', '20011101', '20021101',
+#'                 '20031101', '20041101', '20051101')
+#'data <- Load(var = 'tas', exp = 'system5c3s', 
+#'             nmember = 15, sdates = startDates,
+#'             leadtimemax = 3, latmin = 27, latmax = 48,
+#'             lonmin = -12, lonmax = 40, output = 'lonlat')
+#'data <- as.s2dv_cube(data)
+#'class(data)
+#'}
+#'@export
+as.s2dv_cube <- function(object) {
+  if (is.list(object)) {
+    if (is.null(object) || (is.null(object$mod) && is.null(object$obs))) {
+      stop("The s2dverification::Load call did not return any data.")
+    }
+    obs <- object
+    obs$mod <- NULL
+    object$obs <- NULL
+    names(object)[[1]] <- 'data'
+    names(obs)[[1]] <- 'data'
+    remove_matches <- function(v, patterns) {
+      if (length(v) > 0) {
+        matches <- c()
+        for (pattern in patterns) {
+          matches <- c(matches, which(grepl(pattern, v)))
+        }
+        if (length(matches) > 0) {
+          v <- v[-matches]
+        }
+      }
+      v
+    }
+
+    harmonize_patterns <- function(v) {
+      matches <- grepl('.*\\.nc$', v)
+      if (sum(!matches) > 0) {
+        match_indices <- which(!matches)
+        v[match_indices] <- sapply(v[match_indices], function(x) paste0(x, '*'))
+      }
+      v <- glob2rx(v)
+      v <- gsub('\\$.*\\$', '*', v)
+      v
+    }
+
+    if (!is.null(obs$data)) {
+      obs$Datasets$exp <- NULL
+      obs$Datasets <- obs$Datasets$obs
+      obs_path_patterns <- sapply(obs$Datasets, function(x) attr(x, 'source'))
+      obs_path_patterns <- harmonize_patterns(obs_path_patterns)
+    }
+
+    if (!is.null(object$data)) {
+      object$Datasets$obs <- NULL
+      object$Datasets <- object$Datasets$exp
+      exp_path_patterns <- sapply(object$Datasets, function(x) attr(x, 'source'))
+      exp_path_patterns <- harmonize_patterns(exp_path_patterns)
+    }
+
+    if (!is.null(obs$data) && !is.null(object$data)) {
+      obs$source_files <- remove_matches(obs$source_files,
+                                         exp_path_patterns)
+      obs$not_found_files <- remove_matches(obs$not_found_files,
+                                            exp_path_patterns)
+  
+      object$source_files <- remove_matches(object$source_files,
+                                            obs_path_patterns)
+      object$not_found_files <- remove_matches(object$not_found_files,
+                                               obs_path_patterns)
+    }  
+
+    result <- list()
+    if (!is.null(object$data)) {
+      class(object) <- 's2dv_cube'
+      result$exp <- object
+    }
+    if (!is.null(obs$data)) {
+      class(obs) <- 's2dv_cube'
+      result$obs <- obs
+    }
+    if (is.list(result)) {
+        if (is.null(result$exp)) {
+            result <- result$obs
+        } else if (is.null(result$obs)) {
+            result <- result$exp
+        } else {
+            warning("The output is a list of two 's2dv_cube' objects",
+                    " corresponding to 'exp' and 'obs'.")
+        }
+    }
+ 
+  } else if (class(object) == 'startR_array') {
+    result <- list()
+    result$data <- as.vector(object)
+    dim(result$data) <- dim(object)
+
+    dat_attr_names <- names(attributes(object)$Variables$dat1)
+    common_attr_names <- names(attributes(object)$Variables$common)
+    # $lon
+    known_lon_names <- s2dv:::.KnownLonNames()
+    if (!is.null(dat_attr_names[which(dat_attr_names %in% known_lon_names)]) & 
+        !identical(dat_attr_names[which(dat_attr_names %in% known_lon_names)], character(0))) {
+      result$lon <- attributes(object)$Variables$dat1[[dat_attr_names[which(dat_attr_names %in% known_lon_names)]]]
+    } else if (!is.null(common_attr_names[which(common_attr_names %in% known_lon_names)]) & 
+               !identical(common_attr_names[which(common_attr_names %in% known_lon_names)], character(0))) {
+      result$lon <- attributes(object)$Variables$common[[common_attr_names[which(common_attr_names %in% known_lon_names)]]]
+    } else {
+      warning("'lon' is not found in this object.")
+      result$lon <- NULL
+    }
+    # $lat
+    known_lat_names <- s2dv:::.KnownLatNames()
+    if (!is.null(dat_attr_names[which(dat_attr_names %in% known_lat_names)]) & 
+        !identical(dat_attr_names[which(dat_attr_names %in% known_lat_names)], character(0))) {
+      result$lat <- attributes(object)$Variables$dat1[[dat_attr_names[which(dat_attr_names %in% known_lat_names)]]]
+    } else if (!is.null(common_attr_names[which(common_attr_names %in% known_lat_names)]) &
+               !identical(common_attr_names[which(common_attr_names %in% known_lat_names)], character(0))) {
+      result$lat <- attributes(object)$Variables$common[[common_attr_names[which(common_attr_names %in% known_lat_names)]]]
+    } else {
+      warning("'lat' is not found in this object.")
+      result$lat <- NULL
+    }
+
+    vars <- which(!common_attr_names %in% c("time", known_lon_names, known_lat_names))
+
+    if (length(vars) > 1) {
+      warning("More than one variable has been provided and ",
+              "only the first one '", common_attr_names[vars[1]],"' will be used.")
+      vars <- vars[1]
+    }
+
+    Variable <- list()
+    Variable$varName <- names(attributes(object)$Variables$common)[vars]
+    attr(Variable, 'variable') <- attributes(object)$Variables$common[[vars]]
+    result$Variable <- Variable
+    dims <- dim(object)
+    if (any(c('sdate', 'sdates') %in% names(dims))) {
+        n_sdates <- dims[which(names(dims) == 'sdate' | names(dims) == 'sdates')]
+        sdates <- attributes(object)$Variables$common$time[1 : n_sdates]
+    } else {
+        sdates <- attributes(object)$Variables$common$time[1]
+    }
+    Dataset <- list(list(InitializationDates = list(Member_1 = sdates)))
+    names(Dataset) <- list(deparse(substitute(object)))
+    result$Datasets <- Dataset
+    result$Dates$start <- attributes(object)$Variables$common$time
+    result$when <- Sys.time()
+    result$source_files <- as.vector(attributes(object)$Files)
+    result$load_parameters <- attributes(object)$FileSelectors
+    class(result) <- 's2dv_cube'
+  } else {
+    stop("The class of parameter 'object' is not implemented",
+         " to be converted into 's2dv_cube' class yet.")
+  }
+  result
+
+}
+
-- 
GitLab


From 2517ac807164a7c6239ccbde8937d50cfe51a12c Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 5 Aug 2022 16:46:13 +0200
Subject: [PATCH 04/18] Update recipe for new dev

---
 modules/Loading/testing_recipes/recipe_decadal.yml           | 2 ++
 modules/Loading/testing_recipes/recipe_decadal_daily.yml     | 2 ++
 modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml | 4 ++++
 tests/recipes/recipe-decadal_daily_1.yml                     | 2 ++
 tests/recipes/recipe-decadal_monthly_1.yml                   | 2 ++
 tests/recipes/recipe-decadal_monthly_2.yml                   | 2 ++
 6 files changed, 14 insertions(+)

diff --git a/modules/Loading/testing_recipes/recipe_decadal.yml b/modules/Loading/testing_recipes/recipe_decadal.yml
index bcbcc736..9f09deca 100644
--- a/modules/Loading/testing_recipes/recipe_decadal.yml
+++ b/modules/Loading/testing_recipes/recipe_decadal.yml
@@ -37,6 +37,8 @@ Analysis:
       percentiles: [[1/3, 2/3]]
     Indicators:
       index: FALSE
+  ncores:  # Optional, int: number of cores, defaults to 1
+  remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE
   Output_format: S2S4E
 Run:
   Loglevel: INFO
diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/modules/Loading/testing_recipes/recipe_decadal_daily.yml
index 457dccf6..8e18b0fb 100644
--- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml
+++ b/modules/Loading/testing_recipes/recipe_decadal_daily.yml
@@ -37,6 +37,8 @@ Analysis:
       percentiles: [[1/3, 2/3], [1/10, 9/10]]
     Indicators:
       index: FALSE
+  ncores:  # Optional, int: number of cores, defaults to 1
+  remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE
   Output_format: S2S4E
 Run:
   Loglevel: INFO
diff --git a/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml b/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml
index 6896a541..8c2ef88f 100644
--- a/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml
+++ b/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml
@@ -33,8 +33,12 @@ Analysis:
       method: bias
     Skill:
       metric: RPSS
+    Probabilities:
+      percentiles: [[1/3, 2/3], [1/10, 9/10]]
     Indicators:
       index: FALSE
+  ncores:  # Optional, int: number of cores, defaults to 1
+  remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE
   Output_format: S2S4E
 Run:
   Loglevel: INFO
diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml
index ba3d1f26..762572b7 100644
--- a/tests/recipes/recipe-decadal_daily_1.yml
+++ b/tests/recipes/recipe-decadal_daily_1.yml
@@ -37,6 +37,8 @@ Analysis:
       percentiles: [[1/10, 9/10]]
     Indicators:
       index: FALSE
+  ncores:  # Optional, int: number of cores, defaults to 1
+  remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE
   Output_format: S2S4E
 Run:
   Loglevel: INFO
diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml
index 78044fd9..dae79ef5 100644
--- a/tests/recipes/recipe-decadal_monthly_1.yml
+++ b/tests/recipes/recipe-decadal_monthly_1.yml
@@ -37,6 +37,8 @@ Analysis:
       percentiles: [[1/3, 2/3], [1/10, 9/10]] 
     Indicators:
       index: FALSE
+  ncores:  # Optional, int: number of cores, defaults to 1
+  remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE
   Output_format: S2S4E
 Run:
   Loglevel: INFO
diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml
index 8f95f961..ce832131 100644
--- a/tests/recipes/recipe-decadal_monthly_2.yml
+++ b/tests/recipes/recipe-decadal_monthly_2.yml
@@ -37,6 +37,8 @@ Analysis:
       percentiles: [[1/3, 2/3]]
     Indicators:
       index: FALSE
+  ncores:  # Optional, int: number of cores, defaults to 1
+  remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE
   Output_format: S2S4E
 Run:
   Loglevel: INFO
-- 
GitLab


From 32c2f11c1a903846fa0c1aba55c5e09999b0b9d5 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Tue, 9 Aug 2022 16:41:11 +0200
Subject: [PATCH 05/18] First modification on save_forecast() to work with
 decadal

---
 modules/Saving/Saving.R | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R
index 1c56e459..314c09d3 100644
--- a/modules/Saving/Saving.R
+++ b/modules/Saving/Saving.R
@@ -67,7 +67,9 @@ get_times <- function(store.freq, fcst.horizon, leadtimes, sdate) {
          "seasonal" = {time <- leadtimes; ref <- 'hours since '; 
                        stdname <- paste(strtoi(leadtimes), collapse=", ")},
          "subseasonal" = {len <- 4; ref <- 'hours since ';
-                          stdname <- ''})
+                          stdname <- ''},
+         "decadal" = {time <- leadtimes; ref <- 'hours since ';
+                      stdname <- paste(strtoi(leadtimes), collapse=", ")})
 
   dim(time) <- length(time)
   sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting
@@ -127,19 +129,21 @@ save_forecast <- function(data_cube,
   
   # Generate vector containing leadtimes
   ## TODO: Move to a separate function?
-  dates <- sort(as.Date(data_cube$Dates$start))
-  n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps
-  dates <- dates[1:n_steps]
-  init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
-		              recipe$Analysis$Time$sdate),
-		       format = '%Y%m%d')
+  dates <- ClimProjDiags::Subset(as.Date(data_cube$Dates$start), 'syear', 1)
+  if (fcst.horizon == 'decadal') {
+    init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d')
+  } else {
+    init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
+                         recipe$Analysis$Time$sdate),
+	                 format = '%Y%m%d')
+  }
   # Get time difference in months
   leadtimes <- interval(init_date, dates) %/% hours(1)
 
   syears <- seq(1:dim(data_cube$data)['syear'][[1]])
   for (i in syears) {
     # Select year from array and rearrange dimensions
-    fcst <- data_cube$data[ , , , , i, , , , ] # Select year
+    fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T)
 
     if (!("time" %in% names(dim(fcst)))) {
       dim(fcst) <- c("time" = 1, dim(fcst))
@@ -153,7 +157,7 @@ save_forecast <- function(data_cube,
     # Add metadata
     var.sdname <- dictionary$vars[[variable]]$standard_name
     if (tolower(agg) == "country") {
-      dims <- c('Country', 'time')
+      dims <- c('Country', 'ensemble', 'time')
       var.expname <- paste0(variable, '_country') 
       var.longname <- paste0("Country-Aggregated ", var.longname)
       var.units <- attr(data_cube$Variable, 'variable')$units
@@ -175,7 +179,11 @@ save_forecast <- function(data_cube,
     attr(fcst[[1]], 'global_attrs') <- global_attributes
     
     # Select start date
-    fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i]
+    if (fcst.horizon == 'decadal') {
+      fcst.sdate <- format(as.Date(data_cube$Dates$start[1]), '%Y%m%d')
+    } else {
+      fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i]
+    }
 
     # Get time dimension values and metadata
     times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate)
@@ -226,9 +234,7 @@ save_observations <- function(data_cube,
   
   # Generate vector containing leadtimes
   ## TODO: Move to a separate function?
-  dates <- sort(as.Date(data_cube$Dates$start))
-  n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps
-  dates <- dates[1:n_steps]
+  dates <- ClimProjDiags::Subset(as.Date(data_cube$Dates$start), 'syear', 1)
   init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
 		              recipe$Analysis$Time$sdate),
 		       format = '%Y%m%d')
@@ -238,7 +244,7 @@ save_observations <- function(data_cube,
   syears <- seq(1:dim(data_cube$data)['syear'][[1]])
   for (i in syears) {
     # Select year from array and rearrange dimensions
-    fcst <- data_cube$data[ , , , , i, , , , ] # Select year
+    fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T)
 
     if (!("time" %in% names(dim(fcst)))) {
       dim(fcst) <- c("time" = 1, dim(fcst))
-- 
GitLab


From 86575910ec81d8fe6757a29c53a6d28e52d2b436 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 10 Aug 2022 14:37:53 +0200
Subject: [PATCH 06/18] Change 'fcst' to 'fcst_year'

---
 modules/Loading/testing_recipes/recipe_decadal.yml          | 6 +++---
 modules/Loading/testing_recipes/recipe_decadal_daily.yml    | 2 +-
 .../Loading/testing_recipes/recipe_decadal_monthly_2.yml    | 2 +-
 tests/recipes/recipe-decadal_daily_1.yml                    | 2 +-
 tests/recipes/recipe-decadal_monthly_1.yml                  | 2 +-
 tests/recipes/recipe-decadal_monthly_2.yml                  | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/modules/Loading/testing_recipes/recipe_decadal.yml b/modules/Loading/testing_recipes/recipe_decadal.yml
index 9f09deca..6b21d555 100644
--- a/modules/Loading/testing_recipes/recipe_decadal.yml
+++ b/modules/Loading/testing_recipes/recipe_decadal.yml
@@ -14,11 +14,11 @@ Analysis:
     Reference:
       name: ERA5 #JRA-55
   Time:
-    fcst: [2020,2021]
+    fcst_year: [2020,2021]
     hcst_start: 1990 
-    hcst_end: 1992 
+    hcst_end: 1993 
 #    season: 'Annual'
-    ftime_min: 0
+    ftime_min: 1
     ftime_max: 13
   Region:
     latmin: 10 #-90
diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/modules/Loading/testing_recipes/recipe_decadal_daily.yml
index 8e18b0fb..8884ac7f 100644
--- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml
+++ b/modules/Loading/testing_recipes/recipe_decadal_daily.yml
@@ -14,7 +14,7 @@ Analysis:
     Reference:
       name: ERA5
   Time:
-    fcst: [2020,2021]
+    fcst_year: [2020,2021]
     hcst_start: 2016
     hcst_end: 2019
     season: 'Annual'
diff --git a/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml b/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml
index 8c2ef88f..f9130e16 100644
--- a/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml
+++ b/modules/Loading/testing_recipes/recipe_decadal_monthly_2.yml
@@ -14,7 +14,7 @@ Analysis:
     Reference:
       name: ERA5 #JRA-55
   Time:
-    fcst: 
+    fcst_year: 
     hcst_start: 2014 
     hcst_end: 2016 
 #    season: 'Annual'
diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml
index 762572b7..d0d0726d 100644
--- a/tests/recipes/recipe-decadal_daily_1.yml
+++ b/tests/recipes/recipe-decadal_daily_1.yml
@@ -14,7 +14,7 @@ Analysis:
     Reference:
       name: ERA5
   Time:
-    fcst: [2017,2018]
+    fcst_year: [2017,2018]
     hcst_start: 1990
     hcst_end: 1992
     season: 'Annual'
diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml
index dae79ef5..25ca1320 100644
--- a/tests/recipes/recipe-decadal_monthly_1.yml
+++ b/tests/recipes/recipe-decadal_monthly_1.yml
@@ -14,7 +14,7 @@ Analysis:
     Reference:
       name: ERA5 #JRA-55
   Time:
-    fcst:  2021
+    fcst_year:  2021
     hcst_start: 1991 
     hcst_end: 1994 
 #    season: 'Annual'
diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml
index ce832131..3410c3c5 100644
--- a/tests/recipes/recipe-decadal_monthly_2.yml
+++ b/tests/recipes/recipe-decadal_monthly_2.yml
@@ -14,7 +14,7 @@ Analysis:
     Reference:
       name: ERA5 #JRA-55
   Time:
-    fcst: [2020,2021]
+    fcst_year: [2020,2021]
     hcst_start: 1990 
     hcst_end: 1992 
 #    season: 'Annual'
-- 
GitLab


From bee8026340bffc9bf1b55f21458938c5738cd913 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 10 Aug 2022 16:07:20 +0200
Subject: [PATCH 07/18] Manually correct time attr if fcst has multiple path.

---
 modules/Loading/Loading_decadal.R | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R
index 7739cf9a..2624cb8d 100644
--- a/modules/Loading/Loading_decadal.R
+++ b/modules/Loading/Loading_decadal.R
@@ -208,20 +208,28 @@ load_datasets <- function(recipe_file) {
     dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6])
     fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'ensemble', 'syear', 'time', 'latitude', 'longitude'))
 
-  }
+    # Manipulate time attr because Start() cannot read it correctly
+    wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only
+    tmp <- array(dim = c(dim(fcst)[c('syear', 'time')]))
+    tmp[1, ] <- wrong_time_attr
+    yr_diff <- diff(sdates_fcst)
+    for (i_syear in 1:length(yr_diff)) {
+      tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) 
+    }
+    attr(fcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC')
 
-  tmp_time_attr <- attr(fcst, 'Variables')$common$time
+  }
 
   # change syear to c(sday, sweek, syear)
   dim(fcst) <- c(dim(fcst)[1:3], sday = 1, sweek = 1, dim(fcst)[4:7])
   tmp_time_attr <- attr(fcst, 'Variables')$common$time
-  #TODO: This check cannot pass if multi_path
-  if (!multi_path) {
+#  #TODO: This check cannot pass if multi_path
+#  if (!multi_path) {
     if (!identical(dim(tmp_time_attr), dim(fcst)[6:7])) {
       stop("fcst has problem in matching data and time attr dimension.")
     }
     dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(fcst)[6:7])
-  }
+#  }
 
   #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1
   if (multi_path) {
-- 
GitLab


From e8b0e44bdb5f71f6f953083e434f0fb005261c3b Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 10 Aug 2022 16:08:05 +0200
Subject: [PATCH 08/18] Correct saving decadal data date folder name

---
 modules/Saving/Saving.R     | 28 ++++++++++++++++++++--------
 modules/Saving/paths2save.R | 16 +++++++++++++---
 2 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R
index 314c09d3..aee758ac 100644
--- a/modules/Saving/Saving.R
+++ b/modules/Saving/Saving.R
@@ -126,11 +126,13 @@ save_forecast <- function(data_cube,
   global_attributes <- get_global_attributes(recipe)
   fcst.horizon <- tolower(recipe$Analysis$Horizon)
   store.freq <- recipe$Analysis$Variables$freq
+
   
   # Generate vector containing leadtimes
   ## TODO: Move to a separate function?
   dates <- ClimProjDiags::Subset(as.Date(data_cube$Dates$start), 'syear', 1)
   if (fcst.horizon == 'decadal') {
+    #NOTE: Use the first date as init_date. But it may be better to use the real initialized date (ask users)
     init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d')
   } else {
     init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
@@ -180,7 +182,7 @@ save_forecast <- function(data_cube,
     
     # Select start date
     if (fcst.horizon == 'decadal') {
-      fcst.sdate <- format(as.Date(data_cube$Dates$start[1]), '%Y%m%d')
+      fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d')
     } else {
       fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i]
     }
@@ -235,9 +237,13 @@ save_observations <- function(data_cube,
   # Generate vector containing leadtimes
   ## TODO: Move to a separate function?
   dates <- ClimProjDiags::Subset(as.Date(data_cube$Dates$start), 'syear', 1)
-  init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
-		              recipe$Analysis$Time$sdate),
-		       format = '%Y%m%d')
+  if (fcst.horizon == 'decadal') {
+    init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d')
+  } else {
+    init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
+                         recipe$Analysis$Time$sdate),
+	                 format = '%Y%m%d')
+  }
   # Get time difference in months
   leadtimes <- interval(init_date, dates) %/% hours(1)
 
@@ -283,12 +289,18 @@ save_observations <- function(data_cube,
     # the same name pattern.
     ## Because observations are loaded differently in the daily vs. monthly
     ## cases, different approaches are necessary.
-    if (store.freq == "monthly_mean") {
-      fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i]
-      fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d')
+    if (fcst.horizon == 'decadal') {
+      fcst.sdate <- format(as.Date(data_cube$Dates$start[1]), '%Y%m%d')
     } else {
-      fcst.sdate <- as.Date(data_cube$Dates$start[i])
+
+      if (store.freq == "monthly_mean") {
+        fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i]
+        fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d')
+      } else {
+        fcst.sdate <- as.Date(data_cube$Dates$start[i])
+      }
     }
+
     # Ensure the year is correct if the first leadtime goes to the next year
     if (lubridate::month(fcst.sdate) < lubridate::month(init_date)) {
       lubridate::year(fcst.sdate) <- lubridate::year(fcst.sdate) + 1
diff --git a/modules/Saving/paths2save.R b/modules/Saving/paths2save.R
index 4974cbd8..0975f087 100644
--- a/modules/Saving/paths2save.R
+++ b/modules/Saving/paths2save.R
@@ -41,10 +41,20 @@ get_dir <- function(recipe, agg = "global") {
   outdir <- recipe$Run$output_dir
   variable <- recipe$Analysis$Variables$name
   if (!is.null(recipe$Analysis$Time$fcst_year)) {
-    fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year,
-			 recipe$Analysis$Time$sdate)
+    if (tolower(recipe$Analysis$Horizon) == 'decadal') {
+      #PROBLEM: decadal doesn't have sdate
+      fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, collapse = '_')
+    } else {
+      fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year,
+  	                   recipe$Analysis$Time$sdate)
+    }
   } else {
-    fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate)
+    if (tolower(recipe$Analysis$Horizon) == 'decadal') {
+      #PROBLEM: decadal doesn't have sdate
+      fcst.sdate <- paste0("hcst-", paste(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$hcst_end, sep = '_'))
+    } else {
+      fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate)
+    }
   }
 
   calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method)
-- 
GitLab


From ec7e42b1f4fd532978c1bbd5b830b1681b18f383 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 10 Aug 2022 17:56:01 +0200
Subject: [PATCH 09/18] Create a test script for decadal

---
 modules/test_decadal.R | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 modules/test_decadal.R

diff --git a/modules/test_decadal.R b/modules/test_decadal.R
new file mode 100644
index 00000000..7923c028
--- /dev/null
+++ b/modules/test_decadal.R
@@ -0,0 +1,20 @@
+
+source("modules/Loading/Loading_decadal.R")
+source("modules/Calibration/Calibration.R")
+source("modules/Skill/Skill.R")
+source("modules/Saving/Saving.R")
+
+recipe_file <- "modules/Loading/testing_recipes/recipe_decadal.yml"
+recipe <- read_yaml(recipe_file)
+archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive
+
+# Load datasets
+data <- load_datasets(recipe_file)
+# Calibrate datasets
+calibrated_data <- calibrate_datasets(data, recipe)
+# Compute skill metrics
+skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe)
+# Compute percentiles and probability bins
+probabilities <- compute_probabilities(calibrated_data$hcst, recipe)
+# Export all data to netCDF
+save_data(recipe, data, calibrated_data, skill_metrics, probabilities, archive = archive)
-- 
GitLab


From ae263cb3d3bc6969b122d0acb066efb6f01a649e Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 10 Aug 2022 17:56:44 +0200
Subject: [PATCH 10/18] Change the filename sdate to initial month instead of
 the first date in data

---
 modules/Saving/Saving.R | 61 +++++++++++++++++++++++++++++------------
 1 file changed, 43 insertions(+), 18 deletions(-)

diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R
index aee758ac..89a5c344 100644
--- a/modules/Saving/Saving.R
+++ b/modules/Saving/Saving.R
@@ -4,7 +4,8 @@ source("modules/Saving/paths2save.R")
 
 save_data <- function(recipe, data, calibrated_data,
 		      skill_metrics = NULL,
-		      probabilities = NULL) {
+		      probabilities = NULL,
+                      archive = NULL) {
   # Wrapper for the saving functions.
   # recipe: The auto-s2s recipe
   # data: output of load_datasets()
@@ -19,24 +20,24 @@ save_data <- function(recipe, data, calibrated_data,
   dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
 
   # Export hindcast, forecast and observations onto outfile
-  save_forecast(calibrated_data$hcst, recipe, dict, outdir)
+  save_forecast(calibrated_data$hcst, recipe, dict, outdir, archive = archive, type = 'hcst')
   if (!is.null(calibrated_data$fcst)) {
-    save_forecast(calibrated_data$fcst, recipe, dict, outdir)
+    save_forecast(calibrated_data$fcst, recipe, dict, outdir, archive = archive, type = 'fcst')
   }
-  save_observations(data$obs, recipe, dict, outdir)
+  save_observations(data$obs, recipe, dict, outdir, archive = archive)
 
   # Export skill metrics onto outfile
   if (!is.null(skill_metrics)) {
-    save_metrics(skill_metrics, recipe, dict, data$hcst, outdir)
+    save_metrics(skill_metrics, recipe, dict, data$hcst, outdir, archive = archive)
     if ("corr" %in% names(skill_metrics)) {
-      save_corr(skill_metrics, recipe, dict, data$hcst, outdir)
+      save_corr(skill_metrics, recipe, dict, data$hcst, outdir, archive = archive)
     }
   }
   
   # Export probabilities onto outfile
   if (!is.null(probabilities)) {
-    save_percentiles(probabilities$percentiles, recipe, data$hcst, outdir)
-    save_probabilities(probabilities$probs, recipe, data$hcst, outdir)
+    save_percentiles(probabilities$percentiles, recipe, data$hcst, outdir, archive = archive)
+    save_probabilities(probabilities$probs, recipe, data$hcst, outdir, archive = archive)
   }
 }
 
@@ -111,7 +112,9 @@ save_forecast <- function(data_cube,
 			  recipe,
 			  dictionary,
                           outdir,
-                          agg="global") {
+                          agg="global",
+                          archive = NULL,
+                          type = NULL) {
   # Loops over the years in the s2dv_cube containing a hindcast or forecast
   # and exports each year to a netCDF file.
   # data_cube: s2dv_cube containing the data and metadata
@@ -132,8 +135,18 @@ save_forecast <- function(data_cube,
   ## TODO: Move to a separate function?
   dates <- ClimProjDiags::Subset(as.Date(data_cube$Dates$start), 'syear', 1)
   if (fcst.horizon == 'decadal') {
-    #NOTE: Use the first date as init_date. But it may be better to use the real initialized date (ask users)
-    init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d')
+    # Method 1: Use the first date as init_date. But it may be better to use the real initialized date (ask users)
+#    init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d')
+    # Method 2: use initial month
+    init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month
+    if (type == 'hcst') {
+#PROBLEM for fcst!!!!!!!!!!!!
+      init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, '-', 
+                                  sprintf('%02d', init_month), '-01'))
+    } else if (type == 'fcst') {
+      init_date <- as.Date(paste0(recipe$Analysis$Time$fcst_year[1], '-',
+                                  sprintf('%02d', init_month), '-01'))
+    }
   } else {
     init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
                          recipe$Analysis$Time$sdate),
@@ -143,6 +156,7 @@ save_forecast <- function(data_cube,
   leadtimes <- interval(init_date, dates) %/% hours(1)
 
   syears <- seq(1:dim(data_cube$data)['syear'][[1]])
+  syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) # expect dim = [sday = 1, sweek = 1, syear, time]
   for (i in syears) {
     # Select year from array and rearrange dimensions
     fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T)
@@ -182,7 +196,13 @@ save_forecast <- function(data_cube,
     
     # Select start date
     if (fcst.horizon == 'decadal') {
-      fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d')
+      #NOTE: Not good to use data_cube$load_parameters$dat1 because decadal data has been reshaped
+#      fcst.sdate <- format(as.Date(data_cube$Dates$start[i]), '%Y%m%d')
+
+      # init_date is like "1990-11-01"
+      fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date))
+      fcst.sdate <- format(fcst.sdate, '%Y%m%d')
+
     } else {
       fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i]
     }
@@ -218,7 +238,8 @@ save_observations <- function(data_cube,
 			      recipe,
 			      dictionary,
                               outdir,
-                              agg="global") {
+                              agg="global",
+                              archive = NULL) {
   # Loops over the years in the s2dv_cube containing the observations and
   # exports each year to a netCDF file.
   # data_cube: s2dv_cube containing the data and metadata
@@ -290,7 +311,7 @@ save_observations <- function(data_cube,
     ## Because observations are loaded differently in the daily vs. monthly
     ## cases, different approaches are necessary.
     if (fcst.horizon == 'decadal') {
-      fcst.sdate <- format(as.Date(data_cube$Dates$start[1]), '%Y%m%d')
+      fcst.sdate <- as.Date(data_cube$Dates$start[1])
     } else {
 
       if (store.freq == "monthly_mean") {
@@ -349,7 +370,8 @@ save_metrics <- function(skill,
 			 dictionary,
                          data_cube,
                          outdir,
-                         agg="global") {
+                         agg="global",
+                         archive = NULL) {
   # This function adds metadata to the skill metrics in 'skill'
   # and exports them to a netCDF file inside 'outdir'.
   
@@ -438,7 +460,8 @@ save_corr <- function(skill,
 		      dictionary,
                       data_cube,
                       outdir,
-                      agg="global") {
+                      agg="global",
+                      archive = NULL) {
   # This function adds metadata to the ensemble correlation in 'skill'
   # and exports it to a netCDF file inside 'outdir'.
   
@@ -525,7 +548,8 @@ save_percentiles <- function(percentiles,
 			     recipe,
                              data_cube,
                              outdir,
-                             agg="global") {
+                             agg="global",
+                             archive = NULL) {
   # This function adds metadata to the percentiles
   # and exports them to a netCDF file inside 'outdir'.
   
@@ -604,7 +628,8 @@ save_probabilities <- function(probs,
 			       recipe,
 			       data_cube,
                                outdir,
-                               agg="global") {
+                               agg="global",
+                               archive = NULL) {
   # Loops over the years in the s2dv_cube containing a hindcast or forecast
   # and exports the corresponding category probabilities to a netCDF file.
   # probs: array containing the probability data
-- 
GitLab


From fd38d7d35126087e8542d2cb0ac097c5fc27b969 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 11 Aug 2022 18:22:15 +0200
Subject: [PATCH 11/18] Modify save_metrics(), save_corr(), save_percentiles()
 for decadal case

---
 modules/Saving/Saving.R | 134 ++++++++++++++++++++++++++++------------
 1 file changed, 96 insertions(+), 38 deletions(-)

diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R
index 89a5c344..e2ecf38b 100644
--- a/modules/Saving/Saving.R
+++ b/modules/Saving/Saving.R
@@ -112,7 +112,7 @@ save_forecast <- function(data_cube,
 			  recipe,
 			  dictionary,
                           outdir,
-                          agg="global",
+                          agg = "global",
                           archive = NULL,
                           type = NULL) {
   # Loops over the years in the s2dv_cube containing a hindcast or forecast
@@ -238,7 +238,7 @@ save_observations <- function(data_cube,
 			      recipe,
 			      dictionary,
                               outdir,
-                              agg="global",
+                              agg = "global",
                               archive = NULL) {
   # Loops over the years in the s2dv_cube containing the observations and
   # exports each year to a netCDF file.
@@ -259,7 +259,13 @@ save_observations <- function(data_cube,
   ## TODO: Move to a separate function?
   dates <- ClimProjDiags::Subset(as.Date(data_cube$Dates$start), 'syear', 1)
   if (fcst.horizon == 'decadal') {
-    init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d')
+    # Method 1: Use the first date as init_date. But it may be better to use the real initialized date (ask users)
+#    init_date <- as.Date(data_cube$Dates$start[1], format = '%Y%m%d')
+    # Method 2: use initial month
+    init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month
+    init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, '-', 
+                                sprintf('%02d', init_month), '-01'))
+
   } else {
     init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
                          recipe$Analysis$Time$sdate),
@@ -269,6 +275,7 @@ save_observations <- function(data_cube,
   leadtimes <- interval(init_date, dates) %/% hours(1)
 
   syears <- seq(1:dim(data_cube$data)['syear'][[1]])
+  syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) # expect dim = [sday = 1, sweek = 1, syear, time]
   for (i in syears) {
     # Select year from array and rearrange dimensions
     fcst <- ClimProjDiags::Subset(data_cube$data, 'syear', i, drop = T)
@@ -311,7 +318,8 @@ save_observations <- function(data_cube,
     ## Because observations are loaded differently in the daily vs. monthly
     ## cases, different approaches are necessary.
     if (fcst.horizon == 'decadal') {
-      fcst.sdate <- as.Date(data_cube$Dates$start[1])
+      # init_date is like "1990-11-01"
+      fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date))
     } else {
 
       if (store.freq == "monthly_mean") {
@@ -370,7 +378,7 @@ save_metrics <- function(skill,
 			 dictionary,
                          data_cube,
                          outdir,
-                         agg="global",
+                         agg = "global",
                          archive = NULL) {
   # This function adds metadata to the skill metrics in 'skill'
   # and exports them to a netCDF file inside 'outdir'.
@@ -415,20 +423,37 @@ save_metrics <- function(skill,
   fcst.horizon <- tolower(recipe$Analysis$Horizon)
   store.freq <- recipe$Analysis$Variables$freq
   # Generate vector containing leadtimes
-  dates <- sort(as.Date(data_cube$Dates$start))
-  n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps
-  dates <- dates[1:n_steps]
-  init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
-		              recipe$Analysis$Time$sdate),
-		       format = '%Y%m%d')
+  dates <- ClimProjDiags::Subset(as.Date(data_cube$Dates$start), 'syear', 1)
+  if (fcst.horizon == 'decadal') {
+    init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month
+    init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, '-',
+                                sprintf('%02d', init_month), '-01'))
+  } else {
+    init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
+                         recipe$Analysis$Time$sdate),
+                         format = '%Y%m%d')
+  }
+
   # Get time difference in months
   leadtimes <- interval(init_date, dates) %/% hours(1)
+
+  # Select start date
   # If a fcst is provided, use that as the ref. year. Otherwise use 1970.
-  if (!is.null(recipe$Analysis$Time$fcst_year)) {
-    fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year,
-			 recipe$Analysis$Time$sdate)
+  if (fcst.horizon == 'decadal') {
+    if (!is.null(recipe$Analysis$Time$fcst_year)) {
+      #PROBLEM: May be more than one fcst_year
+      fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1],
+                           sprintf('%02d', init_month), '01')
+    } else {
+      fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01')
+    }
   } else {
-    fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate)
+    if (!is.null(recipe$Analysis$Time$fcst_year)) {
+      fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year,
+  	                   recipe$Analysis$Time$sdate)
+    } else {
+      fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate)
+    }
   }
 
   times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate)
@@ -460,7 +485,7 @@ save_corr <- function(skill,
 		      dictionary,
                       data_cube,
                       outdir,
-                      agg="global",
+                      agg = "global",
                       archive = NULL) {
   # This function adds metadata to the ensemble correlation in 'skill'
   # and exports it to a netCDF file inside 'outdir'.
@@ -504,20 +529,37 @@ save_corr <- function(skill,
   fcst.horizon <- tolower(recipe$Analysis$Horizon)
   store.freq <- recipe$Analysis$Variables$freq
   # Generate vector containing leadtimes
-  dates <- sort(as.Date(data_cube$Dates$start))
-  n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps
-  dates <- dates[1:n_steps]
-  init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
-		              recipe$Analysis$Time$sdate),
-		       format = '%Y%m%d')
+  dates <- ClimProjDiags::Subset(as.Date(data_cube$Dates$start), 'syear', 1)
+  if (fcst.horizon == 'decadal') {
+    init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month
+    init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, '-',
+                                sprintf('%02d', init_month), '-01'))
+  } else {
+    init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
+                         recipe$Analysis$Time$sdate),
+                         format = '%Y%m%d')
+  }
+
   # Get time difference in months
   leadtimes <- interval(init_date, dates) %/% hours(1)
+
+  # Select start date
   # If a fcst is provided, use that as the ref. year. Otherwise use 1970.
-  if (!is.null(recipe$Analysis$Time$fcst_year)) {
-    fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year,
-			 recipe$Analysis$Time$sdate)
+  if (fcst.horizon == 'decadal') {
+    if (!is.null(recipe$Analysis$Time$fcst_year)) {
+      #PROBLEM: May be more than one fcst_year
+      fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1],
+                           sprintf('%02d', init_month), '01')
+    } else {
+      fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01')
+    }
   } else {
-    fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate)
+    if (!is.null(recipe$Analysis$Time$fcst_year)) {
+      fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year,
+                           recipe$Analysis$Time$sdate)
+    } else {
+      fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate)
+    }
   }
 
   times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate)
@@ -548,7 +590,7 @@ save_percentiles <- function(percentiles,
 			     recipe,
                              data_cube,
                              outdir,
-                             agg="global",
+                             agg = "global",
                              archive = NULL) {
   # This function adds metadata to the percentiles
   # and exports them to a netCDF file inside 'outdir'.
@@ -583,21 +625,37 @@ save_percentiles <- function(percentiles,
   fcst.horizon <- tolower(recipe$Analysis$Horizon)
   store.freq <- recipe$Analysis$Variables$freq
   # Generate vector containing leadtimes
-  dates <- sort(as.Date(data_cube$Dates$start))
-  n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps
-  dates <- dates[1:n_steps]
-  init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
-		              recipe$Analysis$Time$sdate),
-		       format = '%Y%m%d')
+  dates <- ClimProjDiags::Subset(as.Date(data_cube$Dates$start), 'syear', 1)
+  if (fcst.horizon == 'decadal') {
+    init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month
+    init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, '-',
+                                sprintf('%02d', init_month), '-01'))
+  } else {
+    init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
+                         recipe$Analysis$Time$sdate),
+                         format = '%Y%m%d')
+  }
+
   # Get time difference in hours
   leadtimes <- interval(init_date, dates) %/% hours(1)
 
+  # Select start date
   # If a fcst is provided, use that as the ref. year. Otherwise use 1970.
-  if (!is.null(recipe$Analysis$Time$fcst_year)) {
-    fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year,
-			 recipe$Analysis$Time$sdate)
+  if (fcst.horizon == 'decadal') {
+    if (!is.null(recipe$Analysis$Time$fcst_year)) {
+      #PROBLEM: May be more than one fcst_year
+      fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1],
+                           sprintf('%02d', init_month), '01')
+    } else {
+      fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01')
+    }
   } else {
-    fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate)
+    if (!is.null(recipe$Analysis$Time$fcst_year)) {
+      fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year,
+                           recipe$Analysis$Time$sdate)
+    } else {
+      fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate)
+    }
   }
 
   times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate)
@@ -628,7 +686,7 @@ save_probabilities <- function(probs,
 			       recipe,
 			       data_cube,
                                outdir,
-                               agg="global",
+                               agg = "global",
                                archive = NULL) {
   # Loops over the years in the s2dv_cube containing a hindcast or forecast
   # and exports the corresponding category probabilities to a netCDF file.
-- 
GitLab


From 24b66019421fe55f3bcf26d28b4eddd8a209c6d7 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 12 Aug 2022 10:20:16 +0200
Subject: [PATCH 12/18] Modify save_probabilities() for decadal cases

---
 modules/Saving/Saving.R | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R
index e2ecf38b..2764bee3 100644
--- a/modules/Saving/Saving.R
+++ b/modules/Saving/Saving.R
@@ -707,20 +707,26 @@ save_probabilities <- function(probs,
   
   # Generate vector containing leadtimes
   ## TODO: Move to a separate function?
-  dates <- sort(as.Date(data_cube$Dates$start))
-  n_steps <- dim(data_cube$data)['time'][[1]] # number of time steps
-  dates <- dates[1:n_steps]
-  init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
-		              recipe$Analysis$Time$sdate),
-		       format = '%Y%m%d')
+  dates <- ClimProjDiags::Subset(as.Date(data_cube$Dates$start), 'syear', 1)
+  if (fcst.horizon == 'decadal') {
+    init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month
+    init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start, '-',
+                                sprintf('%02d', init_month), '-01'))
+  } else {
+    init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
+                         recipe$Analysis$Time$sdate),
+                         format = '%Y%m%d')
+  }
+
   # Get time difference in hours
   leadtimes <- interval(init_date, dates) %/% hours(1)
 
   syears <- seq(1:dim(data_cube$data)['syear'][[1]])
+  syears_val <- lubridate::year(data_cube$Dates$start[1, 1, , 1]) # expect dim = [sday = 1, sweek = 1, syear, time]
   for (i in syears) {
     # Select year from array and rearrange dimensions
-    probs_syear <- lapply(probs, function(x) {
-			  x[i, , , ]})
+    probs_syear <- lapply(probs, ClimProjDiags::Subset,'syear', i, drop = T)
+
     # Restore time dimension if the arrays are missing it
     if (!("time" %in% names(dim(probs_syear[[1]])))) {
       probs_syear <- lapply(probs_syear, function(x) {
@@ -753,7 +759,13 @@ save_probabilities <- function(probs,
     attr(probs_syear[[1]], 'global_attrs') <- global_attributes
     
     # Select start date
-    fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i]
+    if (fcst.horizon == 'decadal') {
+      # init_date is like "1990-11-01"
+      fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date))
+      fcst.sdate <- format(fcst.sdate, '%Y%m%d')
+    } else {
+      fcst.sdate <- data_cube$load_parameters$dat1$file_date[[1]][i]
+    }
 
     # Get time dimension values and metadata
     times <- get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate)
-- 
GitLab


From afe0448b73f3535ee4750c41393b43a17135f4b8 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 12 Aug 2022 10:59:21 +0200
Subject: [PATCH 13/18] Change ftime to start from 1

---
 modules/Loading/Loading_decadal.R                  |  3 +--
 modules/Loading/get_daily_time_ind.R               | 14 +++++++-------
 modules/Loading/testing_recipes/recipe_decadal.yml |  4 ++--
 .../testing_recipes/recipe_decadal_daily.yml       |  4 ++--
 tests/recipes/recipe-decadal_daily_1.yml           |  4 ++--
 tests/recipes/recipe-decadal_monthly_1.yml         |  4 ++--
 tests/recipes/recipe-decadal_monthly_2.yml         |  4 ++--
 7 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R
index 2624cb8d..2fdd03a0 100644
--- a/modules/Loading/Loading_decadal.R
+++ b/modules/Loading/Loading_decadal.R
@@ -43,9 +43,8 @@ load_datasets <- function(recipe_file) {
   sdates_hcst <- as.numeric(recipe$Analysis$Time$hcst_start):as.numeric(recipe$Analysis$Time$hcst_end) #1960:2015
   sdates_fcst <- recipe$Analysis$Time$fcst
  
-  #NOTE: time in recipe starts from 0, but in Start() it starts from 1 
   if (store.freq == "monthly_mean") {
-    time_ind <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max)) + 1
+    time_ind <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max))
 
   } else if (store.freq == "daily_mean") {
     time_ind <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min),
diff --git a/modules/Loading/get_daily_time_ind.R b/modules/Loading/get_daily_time_ind.R
index f7625690..c3d9599c 100644
--- a/modules/Loading/get_daily_time_ind.R
+++ b/modules/Loading/get_daily_time_ind.R
@@ -1,8 +1,8 @@
 # This function is for decadal-daily data loading.
 # The time input (ftimemin & ftimemax) units in the recipe is month. For example,
-# ftimemin = 0 means the first month, and ftimemax = 2 means the 3rd month.
-# For daily data, if initial month is November, ftimemin = 0 equals to c(1:30), 
-# and ftimemax = 2, which means January next year, equals to c(31+31:31+31+30).
+# ftimemin = 1 means the first month, and ftimemax = 3 means the 3rd month.
+# For daily data, if initial month is November, ftimemin = 1 equals to c(1:30), 
+# and ftimemax = 3, which means January next year, equals to c(31+31:31+31+30).
 #
 #NOTE: For the datasets that having one file per sdate, we can use array [syear, time] to define time since
 # the Start() call doesn't need time_across = 'chunk' (there is only one chunk). BUT to 
@@ -16,20 +16,20 @@ get_daily_time_ind <- function(ftimemin, ftimemax, initial_month, sdates, calend
                        'gregorian')) 
     stop("The calendar is not recognized. Please contact maintainers.")
 
-  total_months <- (initial_month + ftimemin):(initial_month + ftimemax)
+  total_months <- (initial_month + ftimemin - 1):(initial_month + ftimemax - 1)
   total_months <- ifelse(total_months > 12, total_months %% 12, total_months)
   total_months <- ifelse(total_months == 0, 12, total_months)
 
   if (calendar == '360-day') {
-    daily_time_ind <- (ftimemin * 30 + 1): ((ftimemin * 30) + (ftimemax - ftimemin + 1) * 30)
+    daily_time_ind <- ((ftimemin - 1) * 30 + 1): (((ftimemin - 1) * 30) + (ftimemax - ftimemin + 1) * 30)
 
   } else {
 
     total_days_sum <- sum(days_in_month(total_months))
-    if (ftimemin == 0) {
+    if (ftimemin == 1) {
       daily_time_ind <- 1:total_days_sum
     } else {
-      before_start_months <- initial_month:(initial_month + ftimemin - 1)
+      before_start_months <- initial_month:(initial_month + ftimemin - 2)
       before_start_months <- ifelse(before_start_months > 12, before_start_months %% 12, before_start_months)
       before_start_months <- ifelse(before_start_months == 0, 12, before_start_months)
       before_start_total_days_sum <- sum(days_in_month(before_start_months))
diff --git a/modules/Loading/testing_recipes/recipe_decadal.yml b/modules/Loading/testing_recipes/recipe_decadal.yml
index 6b21d555..de4f9591 100644
--- a/modules/Loading/testing_recipes/recipe_decadal.yml
+++ b/modules/Loading/testing_recipes/recipe_decadal.yml
@@ -18,8 +18,8 @@ Analysis:
     hcst_start: 1990 
     hcst_end: 1993 
 #    season: 'Annual'
-    ftime_min: 1
-    ftime_max: 13
+    ftime_min: 2
+    ftime_max: 14
   Region:
     latmin: 10 #-90
     latmax: 20 #90
diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/modules/Loading/testing_recipes/recipe_decadal_daily.yml
index 8884ac7f..6dca8f43 100644
--- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml
+++ b/modules/Loading/testing_recipes/recipe_decadal_daily.yml
@@ -18,8 +18,8 @@ Analysis:
     hcst_start: 2016
     hcst_end: 2019
     season: 'Annual'
-    ftime_min: 2
-    ftime_max: 4
+    ftime_min: 3
+    ftime_max: 5
   Region:
     latmin: 10 #-90
     latmax: 20 #90
diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml
index d0d0726d..ef626ce3 100644
--- a/tests/recipes/recipe-decadal_daily_1.yml
+++ b/tests/recipes/recipe-decadal_daily_1.yml
@@ -18,8 +18,8 @@ Analysis:
     hcst_start: 1990
     hcst_end: 1992
     season: 'Annual'
-    ftime_min: 2
-    ftime_max: 4
+    ftime_min: 3
+    ftime_max: 5
   Region:
     latmin: 10 #-90
     latmax: 20 #90
diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml
index 25ca1320..fd948915 100644
--- a/tests/recipes/recipe-decadal_monthly_1.yml
+++ b/tests/recipes/recipe-decadal_monthly_1.yml
@@ -18,8 +18,8 @@ Analysis:
     hcst_start: 1991 
     hcst_end: 1994 
 #    season: 'Annual'
-    ftime_min: 0
-    ftime_max: 2
+    ftime_min: 1
+    ftime_max: 3
   Region:
     latmin: 17 
     latmax: 20 
diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml
index 3410c3c5..83027a0f 100644
--- a/tests/recipes/recipe-decadal_monthly_2.yml
+++ b/tests/recipes/recipe-decadal_monthly_2.yml
@@ -18,8 +18,8 @@ Analysis:
     hcst_start: 1990 
     hcst_end: 1992 
 #    season: 'Annual'
-    ftime_min: 0
-    ftime_max: 13
+    ftime_min: 1
+    ftime_max: 14
   Region:
     latmin: -60 #-90
     latmax: -55 #90
-- 
GitLab


From b3853fac370ecf210f14432a026dd22af67d5278 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 12 Aug 2022 16:08:05 +0200
Subject: [PATCH 14/18] Include unit tests for saving module

---
 .../testing_recipes/recipe_decadal_daily.yml  |  2 +-
 tests/recipes/recipe-decadal_daily_1.yml      |  2 +-
 tests/recipes/recipe-decadal_monthly_1.yml    |  2 +-
 tests/recipes/recipe-decadal_monthly_2.yml    |  2 +-
 tests/testthat/test-decadal_daily_1.R         |  3 +-
 tests/testthat/test-decadal_monthly_1.R       | 30 ++++++++++++++++++-
 tests/testthat/test-decadal_monthly_2.R       |  4 +--
 7 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/modules/Loading/testing_recipes/recipe_decadal_daily.yml b/modules/Loading/testing_recipes/recipe_decadal_daily.yml
index 6dca8f43..f362329e 100644
--- a/modules/Loading/testing_recipes/recipe_decadal_daily.yml
+++ b/modules/Loading/testing_recipes/recipe_decadal_daily.yml
@@ -32,7 +32,7 @@ Analysis:
     Calibration:
       method: qmap
     Skill:
-      metric: RPSS FRPSS
+      metric: RPSS FRPSS EnsCorr
     Probabilities:
       percentiles: [[1/3, 2/3], [1/10, 9/10]]
     Indicators:
diff --git a/tests/recipes/recipe-decadal_daily_1.yml b/tests/recipes/recipe-decadal_daily_1.yml
index ef626ce3..90048009 100644
--- a/tests/recipes/recipe-decadal_daily_1.yml
+++ b/tests/recipes/recipe-decadal_daily_1.yml
@@ -43,6 +43,6 @@ Analysis:
 Run:
   Loglevel: INFO
   Terminal: yes
-  output_dir: ./out-logs/
+  output_dir: ./tests/out-logs/
   code_dir: /esarchive/scratch/aho/git/auto-s2s/
 
diff --git a/tests/recipes/recipe-decadal_monthly_1.yml b/tests/recipes/recipe-decadal_monthly_1.yml
index fd948915..cedb07f0 100644
--- a/tests/recipes/recipe-decadal_monthly_1.yml
+++ b/tests/recipes/recipe-decadal_monthly_1.yml
@@ -43,6 +43,6 @@ Analysis:
 Run:
   Loglevel: INFO
   Terminal: yes
-  output_dir: ./out-logs/
+  output_dir: ./tests/out-logs/
   code_dir: /esarchive/scratch/aho/git/auto-s2s/
 
diff --git a/tests/recipes/recipe-decadal_monthly_2.yml b/tests/recipes/recipe-decadal_monthly_2.yml
index 83027a0f..6c33d302 100644
--- a/tests/recipes/recipe-decadal_monthly_2.yml
+++ b/tests/recipes/recipe-decadal_monthly_2.yml
@@ -43,6 +43,6 @@ Analysis:
 Run:
   Loglevel: INFO
   Terminal: yes
-  output_dir: ./out-logs/
+  output_dir: ./tests/out-logs/
   code_dir: /esarchive/scratch/aho/git/auto-s2s/
 
diff --git a/tests/testthat/test-decadal_daily_1.R b/tests/testthat/test-decadal_daily_1.R
index 2fbede4c..720662d4 100644
--- a/tests/testthat/test-decadal_daily_1.R
+++ b/tests/testthat/test-decadal_daily_1.R
@@ -8,13 +8,14 @@ source("modules/Skill/Skill.R")
 source("modules/Saving/Saving.R")
 
 recipe_file <- "tests/recipes/recipe-decadal_daily_1.yml"
+recipe <- read_yaml(recipe_file)
+archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive
 
 # Load datasets
 suppressWarnings({invisible(capture.output(
 data <- load_datasets(recipe_file)
 ))})
 
-#recipe <- read_yaml(recipe_file)
 ## Calibrate datasets
 #suppressWarnings({invisible(capture.output(
 #  calibrated_data <- calibrate_datasets(data, recipe)
diff --git a/tests/testthat/test-decadal_monthly_1.R b/tests/testthat/test-decadal_monthly_1.R
index 07e084df..6f43aaa1 100644
--- a/tests/testthat/test-decadal_monthly_1.R
+++ b/tests/testthat/test-decadal_monthly_1.R
@@ -8,13 +8,14 @@ source("modules/Skill/Skill.R")
 source("modules/Saving/Saving.R")
 
 recipe_file <- "tests/recipes/recipe-decadal_monthly_1.yml"
+recipe <- read_yaml(recipe_file)
+archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive
 
 # Load datasets
 suppressWarnings({invisible(capture.output(
 data <- load_datasets(recipe_file)
 ))})
 
-recipe <- read_yaml(recipe_file)
 # Calibrate datasets
 suppressWarnings({invisible(capture.output(
   calibrated_data <- calibrate_datasets(data, recipe)
@@ -28,6 +29,14 @@ suppressWarnings({invisible(capture.output(
 probs <- compute_probabilities(calibrated_data$hcst, recipe)
 ))})
 
+# Saving
+suppressWarnings({invisible(capture.output(
+save_data(recipe = recipe, data = data, calibrated_data = calibrated_data, 
+          skill_metrics = skill_metrics, probabilities = probs, archive = archive)
+))})
+
+outdir <- get_dir(recipe)
+
 #======================================
 
 test_that("1. Loading", {
@@ -236,4 +245,23 @@ tolerance = 0.0001
 
 })
 
+#======================================
+
+test_that("4. Saving", {
+
+expect_equal(
+list.files(outdir),
+c("tas_19911101.nc", "tas_19921101.nc", "tas_19931101.nc", "tas_19941101.nc", "tas_20211101.nc", 
+  "tas-obs_19911101.nc", "tas-obs_19921101.nc", "tas-obs_19931101.nc", "tas-obs_19941101.nc",
+  "tas-percentiles_month11.nc", "tas-probs_19911101.nc", "tas-probs_19921101.nc",
+  "tas-probs_19931101.nc", "tas-probs_19941101.nc", "tas-skill_month11.nc")
+)
+# open the files and check values/attributes?
+#expect_equal(
+#)
+
+
+})
 
+# Delete files
+unlink(paste0(outdir, list.files(outdir)))
diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R
index b1e375f8..8fd3525d 100644
--- a/tests/testthat/test-decadal_monthly_2.R
+++ b/tests/testthat/test-decadal_monthly_2.R
@@ -8,14 +8,14 @@ source("modules/Skill/Skill.R")
 source("modules/Saving/Saving.R")
 
 recipe_file <- "tests/recipes/recipe-decadal_monthly_2.yml"
+recipe <- read_yaml(recipe_file)
+archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive
 
 # Load datasets
 suppressWarnings({invisible(capture.output(
 data <- load_datasets(recipe_file)
 ))})
 
-recipe <- read_yaml(recipe_file)
-
 # Calibrate datasets
 suppressWarnings({invisible(capture.output(
   calibrated_data <- calibrate_datasets(data, recipe)
-- 
GitLab


From a7b2e70be6f63a0075d990f9077a4b9d93897cc1 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 19 Aug 2022 17:24:23 +0200
Subject: [PATCH 15/18] Separate path finding into a function (will be used in
 hcst too)

---
 modules/Loading/Loading_decadal.R             | 46 ++++--------------
 ...ly_time_ind.R => helper_loading_decadal.R} | 48 +++++++++++++++++++
 2 files changed, 58 insertions(+), 36 deletions(-)
 rename modules/Loading/{get_daily_time_ind.R => helper_loading_decadal.R} (64%)

diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R
index 2fdd03a0..fa59b9b3 100644
--- a/modules/Loading/Loading_decadal.R
+++ b/modules/Loading/Loading_decadal.R
@@ -7,7 +7,7 @@
 ## TODO: remove paths to personal scratchs 
 source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R")
 # Load required libraries/funs
-source("modules/Loading/get_daily_time_ind.R")
+source("modules/Loading/helper_loading_decadal.R")
 source("modules/Loading/dates2load.R")
 source("modules/Loading/check_latlon.R")
 source("tools/libs.R")
@@ -139,40 +139,12 @@ load_datasets <- function(recipe_file) {
 # Step 2: Load the fcst
 #-------------------------------------------
   if (!is.null(recipe$Analysis$Time$fcst)) {
-
-  # Define path (monthly and daily)
-  multi_path <- FALSE
-  if (is.null(archive$System[[exp.name]]$src$first_dcppB_syear)) { # only dcppA
-    fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst,
-                           '$ensemble$', table, '$var$', grid, version)
-    fcst.files <- paste0('$var$_', table, '_*_dcppA-hindcast_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
-
-  } else {
-    if (all(sdates_fcst >= archive$System[[exp.name]]$src$first_dcppB_syear)) { # only dcppB
-      fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst,
-                             '$ensemble$', table, '$var$', grid, version)
-      fcst.files <- paste0('$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
-
-    } else {
-      # Create one path for each sdate
-      #TODO: When *_depends = 'syear' can be more than one, use one path with $dcpp$
-      multi_path <- TRUE
-      path_list <- vector('list', length = length(sdates_fcst))
-      for (i_sdate in 1:length(sdates_fcst)) {
-        if (sdates_fcst[i_sdate] >= archive$System[[exp.name]]$src$first_dcppB_syear) {
-          path_list[[i_sdate]] <- 
-            list(path = file.path(archive$src, archive$System[[exp.name]]$src$fcst,
-                                  '$ensemble$', table, '$var$', grid, version,
-                                  paste0('$var$_', table, '_*_dcppB-forecast_s', sdates_fcst[i_sdate], '-$ensemble$_', grid, '_$chunk$.nc')))
-        } else {
-          path_list[[i_sdate]] <- 
-            list(path = file.path(archive$src, archive$System[[exp.name]]$src$hcst,
-                                  '$ensemble$', table, '$var$', grid, version,
-                                  paste0('$var$_', table, '_*_dcppA-hindcast_s', sdates_fcst[i_sdate], '-$ensemble$_', grid, '_$chunk$.nc')))
-        }
-      }
-    }
-  }
+#-----------NEW------------
+  tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, 
+                       version = version, sdates = sdates_fcst)
+  path_list <- tmp$path_list
+  multi_path <- tmp$multi_path
+#--------NEW_END-------------
 
 #  fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst,
 #                         '$ensemble$', table, '$var$', grid, version)
@@ -183,7 +155,9 @@ load_datasets <- function(recipe_file) {
   if (!multi_path) {
     #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst
     Start_fcst_arg_list <- Start_hcst_arg_list
-    Start_fcst_arg_list[['dat']] <- file.path(fcst.path, fcst.files)
+#-----------NEW------------
+    Start_fcst_arg_list[['dat']] <- path_list
+#--------NEW_END-------------
     Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst)
     fcst <- do.call(Start, Start_fcst_arg_list)
 
diff --git a/modules/Loading/get_daily_time_ind.R b/modules/Loading/helper_loading_decadal.R
similarity index 64%
rename from modules/Loading/get_daily_time_ind.R
rename to modules/Loading/helper_loading_decadal.R
index c3d9599c..14ceebee 100644
--- a/modules/Loading/get_daily_time_ind.R
+++ b/modules/Loading/helper_loading_decadal.R
@@ -66,6 +66,8 @@ get_daily_time_ind <- function(ftimemin, ftimemax, initial_month, sdates, calend
   return(daily_time_ind)
 }
 
+#==========================================
+
 # This function is used to pick out 0229 after loading data. But it is not used in the 
 # script now.
 correct_daily_for_leap <- function(data = NULL, time_attr, return_time = TRUE) {
@@ -101,3 +103,49 @@ correct_daily_for_leap <- function(data = NULL, time_attr, return_time = TRUE) {
     return(list(data = new_data))
   }
 }
+
+#==========================================
+# This function generates the path for Start() call. It shouldn't be needed when Start() is improved.
+get_dcpp_path <- function(archive, exp.name, table, grid, version, sdates) {
+
+  # Define path (monthly and daily)
+  multi_path <- FALSE
+  if (is.null(archive$System[[exp.name]]$src$first_dcppB_syear) |
+      isTRUE(all(sdates < archive$System[[exp.name]]$src$first_dcppB_syear))) { # only dcppA
+    fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst,
+                           '$ensemble$', table, '$var$', grid, version)
+    fcst.files <- paste0('$var$_', table, '_*_dcppA-hindcast_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
+    path_list <- file.path(fcst.path, fcst.files)
+
+  } else {
+    if (all(sdates >= archive$System[[exp.name]]$src$first_dcppB_syear)) { # only dcppB
+      fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst,
+                             '$ensemble$', table, '$var$', grid, version)
+      fcst.files <- paste0('$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
+      path_list <- file.path(fcst.path, fcst.files)
+
+    } else { # have both dcppA and dcppB
+      # Create one path for each sdate
+      #TODO: When *_depends = 'syear' can be more than one, use one path with $dcpp$
+      multi_path <- TRUE
+      path_list <- vector('list', length = length(sdates))
+      for (i_sdate in 1:length(sdates)) {
+        if (sdates[i_sdate] >= archive$System[[exp.name]]$src$first_dcppB_syear) {
+          path_list[[i_sdate]] <- 
+            list(path = file.path(archive$src, archive$System[[exp.name]]$src$fcst,
+                                  '$ensemble$', table, '$var$', grid, version,
+                                  paste0('$var$_', table, '_*_dcppB-forecast_s', sdates[i_sdate], 
+                                         '-$ensemble$_', grid, '_$chunk$.nc')))
+        } else {
+          path_list[[i_sdate]] <- 
+            list(path = file.path(archive$src, archive$System[[exp.name]]$src$hcst,
+                                  '$ensemble$', table, '$var$', grid, version,
+                                  paste0('$var$_', table, '_*_dcppA-hindcast_s', sdates[i_sdate], 
+                                  '-$ensemble$_', grid, '_$chunk$.nc')))
+        }
+      }
+    }
+  }
+  return(list(path_list = path_list, multi_path = multi_path))
+}
+
-- 
GitLab


From dd89d8ab28be6335cbc21b3ddbc2c4deeb889b22 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Mon, 22 Aug 2022 15:13:02 +0200
Subject: [PATCH 16/18] Fix time attributes in save_forecast() for the fcst in
 the seasonal case

---
 modules/Saving/Saving.R | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R
index 2764bee3..fccf2ef4 100644
--- a/modules/Saving/Saving.R
+++ b/modules/Saving/Saving.R
@@ -132,7 +132,6 @@ save_forecast <- function(data_cube,
 
   
   # Generate vector containing leadtimes
-  ## TODO: Move to a separate function?
   dates <- ClimProjDiags::Subset(as.Date(data_cube$Dates$start), 'syear', 1)
   if (fcst.horizon == 'decadal') {
     # Method 1: Use the first date as init_date. But it may be better to use the real initialized date (ask users)
@@ -148,9 +147,15 @@ save_forecast <- function(data_cube,
                                   sprintf('%02d', init_month), '-01'))
     }
   } else {
-    init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
-                         recipe$Analysis$Time$sdate),
-	                 format = '%Y%m%d')
+    if (type == 'hcst') {
+      init_date <- as.Date(paste0(recipe$Analysis$Time$hcst_start,
+                           recipe$Analysis$Time$sdate),
+	                   format = '%Y%m%d')
+    } else if (type == 'fcst') {
+      init_date <- as.Date(paste0(recipe$Analysis$Time$fcst_year,
+                           recipe$Analysis$Time$sdate),
+			   format = '%Y%m%d')
+    }
   }
   # Get time difference in months
   leadtimes <- interval(init_date, dates) %/% hours(1)
-- 
GitLab


From b00f00c07d56b301796c1229ccd333e4a979b04c Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Mon, 22 Aug 2022 17:59:19 +0200
Subject: [PATCH 17/18] Add FGOALS and supplement ERA5

---
 conf/archive_decadal.yml          | 26 ++++++----
 modules/Loading/Loading_decadal.R | 79 ++++++++++++++++++++-----------
 2 files changed, 67 insertions(+), 38 deletions(-)

diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml
index 7ac36b1a..03e41425 100644
--- a/conf/archive_decadal.yml
+++ b/conf/archive_decadal.yml
@@ -141,6 +141,7 @@ archive:
           table: {"tas":"Amon", "pr":"Amon"}
           grid: {"tas":"gn", "pr":"gn"}
           version: {"tas":"v20200101", "pr":"v20200101"}
+#Prepared
         daily_mean:
           grid:
           version:
@@ -168,20 +169,23 @@ archive:
         reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/CMCC-CM2-SR5/DCPP/CMCC/CMCC-CM2-SR5/dcppA-hindcast/r1i1p1f1/Amon/tas/gn/v20210312/tas_Amon_CMCC-CM2-SR5_dcppA-hindcast_s2008-r1i1p1f1_gn_200811-201812.nc"
 
 # ----
-#QUESTION: missing in spreadsheet
       FGOALS-f3-L:
         src:
+          hcst: "exp/CMIP6/dcppA-hindcast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppA-hindcast/"
+          fcst: "exp/CMIP6/dcppB-forecast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppB-forecast/"
+          first_dcppB_syear: 2017
         monthly_mean:
-          table:
-          grid:
-          version:
+          table: {"tas":"Amon", "pr":"Amon", "psl":"Amon", "tos":"Omon"}
+          grid: {"tas":"gr", "pr":"gr", "psl":"gr","tos":"gn"}
+          version: {"tas":"v20220212", "pr":"v20220212", "psl":"v20220212", "tos":"v20220214"} 
+#Prepared
         daily_mean:
           grid:
           version:
           calendar:
-        member:
-        initial_month:
-        reference_grid:
+        member: r1i1p1f1,r2i1p1f1,r3i1p1f1
+        initial_month: 11
+        reference_grid: "/esarchive/exp/CMIP6/dcppA-hindcast/FGOALS-f3-L/DCPP/CAS/FGOALS-f3-L/dcppA-hindcast/r1i1p1f1/Amon/tas/gr/v20220212/tas_Amon_FGOALS-f3-L_dcppA-hindcast_s1960-r1i1p1f1_gr_196011-197012.nc"
 
 # ----
       IPSL-CM6A-LR:
@@ -316,10 +320,11 @@ archive:
 # ----
       ERA5:
         src: "recon/ecmwf/era5/"
-        monthly_mean: {"tas":"_f1h-r1440x721cds", "prlr":"_f1h-r1440x721cds", "psl":"_f1h-r1440x721cds"}
+        monthly_mean: {"tas":"_f1h-r1440x721cds", "prlr":"_f1h-r1440x721cds", "psl":"_f1h-r1440x721cds", "tos":"_f1h-r1440x721cds"}
         daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/",
-                     "prlr":"_f1h-r1440x721cds/", "sfcWind":"_f1h-r1440x721cds/"}
-        calendar: "360-day"
+                     "prlr":"_f1h-r1440x721cds/", "sfcWind":"_f1h-r1440x721cds/",
+                     "tos":"_f1h-r1440x721cds"}
+        calendar: "proleptic_gregorian"
         reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc"
 
 # ----
@@ -331,6 +336,7 @@ archive:
         src: "recon/jma/jra55/"
         monthly_mean: {"tas":"_f6h", "psl":"_f6h", "tos":"", "pr":"_s0-3h", "prlr":"_s0-3h"}
         daily_mean: {"tas":"_f6h", "psl":"_f6h", "prlr":"_s0-3h", "sfcWind":"_f6h"}
+        calendar: "proleptic_gregorian"
         reference_grid: "/esarchive/recon/jma/jra55/monthly_mean/tas_f6h/tas_200811.nc"
 
 # ----
diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R
index fa59b9b3..6f4de29f 100644
--- a/modules/Loading/Loading_decadal.R
+++ b/modules/Loading/Loading_decadal.R
@@ -88,12 +88,17 @@ load_datasets <- function(recipe_file) {
   # Step 1: Load the hcst
   #-------------------------------------------
   #monthly and daily
-  hcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst,
-                         '$ensemble$', table, '$var$', grid, version)
-  hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
+  tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid,
+                       version = version, sdates = sdates_hcst)
+  path_list <- tmp$path_list
+  multi_path <- tmp$multi_path
+#  hcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst,
+#                         '$ensemble$', table, '$var$', grid, version)
+#  hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
+
 
-  Start_hcst_arg_list <- list(
-    dat = file.path(hcst.path, hcst.files),
+  Start_default_arg_list <- list(
+    dat = path_list, #file.path(hcst.path, hcst.files),
     var = variable,
     ensemble = member,
     syear = paste0(sdates_hcst),
@@ -118,10 +123,37 @@ load_datasets <- function(recipe_file) {
                        time = c('syear', 'chunk')),
     retrieve = T)
 
-  hcst <- do.call(Start, Start_hcst_arg_list)
+  if (!multi_path) {
+    Start_hcst_arg_list <- Start_default_arg_list
+    hcst <- do.call(Start, Start_hcst_arg_list)
 
-  tmp_time_attr <- attr(hcst, 'Variables')$common$time
+  } else {
+    Start_hcst_arg_list <- Start_default_arg_list
+    Start_hcst_arg_list[['syear']] <- NULL
+    Start_hcst_arg_list[['chunk_depends']] <- NULL
+    remove_ind <- which(Start_hcst_arg_list[['return_vars']][['time']] == 'syear')
+    Start_hcst_arg_list[['return_vars']][['time']] <- Start_hcst_arg_list[['return_vars']][['time']][-remove_ind]
+
+    hcst <- do.call(Start, Start_hcst_arg_list)
+
+    # Reshape and reorder dimensions
+    ## dat should be 1, syear should be length of dat; reorder dimensions
+    dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6])
+    hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'ensemble', 'syear', 'time', 'latitude', 'longitude'))
+
+    # Manipulate time attr because Start() cannot read it correctly
+    wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only
+    tmp <- array(dim = c(dim(hcst)[c('syear', 'time')]))
+    tmp[1, ] <- wrong_time_attr
+    yr_diff <- diff(sdates_hcst)
+    for (i_syear in 1:length(yr_diff)) {
+      tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear])
+    }
+    attr(hcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC')
 
+  }
+
+  tmp_time_attr <- attr(hcst, 'Variables')$common$time
 
   # change syear to c(sday, sweek, syear)
   dim(hcst) <- c(dim(hcst)[1:3], sday = 1, sweek = 1, dim(hcst)[4:7])
@@ -139,13 +171,11 @@ load_datasets <- function(recipe_file) {
 # Step 2: Load the fcst
 #-------------------------------------------
   if (!is.null(recipe$Analysis$Time$fcst)) {
-#-----------NEW------------
+
   tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, 
                        version = version, sdates = sdates_fcst)
   path_list <- tmp$path_list
   multi_path <- tmp$multi_path
-#--------NEW_END-------------
-
 #  fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst,
 #                         '$ensemble$', table, '$var$', grid, version)
 #  fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
@@ -154,26 +184,21 @@ load_datasets <- function(recipe_file) {
   # monthly & daily
   if (!multi_path) {
     #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst
-    Start_fcst_arg_list <- Start_hcst_arg_list
-#-----------NEW------------
+    Start_fcst_arg_list <- Start_default_arg_list
     Start_fcst_arg_list[['dat']] <- path_list
-#--------NEW_END-------------
     Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst)
     fcst <- do.call(Start, Start_fcst_arg_list)
 
 
   } else { # multi_path
 
-    #TODO: time attribute is not correct
-    Start_fcst_arg_list <- Start_hcst_arg_list
+    #TODO: time attribute is not correct. Improve Start().
+    Start_fcst_arg_list <- Start_default_arg_list
     Start_fcst_arg_list[['dat']] <- path_list
     Start_fcst_arg_list[['syear']] <- NULL
     Start_fcst_arg_list[['chunk_depends']] <- NULL
-    if ('syear' %in% Start_fcst_arg_list[['return_vars']][['time']]) {
-      remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear')
-      Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind]
-    }
-
+    remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear')
+    Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind]
     fcst <- do.call(Start, Start_fcst_arg_list)
 
     # Reshape and reorder dimensions
@@ -193,16 +218,14 @@ load_datasets <- function(recipe_file) {
 
   }
 
+  tmp_time_attr <- attr(fcst, 'Variables')$common$time
+
   # change syear to c(sday, sweek, syear)
   dim(fcst) <- c(dim(fcst)[1:3], sday = 1, sweek = 1, dim(fcst)[4:7])
-  tmp_time_attr <- attr(fcst, 'Variables')$common$time
-#  #TODO: This check cannot pass if multi_path
-#  if (!multi_path) {
-    if (!identical(dim(tmp_time_attr), dim(fcst)[6:7])) {
-      stop("fcst has problem in matching data and time attr dimension.")
-    }
-    dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(fcst)[6:7])
-#  }
+  if (!identical(dim(tmp_time_attr), dim(fcst)[6:7])) {
+    stop("fcst has problem in matching data and time attr dimension.")
+  }
+  dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(fcst)[6:7])
 
   #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1
   if (multi_path) {
-- 
GitLab


From d41d64cdd89c87780e43074b10b72ba81ccc062d Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Mon, 22 Aug 2022 18:01:38 +0200
Subject: [PATCH 18/18] Use global expression for version folder; make hcst
 able to use dcppB

---
 modules/Loading/Loading_decadal.R          |  16 +-
 modules/Loading/helper_loading_decadal.R   |  12 +-
 tests/recipes/recipe-decadal_monthly_3.yml |  48 +++++
 tests/test_decadal.R                       |   8 +
 tests/testthat/test-decadal_monthly_3.R    | 199 +++++++++++++++++++++
 5 files changed, 275 insertions(+), 8 deletions(-)
 create mode 100644 tests/recipes/recipe-decadal_monthly_3.yml
 create mode 100644 tests/testthat/test-decadal_monthly_3.R

diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R
index 6f4de29f..f188c198 100644
--- a/modules/Loading/Loading_decadal.R
+++ b/modules/Loading/Loading_decadal.R
@@ -26,6 +26,9 @@ load_datasets <- function(recipe_file) {
   recipe$filename <- recipe_file 
   archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive
 
+  # Print Start() info or not
+  DEBUG <- FALSE
+
   #-------------------------
   # Read from recipe:
   #-------------------------
@@ -117,10 +120,12 @@ load_datasets <- function(recipe_file) {
     transform_params = list(grid = regrid_params$fcst.gridtype,
                             method = regrid_params$fcst.gridmethod),
     transform_vars = c('latitude', 'longitude'),
+    path_glob_permissive = 2, # for version
     synonims = list(longitude = c('lon', 'longitude'),
                     latitude = c('lat', 'latitude')),
     return_vars = list(latitude = NULL, longitude = NULL,
                        time = c('syear', 'chunk')),
+    silent = !DEBUG,
     retrieve = T)
 
   if (!multi_path) {
@@ -145,7 +150,7 @@ load_datasets <- function(recipe_file) {
     wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only
     tmp <- array(dim = c(dim(hcst)[c('syear', 'time')]))
     tmp[1, ] <- wrong_time_attr
-    yr_diff <- diff(sdates_hcst)
+    yr_diff <- (sdates_hcst - sdates_hcst[1])[-1] #diff(sdates_hcst)
     for (i_syear in 1:length(yr_diff)) {
       tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear])
     }
@@ -162,6 +167,11 @@ load_datasets <- function(recipe_file) {
   }
   dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(hcst)[6:7])
 
+  #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1
+  if (multi_path) {
+    attributes(hcst)$Variables$common[[variable]] <- attributes(hcst)$Variables$dat1[[variable]]
+  }
+
   # Change class from startR_array to s2dv_cube
   suppressWarnings(
   hcst <- as.s2dv_cube(hcst)
@@ -210,7 +220,7 @@ load_datasets <- function(recipe_file) {
     wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only
     tmp <- array(dim = c(dim(fcst)[c('syear', 'time')]))
     tmp[1, ] <- wrong_time_attr
-    yr_diff <- diff(sdates_fcst)
+    yr_diff <- (sdates_fcst - sdates_fcst[1])[-1] #diff(sdates_fcst)
     for (i_syear in 1:length(yr_diff)) {
       tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) 
     }
@@ -290,6 +300,7 @@ load_datasets <- function(recipe_file) {
                                longitude = c('lon','longitude')),
                return_vars = list(latitude = NULL, longitude = NULL,
                                   time = 'file_date'),
+               silent = !DEBUG,
                retrieve = TRUE)
 
   } else if (store.freq == "monthly_mean") {
@@ -314,6 +325,7 @@ load_datasets <- function(recipe_file) {
                                longitude = c('lon','longitude')),
                return_vars = list(latitude = NULL, longitude = NULL,
                                   time = 'file_date'),
+               silent = !DEBUG,
                retrieve = TRUE)
   }
 
diff --git a/modules/Loading/helper_loading_decadal.R b/modules/Loading/helper_loading_decadal.R
index 14ceebee..f4f1ec32 100644
--- a/modules/Loading/helper_loading_decadal.R
+++ b/modules/Loading/helper_loading_decadal.R
@@ -120,8 +120,8 @@ get_dcpp_path <- function(archive, exp.name, table, grid, version, sdates) {
   } else {
     if (all(sdates >= archive$System[[exp.name]]$src$first_dcppB_syear)) { # only dcppB
       fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst,
-                             '$ensemble$', table, '$var$', grid, version)
-      fcst.files <- paste0('$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
+                             '$ensemble$', table, '$var$', grid) #, version)
+      fcst.files <- paste0('v*/$var$_', table, '_*_dcppB-forecast_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
       path_list <- file.path(fcst.path, fcst.files)
 
     } else { # have both dcppA and dcppB
@@ -133,14 +133,14 @@ get_dcpp_path <- function(archive, exp.name, table, grid, version, sdates) {
         if (sdates[i_sdate] >= archive$System[[exp.name]]$src$first_dcppB_syear) {
           path_list[[i_sdate]] <- 
             list(path = file.path(archive$src, archive$System[[exp.name]]$src$fcst,
-                                  '$ensemble$', table, '$var$', grid, version,
-                                  paste0('$var$_', table, '_*_dcppB-forecast_s', sdates[i_sdate], 
+                                  '$ensemble$', table, '$var$', grid, #version,
+                                  paste0('v*/$var$_', table, '_*_dcppB-forecast_s', sdates[i_sdate], 
                                          '-$ensemble$_', grid, '_$chunk$.nc')))
         } else {
           path_list[[i_sdate]] <- 
             list(path = file.path(archive$src, archive$System[[exp.name]]$src$hcst,
-                                  '$ensemble$', table, '$var$', grid, version,
-                                  paste0('$var$_', table, '_*_dcppA-hindcast_s', sdates[i_sdate], 
+                                  '$ensemble$', table, '$var$', grid, #version,
+                                  paste0('v*/$var$_', table, '_*_dcppA-hindcast_s', sdates[i_sdate], 
                                   '-$ensemble$_', grid, '_$chunk$.nc')))
         }
       }
diff --git a/tests/recipes/recipe-decadal_monthly_3.yml b/tests/recipes/recipe-decadal_monthly_3.yml
new file mode 100644
index 00000000..87197af6
--- /dev/null
+++ b/tests/recipes/recipe-decadal_monthly_3.yml
@@ -0,0 +1,48 @@
+Description:
+  Author: An-Chi Ho
+  '': split version
+Analysis:
+  Horizon: Decadal
+  Variables:
+    name: tas
+    freq: monthly_mean
+  Datasets:
+    System: 
+      name: FGOALS-f3-L  
+      member: 'all' # 3 in total
+    Multimodel: no
+    Reference:
+      name: JRA-55
+  Time:
+    fcst_year: 
+    hcst_start: 2015 # 2015-2016 in dcppA, 2017-2018 in dcppB
+    hcst_end: 2018
+#    season: 'Annual'
+    ftime_min: 6 # Apr
+    ftime_max: 8
+  Region:
+    latmin: 40
+    latmax: 65 
+    lonmin: 0
+    lonmax: 20 
+  Regrid:
+    method: bilinear
+    type: to_system
+  Workflow:
+    Calibration:
+      method: 'evmos'
+    Skill:
+      metric: BSS10 Corr
+    Probabilities:
+      percentiles: [[1/3, 2/3]]
+    Indicators:
+      index: FALSE
+  ncores:  # Optional, int: number of cores, defaults to 1
+  remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE
+  Output_format: S2S4E
+Run:
+  Loglevel: INFO
+  Terminal: yes
+  output_dir: ./tests/out-logs/
+  code_dir: /esarchive/scratch/aho/git/auto-s2s/
+
diff --git a/tests/test_decadal.R b/tests/test_decadal.R
index 84a23b62..75b7fca7 100644
--- a/tests/test_decadal.R
+++ b/tests/test_decadal.R
@@ -6,3 +6,11 @@ files_testthat <- list.files('./tests/testthat/', pattern = 'decadal')
 for (i_file in 1:length(files_testthat)) {
   source(paste0('./tests/testthat/', files_testthat[i_file]))
 }
+
+#================
+#--- recipe-decadal_monthly_1.yml ---
+#
+#--- recipe-decadal_monthly_2.yml ---
+#
+#--- recipe-decadal_monthly_3.yml ---
+# FGOALS-f3-L, hcst contains dcppA and dcppB
diff --git a/tests/testthat/test-decadal_monthly_3.R b/tests/testthat/test-decadal_monthly_3.R
new file mode 100644
index 00000000..b6be7f2b
--- /dev/null
+++ b/tests/testthat/test-decadal_monthly_3.R
@@ -0,0 +1,199 @@
+context("Decadal monthly data - 3")
+
+###########################################
+
+source("modules/Loading/Loading_decadal.R")
+source("modules/Calibration/Calibration.R")
+source("modules/Skill/Skill.R")
+source("modules/Saving/Saving.R")
+
+recipe_file <- "tests/recipes/recipe-decadal_monthly_3.yml"
+recipe <- read_yaml(recipe_file)
+archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive
+
+# Load datasets
+suppressWarnings({invisible(capture.output(
+data <- load_datasets(recipe_file)
+))})
+
+# Calibrate datasets
+suppressWarnings({invisible(capture.output(
+  calibrated_data <- calibrate_datasets(data, recipe)
+))})
+
+# Compute skill metrics
+suppressWarnings({invisible(capture.output(
+skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe)
+))})
+suppressWarnings({invisible(capture.output(
+probs <- compute_probabilities(calibrated_data$hcst, recipe)
+))})
+
+#======================================
+
+test_that("1. Loading", {
+
+expect_equal(
+is.list(data),
+TRUE
+)
+expect_equal(
+names(data),
+c("hcst", "fcst", "obs")
+)
+expect_equal(
+class(data$hcst),
+"s2dv_cube"
+)
+expect_equal(
+data$fcst,
+NULL
+)
+expect_equal(
+class(data$obs),
+"s2dv_cube"
+)
+expect_equal(
+names(data$hcst),
+c("data", "lon", "lat", "Variable", "Datasets", "Dates", "when", "source_files", "load_parameters")
+)
+expect_equal(
+names(data$hcst),
+names(data$obs)
+)
+expect_equal(
+dim(data$hcst$data),
+c(dat = 1, var = 1, ensemble = 3, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 25, longitude = 16)
+)
+expect_equal(
+dim(data$hcst$Dates$start),
+c(sday = 1, sweek = 1, syear = 4, time = 3)
+)
+# hcst data
+expect_equal(
+as.vector(drop(data$hcst$data)[3, , 2, 2, 2]),
+c(278.4305, 279.5065, 280.4110, 278.7608),
+tolerance = 0.0001
+)
+expect_equal(
+mean(data$hcst$data),
+284.3765,
+tolerance = 0.0001
+)
+expect_equal(
+range(data$hcst$data),
+c(263.3929, 300.4329),
+tolerance = 0.0001
+)
+
+expect_equal(
+(data$hcst$Dates$start)[1],
+as.POSIXct("2016-04-16", tz = 'UTC')
+)
+expect_equal(
+(data$hcst$Dates$start)[2],
+as.POSIXct("2017-04-16", tz = 'UTC')
+)
+expect_equal(
+(data$hcst$Dates$start)[5],
+as.POSIXct("2016-05-16 12:00:00", tz = 'UTC')
+)
+expect_equal(
+(data$hcst$Dates$start)[12],
+as.POSIXct("2019-06-16", tz = 'UTC')
+)
+
+})
+
+#======================================
+test_that("2. Calibration", {
+
+expect_equal(
+names(calibrated_data),
+c("hcst", "fcst")
+)
+expect_equal(
+as.vector(drop(calibrated_data$hcst$data)[3, , 2, 2, 2]),
+c(279.0648, 281.0578, 282.6535, 280.3137),
+tolerance = 0.0001
+)
+expect_equal(
+calibrated_data$fcst,
+NULL
+)
+})
+
+
+##======================================
+test_that("3. Metrics", {
+
+expect_equal(
+is.list(skill_metrics),
+TRUE
+)
+expect_equal(
+names(skill_metrics),
+c("bss10", "bss10_significance", "corr", "corr_p.value", "corr_conf.low", "corr_conf.up")
+)
+expect_equal(
+class(skill_metrics[[1]]),
+"array"
+)
+expect_equal(
+all(unlist(lapply(lapply(skill_metrics, dim)[1:2], all.equal, c(time = 3, latitude = 25, longitude = 16)))),
+TRUE
+)
+expect_equal(
+all(unlist(lapply(lapply(skill_metrics, dim)[3:6], all.equal, c(ensemble = 3, time = 3, latitude = 25, longitude = 16)))),
+TRUE
+)
+expect_equal(
+as.vector(skill_metrics$bss10[, 1, 2]),
+c(-0.1904762, -0.1904762, -0.1904762),
+tolerance = 0.0001
+)
+expect_equal(
+any(as.vector(skill_metrics$bss10_significance)),
+FALSE
+)
+expect_equal(
+as.vector(skill_metrics$corr[2, , 1, 2]),
+c(-0.2015265, 0.4635463, -0.1019575),
+tolerance = 0.0001
+)
+
+# Probs
+expect_equal(
+names(probs),
+c('probs', 'percentiles')
+)
+expect_equal(
+names(probs$probs),
+c('prob_b33', 'prob_33_to_66', 'prob_a66')
+)
+expect_equal(
+names(probs$percentiles),
+c('percentile_33', 'percentile_66')
+)
+expect_equal(
+dim(probs$probs$prob_b33),
+c(syear = 4, time = 3, latitude = 25, longitude = 16)
+)
+expect_equal(
+dim(probs$percentiles$percentile_33),
+c(time = 3, latitude = 25, longitude = 16)
+)
+expect_equal(
+as.vector(probs$probs$prob_b33[, 1, 2, 2]),
+c(0.0, 0.3333333, 0.3333333, 0.6666667),
+tolerance = 0.0001
+)
+expect_equal(
+as.vector(probs$percentiles$percentile_33[1:3, 1, 2]),
+c(278.1501, 279.5226, 282.0237),
+tolerance = 0.0001
+)
+
+})
+
+
-- 
GitLab