From ed6a5fabcc0f71bf212b4768d5c3431fd43b4118 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 7 Apr 2022 09:11:38 +0200
Subject: [PATCH 01/29] Add grid description files and systems to the archive

---
 conf/archive.yml                             | 25 ++++++++++++++++----
 conf/grid_description/griddes_eccc1.txt      | 19 +++++++++++++++
 conf/grid_description/griddes_system2c3s.txt | 19 +++++++++++++++
 3 files changed, 59 insertions(+), 4 deletions(-)
 create mode 100644 conf/grid_description/griddes_eccc1.txt
 create mode 100644 conf/grid_description/griddes_system2c3s.txt

diff --git a/conf/archive.yml b/conf/archive.yml
index 57574919..73379d5d 100644
--- a/conf/archive.yml
+++ b/conf/archive.yml
@@ -16,20 +16,37 @@ archive:
             reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc"
         system7c3s:
             src: "exp/meteofrance/system7c3s/"
-            monthly_mean: {"tas":"_f6h/","g500":"_f12h/",
-                           "prlr":"_f24h/", "sfcWind": "_f6h/"}
+            monthly_mean: {"tas":"_f6h/", "g500":"_f12h/",
+                           "prlr":"_f24h/", "sfcWind": "_f6h/",
+                           "tasmax":"_f6h/", "tasmin": "_f6h/"}
             nmember:
               fcst: 51
               hcst: 25
             reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system7c3s.txt"
         system35c3s:
             src: "exp/cmcc/system35c3s/"
-            monthly_mean: {"tas":"_f6h/","g500":"_f12h/",
-                          "prlr":"_f24h/", "sfcWind": "_f6h/"}
+            monthly_mean: {"tas":"_f6h/", "g500":"_f12h/",
+                           "prlr":"_f24h/", "sfcWind": "_f6h/"}
             nmember:
               fcst: 50
               hcst: 40
             reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system35c3s.txt"
+        system2c3s:
+            src: "exp/jma/system2c3s/"
+            monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/",
+                           "tasmax":"_f6h/", "tasmin":"_f6h/"}
+            nmember:
+              fcst: 10
+              hcst: 10
+            reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system35c3s.txt"
+        eccc1:
+            src: "exp/eccc/eccc1/"
+            monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/",
+                           "tasmax":"_f6h/", "tasmin":"_f6h/"}
+            nmember:
+              fcst: 10
+              hcst: 10
+            reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_eccc1.txt"
     Reference:
         era5: 
             src: "recon/ecmwf/era5/"
diff --git a/conf/grid_description/griddes_eccc1.txt b/conf/grid_description/griddes_eccc1.txt
new file mode 100644
index 00000000..58a9810c
--- /dev/null
+++ b/conf/grid_description/griddes_eccc1.txt
@@ -0,0 +1,19 @@
+#
+# Grid description file for ECCC1
+#
+# gridID 2
+#
+gridtype  = lonlat
+gridsize  = 64800
+xsize     = 360
+ysize     = 180
+xname     = lon
+xlongname = "longitude"
+xunits    = "degrees_east"
+yname     = lat
+ylongname = "latitude"
+yunits    = "degrees_north"
+xfirst    = 0.5
+xinc      = 1
+yfirst    = 89.5
+yinc      = -1
diff --git a/conf/grid_description/griddes_system2c3s.txt b/conf/grid_description/griddes_system2c3s.txt
new file mode 100644
index 00000000..cf1e3515
--- /dev/null
+++ b/conf/grid_description/griddes_system2c3s.txt
@@ -0,0 +1,19 @@
+#
+# Grid description file for JMA System3c3s
+# 
+# gridID 2
+#
+gridtype  = lonlat
+gridsize  = 10512
+xsize     = 144
+ysize     = 73
+xname     = lon
+xlongname = "longitude"
+xunits    = "degrees_east"
+yname     = lat
+ylongname = "latitude"
+yunits    = "degrees_north"
+xfirst    = 0
+xinc      = 2.5
+yfirst    = 90
+yinc      = -2.5
-- 
GitLab


From 68ccbd7cb91f26b9d54de10296665971f6b1cc8d Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 7 Apr 2022 13:06:35 +0200
Subject: [PATCH 02/29] Fix for issue #11

---
 modules/data_load/load.R                      |  2 +-
 .../data_load/testing_recipes/recipe_4.yml    | 43 +++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 modules/data_load/testing_recipes/recipe_4.yml

diff --git a/modules/data_load/load.R b/modules/data_load/load.R
index 0083e9a9..1fbf623d 100755
--- a/modules/data_load/load.R
+++ b/modules/data_load/load.R
@@ -230,7 +230,7 @@ load_datasets <- function(recipe_file){
                   split_multiselected_dims = split_multiselected_dims,
     	          retrieve = TRUE)
   
-    names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "fcst_year"
+    names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "fcst_syear"
     fcst <- startR_to_s2dv(fcst)
 
   } else {
diff --git a/modules/data_load/testing_recipes/recipe_4.yml b/modules/data_load/testing_recipes/recipe_4.yml
new file mode 100644
index 00000000..128560d4
--- /dev/null
+++ b/modules/data_load/testing_recipes/recipe_4.yml
@@ -0,0 +1,43 @@
+Description:
+  Author: V. Agudetse
+
+Analysis:
+  Horizon: Seasonal
+  Variables:
+    name: tas
+    freq: monthly_mean
+  Datasets:
+    System:
+      name: system7c3s
+    Multimodel: no
+    Reference:
+      name: era5
+  Time:
+    sdate:
+      fcst_syear: '2020'
+      fcst_sday: '1101'
+    hcst_start: '1993'
+    hcst_end: '2016'
+    leadtimemin: 2
+    leadtimemax: 4
+  Region:
+    latmin: -10
+    latmax: 10
+    lonmin: 0
+    lonmax: 20
+  Regrid:
+    method: bilinear
+    type: to_system
+  Workflow:
+    Calibration:
+      method: SBC
+    Skill:
+      metric: RPSS
+    Indicators:
+      index: no
+  Output_format: S2S4E
+Run:
+  Loglevel: INFO
+  Terminal: yes
+  output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/
+  code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
-- 
GitLab


From 2254f0786b6597a35b49ef05c35960731cfc9742 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 12 Apr 2022 16:32:46 +0200
Subject: [PATCH 03/29] format fixes

---
 modules/data_load/load.R | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/modules/data_load/load.R b/modules/data_load/load.R
index 1fbf623d..6aec0614 100755
--- a/modules/data_load/load.R
+++ b/modules/data_load/load.R
@@ -19,12 +19,12 @@ attr2array <- function(attr){
 startR_to_s2dv <- function(startR_array){
 
   dates_dims <- dim(Subset(startR_array, 
-                           along=c('dat','var',
+                           along=c('dat', 'var',
                            'latitude', 'longitude', 'ensemble'), 
                            list(1,1,1,1,1), drop="selected")) 
 
   #sdates_dims <- dim(Subset(startR_array, 
-  #                         along=c('dat','var','time','sweek','sday',
+  #                         along=c('dat', 'var', 'time', 'sweek', 'sday',
   #                         'latitude', 'longitude', 'ensemble'), 
   #                         list(1,1,1,1,1,1,1,1), drop="selected")) 
 
@@ -44,18 +44,18 @@ startR_to_s2dv <- function(startR_array){
                                             'Variables')$dat1$longitude),
                            lat = attr2array(attr(startR_array,
                                             'Variables')$dat1$latitude),
-                           Variable = list(varName=names(attr(startR_array,
-							                            'Variables')$common)[2],
+                           Variable = list(varName=names(attr(startR_array, 
+							 'Variables')$common)[2],
                                            level = NULL),
                            Dates = list(start = dates_start,
                                         end = dates_end),
                                         #sdate = sdates),
-			               time_dims = time_dims,
+			   time_dims = time_dims,
                            when = Sys.time(),
                            source_files = attr2array(attr(startR_array, "Files"))
-          #Datasets=list(exp1=list(InitializationsDates=list(Member_1="01011990", 
-          #                                                  Members="Member_1")))
-                          ) 
+          #                Datasets=list(exp1=list(InitializationsDates=list(                                        Member_1="01011990", 
+          #                                        Members="Member_1")))
+                          )
   return(s2dv_object)
 }
 
-- 
GitLab


From 5f98c0dd0319a258343e6ba7e68603622d53c96b Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 12 Apr 2022 16:33:27 +0200
Subject: [PATCH 04/29] Add testing recipe for meteofrance system 7 monthly

---
 modules/data_load/testing_recipes/recipe_4.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/data_load/testing_recipes/recipe_4.yml b/modules/data_load/testing_recipes/recipe_4.yml
index 128560d4..a82c4f2c 100644
--- a/modules/data_load/testing_recipes/recipe_4.yml
+++ b/modules/data_load/testing_recipes/recipe_4.yml
@@ -30,7 +30,7 @@ Analysis:
     type: to_system
   Workflow:
     Calibration:
-      method: SBC
+      method: mse_min
     Skill:
       metric: RPSS
     Indicators:
-- 
GitLab


From cf240f215e897c6a82f9e6f0b98716ec42c520ef Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 19 Apr 2022 14:47:05 +0200
Subject: [PATCH 05/29] added file to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 5faf5e63..752f5cf9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 out-logs/
 *.swp
 *.swo
+/modules/Calibration/test_victoria.R
-- 
GitLab


From 73ecc687f21033b16f1833e64391fddf36e98006 Mon Sep 17 00:00:00 2001
From: lpalma <lluis.palma@bsc.es>
Date: Tue, 19 Apr 2022 16:10:29 +0200
Subject: [PATCH 06/29] fixed leadtime selection, now the 0 leadmonth
 corresponds to the month starting when the forecast is initialized

---
 modules/data_load/dates2load.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R
index 28d7905e..f6a5e27f 100644
--- a/modules/data_load/dates2load.R
+++ b/modules/data_load/dates2load.R
@@ -97,8 +97,8 @@ get_timeidx <- function(sdates, ltmin, ltmax,
 
   } else if (time_freq == "monthly_mean") {
   
-    idx_min <- ltmin
-    idx_max <- ltmax
+    idx_min <- ltmin + 1
+    idx_max <- ltmax + 1
     indxs <- indices(idx_min:idx_max)
 
   }
-- 
GitLab


From 8b99aa6d4d486b5721a546aa577242d960580d29 Mon Sep 17 00:00:00 2001
From: lpalma <lluis.palma@bsc.es>
Date: Tue, 19 Apr 2022 18:06:03 +0200
Subject: [PATCH 07/29] Added @aho suggestions

---
 modules/data_load/load.R | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/modules/data_load/load.R b/modules/data_load/load.R
index 6aec0614..25a3b82b 100755
--- a/modules/data_load/load.R
+++ b/modules/data_load/load.R
@@ -244,8 +244,7 @@ load_datasets <- function(recipe_file){
   # Separate Start() call for monthly vs daily data
   if (store.freq == "monthly_mean") {
   
-    dates_file <- sapply(dates, format, '%Y%m%d')
-    dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m")
+    dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m")
     dim(dates_file) <- dim(Subset(hcst$data, 
                                   along=c('dat','var',
                                   'latitude', 'longitude', 'ensemble'), 
@@ -254,8 +253,6 @@ load_datasets <- function(recipe_file){
     obs <- Start(dat = obs.path,
   	             var = variable,
   	             file_date = dates_file,
-  	             merge_across_dims = TRUE,
-  	             merge_across_dims_narm = TRUE,
   	             latitude = values(list(lats.min, lats.max)),
   	             latitude_reorder = Sort(),
   	             longitude = values(list(lons.min, lons.max)),
-- 
GitLab


From e36ae023b518d9e8e44d070ad7ce52593c3a288b Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 21 Apr 2022 09:29:27 +0200
Subject: [PATCH 08/29] Adapt hcst calibration script to work with s2dv_cube

---
 modules/Calibration/Calibration.R             | 55 ++++++-----
 modules/Calibration/test_calib.R              | 94 +++++++++++++++++++
 .../data_load/testing_recipes/recipe_4.yml    |  2 +-
 3 files changed, 124 insertions(+), 27 deletions(-)
 create mode 100644 modules/Calibration/test_calib.R

diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index 295bb406..c57e4867 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -48,12 +48,12 @@
 #'@param obs A numeric array with named dimensions,
 #'           representing the observational data used
 #'           in the bias-adjustment method.
-#'           'sday','syear','member' are mandatory dims
+#'           'sday','syear','ensemble' are mandatory dims
 
 #'@param hcst A numeric array with named dimensions,
 #'           representing the system hindcast data used
 #'           in the bias-adjustment method.
-#'           'sday','syear','member' are mandatory dims
+#'           'sday','syear','ensemble' are mandatory dims
 
 #'@param fun bias adjustment function 
 
@@ -69,7 +69,7 @@
 #'@return A numeric array with the bias adjusted hindcast,
 #'        
 
-CST_CALIB_METHODS <- c("bias","evmos","mse_min","crps_min","rpc-based")
+CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based")
 
 hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T,
                        split_factor=1, ncores=32)
@@ -77,10 +77,11 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T,
 
   # Replicates observations for bias adjusting each 
   # system of the multi-model separately
+  ## TODO: Adapt code for multi-model case
   if(mm){
     obs.mm <- obs
     for(dat in 1:(dim(hcst)['dat'][[1]]-1)){
-        obs.mm <- abind(obs2,obs,
+        obs.mm <- abind(obs2, obs,
                         along = which(names(dim(obs)) == 'dat'))
     }
     names(dim(obs.mm)) <- names(dim(obs)) 
@@ -91,7 +92,7 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T,
   if (method %in% CST_CALIB_METHODS) {
 
     # Hcst calibration 
-    hcst <- CSTools::CST_Calibration(hcst,obs,
+    hcst <- CSTools::CST_Calibration(hcst, obs,
                                      cal.method = method,
                                      eval.method = "leave-one-out",
                                      multi.model = mm,
@@ -99,32 +100,34 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T,
                                      na.rm = na.rm,
                                      apply_to = NULL,
                                      alpha = NULL,
-                                     memb_dim = "member",
+                                     memb_dim = "ensemble",
                                      sdate_dim = "syear",
                                      ncores = ncores)
 
   } else {
-    #error(logger, 
-    #      "Calibration method is not implemented in CSTools")
+    ## TODO: Implement other calibration methods
+    error(logger, 
+          "Calibration method is not implemented in CSTools")
   }
 
-  hcst[!is.finite(hcst)] <- NA
-  remove(hcst)
+  hcst$data[!is.finite(hcst$data)] <- NA
+  ## TODO: Is this line necessary?
+  # remove(hcst)
 
   # Merges all the ensembles from the different systems into
   # one single ensemble
   if(mm){
     hcst <- MergeDims(hcst, 
-                      merge_dims=c('dat','member'), 
-                      rename_dim='member')
-    hcst <- drop_na_dim(hcst,'member')
+                      merge_dims = c('dat', 'ensemble'), 
+                      rename_dim = 'ensemble')
+    hcst <- drop_na_dim(hcst, 'ensemble')
   }
 
-  # removes dat and var dimensions if needed
-  try(hcst<-Subset(hcst,
-                   c('var'),list(1),drop='selected'))
-  try(hcst<-Subset(hcst,
-                   c('dat'),list(1),drop='selected'))
+# # removes dat and var dimensions if needed
+#  try(hcst<-Subset(hcst,
+#                  c('var'), list(1), drop='selected'))
+#  try(hcst<-Subset(hcst,
+#                   c('dat'), list(1), drop='selected'))
 
   return(hcst)
 
@@ -138,17 +141,17 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T,
 #'@param obs A numeric array with named dimensions,
 #'           representing the observational data used
 #'           in the bias-adjustment method.
-#'           'sday','syear','member' are mandatory dims
+#'           'sday','syear','ensemble' are mandatory dims
 
 #'@param hcst A numeric array with named dimensions,
 #'           representing the system hindcast data used
 #'           in the bias-adjustment method.
-#'           'sday','syear','member' are mandatory dims
+#'           'sday','syear','ensemble' are mandatory dims
 
 #'@param fcst A numeric array with named dimensions,
 #'           representing the system forecast to be
 #'           bias-adjusted.  
-#'           'sday','syear','member' are mandatory dims
+#'           'sday','syear','ensemble' are mandatory dims
 
 #'@param fun bias adjustment function 
 
@@ -168,8 +171,8 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T,
 ## needs and update: look at hcst version
 #fcst_calib <- function(obs, hcst, fcst, fun, mm=F,
 #                       na.rm=T, split_factor=1,ncores=32,
-#                       target_dims=c('sday','syear','member'),
-#                       output_dims=c('member'))
+#                       target_dims=c('sday','syear','ensemble'),
+#                       output_dims=c('ensemble'))
 #{
 #
 #  # Replicates observations for bias adjusting each 
@@ -200,9 +203,9 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T,
 #  # one single ensemble
 #  if(mm){
 #    calibrated_fcst <- MergeDims(calibrated_fcst, 
-#                         merge_dims=c('dat','member'), 
-#                         rename_dim='member')
-#    calibrated_fcst <- drop_na_dim(calibrated_fcst,'member')
+#                         merge_dims=c('dat','ensemble'), 
+#                         rename_dim='ensemble')
+#    calibrated_fcst <- drop_na_dim(calibrated_fcst,'ensemble')
 #  }
 #
 #  # removes dat and var dimensions if needed
diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R
new file mode 100644
index 00000000..18ee110f
--- /dev/null
+++ b/modules/Calibration/test_calib.R
@@ -0,0 +1,94 @@
+
+
+recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml"
+
+source("modules/data_load/load.R")
+
+data <- load_datasets(recipe_file)
+
+## Entry params data and recipe?
+## TODO: Instead of reading the recipe at each module, do it at the beginning?
+recipe <- read_yaml(recipe_file)
+
+calibrate_datasets <- function(data, recipe) {
+  # hcst <- data$hcst
+  # obs <- data$obs
+  # fcst <- data$fcst
+  
+  # Calibration function params
+  method <- recipe$Analysis$Workflow$Calibration$method
+  ## TODO: Make sure 'Multimodel' param in recipe is a boolean
+  mm <- recipe$Analysis$Datasets$Multimodel
+  ncores <- 4
+  na.rm <- T
+  
+  # Hcst calibration 
+  hcst_calib <- CSTools::CST_Calibration(data$hcst, data$obs,
+                                         cal.method = method,
+                                         eval.method = "leave-one-out",
+                                         multi.model = mm,
+                                         na.fill = TRUE,
+                                         na.rm = na.rm,
+                                         apply_to = NULL,
+                                         alpha = NULL,
+                                         memb_dim = "ensemble",
+                                         sdate_dim = "syear",
+                                         ncores = ncores)
+  
+  ## Alba's version
+  library(s2dv)
+  library(ClimProjDiags)
+  library(multiApply)
+  source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-CalibrationForecast/R/CST_Calibration.R")
+  hcst_calibrated <- CST_Calibration(data$hcst, data$obs,
+                                     cal.method = method,
+                                     eval.method = "leave-one-out",
+                                     multi.model = mm,
+                                     na.fill = TRUE,
+                                     na.rm = na.rm,
+                                     apply_to = NULL,
+                                     alpha = NULL,
+                                     memb_dim = "ensemble",
+                                     sdate_dim = "syear",
+                                     ncores = ncores)
+  
+  ## In the new CST_Calibration function, the fcst must have the same number of 
+  ## dimensions as the hcst
+  dim(data$fcst$data) <- c(dim(data$fcst$data)[1:2], "sday" = 1, "sweek" = 1,
+  			 dim(data$fcst$data)[3:7])
+  ## In the new CST_Calibration function, memb_dim and sdate_dim must be the same
+  ## in the hcst and the fcst.
+  names(dim(data$fcst$data))[which(names(dim(data$fcst$data)) == 
+  				 'fcst_syear')] <- "syear"
+  
+  fcst_alba <- CST_Calibration(data$hcst, data$obs, data$fcst,
+                              cal.method = method,
+                              eval.method = "leave-one-out",
+                              multi.model = mm,
+                              na.fill = TRUE,
+                              na.rm = na.rm,
+                              apply_to = NULL,
+                              alpha = NULL,
+                              memb_dim = "ensemble",
+                              sdate_dim = "syear",
+                              ncores = ncores)
+  
+  ## Lluís' version:
+  ## Note: memb_dim is hardcoded as 'ensemble'.
+  source("/esarchive/scratch/vagudets/repos/auto-s2s/modules/Calibration/Calibration.R")
+  hcst_lluis <- hcst_calib(data$obs, data$hcst,
+  			   method = method,
+  			   mm = mm,
+  			   na.rm = na.rm,
+  			   split_factor = 1,
+  			   ncores = ncores)
+  ## TODO: Include Lluís' forecast version and test it
+  # For daily
+  # hcst <- CSTools::CST_QuantileMapping(data$hcst,
+  #                                      data$obs,
+  #                                      exp_cor = NULL,
+  #                                      sample_dims = c("syear", "time", "ensemble"),
+  #                                      sample_length = NULL,
+  #                                      method = "QUANT",
+  #                                      ncores = ncores,
+  #                                      na.rm = na.rm)
diff --git a/modules/data_load/testing_recipes/recipe_4.yml b/modules/data_load/testing_recipes/recipe_4.yml
index a82c4f2c..421888b1 100644
--- a/modules/data_load/testing_recipes/recipe_4.yml
+++ b/modules/data_load/testing_recipes/recipe_4.yml
@@ -9,7 +9,7 @@ Analysis:
   Datasets:
     System:
       name: system7c3s
-    Multimodel: no
+    Multimodel: False
     Reference:
       name: era5
   Time:
-- 
GitLab


From 5dd360ab980355a37bb4c0e06df586f17eb39f16 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 22 Apr 2022 12:01:45 +0200
Subject: [PATCH 09/29] First version of wrapper fun using CST_Calibration, add
 some useful scripts

---
 modules/Calibration/Calibration.R       |  91 ++++++-------
 modules/Calibration/Calibration_fcst3.R | 155 ++++++++++++++++++++++
 modules/Calibration/test_calib.R        | 165 ++++++++++++------------
 modules/test_victoria.R                 |  14 ++
 4 files changed, 300 insertions(+), 125 deletions(-)
 create mode 100644 modules/Calibration/Calibration_fcst3.R
 create mode 100644 modules/test_victoria.R

diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index c57e4867..8f21eecb 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -169,52 +169,55 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T,
 
 ## TODO:
 ## needs and update: look at hcst version
-#fcst_calib <- function(obs, hcst, fcst, fun, mm=F,
-#                       na.rm=T, split_factor=1,ncores=32,
-#                       target_dims=c('sday','syear','ensemble'),
-#                       output_dims=c('ensemble'))
-#{
-#
-#  # Replicates observations for bias adjusting each 
-#  # system of the multi-model separately
-#  if(mm){
-#    obs.mm <- obs
-#    for(dat in 1:(dim(hcst)['dat'][[1]]-1)){
-#        obs.mm <- abind(obs2,obs,
-#                          along=which(names(dim(obs)) == 'dat'))
-#    }
-#    names(dim(obs.mm)) <- names(dim(obs)) 
-#    obs <- obs.mm
-#    remove(obs.mm)
-#  }
-# 
-#  # Fcst Calibration
-#  calibrated_fcst <-Apply(data=list(obs=obs,hcst=hcst,fcst=fcst),
-#                          extra_info=list(na.rm=na.rm),
-#                          target_dims=target_dims,
-#                          output_dims=output_dims,
-#                          na.rm=na.rm,
-#                          ncores=ncores,
-#                          fun = .fcstcal)[[1]]
-#
-#  calibrated_fcst[!is.finite(calibrated_fcst)] <- NA
-#  
-#  # Merges all the ensembles from the different systems into
-#  # one single ensemble
-#  if(mm){
-#    calibrated_fcst <- MergeDims(calibrated_fcst, 
-#                         merge_dims=c('dat','ensemble'), 
-#                         rename_dim='ensemble')
-#    calibrated_fcst <- drop_na_dim(calibrated_fcst,'ensemble')
-#  }
-#
+fcst_calib <- function(obs, hcst, fcst, fun, mm=F,
+                       na.rm=T, split_factor=1, ncores=32,
+		       ## TODO: remove these?
+                       target_dims=c('sday', 'syear', 'ensemble'),
+                       output_dims=c('ensemble'))
+{
+
+  # Replicates observations for bias adjusting each 
+  # system of the multi-model separately
+  ## TODO: Adapt multi-model case
+  if(mm){
+    obs.mm <- obs
+    for(dat in 1:(dim(hcst)['dat'][[1]]-1)){
+        obs.mm <- abind(obs2,obs,
+                          along=which(names(dim(obs)) == 'dat'))
+    }
+    names(dim(obs.mm)) <- names(dim(obs)) 
+    obs <- obs.mm
+    remove(obs.mm)
+  }
+ 
+  # Fcst Calibration
+  calibrated_fcst <- Apply(data=list(obs = obs, hcst = hcst, fcst = fcst),
+                          extra_info = list(na.rm = na.rm),
+                          target_dims = target_dims,
+                          output_dims = output_dims,
+                          na.rm = na.rm,
+                          ncores = ncores,
+                          fun = .fcstcal)[[1]] ## Need to source this fun?
+
+  calibrated_fcst$data[!is.finite(calibrated_fcst$data)] <- NA
+  
+  # Merges all the ensembles from the different systems into
+  # one single ensemble
+  ## TODO: Adapt multi-model case
+  if(mm){
+    calibrated_fcst <- MergeDims(calibrated_fcst, 
+                         merge_dims=c('dat', 'ensemble'), 
+                         rename_dim = 'ensemble')
+    calibrated_fcst <- drop_na_dim(calibrated_fcst, 'ensemble')
+  }
+
 #  # removes dat and var dimensions if needed
 #  try(calibrated_fcst<-Subset(calibrated_fcst,
 #                              c('var'),list(1),drop='selected'))
 #  try(calibrated_fcst<-Subset(calibrated_fcst,
 #                              c('dat'),list(1),drop='selected'))
-#
-#  return(calibrated_fcst)
-#
-#
-#}
+
+  return(calibrated_fcst)
+
+
+}
diff --git a/modules/Calibration/Calibration_fcst3.R b/modules/Calibration/Calibration_fcst3.R
new file mode 100644
index 00000000..530081f2
--- /dev/null
+++ b/modules/Calibration/Calibration_fcst3.R
@@ -0,0 +1,155 @@
+library(ClimProjDiags)
+
+# Performs the hcst calibration for a centered time
+# window of sdays. Objects must have the dimensions;
+# sday, ensemble & syear. 
+# If sday dim = 1; no time window calibration is performed.
+.cal <- function(var_obs, var_exp, na.rm=F) {
+
+  ntime <- dim(var_exp)[which(names(dim(var_exp)) == 'syear')][]
+  nmembers <- dim(var_exp)[which(names(dim(var_exp)) == 'ensemble')][]
+  ndays <- dim(var_exp)[which(names(dim(var_exp)) == 'sday')][]
+  
+  # selects only the centered day of time window 
+  if ( ndays == 1 ){
+    fcst.index <- 1
+  } else {
+    fcst.index <- (ndays + 1)/2
+  }
+  
+  ind <- 1:ntime
+  climObs <- sapply(ind, function(x) {mean(
+              Subset(var_obs, along='syear',indices=-x), na.rm=na.rm)}) 
+  climPred <- sapply(ind, function(x) {mean(
+              Subset(var_exp, along='syear',indices=-x), na.rm=na.rm)}) 
+
+  var_obs.ano <- var_obs - climObs
+  var_exp.ano <- var_exp - climPred
+  
+  calib.hcst <- NA * Subset(var_exp, along=c('sday','syear'), c(fcst.index,0))
+  # loop over the hindcast years
+  for (t in 1 : ntime) {
+
+    # defining forecast,hindcast in cross-validation
+    fcst.ano <- Subset(var_exp.ano, along=c('syear','sday'),
+                                indices=list(t,fcst.index)) 
+    hcst.ano <- Subset(var_exp.ano, along=c('syear'), indices=list(-t))
+
+    obs.ano <- Subset(var_obs.ano, along='syear', indices=-t) 
+
+    # computes ensemble mean of the fcst & hcst
+    em_fcst <- mean(fcst.ano, na.rm=na.rm)
+    em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean,
+                                             na.rm=na.rm)$output1
+
+    # Changes the correlation method in case of NAs 
+    if (na.rm){
+      corr.method <- "pairwise.complete.obs"
+    } else {
+      corr.method <- "everything"
+    }
+    corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method)
+    sd_obs <- sd(obs.ano, na.rm=na.rm)
+    sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm)
+    
+    # Computes difference between fcst anomaly and ensemble mean
+    fcst_diff <- fcst.ano - em_fcst
+    hcst_diff <- array(numeric(), c(ndays,ntime-1,0))
+    names(dim(hcst_diff)) <- names(dim(hcst.ano))
+    for (n in 1 : nmembers) {
+      diff <-  Subset(hcst.ano, along='ensemble', indices=n,
+                      drop='selected') - em_hcst
+      hcst_diff <- abind(hcst_diff, diff,
+                        along=which(names(dim(var_exp)) == 'ensemble'),
+                        hier.names=F)
+    }
+
+    sd_hcst_diff <- sd(hcst_diff, na.rm=na.rm)
+    
+    a <- abs(corr) * (sd_obs / sd_em_hcst)
+    b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2))
+    
+    # Calibrates hcst sdate
+    calib.syear <- ((a * em_fcst) + (b * fcst_diff) + climObs[t])
+    # Appends calibrated sdate to the hcst array
+    calib.hcst <- abind(calib.hcst, calib.syear,
+                        along=which(names(dim(var_exp)) == 'syear'),
+                        hier.names=F)
+  }
+
+  names(dim(calib.hcst)) <- names(dim(calib.syear))
+  # removes NAs added in array init
+  calib.hcst <- Subset(calib.hcst, along='syear', indices=-1)
+
+  return(calib.hcst)
+
+}
+
+# Performs the fcst calibration for a centered time
+# window of sdays. Objects must have the dimensions;
+# sday, ensemble & syear. 
+# If sday dim = 1; no time window calibration is performed.
+.fcstcal <- function(obs, hcst, fcst, na.rm=F) {
+
+  ntime <- dim(hcst)['syear']
+  nmembers <- dim(hcst)['ensemble']
+  fcst.nmembers <- dim(fcst)['ensemble']
+  ndays <- dim(hcst)['sday']
+  
+  # selects only the centered day of time window 
+  if ( ndays == 1 ){
+    fcst.index <- 1
+  } else {
+    fcst.index <- (ndays + 1)/2
+  }
+  
+  ind <- 1:ntime
+  climObs <- mean(obs, na.rm=na.rm) 
+  climPred <- mean(hcst, na.rm=na.rm)
+
+  obs.ano <- obs - climObs
+  hcst.ano <- hcst - climPred
+  fcst.ano <- fcst - climPred
+  
+  calib.fcst <- NA * fcst
+
+  # computes ensemble mean of the fcst & hcst
+  em_fcst <- mean(fcst.ano, na.rm=na.rm)
+  em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean,
+                                           na.rm=na.rm)$output1
+
+  # Changes the correlation method in case of NAs allowed
+  if (na.rm){
+    corr.method <- "pairwise.complete.obs"
+  } else {
+    corr.method <- "everything"
+  }
+
+  corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method)
+  sd_obs <- sd(obs.ano, na.rm=na.rm)
+  sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm)
+  
+  # Computes difference between fcst anomaly and ensemble mean
+  fcst_diff <- fcst.ano - em_fcst
+  hcst_diff <- array(numeric(), c(ndays,ntime,0))
+  names(dim(hcst_diff)) <- names(dim(hcst.ano))
+  for (n in 1 : nmembers) {
+    diff <-  Subset(hcst.ano, along='ensemble', indices=n,
+                    drop='selected') - em_hcst
+    hcst_diff <- abind(hcst_diff, diff,
+                      along=which(names(dim(hcst)) == 'ensemble'),
+                      hier.names=F)
+  }
+
+  sd_hcst_diff <- sd(hcst_diff, na.rm)
+  
+  a <- abs(corr) * (sd_obs / sd_em_hcst)
+  b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2))
+  
+  # Calibrates hcst sdate
+  fcst.calib <- ((a * em_fcst) + (b * fcst_diff) + climObs)
+  return(Subset(fcst.calib, c('sday','syear'), list(1,1), drop='selected'))
+
+}
+
+
diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R
index 18ee110f..c24bcecf 100644
--- a/modules/Calibration/test_calib.R
+++ b/modules/Calibration/test_calib.R
@@ -1,19 +1,16 @@
 
-
-recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml"
-
-source("modules/data_load/load.R")
-
-data <- load_datasets(recipe_file)
+## TODO: Replace with library(CSTools) once Alba's fun is merged
+library(s2dv)
+library(ClimProjDiags)
+library(multiApply)
+source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-CalibrationForecast/R/CST_Calibration.R")
 
 ## Entry params data and recipe?
-## TODO: Instead of reading the recipe at each module, do it at the beginning?
-recipe <- read_yaml(recipe_file)
-
 calibrate_datasets <- function(data, recipe) {
-  # hcst <- data$hcst
-  # obs <- data$obs
-  # fcst <- data$fcst
+  hcst <- data$hcst
+  obs <- data$obs
+  fcst <- data$fcst
+  ## TODO: Daily case
   
   # Calibration function params
   method <- recipe$Analysis$Workflow$Calibration$method
@@ -22,73 +19,79 @@ calibrate_datasets <- function(data, recipe) {
   ncores <- 4
   na.rm <- T
   
-  # Hcst calibration 
-  hcst_calib <- CSTools::CST_Calibration(data$hcst, data$obs,
-                                         cal.method = method,
-                                         eval.method = "leave-one-out",
-                                         multi.model = mm,
-                                         na.fill = TRUE,
-                                         na.rm = na.rm,
-                                         apply_to = NULL,
-                                         alpha = NULL,
-                                         memb_dim = "ensemble",
-                                         sdate_dim = "syear",
-                                         ncores = ncores)
-  
-  ## Alba's version
-  library(s2dv)
-  library(ClimProjDiags)
-  library(multiApply)
-  source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-CalibrationForecast/R/CST_Calibration.R")
-  hcst_calibrated <- CST_Calibration(data$hcst, data$obs,
-                                     cal.method = method,
-                                     eval.method = "leave-one-out",
-                                     multi.model = mm,
-                                     na.fill = TRUE,
-                                     na.rm = na.rm,
-                                     apply_to = NULL,
-                                     alpha = NULL,
-                                     memb_dim = "ensemble",
-                                     sdate_dim = "syear",
-                                     ncores = ncores)
-  
-  ## In the new CST_Calibration function, the fcst must have the same number of 
-  ## dimensions as the hcst
-  dim(data$fcst$data) <- c(dim(data$fcst$data)[1:2], "sday" = 1, "sweek" = 1,
-  			 dim(data$fcst$data)[3:7])
-  ## In the new CST_Calibration function, memb_dim and sdate_dim must be the same
-  ## in the hcst and the fcst.
-  names(dim(data$fcst$data))[which(names(dim(data$fcst$data)) == 
-  				 'fcst_syear')] <- "syear"
-  
-  fcst_alba <- CST_Calibration(data$hcst, data$obs, data$fcst,
-                              cal.method = method,
-                              eval.method = "leave-one-out",
-                              multi.model = mm,
-                              na.fill = TRUE,
-                              na.rm = na.rm,
-                              apply_to = NULL,
-                              alpha = NULL,
-                              memb_dim = "ensemble",
-                              sdate_dim = "syear",
-                              ncores = ncores)
+  ## TODO: Change or move this check elsewhere so the error shows up earlier
+  CST_CALIB_METHODS <- c("bias","evmos","mse_min","crps_min","rpc-based")
+  if (method %in% CST_CALIB_METHODS) {
+    ## Alba's version
+    hcst_calibrated <- CST_Calibration(data$hcst, data$obs,
+                                       cal.method = method,
+                                       eval.method = "leave-one-out",
+                                       multi.model = mm,
+                                       na.fill = TRUE,
+                                       na.rm = na.rm,
+                                       apply_to = NULL,
+                                       alpha = NULL,
+                                       memb_dim = "ensemble",
+                                       sdate_dim = "syear",
+                                       ncores = ncores)
+    ## TODO: Resolve these workarounds
+    ## In the new CST_Calibration function, the fcst must have the same number of 
+    ## dimensions as the hcst
+    dim(fcst$data) <- c(dim(fcst$data)[1:2], "sday" = 1, "sweek" = 1,
+    			dim(fcst$data)[3:7])
+    ## In the new CST_Calibration function, memb_dim and sdate_dim must be the 
+    ## same in the hcst and the fcst.
+    names(dim(fcst$data))[which(names(dim(fcst$data)) == 
+    		          'fcst_syear')] <- "syear"
+    
+    fcst_calibrated <- CST_Calibration(hcst, obs, fcst,
+                                       cal.method = method,
+                                       eval.method = "leave-one-out",
+                                       multi.model = mm,
+                                       na.fill = TRUE,
+                                       na.rm = na.rm,
+                                       apply_to = NULL,
+                                       alpha = NULL,
+                                       memb_dim = "ensemble",
+                                       sdate_dim = "syear",
+                                       ncores = ncores)
+  } else {
+    stop("Calibration method is not implemented in CSTools")
+  }
+  print("###### CALIBRATION COMPLETE ######")
+  return(list(hcst_calibrated, fcst_calibrated))
+}
+
+#    ## Lluís' version:
+#    ## Note: memb_dim is hardcoded as 'ensemble'.
+#    source("modules/Calibration/Calibration.R")
+#    ## TODO: Change path
+#    # source("https://earth.bsc.es/gitlab/api/v4/projects/446/repository/blobs/bfe5848c06536218f1fbed6e9356cc6aebe27762/raw?ref=TFM&private_token=pExJt3V4FYwj4yzvy9ps")
+#    source("modules/Calibration/Calibration_fcst3.R")
+#    ## TODO: move this elsewhere
+#    library(abind)
+#    hcst_lluis <- hcst_calib(obs, hcst,
+#    			   method = method,
+#    			   mm = mm,
+#    			   na.rm = na.rm,
+#    			   split_factor = 1,
+#    			   ncores = ncores)
+#  
+#    fcst_lluis <- fcst_calib(obs$data, hcst$data, fcst$data,
+#  			     # method = method,
+#  			     fun = .fcstcal, ## ?
+#  			     mm = mm,
+#  			     na.rm = na.rm,
+#  			     split_factor = 1,
+#  			     ncores = ncores)
   
-  ## Lluís' version:
-  ## Note: memb_dim is hardcoded as 'ensemble'.
-  source("/esarchive/scratch/vagudets/repos/auto-s2s/modules/Calibration/Calibration.R")
-  hcst_lluis <- hcst_calib(data$obs, data$hcst,
-  			   method = method,
-  			   mm = mm,
-  			   na.rm = na.rm,
-  			   split_factor = 1,
-  			   ncores = ncores)
-  ## TODO: Include Lluís' forecast version and test it
-  # For daily
-  # hcst <- CSTools::CST_QuantileMapping(data$hcst,
-  #                                      data$obs,
-  #                                      exp_cor = NULL,
-  #                                      sample_dims = c("syear", "time", "ensemble"),
-  #                                      sample_length = NULL,
-  #                                      method = "QUANT",
-  #                                      ncores = ncores,
-  #                                      na.rm = na.rm)
+    ## TODO: Include Lluís' forecast version and test it
+    # For daily
+    # hcst <- CSTools::CST_QuantileMapping(data$hcst,
+    #                                      data$obs,
+    #                                      exp_cor = NULL,
+    #                                      sample_dims = c("syear", "time", "ensemble"),
+    #                                      sample_length = NULL,
+    #                                      method = "QUANT",
+    #                                      ncores = ncores,
+    #                                      na.rm = na.rm)
diff --git a/modules/test_victoria.R b/modules/test_victoria.R
new file mode 100644
index 00000000..e04c8a6f
--- /dev/null
+++ b/modules/test_victoria.R
@@ -0,0 +1,14 @@
+
+
+recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml"
+
+source("modules/data_load/load.R")
+source("modules/Calibration/test_calib.R")
+data <- load_datasets(recipe_file)
+## TODO: Instead of reading the recipe at each module, do it at the beginning?
+recipe <- read_yaml(recipe_file)
+calibrated_data <- calibrate_datasets(data, recipe)
+
+## Entry params data and recipe?
+## TODO: Instead of reading the recipe at each module, do it at the beginning?
+calibrated_data <- calibrate_datasets(data, recipe)
-- 
GitLab


From 379f2b6a154b4822ef40de0b6956f55f16e43ea3 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 22 Apr 2022 17:03:58 +0200
Subject: [PATCH 10/29] Add condition in case fcst is NULL

---
 modules/Calibration/test_calib.R | 43 +++++++++++++++++---------------
 modules/test_victoria.R          |  4 ---
 2 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R
index c24bcecf..09dc19b9 100644
--- a/modules/Calibration/test_calib.R
+++ b/modules/Calibration/test_calib.R
@@ -10,7 +10,7 @@ calibrate_datasets <- function(data, recipe) {
   hcst <- data$hcst
   obs <- data$obs
   fcst <- data$fcst
-  ## TODO: Daily case
+  ## TODO: Implement daily case
   
   # Calibration function params
   method <- recipe$Analysis$Workflow$Calibration$method
@@ -21,9 +21,10 @@ calibrate_datasets <- function(data, recipe) {
   
   ## TODO: Change or move this check elsewhere so the error shows up earlier
   CST_CALIB_METHODS <- c("bias","evmos","mse_min","crps_min","rpc-based")
+  ## TODO: Revise output variable names
   if (method %in% CST_CALIB_METHODS) {
     ## Alba's version
-    hcst_calibrated <- CST_Calibration(data$hcst, data$obs,
+    hcst_calibrated <- CST_Calibration(hcst, obs,
                                        cal.method = method,
                                        eval.method = "leave-one-out",
                                        multi.model = mm,
@@ -37,24 +38,26 @@ calibrate_datasets <- function(data, recipe) {
     ## TODO: Resolve these workarounds
     ## In the new CST_Calibration function, the fcst must have the same number of 
     ## dimensions as the hcst
-    dim(fcst$data) <- c(dim(fcst$data)[1:2], "sday" = 1, "sweek" = 1,
-    			dim(fcst$data)[3:7])
-    ## In the new CST_Calibration function, memb_dim and sdate_dim must be the 
-    ## same in the hcst and the fcst.
-    names(dim(fcst$data))[which(names(dim(fcst$data)) == 
-    		          'fcst_syear')] <- "syear"
-    
-    fcst_calibrated <- CST_Calibration(hcst, obs, fcst,
-                                       cal.method = method,
-                                       eval.method = "leave-one-out",
-                                       multi.model = mm,
-                                       na.fill = TRUE,
-                                       na.rm = na.rm,
-                                       apply_to = NULL,
-                                       alpha = NULL,
-                                       memb_dim = "ensemble",
-                                       sdate_dim = "syear",
-                                       ncores = ncores)
+    if (!is.null(fcst)) {
+      dim(fcst$data) <- c(dim(fcst$data)[1:2], "sday" = 1, "sweek" = 1,
+      			dim(fcst$data)[3:7])
+      ## In the new CST_Calibration function, memb_dim and sdate_dim must be the 
+      ## same in the hcst and the fcst.
+      names(dim(fcst$data))[which(names(dim(fcst$data)) == 
+      		          'fcst_syear')] <- "syear"
+      
+      fcst_calibrated <- CST_Calibration(hcst, obs, fcst,
+                                         cal.method = method,
+                                         eval.method = "leave-one-out",
+                                         multi.model = mm,
+                                         na.fill = TRUE,
+                                         na.rm = na.rm,
+                                         apply_to = NULL,
+                                         alpha = NULL,
+                                         memb_dim = "ensemble",
+                                         sdate_dim = "syear",
+                                         ncores = ncores)
+    }
   } else {
     stop("Calibration method is not implemented in CSTools")
   }
diff --git a/modules/test_victoria.R b/modules/test_victoria.R
index e04c8a6f..686b1f13 100644
--- a/modules/test_victoria.R
+++ b/modules/test_victoria.R
@@ -8,7 +8,3 @@ data <- load_datasets(recipe_file)
 ## TODO: Instead of reading the recipe at each module, do it at the beginning?
 recipe <- read_yaml(recipe_file)
 calibrated_data <- calibrate_datasets(data, recipe)
-
-## Entry params data and recipe?
-## TODO: Instead of reading the recipe at each module, do it at the beginning?
-calibrated_data <- calibrate_datasets(data, recipe)
-- 
GitLab


From 96d384f478146fcdc0f74b40f94515d0e0102972 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 26 Apr 2022 11:20:11 +0200
Subject: [PATCH 11/29] Change path to grid description files from absolute to
 relative

---
 conf/archive.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/conf/archive.yml b/conf/archive.yml
index 73379d5d..43750a1b 100644
--- a/conf/archive.yml
+++ b/conf/archive.yml
@@ -22,7 +22,7 @@ archive:
             nmember:
               fcst: 51
               hcst: 25
-            reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system7c3s.txt"
+            reference_grid: "conf/grid_description/griddes_system7c3s.txt"
         system35c3s:
             src: "exp/cmcc/system35c3s/"
             monthly_mean: {"tas":"_f6h/", "g500":"_f12h/",
@@ -30,7 +30,7 @@ archive:
             nmember:
               fcst: 50
               hcst: 40
-            reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system35c3s.txt"
+            reference_grid: "conf/grid_description/griddes_system35c3s.txt"
         system2c3s:
             src: "exp/jma/system2c3s/"
             monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/",
@@ -38,7 +38,7 @@ archive:
             nmember:
               fcst: 10
               hcst: 10
-            reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system35c3s.txt"
+            reference_grid: "conf/grid_description/griddes_system35c3s.txt"
         eccc1:
             src: "exp/eccc/eccc1/"
             monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/",
@@ -46,7 +46,7 @@ archive:
             nmember:
               fcst: 10
               hcst: 10
-            reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_eccc1.txt"
+            reference_grid: "conf/grid_description/griddes_eccc1.txt"
     Reference:
         era5: 
             src: "recon/ecmwf/era5/"
-- 
GitLab


From b3520bcb9c47edd811a02743f6ca2071861d80f2 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 26 Apr 2022 12:02:24 +0200
Subject: [PATCH 12/29] Adapt forecast calibration to work with s2dv_cube
 objects

---
 modules/Calibration/Calibration.R       | 23 +++++++++++++----------
 modules/Calibration/Calibration_fcst3.R | 18 ++++++++++--------
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index 8f21eecb..4945c333 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -88,9 +88,8 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T,
     obs <- obs.mm
     remove(obs.mm)
   }
-
+  
   if (method %in% CST_CALIB_METHODS) {
-
     # Hcst calibration 
     hcst <- CSTools::CST_Calibration(hcst, obs,
                                      cal.method = method,
@@ -105,9 +104,10 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T,
                                      ncores = ncores)
 
   } else {
-    ## TODO: Implement other calibration methods
-    error(logger, 
-          "Calibration method is not implemented in CSTools")
+    ## TODO: Implement other calibration methods, implement logger
+    stop("Calibration method is not implemented in CSTools")
+    # error(logger, 
+    #       "Calibration method is not implemented in CSTools")
   }
 
   hcst$data[!is.finite(hcst$data)] <- NA
@@ -171,9 +171,9 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T,
 ## needs and update: look at hcst version
 fcst_calib <- function(obs, hcst, fcst, fun, mm=F,
                        na.rm=T, split_factor=1, ncores=32,
-		       ## TODO: remove these?
+		       ## TODO: establish target dims and output dims
                        target_dims=c('sday', 'syear', 'ensemble'),
-                       output_dims=c('ensemble'))
+                       output_dims=c('sday', 'syear', 'ensemble'))
 {
 
   # Replicates observations for bias adjusting each 
@@ -191,15 +191,18 @@ fcst_calib <- function(obs, hcst, fcst, fun, mm=F,
   }
  
   # Fcst Calibration
-  calibrated_fcst <- Apply(data=list(obs = obs, hcst = hcst, fcst = fcst),
+  ## TODO: Return s2dv_cube
+  calibrated_fcst <- Apply(data=list(obs = obs$data, hcst = hcst$data, fcst = fcst$data),
                           extra_info = list(na.rm = na.rm),
                           target_dims = target_dims,
                           output_dims = output_dims,
                           na.rm = na.rm,
                           ncores = ncores,
                           fun = .fcstcal)[[1]] ## Need to source this fun?
-
-  calibrated_fcst$data[!is.finite(calibrated_fcst$data)] <- NA
+  
+  ## TODO: Restore this line
+  # calibrated_fcst$data[!is.finite(calibrated_fcst$data)] <- NA
+  calibrated_fcst[!is.finite(calibrated_fcst)] <- NA
   
   # Merges all the ensembles from the different systems into
   # one single ensemble
diff --git a/modules/Calibration/Calibration_fcst3.R b/modules/Calibration/Calibration_fcst3.R
index 530081f2..e57f05c4 100644
--- a/modules/Calibration/Calibration_fcst3.R
+++ b/modules/Calibration/Calibration_fcst3.R
@@ -19,20 +19,20 @@ library(ClimProjDiags)
   
   ind <- 1:ntime
   climObs <- sapply(ind, function(x) {mean(
-              Subset(var_obs, along='syear',indices=-x), na.rm=na.rm)}) 
+              Subset(var_obs, along='syear', indices=-x), na.rm=na.rm)}) 
   climPred <- sapply(ind, function(x) {mean(
-              Subset(var_exp, along='syear',indices=-x), na.rm=na.rm)}) 
+              Subset(var_exp, along='syear', indices=-x), na.rm=na.rm)}) 
 
   var_obs.ano <- var_obs - climObs
   var_exp.ano <- var_exp - climPred
   
-  calib.hcst <- NA * Subset(var_exp, along=c('sday','syear'), c(fcst.index,0))
+  calib.hcst <- NA * Subset(var_exp, along=c('sday','syear'), c(fcst.index, 0))
   # loop over the hindcast years
   for (t in 1 : ntime) {
 
     # defining forecast,hindcast in cross-validation
-    fcst.ano <- Subset(var_exp.ano, along=c('syear','sday'),
-                                indices=list(t,fcst.index)) 
+    fcst.ano <- Subset(var_exp.ano, along=c('syear', 'sday'),
+                                indices=list(t, fcst.index)) 
     hcst.ano <- Subset(var_exp.ano, along=c('syear'), indices=list(-t))
 
     obs.ano <- Subset(var_obs.ano, along='syear', indices=-t) 
@@ -40,7 +40,7 @@ library(ClimProjDiags)
     # computes ensemble mean of the fcst & hcst
     em_fcst <- mean(fcst.ano, na.rm=na.rm)
     em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean,
-                                             na.rm=na.rm)$output1
+                     na.rm=na.rm)$output1
 
     # Changes the correlation method in case of NAs 
     if (na.rm){
@@ -117,7 +117,7 @@ library(ClimProjDiags)
   em_fcst <- mean(fcst.ano, na.rm=na.rm)
   em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean,
                                            na.rm=na.rm)$output1
-
+  
   # Changes the correlation method in case of NAs allowed
   if (na.rm){
     corr.method <- "pairwise.complete.obs"
@@ -148,7 +148,9 @@ library(ClimProjDiags)
   
   # Calibrates hcst sdate
   fcst.calib <- ((a * em_fcst) + (b * fcst_diff) + climObs)
-  return(Subset(fcst.calib, c('sday','syear'), list(1,1), drop='selected'))
+  # fcst_cube$data <- fcst.calib
+  return(fcst.calib)
+  # return(Subset(fcst.calib, c('sday', 'syear'), list(1,1), drop='selected'))
 
 }
 
-- 
GitLab


From 397a6ce59eb8fe56c33917e51805347a365b4811 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 26 Apr 2022 16:24:50 +0200
Subject: [PATCH 13/29] testing

---
 modules/data_load/testing_recipes/recipe_3.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/data_load/testing_recipes/recipe_3.yml b/modules/data_load/testing_recipes/recipe_3.yml
index ed82f43c..b71f428d 100644
--- a/modules/data_load/testing_recipes/recipe_3.yml
+++ b/modules/data_load/testing_recipes/recipe_3.yml
@@ -30,7 +30,7 @@ Analysis:
     type: to_system
   Workflow:
     Calibration:
-      method: SBC
+      method: mse_min
     Skill:
       metric: RPSS
     Indicators:
-- 
GitLab


From 05cfd85abe4811acfe5034e00cbe3ae76854811e Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 26 Apr 2022 16:25:58 +0200
Subject: [PATCH 14/29] Make dimensions of fcst object the same as hcst object

---
 modules/Calibration/test_calib.R | 21 ++++------
 modules/data_load/dates2load.R   | 13 +++---
 modules/data_load/load.R         | 71 ++++++++++++++++++--------------
 3 files changed, 55 insertions(+), 50 deletions(-)

diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R
index 09dc19b9..d4d4259c 100644
--- a/modules/Calibration/test_calib.R
+++ b/modules/Calibration/test_calib.R
@@ -14,16 +14,15 @@ calibrate_datasets <- function(data, recipe) {
   
   # Calibration function params
   method <- recipe$Analysis$Workflow$Calibration$method
-  ## TODO: Make sure 'Multimodel' param in recipe is a boolean
   mm <- recipe$Analysis$Datasets$Multimodel
   ncores <- 4
   na.rm <- T
   
-  ## TODO: Change or move this check elsewhere so the error shows up earlier
-  CST_CALIB_METHODS <- c("bias","evmos","mse_min","crps_min","rpc-based")
+  CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based")
   ## TODO: Revise output variable names
   if (method %in% CST_CALIB_METHODS) {
-    ## Alba's version
+    ## Alba's version of CST_Calibration (pending merge) is being used
+    # Calibrate the hindcast
     hcst_calibrated <- CST_Calibration(hcst, obs,
                                        cal.method = method,
                                        eval.method = "leave-one-out",
@@ -35,17 +34,8 @@ calibrate_datasets <- function(data, recipe) {
                                        memb_dim = "ensemble",
                                        sdate_dim = "syear",
                                        ncores = ncores)
-    ## TODO: Resolve these workarounds
-    ## In the new CST_Calibration function, the fcst must have the same number of 
-    ## dimensions as the hcst
     if (!is.null(fcst)) {
-      dim(fcst$data) <- c(dim(fcst$data)[1:2], "sday" = 1, "sweek" = 1,
-      			dim(fcst$data)[3:7])
-      ## In the new CST_Calibration function, memb_dim and sdate_dim must be the 
-      ## same in the hcst and the fcst.
-      names(dim(fcst$data))[which(names(dim(fcst$data)) == 
-      		          'fcst_syear')] <- "syear"
-      
+      # Calibrate the forecast
       fcst_calibrated <- CST_Calibration(hcst, obs, fcst,
                                          cal.method = method,
                                          eval.method = "leave-one-out",
@@ -57,7 +47,10 @@ calibrate_datasets <- function(data, recipe) {
                                          memb_dim = "ensemble",
                                          sdate_dim = "syear",
                                          ncores = ncores)
+    } else {
+      fcst_calibrated <- NULL
     }
+  ## TODO: Implement other calibration methods  
   } else {
     stop("Calibration method is not implemented in CSTools")
   }
diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R
index f6a5e27f..d9e5fa0c 100644
--- a/modules/data_load/dates2load.R
+++ b/modules/data_load/dates2load.R
@@ -49,12 +49,13 @@ dates2load <- function(recipe, logger){
 # adds the correspondent dims to each sdate array
 .add_dims <- function(data, type){
 
-  if (type == "hcst"){
-    default_dims <- c(sday = 1, sweek = 1, 
-                      syear = length(data))
-  } else {
-    default_dims <- c(fcst_syear = length(data))
-  }
+#   if (type == "hcst"){
+#     default_dims <- c(sday = 1, sweek = 1, 
+#                       syear = length(data))
+#   } else {
+#     default_dims <- c(fcst_syear = length(data))
+#   }
+  default_dims <- c(sday = 1, sweek = 1, syear = length(data))
 
   default_dims[names(dim(data))] <- dim(data)
   dim(data) <- default_dims
diff --git a/modules/data_load/load.R b/modules/data_load/load.R
index 25a3b82b..68588156 100755
--- a/modules/data_load/load.R
+++ b/modules/data_load/load.R
@@ -37,7 +37,7 @@ startR_to_s2dv <- function(startR_array){
   #dim(sdates) <- sdates_dims
 
   ## TODO: change this line when time attributes work correctly?
-  time_dims <- c("time", "sday", "sweek", "syear", "fcst_syear")
+  time_dims <- c("time", "sday", "sweek", "syear")
 
   s2dv_object <- s2dv_cube(data = attr2array(startR_array), 
                            lon = attr2array(attr(startR_array,
@@ -193,8 +193,8 @@ load_datasets <- function(recipe_file){
     # Adjusts dims for daily case, could be removed if startR allows
     # multidim split
     names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear"
-    default_dims <- c(dat = 1, var = 1, sweek = 1, 
-                      sday = 1, syear = 1, time = 1, 
+    default_dims <- c(dat = 1, var = 1, sday = 1, 
+                      sweek = 1, syear = 1, time = 1, 
                       latitude = 1, longitude = 1, ensemble = 1)
     default_dims[names(dim(hcst))] <- dim(hcst)
     dim(hcst) <- default_dims
@@ -218,23 +218,34 @@ load_datasets <- function(recipe_file){
     	          longitude_reorder = circularsort,
     	          transform = regrid_params$fcst.transform,
     	          transform_params = list(grid = regrid_params$fcst.gridtype,
-    		        	                  method = regrid_params$fcst.gridmethod),
+    		                          method = regrid_params$fcst.gridmethod),
     	          transform_vars = c('latitude', 'longitude'),
     	          synonims = list(latitude = c('lat', 'latitude'),
-    		                      longitude = c('lon', 'longitude'),
-    		                      ensemble = c('member', 'ensemble')),
+    		                  longitude = c('lon', 'longitude'),
+    		                  ensemble = c('member', 'ensemble')),
     	          ensemble = indices(1:fcst.nmember),
     	          return_vars = list(latitude = 'dat',
-    		        	             longitude = 'dat',
-    		        	             time = 'file_date'),
+    		                     longitude = 'dat',
+    		                     time = 'file_date'),
                   split_multiselected_dims = split_multiselected_dims,
     	          retrieve = TRUE)
-  
-    names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "fcst_syear"
+    
+    ## TODO: Add 'sday' and 'sweek' dims to fcst
+    if (recipe$Analysis$Variables$freq == "daily_mean") {
+      # Adjusts dims for daily case, could be removed if startR allows
+      # multidim split
+      names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear"
+      default_dims <- c(dat = 1, var = 1, sday = 1,
+                        sweek = 1, syear = 1, time = 1,
+                        latitude = 1, longitude = 1, ensemble = 1)
+      default_dims[names(dim(fcst))] <- dim(fcst)
+      dim(fcst) <- default_dims
+    }
+  #  names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear"
     fcst <- startR_to_s2dv(fcst)
 
   } else {
-    fcst <- NULL 
+    fcst <- NULL
   }
   
   # Load reference
@@ -251,23 +262,23 @@ load_datasets <- function(recipe_file){
                                   list(1,1,1,1,1), drop="selected"))
 
     obs <- Start(dat = obs.path,
-  	             var = variable,
-  	             file_date = dates_file,
-  	             latitude = values(list(lats.min, lats.max)),
-  	             latitude_reorder = Sort(),
-  	             longitude = values(list(lons.min, lons.max)),
-  	             longitude_reorder = circularsort,
-  	             transform = regrid_params$obs.transform,
-  	             transform_params = list(grid = regrid_params$obs.gridtype,
-  		          	                   method = regrid_params$obs.gridmethod),
-  	             transform_vars = c('latitude', 'longitude'),
-  	             synonims = list(latitude = c('lat','latitude'),
-  		                         longitude = c('lon','longitude')),
-  	             return_vars = list(latitude = 'dat',
-  		          	              longitude = 'dat',
-  		          	              time = 'file_date'),
-  	             split_multiselected_dims = TRUE,
-  	             retrieve = TRUE)
+                 var = variable,
+  	         file_date = dates_file,
+  	         latitude = values(list(lats.min, lats.max)),
+  	         latitude_reorder = Sort(),
+  	         longitude = values(list(lons.min, lons.max)),
+  	         longitude_reorder = circularsort,
+  	         transform = regrid_params$obs.transform,
+  	         transform_params = list(grid = regrid_params$obs.gridtype,
+  		                         method = regrid_params$obs.gridmethod),
+  	         transform_vars = c('latitude', 'longitude'),
+  	         synonims = list(latitude = c('lat','latitude'),
+  	                         longitude = c('lon','longitude')),
+  	         return_vars = list(latitude = 'dat',
+  		                    longitude = 'dat',
+  		                    time = 'file_date'),
+  	         split_multiselected_dims = TRUE,
+  	         retrieve = TRUE)
   
   } else if (store.freq == "daily_mean") {
 
@@ -307,8 +318,8 @@ load_datasets <- function(recipe_file){
   
   # TODO: Reorder obs dims to match hcst dims?
   # Adds ensemble dim to obs (for consistency with hcst/fcst)
-  default_dims <- c(dat = 1, var = 1, sweek = 1, 
-                    sday = 1, syear = 1, time = 1, 
+  default_dims <- c(dat = 1, var = 1, sday = 1, 
+                    sweek = 1, syear = 1, time = 1, 
                     latitude = 1, longitude = 1, ensemble = 1)
   default_dims[names(dim(obs))] <- dim(obs)
   dim(obs) <- default_dims
-- 
GitLab


From e8b6f53cb18149322070ce4827986c7ab9af598e Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 28 Apr 2022 13:25:29 +0200
Subject: [PATCH 15/29] Get hcst calibration (Lluis version) to work with
 s2dv_cube

---
 modules/Calibration/Calibration.R       | 30 +++++++++++++++++--------
 modules/Calibration/Calibration_fcst3.R |  8 ++++---
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index 4945c333..2463fcbd 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -103,14 +103,26 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T,
                                      sdate_dim = "syear",
                                      ncores = ncores)
 
+    hcst$data[!is.finite(hcst$data)] <- NA
+
+  } else if (method %in% c("s2s4e")) {
+    hcst <- Apply(data=list(obs = obs$data, hcst = hcst$data),
+                            extra_info = list(na.rm = na.rm),
+                            target_dims = c('sday', 'syear', 'ensemble'),
+                            output_dims = c('sday', 'syear', 'ensemble'),
+			    na.rm = na.rm,
+			    ncores = ncores,
+			    fun = .cal)[[1]]
+
+    hcst[!is.finite(hcst)] <- NA
   } else {
     ## TODO: Implement other calibration methods, implement logger
     stop("Calibration method is not implemented in CSTools")
-    # error(logger, 
-    #       "Calibration method is not implemented in CSTools")
+   # error(logger, 
+   #       "Calibration method is not implemented in CSTools")
   }
 
-  hcst$data[!is.finite(hcst$data)] <- NA
+  # hcst$data[!is.finite(hcst$data)] <- NA
   ## TODO: Is this line necessary?
   # remove(hcst)
 
@@ -193,12 +205,12 @@ fcst_calib <- function(obs, hcst, fcst, fun, mm=F,
   # Fcst Calibration
   ## TODO: Return s2dv_cube
   calibrated_fcst <- Apply(data=list(obs = obs$data, hcst = hcst$data, fcst = fcst$data),
-                          extra_info = list(na.rm = na.rm),
-                          target_dims = target_dims,
-                          output_dims = output_dims,
-                          na.rm = na.rm,
-                          ncores = ncores,
-                          fun = .fcstcal)[[1]] ## Need to source this fun?
+                           extra_info = list(na.rm = na.rm),
+                           target_dims = target_dims,
+                           output_dims = output_dims,
+                           na.rm = na.rm,
+                           ncores = ncores,
+                           fun = .fcstcal)[[1]]
   
   ## TODO: Restore this line
   # calibrated_fcst$data[!is.finite(calibrated_fcst$data)] <- NA
diff --git a/modules/Calibration/Calibration_fcst3.R b/modules/Calibration/Calibration_fcst3.R
index e57f05c4..c67aae4c 100644
--- a/modules/Calibration/Calibration_fcst3.R
+++ b/modules/Calibration/Calibration_fcst3.R
@@ -18,15 +18,17 @@ library(ClimProjDiags)
   }
   
   ind <- 1:ntime
+ 
   climObs <- sapply(ind, function(x) {mean(
-              Subset(var_obs, along='syear', indices=-x), na.rm=na.rm)}) 
+             Subset(var_obs, along='syear', indices=-x), na.rm=na.rm)})
   climPred <- sapply(ind, function(x) {mean(
-              Subset(var_exp, along='syear', indices=-x), na.rm=na.rm)}) 
+              Subset(var_exp, along='syear', indices=-x), na.rm=na.rm)})
 
   var_obs.ano <- var_obs - climObs
   var_exp.ano <- var_exp - climPred
   
-  calib.hcst <- NA * Subset(var_exp, along=c('sday','syear'), c(fcst.index, 0))
+  calib.hcst <- NA * Subset(var_exp, along=c('sday', 'syear'), 
+			    c(fcst.index, 0))
   # loop over the hindcast years
   for (t in 1 : ntime) {
 
-- 
GitLab


From 8c59d0170f40458aaaca47e5d9c5f201f187d526 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 28 Apr 2022 13:25:58 +0200
Subject: [PATCH 16/29] Add quantile mapping as calibration method for daily
 data

---
 modules/Calibration/test_calib.R | 81 ++++++++++++++++++++++----------
 modules/test_victoria.R          |  2 +-
 2 files changed, 58 insertions(+), 25 deletions(-)

diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R
index d4d4259c..20f067ce 100644
--- a/modules/Calibration/test_calib.R
+++ b/modules/Calibration/test_calib.R
@@ -17,26 +17,18 @@ calibrate_datasets <- function(data, recipe) {
   mm <- recipe$Analysis$Datasets$Multimodel
   ncores <- 4
   na.rm <- T
+
+  if (recipe$Analysis$Variables$freq == "monthly_mean") {
   
-  CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based")
-  ## TODO: Revise output variable names
-  if (method %in% CST_CALIB_METHODS) {
-    ## Alba's version of CST_Calibration (pending merge) is being used
-    # Calibrate the hindcast
-    hcst_calibrated <- CST_Calibration(hcst, obs,
-                                       cal.method = method,
-                                       eval.method = "leave-one-out",
-                                       multi.model = mm,
-                                       na.fill = TRUE,
-                                       na.rm = na.rm,
-                                       apply_to = NULL,
-                                       alpha = NULL,
-                                       memb_dim = "ensemble",
-                                       sdate_dim = "syear",
-                                       ncores = ncores)
-    if (!is.null(fcst)) {
-      # Calibrate the forecast
-      fcst_calibrated <- CST_Calibration(hcst, obs, fcst,
+    CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based")
+    ## TODO: implement other calibration methods
+    ## TODO: Restructure the code?
+    if (!(method %in% CST_CALIB_METHODS)) {
+      stop("Calibration method is not implemented in CSTools")
+    } else {
+      ## Alba's version of CST_Calibration (pending merge) is being used
+      # Calibrate the hindcast and forecast
+      hcst_calibrated <- CST_Calibration(hcst, obs,
                                          cal.method = method,
                                          eval.method = "leave-one-out",
                                          multi.model = mm,
@@ -47,15 +39,57 @@ calibrate_datasets <- function(data, recipe) {
                                          memb_dim = "ensemble",
                                          sdate_dim = "syear",
                                          ncores = ncores)
+      if (!is.null(fcst)) {
+        # Calibrate the forecast
+        fcst_calibrated <- CST_Calibration(hcst, obs, fcst,
+                                           cal.method = method,
+                                           eval.method = "leave-one-out",
+                                           multi.model = mm,
+                                           na.fill = TRUE,
+                                           na.rm = na.rm,
+                                           apply_to = NULL,
+                                           alpha = NULL,
+                                           memb_dim = "ensemble",
+                                           sdate_dim = "syear",
+                                           ncores = ncores)
+      } else {
+        fcst_calibrated <- NULL
+      }
+
+    } 
+
+  } else if (recipe$Analysis$Variables$freq == "daily_mean") {
+    # Daily data calibration using Quantile Mapping
+    warning("Data is at daily frequency. Quantile Mapping will be used",
+            " as the calibration method.")
+    hcst_calibrated <- CST_QuantileMapping(hcst, obs,
+      			             exp_cor = NULL,
+      				     ## TODO: Tidy up
+      			             sample_dims = c("syear",
+      						     "time",
+      						     "ensemble"),
+      				     sample_length = NULL,
+      				     method = "QUANT",
+      				     ncores = ncores,
+      				     na.rm = na.rm)
+
+    if (!is.null(fcst)) {
+      fcst_calibrated <- CST_QuantileMapping(hcst, obs,
+      				       exp_cor = fcst,
+      				       sample_dims = c("syear",
+      						       "time",
+      						       "ensemble"),
+      				       sample_length = NULL,
+      				       method = "QUANT",
+      				       ncores = ncores,
+      				       na.rm = na.rm)
     } else {
       fcst_calibrated <- NULL
     }
-  ## TODO: Implement other calibration methods  
-  } else {
-    stop("Calibration method is not implemented in CSTools")
   }
+  
   print("###### CALIBRATION COMPLETE ######")
-  return(list(hcst_calibrated, fcst_calibrated))
+  return(list(hcst_calib = hcst_calibrated, fcst_calib = fcst_calibrated))
 }
 
 #    ## Lluís' version:
@@ -81,7 +115,6 @@ calibrate_datasets <- function(data, recipe) {
 #  			     split_factor = 1,
 #  			     ncores = ncores)
   
-    ## TODO: Include Lluís' forecast version and test it
     # For daily
     # hcst <- CSTools::CST_QuantileMapping(data$hcst,
     #                                      data$obs,
diff --git a/modules/test_victoria.R b/modules/test_victoria.R
index 686b1f13..3f394e28 100644
--- a/modules/test_victoria.R
+++ b/modules/test_victoria.R
@@ -1,6 +1,6 @@
 
 
-recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml"
+recipe_file <- "modules/data_load/testing_recipes/recipe_3.yml"
 
 source("modules/data_load/load.R")
 source("modules/Calibration/test_calib.R")
-- 
GitLab


From 4b374f9d13643fbea26d7427b4f9c842f47a1c85 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 28 Apr 2022 16:29:32 +0200
Subject: [PATCH 17/29] Add second version of legacy fcst calibration

---
 modules/Calibration/Calibration_fcst4.R | 158 ++++++++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 modules/Calibration/Calibration_fcst4.R

diff --git a/modules/Calibration/Calibration_fcst4.R b/modules/Calibration/Calibration_fcst4.R
new file mode 100644
index 00000000..f1866dcb
--- /dev/null
+++ b/modules/Calibration/Calibration_fcst4.R
@@ -0,0 +1,158 @@
+library(ClimProjDiags)
+
+# Performs the hcst calibration for a centered time
+# window of sdays. Objects must have the dimensions;
+# sday, ensemble & syear. 
+# If sday dim = 1; no time window calibration is performed.
+.cal <- function(var_obs, var_exp, na.rm=F) {
+
+  ntime <- dim(var_exp)[which(names(dim(var_exp)) == 'syear')][]
+  nmembers <- dim(var_exp)[which(names(dim(var_exp)) == 'ensemble')][]
+  ndays <- dim(var_exp)[which(names(dim(var_exp)) == 'sday')][]
+  
+  # selects only the centered day of time window 
+  if ( ndays == 1 ){
+    fcst.index <- 1
+  } else {
+    fcst.index <- (ndays + 1)/2
+  }
+  
+  ind <- 1:ntime
+ 
+  climObs <- sapply(ind, function(x) {mean(var_obs, na.rm=na.rm)})
+  climPred <- sapply(ind, function(x) {mean(
+              Subset(var_exp, along='syear', indices=-x), na.rm=na.rm)})
+
+  var_obs.ano <- var_obs - climObs
+  var_exp.ano <- var_exp - climPred
+  
+  calib.hcst <- NA * Subset(var_exp, along=c('sday', 'syear'), 
+			    c(fcst.index, 0))
+  # loop over the hindcast years
+  for (t in 1 : ntime) {
+
+    # defining forecast,hindcast in cross-validation
+    fcst.ano <- Subset(var_exp.ano, along=c('syear', 'sday'),
+                                indices=list(t, fcst.index)) 
+    hcst.ano <- Subset(var_exp.ano, along=c('syear'), indices=list(-t))
+
+    obs.ano <- Subset(var_obs.ano, along='syear', indices=-t) 
+
+    # computes ensemble mean of the fcst & hcst
+    em_fcst <- mean(fcst.ano, na.rm=na.rm)
+    em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean,
+                     na.rm=na.rm)$output1
+
+    # Changes the correlation method in case of NAs 
+    if (na.rm){
+      corr.method <- "pairwise.complete.obs"
+    } else {
+      corr.method <- "everything"
+    }
+    corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method)
+    sd_obs <- sd(obs.ano, na.rm=na.rm)
+    sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm)
+    
+    # Computes difference between fcst anomaly and ensemble mean
+    fcst_diff <- fcst.ano - em_fcst
+    hcst_diff <- array(numeric(), c(ndays,ntime-1,0))
+    names(dim(hcst_diff)) <- names(dim(hcst.ano))
+    for (n in 1 : nmembers) {
+      diff <-  Subset(hcst.ano, along='ensemble', indices=n,
+                      drop='selected') - em_hcst
+      hcst_diff <- abind(hcst_diff, diff,
+                        along=which(names(dim(var_exp)) == 'ensemble'),
+                        hier.names=F)
+    }
+
+    sd_hcst_diff <- sd(hcst_diff, na.rm=na.rm)
+    
+    a <- abs(corr) * (sd_obs / sd_em_hcst)
+    b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2))
+    
+    # Calibrates hcst sdate
+    calib.syear <- ((a * em_fcst) + (b * fcst_diff) + climObs[t])
+    # Appends calibrated sdate to the hcst array
+    calib.hcst <- abind(calib.hcst, calib.syear,
+                        along=which(names(dim(var_exp)) == 'syear'),
+                        hier.names=F)
+  }
+
+  names(dim(calib.hcst)) <- names(dim(calib.syear))
+  # removes NAs added in array init
+  calib.hcst <- Subset(calib.hcst, along='syear', indices=-1)
+
+  return(calib.hcst)
+
+}
+
+# Performs the fcst calibration for a centered time
+# window of sdays. Objects must have the dimensions;
+# sday, ensemble & syear. 
+# If sday dim = 1; no time window calibration is performed.
+.fcstcal <- function(obs, hcst, fcst, na.rm=F) {
+
+  ntime <- dim(hcst)['syear']
+  nmembers <- dim(hcst)['ensemble']
+  fcst.nmembers <- dim(fcst)['ensemble']
+  ndays <- dim(hcst)['sday']
+  
+  # selects only the centered day of time window 
+  if ( ndays == 1 ){
+    fcst.index <- 1
+  } else {
+    fcst.index <- (ndays + 1)/2
+  }
+  
+  ind <- 1:ntime
+  climObs <- mean(obs, na.rm=na.rm) 
+  climPred <- mean(hcst, na.rm=na.rm)
+
+  obs.ano <- obs - climObs
+  hcst.ano <- hcst - climPred
+  fcst.ano <- fcst - climPred
+  
+  calib.fcst <- NA * fcst
+
+  # computes ensemble mean of the fcst & hcst
+  em_fcst <- mean(fcst.ano, na.rm=na.rm)
+  em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean,
+                                           na.rm=na.rm)$output1
+  
+  # Changes the correlation method in case of NAs allowed
+  if (na.rm){
+    corr.method <- "pairwise.complete.obs"
+  } else {
+    corr.method <- "everything"
+  }
+
+  corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method)
+  sd_obs <- sd(obs.ano, na.rm=na.rm)
+  sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm)
+  
+  # Computes difference between fcst anomaly and ensemble mean
+  fcst_diff <- fcst.ano - em_fcst
+  hcst_diff <- array(numeric(), c(ndays,ntime,0))
+  names(dim(hcst_diff)) <- names(dim(hcst.ano))
+  for (n in 1 : nmembers) {
+    diff <-  Subset(hcst.ano, along='ensemble', indices=n,
+                    drop='selected') - em_hcst
+    hcst_diff <- abind(hcst_diff, diff,
+                      along=which(names(dim(hcst)) == 'ensemble'),
+                      hier.names=F)
+  }
+
+  sd_hcst_diff <- sd(hcst_diff, na.rm)
+  
+  a <- abs(corr) * (sd_obs / sd_em_hcst)
+  b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2))
+  
+  # Calibrates hcst sdate
+  fcst.calib <- ((a * em_fcst) + (b * fcst_diff) + climObs)
+  # fcst_cube$data <- fcst.calib
+  return(fcst.calib)
+  # return(Subset(fcst.calib, c('sday', 'syear'), list(1,1), drop='selected'))
+
+}
+
+
-- 
GitLab


From 0dd29335d02315c8c7888705fec2063d68680216 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 29 Apr 2022 16:55:38 +0200
Subject: [PATCH 18/29] Move loading of libraries to tools/libs.R

---
 modules/Calibration/Calibration_fcst3.R |  1 -
 modules/Calibration/Calibration_fcst4.R |  1 -
 modules/Calibration/test_calib.R        | 39 ++-----------------------
 tools/libs.R                            |  3 +-
 4 files changed, 4 insertions(+), 40 deletions(-)

diff --git a/modules/Calibration/Calibration_fcst3.R b/modules/Calibration/Calibration_fcst3.R
index c67aae4c..babc1922 100644
--- a/modules/Calibration/Calibration_fcst3.R
+++ b/modules/Calibration/Calibration_fcst3.R
@@ -1,4 +1,3 @@
-library(ClimProjDiags)
 
 # Performs the hcst calibration for a centered time
 # window of sdays. Objects must have the dimensions;
diff --git a/modules/Calibration/Calibration_fcst4.R b/modules/Calibration/Calibration_fcst4.R
index f1866dcb..9359c6e8 100644
--- a/modules/Calibration/Calibration_fcst4.R
+++ b/modules/Calibration/Calibration_fcst4.R
@@ -1,4 +1,3 @@
-library(ClimProjDiags)
 
 # Performs the hcst calibration for a centered time
 # window of sdays. Objects must have the dimensions;
diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R
index 20f067ce..38bc08bd 100644
--- a/modules/Calibration/test_calib.R
+++ b/modules/Calibration/test_calib.R
@@ -1,8 +1,5 @@
 
-## TODO: Replace with library(CSTools) once Alba's fun is merged
-library(s2dv)
-library(ClimProjDiags)
-library(multiApply)
+## TODO: Remove once Alba's fun is merged in CSTools
 source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-CalibrationForecast/R/CST_Calibration.R")
 
 ## Entry params data and recipe?
@@ -10,7 +7,6 @@ calibrate_datasets <- function(data, recipe) {
   hcst <- data$hcst
   obs <- data$obs
   fcst <- data$fcst
-  ## TODO: Implement daily case
   
   # Calibration function params
   method <- recipe$Analysis$Workflow$Calibration$method
@@ -92,35 +88,4 @@ calibrate_datasets <- function(data, recipe) {
   return(list(hcst_calib = hcst_calibrated, fcst_calib = fcst_calibrated))
 }
 
-#    ## Lluís' version:
-#    ## Note: memb_dim is hardcoded as 'ensemble'.
-#    source("modules/Calibration/Calibration.R")
-#    ## TODO: Change path
-#    # source("https://earth.bsc.es/gitlab/api/v4/projects/446/repository/blobs/bfe5848c06536218f1fbed6e9356cc6aebe27762/raw?ref=TFM&private_token=pExJt3V4FYwj4yzvy9ps")
-#    source("modules/Calibration/Calibration_fcst3.R")
-#    ## TODO: move this elsewhere
-#    library(abind)
-#    hcst_lluis <- hcst_calib(obs, hcst,
-#    			   method = method,
-#    			   mm = mm,
-#    			   na.rm = na.rm,
-#    			   split_factor = 1,
-#    			   ncores = ncores)
-#  
-#    fcst_lluis <- fcst_calib(obs$data, hcst$data, fcst$data,
-#  			     # method = method,
-#  			     fun = .fcstcal, ## ?
-#  			     mm = mm,
-#  			     na.rm = na.rm,
-#  			     split_factor = 1,
-#  			     ncores = ncores)
-  
-    # For daily
-    # hcst <- CSTools::CST_QuantileMapping(data$hcst,
-    #                                      data$obs,
-    #                                      exp_cor = NULL,
-    #                                      sample_dims = c("syear", "time", "ensemble"),
-    #                                      sample_length = NULL,
-    #                                      method = "QUANT",
-    #                                      ncores = ncores,
-    #                                      na.rm = na.rm)
+
diff --git a/tools/libs.R b/tools/libs.R
index 7b010f54..483eda5d 100644
--- a/tools/libs.R
+++ b/tools/libs.R
@@ -3,12 +3,13 @@ library(startR)
 library(ClimProjDiags)
 library(multiApply)
 library(yaml)
+library(s2dv)
 # library(s2dverification)
 #  library(ncdf4)
 #  library(abind)
 #  library(easyVerification)
 #  library(easyNCDF)
-  library(CSTools)
+library(CSTools)
 #
 #  library(parallel)
 #  library(pryr) # To check mem usage.
-- 
GitLab


From 8672e2d2089e7dae6d197759bfc64f3368b32d1d Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Wed, 4 May 2022 14:30:14 +0200
Subject: [PATCH 19/29] Add multi-model calilbration

---
 modules/Calibration/test_calib.R | 14 ++++++++++++--
 tools/libs.R                     |  2 +-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R
index 38bc08bd..fa3f1359 100644
--- a/modules/Calibration/test_calib.R
+++ b/modules/Calibration/test_calib.R
@@ -14,6 +14,18 @@ calibrate_datasets <- function(data, recipe) {
   ncores <- 4
   na.rm <- T
 
+  # Replicate observation array for the multi-model case
+  if (mm) {
+    obs.mm <- obs$data
+    for(dat in 1:(dim(hcst$data)['dat'][[1]]-1)) {
+      obs.mm <- abind(obs.mm, obs$data,
+		      along=which(names(dim(obs$data)) == 'dat'))
+    }
+    names(dim(obs.mm)) <- names(dim(obs$data))
+    obs$data <- obs.mm
+    remove(obs.mm)
+  }
+
   if (recipe$Analysis$Variables$freq == "monthly_mean") {
   
     CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based")
@@ -87,5 +99,3 @@ calibrate_datasets <- function(data, recipe) {
   print("###### CALIBRATION COMPLETE ######")
   return(list(hcst_calib = hcst_calibrated, fcst_calib = fcst_calibrated))
 }
-
-
diff --git a/tools/libs.R b/tools/libs.R
index 483eda5d..3ee4c40a 100644
--- a/tools/libs.R
+++ b/tools/libs.R
@@ -4,9 +4,9 @@ library(ClimProjDiags)
 library(multiApply)
 library(yaml)
 library(s2dv)
+library(abind)
 # library(s2dverification)
 #  library(ncdf4)
-#  library(abind)
 #  library(easyVerification)
 #  library(easyNCDF)
 library(CSTools)
-- 
GitLab


From a2d8abe8f5b5011b09a473d9becd59c3ccdfd145 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 12 May 2022 14:35:33 +0200
Subject: [PATCH 20/29] Modify the way the cross-validation is done to
 reproduce the results from CST_Calibration()

---
 modules/Calibration/Calibration_fcst3.R | 46 ++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 9 deletions(-)

diff --git a/modules/Calibration/Calibration_fcst3.R b/modules/Calibration/Calibration_fcst3.R
index babc1922..3d24226b 100644
--- a/modules/Calibration/Calibration_fcst3.R
+++ b/modules/Calibration/Calibration_fcst3.R
@@ -20,23 +20,30 @@
  
   climObs <- sapply(ind, function(x) {mean(
              Subset(var_obs, along='syear', indices=-x), na.rm=na.rm)})
+
+  climObs2 <- sapply(ind, function(x) {mean(var_obs, na.rm=na.rm)})
+  
   climPred <- sapply(ind, function(x) {mean(
               Subset(var_exp, along='syear', indices=-x), na.rm=na.rm)})
 
-  var_obs.ano <- var_obs - climObs
-  var_exp.ano <- var_exp - climPred
-  
+  climPred2 <- sapply(ind, function(x) {mean(var_exp, na.rm=na.rm)})
+
+  var_obs.ano <- var_obs - climObs2
+  var_fcst.ano <- var_exp - climPred
+  var_exp.ano <- var_exp - climPred2
+
   calib.hcst <- NA * Subset(var_exp, along=c('sday', 'syear'), 
 			    c(fcst.index, 0))
   # loop over the hindcast years
   for (t in 1 : ntime) {
 
     # defining forecast,hindcast in cross-validation
-    fcst.ano <- Subset(var_exp.ano, along=c('syear', 'sday'),
-                                indices=list(t, fcst.index)) 
+    fcst.ano <- Subset(var_fcst.ano, along=c('syear', 'sday'),
+                                indices=list(t, fcst.index))
+
     hcst.ano <- Subset(var_exp.ano, along=c('syear'), indices=list(-t))
 
-    obs.ano <- Subset(var_obs.ano, along='syear', indices=-t) 
+    obs.ano <- Subset(var_obs.ano, along='syear', indices=-t)
 
     # computes ensemble mean of the fcst & hcst
     em_fcst <- mean(fcst.ano, na.rm=na.rm)
@@ -66,9 +73,20 @@
     }
 
     sd_hcst_diff <- sd(hcst_diff, na.rm=na.rm)
-    
+
     a <- abs(corr) * (sd_obs / sd_em_hcst)
     b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2))
+    # print(paste0("climObs: ", climObs[t])) #
+    # print(paste0("sd_obs :", sd_obs)) # 
+    # print(paste0("sd_hcst_diff :", sd_hcst_diff))
+    # print(paste0("sd_em_hcst :", sd_em_hcst)) # 
+    # print(paste0("em_hcst :", em_hcst))
+    # print(paste0("Alpha - Lluís:", a)) #
+    # print(paste0("Beta - Lluís:", b)) #
+    # print(paste0("fcst_diff: ", fcst_diff))
+    # print(paste0("climPred: ", climPred))
+    # print(paste0("climCross: ", climCross[t]))
+    # print(paste0("em_fcst :", em_fcst))
     
     # Calibrates hcst sdate
     calib.syear <- ((a * em_fcst) + (b * fcst_diff) + climObs[t])
@@ -143,10 +161,20 @@
   }
 
   sd_hcst_diff <- sd(hcst_diff, na.rm)
-  
+  # print(paste0("Sigma e:", sd_hcst_diff))
   a <- abs(corr) * (sd_obs / sd_em_hcst)
   b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2))
-  
+  ## print(paste0("climObs: ", climObs))
+  ## print(paste0("sd_obs :", sd_obs))
+  # print(paste0("sd_hcst_diff :", sd_hcst_diff))
+  # print(paste0("sd_em_hcst :", sd_em_hcst))
+  # print(paste0("em_hcst :", em_hcst))
+  ## print(paste0("Alpha - Lluís:", a))
+  ## print(paste0("Beta - Lluís:", b))
+  # print(paste0("climObs: ", climObs))
+  # print(paste0("fcst_diff: ", fcst_diff))
+  # print(paste0("em_fcst :", em_fcst))
+ 
   # Calibrates hcst sdate
   fcst.calib <- ((a * em_fcst) + (b * fcst_diff) + climObs)
   # fcst_cube$data <- fcst.calib
-- 
GitLab


From 56d1e99cbd6ba4acb4656125a89f12bafbb7cd77 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 12 May 2022 16:55:00 +0200
Subject: [PATCH 21/29] formatting

---
 modules/Skill/s2s.metrics.R | 242 ++++++++++++++++++++++++++++++++++++
 modules/Skill/s2s.probs.R   |  42 +++++++
 2 files changed, 284 insertions(+)
 create mode 100644 modules/Skill/s2s.metrics.R
 create mode 100644 modules/Skill/s2s.probs.R

diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R
new file mode 100644
index 00000000..5497a885
--- /dev/null
+++ b/modules/Skill/s2s.metrics.R
@@ -0,0 +1,242 @@
+
+source("s2s.probs.R")
+
+
+# MERGES verification dimns int single sdate dim along which the 
+# verification metrics will be computed
+mergedims <- function(data, indims, outdim) {
+
+  old.dims <- names(dim(data))
+  new.dims <- c(indims, old.dims[! old.dims %in% indims])
+  data <- Reorder(data, new.dims)
+
+  for (step in 1:(length(indims) - 1)) {
+      if (step == 1) {
+      data <- MergeDims(data, 
+                        merge_dims=c(indims[step], indims[step+1]), 
+                        rename_dim=outdim) 
+      } else {
+      data <- MergeDims(data,
+                        merge_dims=c(outdim, indims[step+1]), 
+                        rename_dim=outdim) 
+      }
+  }
+
+  return(data)
+
+}
+
+Compute_verif_metrics <- function(exp, obs, skill_metrics,
+                                  verif.dims=c("syear", "sday", "sweek"),
+                                  na.rm=T,ncores=1) {
+
+  exp <- mergedims(exp, verif.dims, 'sdate')
+  obs <- mergedims(obs, verif.dims, 'sdate')
+  ## obs already contains ensemble dimension
+  # obs <- InsertDim(obs, 2, 1, name='member') 
+  
+  ## REMOVE VALUES IN CASE PAIR OBS-EXP IS NOT COMPLETE 
+  if (na.rm) {
+    nsdates <- dim(exp)[which(names(dim(exp)) == 'sdate')][]
+    not_missing_dates <- c() 
+    for (sdate in 1:nsdates){
+      if(!all(is.na(Subset(exp,along='sdate', list(sdate), drop=F)))){
+        not_missing_dates <- c(not_missing_dates, sdate)
+      }
+    }
+    obs <- Subset(obs, along='sdate', not_missing_dates, drop=F)
+    exp <- Subset(exp, along='sdate', not_missing_dates, drop=F)
+  }
+  
+  return(.compute_metrics(exp, obs, skill_metrics, ncores=ncores, na.rm=na.rm))
+
+}
+
+.compute_metrics <- function(exp, obs, metrics, ncores=1, split_factor=1, na.rm=FALSE) {
+ 
+    veriUnwrap <- easyVerification:::veriUnwrap
+
+    ## SPECS VERIFICATION PARAMETERS
+    SPECSVER_METRICS <- c("frpss", "frps", "bss10", "bss90", "enscorr", "rpss")
+
+    FUN <- list(frps="FairRps", rpss="EnsRpss", frpss="FairRpss",
+		bss10="FairRpss", bss90="FairRpss", enscorr="EnsCorr")
+
+    PROB <- list(frps=c(1/3, 2/3), rpss=c(1/3, 2/3), frpss=c(1/3, 2/3), 
+		 bss10=c(1/10), bss90=c(9/10), enscorr=NULL)
+
+    metrics_data <- list()
+    for (metric in metrics) {
+
+      if (metric %in% SPECSVER_METRICS) { 
+
+        data <- Apply(data=list(exp, obs),
+                        target_dims=list(c('sdate', 'ensemble'), 
+					 c('sdate', 'ensemble')),
+                        fun="veriApply",
+                        verifun=FUN[[metric]],
+                        prob=PROB[[metric]],
+                        ensdim=2,
+                        ncores=ncores,
+                        split_factor=split_factor,
+                        tdim=1,
+                        na.rm=na.rm)[[1]] #* 100
+        
+        data <- Subset(data, c('member'),list(1),drop='selected')
+        data[!is.finite(data)] <- NaN
+        metrics_data[[ metric ]] <- list(data)
+
+      } else if (metric == "corr_eno") {
+        # computes ensemble mean
+        data <- multiApply::Apply(data = exp, 
+                                  target_dims = 'ensemble', 
+                                  fun = mean, ncores = ncores)$output1
+
+        data <- multiApply::Apply(data = list(exp = data, 
+                                              obs = Subset(obs, c('ensemble'),
+					            list(1), drop='selected')),
+                                  target_dims = 'sdate', fun = .correlation_eno, 
+                                  time_dim = 'sdate', ncores = ncores)
+
+        # append both corr_r and corr_sign to metrics list
+        for (i in 1:length(data)) {
+          coeff <- data[[i]]
+          coeff[!is.finite(coeff)] <- NaN
+          if (names(data)[i] %in% c("r", "sign")){
+            metrics_data[[ paste0("corr_", names(data)[i]) ]] <- list(coeff)
+          }
+        }
+
+      } else if (metric == "frpss_sign") {
+
+        terciles_obs <- Compute_probs(obs, c(1/3,2/3), 
+				      quantile_dims=c('sdate'),
+                                      ncores=ncores,
+				      split_factor=1,
+				      na.rm=na.rm)
+
+        terciles_exp <- Compute_probs(exp, c(1/3,2/3), 
+				      quantile_dims=c('sdate', 'ensemble'),
+                                      ncores=ncores,
+				      split_factor=1,
+				      na.rm=na.rm)
+
+        probs_clim = array(data = 1/3, dim = dim(terciles_obs$probs))
+
+        frps <- NULL
+        n_members <- dim(exp)[which(names(dim(exp)) == 'ensemble')][]
+        frps$clim <- multiApply::Apply(data = list(probs_exp = probs_clim,
+						   probs_obs = terciles_obs$probs),
+                                      target_dims = 'bin', 
+                                      fun = .rps_from_probs, 
+                                      n_categories = 3, 
+                                      n_members = n_members,
+                                      Fair = TRUE, ncores = ncores)$output1
+
+        frps$exp <- multiApply::Apply(data = list(probs_exp = terciles_exp$probs,
+						  probs_obs = terciles_obs$probs), 
+                                      target_dims = 'bin', 
+                                      fun = .rps_from_probs, 
+                                      n_categories = 3, 
+                                      n_members=n_members,
+                                      Fair = TRUE, ncores = ncores)$output1
+
+        frps$clim_mean <- multiApply::Apply(data = frps$clim, target_dims='sdate',
+                                            fun=mean, ncores=ncores)$output1
+        frps$exp_mean <- multiApply::Apply(data = frps$exp, target_dims='sdate',
+                                            fun=mean, ncores=ncores)$output1
+
+        frpss_respect2clim <- NULL
+        frpss_respect2clim$rpss <- 1 - frps$exp_mean/frps$clim_mean
+        frpss_respect2clim$sign <- s2dv::RandomWalkTest(skill_A=frps$exp,
+							skill_B=frps$clim, 
+                                                        time_dim='sdate',
+							ncores=ncores)$signif
+          
+        frpss_respect2clim$rpss[!is.finite(frpss_respect2clim$rpss)] <- NaN
+        frpss_respect2clim$sign[!is.finite(frpss_respect2clim$sign)] <- NaN 
+
+        frpss_respect2clim$rpss <- Subset(frpss_respect2clim$rpss, 
+					  c('ensemble'), list(1), 
+					  drop='selected')
+        frpss_respect2clim$sign <- Subset(frpss_respect2clim$sign,
+					  c('ensemble'), list(1),
+					  drop='selected')
+
+        metrics_data[[ "frpss_values" ]] <- list(frpss_respect2clim$rpss)
+        metrics_data[[ "frpss_sign" ]] <- list(frpss_respect2clim$sign)
+
+      }
+
+    }
+    
+    return(metrics_data)
+
+}
+
+.correlation_eno = function(exp, obs, time_dim = 'syear', alpha = 0.05, ncores = 1){
+  
+  cor = NULL
+  cor$r = cor(exp, obs) # Correlation coefficient
+  
+  n_eff = s2dv::Eno(data = obs, time_dim = time_dim,
+		    na.action = na.pass, ncores = ncores)
+
+  t_alpha2_n2 = qt(p=alpha/2, df = n_eff-2, lower.tail = FALSE)
+  t = abs(cor$r) * sqrt(n_eff-2) / sqrt(1-cor$r^2)
+  
+  if (anyNA(c(t, t_alpha2_n2)) == FALSE & t >= t_alpha2_n2) {
+    cor$sign = TRUE
+  } else {
+    cor$sign = FALSE
+  }
+
+  z_a2 = qnorm(p = alpha/2, lower.tail = FALSE)
+  conf_int = c()
+  conf_int[1] = tanh(atanh(cor$r) - z_a2 / sqrt(n_eff-3))
+  conf_int[2] = tanh(atanh(cor$r) + z_a2 / sqrt(n_eff-3))
+  cor$conf_int = conf_int
+  
+  cor$n_eff <- n_eff
+  
+  return(cor)
+}
+
+
+.rps_from_probs <- function(probs_exp, probs_obs, n_categories, Fair, n_members = NULL){
+  
+  ## Checkings
+  if (length(probs_exp) != n_categories | length(probs_obs) != n_categories){
+    stop('The number of probabilities has to be the same as the number of categories')
+  }
+  if (!is.numeric(n_categories)){
+    stop('n_categories must be an integer')
+  }
+  if (!is.numeric(n_members) & Fair == TRUE){
+    stop('n_members must be an integer')
+  }
+  
+  ## RPS (Wilks 2011, pp.348-350)
+  rps <- NULL
+  for (i in 1:n_categories){
+    rps[i] <- (cumsum(probs_exp)[i]-cumsum(probs_obs)[i])^2
+  }
+  rps <- sum(rps)
+  
+  ## FairRPS
+  if (Fair == TRUE){ 
+    
+    ## formula of SpecsVerification::EnsRps
+    ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1))
+    R <- n_members
+    R_new <- Inf 
+    probs_cum <- cumsum(probs_exp)
+    adjustment <- -1 / (R-1) * probs_cum * (1 - probs_cum)
+    adjustment <- sum(adjustment)
+    
+    rps <- rps + adjustment
+    
+  }
+  
+  return(rps)
+}
diff --git a/modules/Skill/s2s.probs.R b/modules/Skill/s2s.probs.R
new file mode 100644
index 00000000..650a4bc5
--- /dev/null
+++ b/modules/Skill/s2s.probs.R
@@ -0,0 +1,42 @@
+
+
+Compute_probs <- function(data, thresholds,
+                          ncores=1, quantile_dims=c('syear', 'member'),
+                          probs_dims=list('member', 'bin'),
+                          split_factor=1, na.rm=FALSE) {
+                            
+  quantiles <- Apply(data,
+                     quantile_dims,
+                     function(x, na.rm) {quantile(as.vector(x), 
+				         thresholds,na.rm=na.rm)},
+                     output_dims="bin",
+                     ncores=ncores, 
+                     na.rm=na.rm,
+                     split_factor=split_factor)[[1]]
+  
+  if (na.rm == FALSE) {
+    c2p <- function(x, t) {
+    colMeans(convert2prob(as.vector(x), threshold = as.vector(t)))
+    }
+  } else {
+    c2p <- function(x, t) {
+      if (any(!is.na(x))){
+        colMeans(convert2prob(as.vector(x), threshold = as.vector(t)))
+      } else {
+        c(NA, NA, NA)
+      }
+    }
+  }
+
+  probs <- Apply(data = list(x = data, t = quantiles),
+                             target_dims = probs_dims,
+                             c2p,
+                             output_dims = "bin",
+                             split_factor = split_factor,
+                             ncores = ncores)[[1]]
+
+  return(list(probs=probs, quantiles=quantiles))
+
+}
+
+
-- 
GitLab


From c4c7b1786791055e857d5bebc2ae2caee67e4b9e Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 12 May 2022 17:04:17 +0200
Subject: [PATCH 22/29] Slight modification for testing

---
 modules/data_load/testing_recipes/recipe_4.yml | 2 +-
 modules/test_victoria.R                        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/data_load/testing_recipes/recipe_4.yml b/modules/data_load/testing_recipes/recipe_4.yml
index 421888b1..2cfcff37 100644
--- a/modules/data_load/testing_recipes/recipe_4.yml
+++ b/modules/data_load/testing_recipes/recipe_4.yml
@@ -19,7 +19,7 @@ Analysis:
     hcst_start: '1993'
     hcst_end: '2016'
     leadtimemin: 2
-    leadtimemax: 4
+    leadtimemax: 2
   Region:
     latmin: -10
     latmax: 10
diff --git a/modules/test_victoria.R b/modules/test_victoria.R
index 3f394e28..686b1f13 100644
--- a/modules/test_victoria.R
+++ b/modules/test_victoria.R
@@ -1,6 +1,6 @@
 
 
-recipe_file <- "modules/data_load/testing_recipes/recipe_3.yml"
+recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml"
 
 source("modules/data_load/load.R")
 source("modules/Calibration/test_calib.R")
-- 
GitLab


From 1694df406e1e8fd93d5363884c76e7529a31af1f Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 13 May 2022 12:02:01 +0200
Subject: [PATCH 23/29] Change ensemble dim name, formatting

---
 modules/Skill/s2s.metrics.R | 44 ++++++++++++++++++++-----------------
 modules/Skill/s2s.probs.R   |  4 ++--
 2 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R
index 5497a885..32243310 100644
--- a/modules/Skill/s2s.metrics.R
+++ b/modules/Skill/s2s.metrics.R
@@ -1,5 +1,5 @@
 
-source("s2s.probs.R")
+source("modules/Skill/s2s.probs.R")
 
 
 # MERGES verification dimns int single sdate dim along which the 
@@ -52,7 +52,9 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics,
 
 }
 
-.compute_metrics <- function(exp, obs, metrics, ncores=1, split_factor=1, na.rm=FALSE) {
+.compute_metrics <- function(exp, obs, metrics, 
+			     ncores=1, split_factor=1,
+			     na.rm=FALSE) {
  
     veriUnwrap <- easyVerification:::veriUnwrap
 
@@ -71,18 +73,18 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics,
       if (metric %in% SPECSVER_METRICS) { 
 
         data <- Apply(data=list(exp, obs),
-                        target_dims=list(c('sdate', 'ensemble'), 
-					 c('sdate', 'ensemble')),
-                        fun="veriApply",
-                        verifun=FUN[[metric]],
-                        prob=PROB[[metric]],
-                        ensdim=2,
-                        ncores=ncores,
-                        split_factor=split_factor,
-                        tdim=1,
-                        na.rm=na.rm)[[1]] #* 100
+                      target_dims=list(c('sdate', 'ensemble'), 
+		      		       c('sdate', 'ensemble')),
+                      fun="veriApply",
+                      verifun=FUN[[metric]],
+                      prob=PROB[[metric]],
+                      ensdim=2,
+                      ncores=ncores,
+                      split_factor=split_factor,
+                      tdim=1,
+                      na.rm=na.rm)[[1]] #* 100
         
-        data <- Subset(data, c('member'),list(1),drop='selected')
+        data <- Subset(data, c('ensemble'), list(1), drop='selected')
         data[!is.finite(data)] <- NaN
         metrics_data[[ metric ]] <- list(data)
 
@@ -127,19 +129,21 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics,
         n_members <- dim(exp)[which(names(dim(exp)) == 'ensemble')][]
         frps$clim <- multiApply::Apply(data = list(probs_exp = probs_clim,
 						   probs_obs = terciles_obs$probs),
-                                      target_dims = 'bin', 
-                                      fun = .rps_from_probs, 
-                                      n_categories = 3, 
-                                      n_members = n_members,
-                                      Fair = TRUE, ncores = ncores)$output1
+                                       target_dims = 'bin', 
+                                       fun = .rps_from_probs, 
+                                       n_categories = 3, 
+                                       n_members = n_members,
+                                       Fair = TRUE, 
+				       ncores = ncores)$output1
 
         frps$exp <- multiApply::Apply(data = list(probs_exp = terciles_exp$probs,
 						  probs_obs = terciles_obs$probs), 
                                       target_dims = 'bin', 
                                       fun = .rps_from_probs, 
                                       n_categories = 3, 
-                                      n_members=n_members,
-                                      Fair = TRUE, ncores = ncores)$output1
+                                      n_members = n_members,
+                                      Fair = TRUE,
+				      ncores = ncores)$output1
 
         frps$clim_mean <- multiApply::Apply(data = frps$clim, target_dims='sdate',
                                             fun=mean, ncores=ncores)$output1
diff --git a/modules/Skill/s2s.probs.R b/modules/Skill/s2s.probs.R
index 650a4bc5..a8cab57c 100644
--- a/modules/Skill/s2s.probs.R
+++ b/modules/Skill/s2s.probs.R
@@ -1,8 +1,8 @@
 
 
 Compute_probs <- function(data, thresholds,
-                          ncores=1, quantile_dims=c('syear', 'member'),
-                          probs_dims=list('member', 'bin'),
+                          ncores=1, quantile_dims=c('syear', 'ensemble'),
+                          probs_dims=list('ensemble', 'bin'),
                           split_factor=1, na.rm=FALSE) {
                             
   quantiles <- Apply(data,
-- 
GitLab


From 5f51a06e873e64f2a71dead9a014888dac1a171c Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 17 May 2022 14:57:58 +0200
Subject: [PATCH 24/29] Add wrapper fun for skill computations, small changes
 for testing

---
 modules/Skill/Skill.R                         | 109 +++++++++++++-----
 modules/Skill/s2s.metrics.R                   |  15 ++-
 .../data_load/testing_recipes/recipe_4.yml    |   4 +-
 modules/test_victoria.R                       |   7 ++
 4 files changed, 95 insertions(+), 40 deletions(-)

diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index e7cad0cb..9976d622 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -7,37 +7,82 @@
 # - reliability diagram
 # - ask Carlos which decadal metrics he is currently using
 
+source("modules/Skill/s2s.metrics.R")
+## TODO: Remove once the new version of s2dv is released
+source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/RandomWalkTest.R")
+source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/develop-RPSS/R/RPS.R")
+source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/develop-RPSS/R/RPSS.R")
+
+## TODO: Implement this in the future
 ## Which parameter are required?
-if (!("obs" %in% ls()) || is.null(obs)) {
-  error(logger,
-        "There is no object 'obs' in the global environment or it is NULL")
-}
-if (stream == "fcst" && (!("fcst" %in% ls()) || is.null(fcst))) {
-  error(logger,
-        "There is no object 'fcst' in the global environment or it is NULL")
-}
-if (!("hcst" %in% ls()) || is.null(hcst)) {
-  error(logger,
-        "There is no object 'hcst' in the global environment or it is NULL")
-}
-if (!("metric" %in% ls()) || is.null(metric)) {
-  warn(logger,
-       "Verification metric not found and it is set as 'EnsCorr'.")
-  metric <- 'EnsCorr'
-}
-if (metric %in% c('FRPSS', 'RPSS')) {
-  metric_fun <- "veriApply"
-  metric_method <- "FairRpss"
-} else if (metric %in% c("FCRPSS", "CRPSS")) {
-  metric_fun <- "veriApply"
-} else if (metric %in% c("EnsCorr", "EnsCor")) {
-  metric_fun <- "veriApply"
-  metric_method <- "EnsCorr"
-#...
-} else {
-  error(logger, "Unknown verification metric defined in the recipe.")
-  metric_fun <- 'NotFound'
+# if (!("obs" %in% ls()) || is.null(obs)) {
+#   error(logger,
+#         "There is no object 'obs' in the global environment or it is NULL")
+# }
+# if (stream == "fcst" && (!("fcst" %in% ls()) || is.null(fcst))) {
+#   error(logger,
+#         "There is no object 'fcst' in the global environment or it is NULL")
+# }
+# if (!("hcst" %in% ls()) || is.null(hcst)) {
+#   error(logger,
+#         "There is no object 'hcst' in the global environment or it is NULL")
+# }
+# if (!("metric" %in% ls()) || is.null(metric)) {
+#   warn(logger,
+#        "Verification metric not found and it is set as 'EnsCorr'.")
+#   metric <- 'EnsCorr'
+# }
+# if (metric %in% c('FRPSS', 'RPSS')) {
+#   metric_fun <- "veriApply"
+#   metric_method <- "FairRpss"
+# } else if (metric %in% c("FCRPSS", "CRPSS")) {
+#   metric_fun <- "veriApply"
+# } else if (metric %in% c("EnsCorr", "EnsCor")) {
+#   metric_fun <- "veriApply"
+#   metric_method <- "EnsCorr"
+# #...
+# } else {
+#   error(logger, "Unknown verification metric defined in the recipe.")
+#   metric_fun <- 'NotFound'
+# }
+# info(logger, paste("#-------------------------- ", "\n",
+#                    " running Skill module ", "\n",
+#                    " it can call ", metric_fun ))
+
+compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) {
+  # exp: s2dv_cube containing the hindcast
+  # obs: s2dv_cube containing the observations
+  # recipe: auto-s2s recipe as provided by read_yaml
+  
+  ## TODO: Adapt time_dims to subseasonal case?
+  time_dim <- 'syear'
+  memb_dim <- 'ensemble'
+  metric <- recipe$Analysis$Workflow$Skill$metric
+  # Whether the fair version of the metric is to be computed
+  if (metric %in% c('FRPS', 'FRPSS', 'BSS10', 'BSS90')) {
+    Fair <- T
+  } else {
+    Fair <- F
+  }
+
+  if (metric %in% c('RPS', 'FRPS')) {
+    skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
+		 Fair = Fair, ncores = ncores)
+
+  } else if (metric %in% c('RPSS', 'FRPSS')) {
+    skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
+		  Fair = Fair, ncores = ncores)
+  } else if (metric == 'BSS10') {
+    skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
+		  prob_thresholds = 0.1, Fair = Fair, ncores = ncores)
+  } else if (metric == 'BSS90') {
+    skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
+		  prob_thresholds = 0.9, Fair = Fair, ncores = ncores)
+  }
+
+  return(skill)
+
 }
-info(logger, paste("#-------------------------- ", "\n",
-                   " running Skill module ", "\n",
-                   " it can call ", metric_fun ))
+
+  ## TODO: Add specsVerification funs
+
diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R
index 32243310..1f5b8a9d 100644
--- a/modules/Skill/s2s.metrics.R
+++ b/modules/Skill/s2s.metrics.R
@@ -26,6 +26,8 @@ mergedims <- function(data, indims, outdim) {
 
 }
 
+## TODO: New function to split sdate dim back into 'sday', 'sweek' and 'syear'?
+
 Compute_verif_metrics <- function(exp, obs, skill_metrics,
                                   verif.dims=c("syear", "sday", "sweek"),
                                   na.rm=T,ncores=1) {
@@ -83,15 +85,16 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics,
                       split_factor=split_factor,
                       tdim=1,
                       na.rm=na.rm)[[1]] #* 100
-        
+
+        ## TODO: Keep 'ensemble' dimension to maintain flexibility?
         data <- Subset(data, c('ensemble'), list(1), drop='selected')
         data[!is.finite(data)] <- NaN
-        metrics_data[[ metric ]] <- list(data)
+        metrics_data[[ metric ]] <- data ## previously: list(data)
 
       } else if (metric == "corr_eno") {
         # computes ensemble mean
         data <- multiApply::Apply(data = exp, 
-                                  target_dims = 'ensemble', 
+                                  target_dims = 'ensemble',
                                   fun = mean, ncores = ncores)$output1
 
         data <- multiApply::Apply(data = list(exp = data, 
@@ -105,19 +108,19 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics,
           coeff <- data[[i]]
           coeff[!is.finite(coeff)] <- NaN
           if (names(data)[i] %in% c("r", "sign")){
-            metrics_data[[ paste0("corr_", names(data)[i]) ]] <- list(coeff)
+            metrics_data[[ paste0("corr_", names(data)[i]) ]] <- coeff ## list(coeff)
           }
         }
 
       } else if (metric == "frpss_sign") {
 
-        terciles_obs <- Compute_probs(obs, c(1/3,2/3), 
+        terciles_obs <- Compute_probs(obs, c(1/3, 2/3), 
 				      quantile_dims=c('sdate'),
                                       ncores=ncores,
 				      split_factor=1,
 				      na.rm=na.rm)
 
-        terciles_exp <- Compute_probs(exp, c(1/3,2/3), 
+        terciles_exp <- Compute_probs(exp, c(1/3, 2/3), 
 				      quantile_dims=c('sdate', 'ensemble'),
                                       ncores=ncores,
 				      split_factor=1,
diff --git a/modules/data_load/testing_recipes/recipe_4.yml b/modules/data_load/testing_recipes/recipe_4.yml
index 2cfcff37..33c3a74c 100644
--- a/modules/data_load/testing_recipes/recipe_4.yml
+++ b/modules/data_load/testing_recipes/recipe_4.yml
@@ -19,7 +19,7 @@ Analysis:
     hcst_start: '1993'
     hcst_end: '2016'
     leadtimemin: 2
-    leadtimemax: 2
+    leadtimemax: 3
   Region:
     latmin: -10
     latmax: 10
@@ -32,7 +32,7 @@ Analysis:
     Calibration:
       method: mse_min
     Skill:
-      metric: RPSS
+      metric: BSS90
     Indicators:
       index: no
   Output_format: S2S4E
diff --git a/modules/test_victoria.R b/modules/test_victoria.R
index 686b1f13..a38edbc2 100644
--- a/modules/test_victoria.R
+++ b/modules/test_victoria.R
@@ -4,7 +4,14 @@ recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml"
 
 source("modules/data_load/load.R")
 source("modules/Calibration/test_calib.R")
+source("modules/Skill/Skill.R")
+
 data <- load_datasets(recipe_file)
 ## TODO: Instead of reading the recipe at each module, do it at the beginning?
 recipe <- read_yaml(recipe_file)
+# Calibrate datasets
 calibrated_data <- calibrate_datasets(data, recipe)
+# Compute skill metric
+skill_metric <- compute_skill_metrics(calibrated_data$hcst_calib, data$obs,
+				      recipe, na.rm = T, ncores = 4)
+
-- 
GitLab


From aef22ebcda44b819957f53f7cfdce9323259688b Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 17 May 2022 16:33:17 +0200
Subject: [PATCH 25/29] Add some systems and grid description files to archive

---
 conf/archive.yml                             | 20 ++++++++++++++------
 conf/grid_description/griddes_ncep-cfsv2.txt | 18 ++++++++++++++++++
 conf/grid_description/griddes_system2c3s.txt | 16 +++++++---------
 3 files changed, 39 insertions(+), 15 deletions(-)
 create mode 100644 conf/grid_description/griddes_ncep-cfsv2.txt

diff --git a/conf/archive.yml b/conf/archive.yml
index 43750a1b..0caa78bf 100644
--- a/conf/archive.yml
+++ b/conf/archive.yml
@@ -5,11 +5,11 @@ archive:
     System:
         system5c3s: 
             src: "exp/ecmwf/system5c3s/"
-            daily_mean: {"tas":"_f6h/","rsds":"_s0-24h/",
-                         "prlr":"_s0-24h/","sfcWind":"_f6h/"}
-            monthly_mean: {"tas":"_f6h/","rsds":"_s0-24h/",
-                           "prlr":"_s0-24h/","sfcWind":"_f6h/",
-                           "tasmin":"_f24h/","tasmax":"_f24h/"}
+            daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/",
+                         "prlr":"_s0-24h/", "sfcWind":"_f6h/"}
+            monthly_mean: {"tas":"_f6h/", "rsds":"_s0-24h/",
+                           "prlr":"_s0-24h/", "sfcWind":"_f6h/",
+                           "tasmin":"_f24h/", "tasmax":"_f24h/"}
             nmember:
               fcst: 51
               hcst: 25
@@ -38,7 +38,7 @@ archive:
             nmember:
               fcst: 10
               hcst: 10
-            reference_grid: "conf/grid_description/griddes_system35c3s.txt"
+            reference_grid: "conf/grid_description/griddes_system2c3s.txt"
         eccc1:
             src: "exp/eccc/eccc1/"
             monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/",
@@ -47,6 +47,14 @@ archive:
               fcst: 10
               hcst: 10
             reference_grid: "conf/grid_description/griddes_eccc1.txt"
+        ncep-cfsv2:
+             src: "exp/ncep/cfs-v2/"
+             monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/",
+                            "tasmax":"_f6h/", "tasmin":"_f6h/"}
+             nmember:
+               fcst: 20
+               hcst: 20
+            reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt"
     Reference:
         era5: 
             src: "recon/ecmwf/era5/"
diff --git a/conf/grid_description/griddes_ncep-cfsv2.txt b/conf/grid_description/griddes_ncep-cfsv2.txt
new file mode 100644
index 00000000..6d8abe86
--- /dev/null
+++ b/conf/grid_description/griddes_ncep-cfsv2.txt
@@ -0,0 +1,18 @@
+#
+# Grid description file for NCEP CFSv2
+#
+gridtype  = lonlat
+gridsize  = 64800
+xname     = lon
+xlongname = Longitude
+xunits    = degrees_east
+yname     = lat
+ylongname = Latitude
+yunits    = degrees_north
+xsize     = 360
+ysize     = 180
+xfirst    = 0.5
+xinc      = 1
+yfirst    = 89.5
+yinc      = -1
+#
diff --git a/conf/grid_description/griddes_system2c3s.txt b/conf/grid_description/griddes_system2c3s.txt
index cf1e3515..0827b7c0 100644
--- a/conf/grid_description/griddes_system2c3s.txt
+++ b/conf/grid_description/griddes_system2c3s.txt
@@ -1,18 +1,16 @@
 #
-# Grid description file for JMA System3c3s
-# 
-# gridID 2
+# Grid description file for JMA System 2 C3S
 #
 gridtype  = lonlat
 gridsize  = 10512
-xsize     = 144
-ysize     = 73
 xname     = lon
-xlongname = "longitude"
-xunits    = "degrees_east"
+xlongname = longitude
+xunits    = degrees_east
 yname     = lat
-ylongname = "latitude"
-yunits    = "degrees_north"
+ylongname = latitude
+yunits    = degrees_north
+xsize     = 144
+ysize     = 73
 xfirst    = 0
 xinc      = 2.5
 yfirst    = 90
-- 
GitLab


From 506018beeab7ec5c06c5b0b0c0430bf5fa656b96 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Wed, 18 May 2022 12:27:07 +0200
Subject: [PATCH 26/29] Improvements to archive

---
 conf/archive.yml                              | 33 ++++++++++++-------
 conf/grid_description/griddes_system21_m1.txt | 16 ++++-----
 2 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/conf/archive.yml b/conf/archive.yml
index 0caa78bf..e869cb9c 100644
--- a/conf/archive.yml
+++ b/conf/archive.yml
@@ -23,10 +23,19 @@ archive:
               fcst: 51
               hcst: 25
             reference_grid: "conf/grid_description/griddes_system7c3s.txt"
+        system21_m1:
+            src: "exp/dwd/system21_m1/"
+            monthly_mean: {"tas":"_f6h/", "prlr":"_f24h",
+                           "tasmin":"_f24h/", "tasmax":"_f24h/"}
+            nmember:
+              fcst: 50
+              hcst: 30
+            reference_grid: "conf/grid_description/griddes_system21_m1.txt"
         system35c3s:
             src: "exp/cmcc/system35c3s/"
             monthly_mean: {"tas":"_f6h/", "g500":"_f12h/",
-                           "prlr":"_f24h/", "sfcWind": "_f6h/"}
+                           "prlr":"_f24h/", "sfcWind": "_f6h/",
+                           "tasmax":"_f24h/", "tasmin":"_f24h"}
             nmember:
               fcst: 50
               hcst: 40
@@ -48,24 +57,24 @@ archive:
               hcst: 10
             reference_grid: "conf/grid_description/griddes_eccc1.txt"
         ncep-cfsv2:
-             src: "exp/ncep/cfs-v2/"
-             monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/",
-                            "tasmax":"_f6h/", "tasmin":"_f6h/"}
-             nmember:
-               fcst: 20
-               hcst: 20
+            src: "exp/ncep/cfs-v2/"
+            monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/",
+                           "tasmax":"_f6h/", "tasmin":"_f6h/"}
+            nmember:
+              fcst: 20
+              hcst: 20
             reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt"
     Reference:
         era5: 
             src: "recon/ecmwf/era5/"
-            daily_mean: {"tas":"_f1h-r1440x721cds/","rsds":"_f1h-r1440x721cds/",
-                         "prlr":"_f1h-r1440x721cds/","sfcWind":"_f1h-r1440x721cds/"}
-            monthly_mean: {"tas":"_f1h-r1440x721cds/"}
+            daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/",
+                         "prlr":"_f1h-r1440x721cds/", "sfcWind":"_f1h-r1440x721cds/"}
+            monthly_mean: {"tas":"_f1h-r1440x721cds/", "prlr":"_f1h-r1440x721cds/"}
             reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc"
         era5land: 
             src: "recon/ecmwf/era5land/"
-            daily_mean: {"tas":"_f1h/","rsds":"_f1h/",
-                         "prlr":"_f1h/","sfcWind":"_f1h/"}
+            daily_mean: {"tas":"_f1h/", "rsds":"_f1h/",
+                         "prlr":"_f1h/", "sfcWind":"_f1h/"}
             monthly_mean: {"tas":"_f1h/"}
             reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc"
         uerra: 
diff --git a/conf/grid_description/griddes_system21_m1.txt b/conf/grid_description/griddes_system21_m1.txt
index bf9ac52b..954438f8 100644
--- a/conf/grid_description/griddes_system21_m1.txt
+++ b/conf/grid_description/griddes_system21_m1.txt
@@ -1,16 +1,16 @@
-# Grid description file for DWD GCFS2.1 (CDS)
-# gridID 2
+#
+# Grid description file for DWD GCFS2.1
 #
 gridtype  = lonlat
 gridsize  = 64800
-xsize     = 360
-ysize     = 180
 xname     = lon
-xlongname = "longitude"
-xunits    = "degrees_east"
+xlongname = longitude
+xunits    = degrees_east
 yname     = lat
-ylongname = "latitude"
-yunits    = "degrees_north"
+ylongname = latitude
+yunits    = degrees_north
+xsize     = 360
+ysize     = 180
 xfirst    = 0.5
 xinc      = 1
 yfirst    = 89.5
-- 
GitLab


From e74dc6027cd6ae86356b57cd762f70c0a1099ac1 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 19 May 2022 17:29:47 +0200
Subject: [PATCH 27/29] Add SpecsVerification metrics, add testing recipes

---
 modules/Skill/Skill.R                         | 37 ++++++++++++----
 .../data_load/testing_recipes/recipe_3.yml    |  2 +-
 .../data_load/testing_recipes/recipe_5.yml    | 43 +++++++++++++++++++
 modules/test_victoria.R                       |  2 +-
 4 files changed, 74 insertions(+), 10 deletions(-)
 create mode 100644 modules/data_load/testing_recipes/recipe_5.yml

diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index 9976d622..9ed0b9ae 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -57,29 +57,50 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) {
   ## TODO: Adapt time_dims to subseasonal case?
   time_dim <- 'syear'
   memb_dim <- 'ensemble'
-  metric <- recipe$Analysis$Workflow$Skill$metric
+  metric <- tolower(recipe$Analysis$Workflow$Skill$metric)
   # Whether the fair version of the metric is to be computed
-  if (metric %in% c('FRPS', 'FRPSS', 'BSS10', 'BSS90')) {
+  if (metric %in% c('frps', 'frpss', 'bss10', 'bss90')) {
     Fair <- T
   } else {
     Fair <- F
   }
-
-  if (metric %in% c('RPS', 'FRPS')) {
+  # Ranked Probability Score and Fair version
+  if (metric %in% c('rps', 'frps')) {
     skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
 		 Fair = Fair, ncores = ncores)
-
-  } else if (metric %in% c('RPSS', 'FRPSS')) {
+  # Ranked Probability Skill Score and Fair version
+  } else if (metric %in% c('rpss', 'frpss')) {
     skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
 		  Fair = Fair, ncores = ncores)
-  } else if (metric == 'BSS10') {
+  # Brier Skill Score - 10th percentile
+  } else if (metric == 'bss10') {
     skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
 		  prob_thresholds = 0.1, Fair = Fair, ncores = ncores)
-  } else if (metric == 'BSS90') {
+  # Brier Skill Score - 90th percentile
+  } else if (metric == 'bss90') {
     skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
 		  prob_thresholds = 0.9, Fair = Fair, ncores = ncores)
+  # Ensemble mean correlation
+  } else if (metric == 'enscorr') {
+    ## TODO: Implement option for Kendall and Spearman methods?
+    skill <- s2dv::Corr(exp$data, obs$data, 
+			dat_dim = 'dat', 
+			time_dim = time_dim, 
+			method = 'pearson', 
+			memb_dim = memb_dim,
+			ncores = ncores)
+  } else if (grepl("specs", metric, fixed = TRUE)) {
+    # Compute SpecsVerification version of the metrics
+    metric <- (strsplit(metric, "_"))[[1]][1] # Get metric name
+    skill <- Compute_verif_metrics(exp$data, obs$data,
+				   skill_metrics = metric,
+				   verif.dims=c("syear", "sday", "sweek"),
+				   na.rm = na.rm,
+				   ncores = ncores)
   }
 
+  print("##### SKILL METRIC COMPUTATION COMPLETE #####")
+
   return(skill)
 
 }
diff --git a/modules/data_load/testing_recipes/recipe_3.yml b/modules/data_load/testing_recipes/recipe_3.yml
index b71f428d..da1bd78a 100644
--- a/modules/data_load/testing_recipes/recipe_3.yml
+++ b/modules/data_load/testing_recipes/recipe_3.yml
@@ -17,7 +17,7 @@ Analysis:
       fcst_syear: '2020'
       fcst_sday: '1101'
     hcst_start: '1993'
-    hcst_end: '1995'
+    hcst_end: '2016'
     leadtimemin: 0
     leadtimemax: 0
   Region:
diff --git a/modules/data_load/testing_recipes/recipe_5.yml b/modules/data_load/testing_recipes/recipe_5.yml
new file mode 100644
index 00000000..5e02eb1f
--- /dev/null
+++ b/modules/data_load/testing_recipes/recipe_5.yml
@@ -0,0 +1,43 @@
+Description:
+  Author: V. Agudetse
+
+Analysis:
+  Horizon: Seasonal
+  Variables:
+    name: prlr
+    freq: monthly_mean
+  Datasets:
+    System:
+      name: system2c3s
+    Multimodel: False
+    Reference:
+      name: era5
+  Time:
+    sdate:
+      fcst_syear: #
+      fcst_sday: '0301'
+    hcst_start: '1993'
+    hcst_end: '2015'
+    leadtimemin: 0
+    leadtimemax: 1
+  Region:
+    latmin: -10
+    latmax: 10
+    lonmin: 0
+    lonmax: 20
+  Regrid:
+    method: bilinear
+    type: to_system
+  Workflow:
+    Calibration:
+      method: evmos
+    Skill:
+      metric: RPSS_Specs
+    Indicators:
+      index: no
+  Output_format: S2S4E
+Run:
+  Loglevel: INFO
+  Terminal: yes
+  output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/
+  code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
diff --git a/modules/test_victoria.R b/modules/test_victoria.R
index a38edbc2..4383eb6d 100644
--- a/modules/test_victoria.R
+++ b/modules/test_victoria.R
@@ -1,6 +1,6 @@
 
 
-recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml"
+recipe_file <- "modules/data_load/testing_recipes/recipe_5.yml"
 
 source("modules/data_load/load.R")
 source("modules/Calibration/test_calib.R")
-- 
GitLab


From 568d414745ffde33cd065010ce9ef214d7ca766a Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Wed, 25 May 2022 12:20:55 +0200
Subject: [PATCH 28/29] Return list with multiple skill metric arrays; testing

---
 modules/Skill/Skill.R                         | 95 ++++++++++---------
 .../data_load/testing_recipes/recipe_5.yml    |  4 +-
 modules/test_victoria.R                       |  4 +-
 3 files changed, 54 insertions(+), 49 deletions(-)

diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index 9ed0b9ae..f641fa6a 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -57,53 +57,58 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) {
   ## TODO: Adapt time_dims to subseasonal case?
   time_dim <- 'syear'
   memb_dim <- 'ensemble'
-  metric <- tolower(recipe$Analysis$Workflow$Skill$metric)
-  # Whether the fair version of the metric is to be computed
-  if (metric %in% c('frps', 'frpss', 'bss10', 'bss90')) {
-    Fair <- T
-  } else {
-    Fair <- F
+  metrics <- tolower(recipe$Analysis$Workflow$Skill$metric)
+  skill_metrics <- list()
+  for (metric in strsplit(metrics, " ")[[1]]) {
+    # Whether the fair version of the metric is to be computed
+    if (metric %in% c('frps', 'frpss', 'bss10', 'bss90')) {
+      Fair <- T
+    } else {
+      Fair <- F
+    }
+    # Ranked Probability Score and Fair version
+    if (metric %in% c('rps', 'frps')) {
+      skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
+          	 Fair = Fair, ncores = ncores)
+      skill_metrics[[ metric ]] <- skill
+    # Ranked Probability Skill Score and Fair version
+    } else if (metric %in% c('rpss', 'frpss')) {
+      skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
+          	  Fair = Fair, ncores = ncores)
+      skill_metrics[[ metric ]] <- skill
+    # Brier Skill Score - 10th percentile
+    } else if (metric == 'bss10') {
+      skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
+          	  prob_thresholds = 0.1, Fair = Fair, ncores = ncores)
+      skill_metrics[[ metric ]] <- skill
+    # Brier Skill Score - 90th percentile
+    } else if (metric == 'bss90') {
+      skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
+          	  prob_thresholds = 0.9, Fair = Fair, ncores = ncores)
+      skill_metrics[[ metric ]] <- skill
+    # Ensemble mean correlation
+    } else if (metric == 'enscorr') {
+      ## TODO: Implement option for Kendall and Spearman methods?
+      skill <- s2dv::Corr(exp$data, obs$data, 
+          		  dat_dim = 'dat', 
+          		  time_dim = time_dim, 
+          		  method = 'pearson', 
+          		  memb_dim = memb_dim,
+          		  ncores = ncores)
+      skill_metrics[[ metric ]] <- skill
+    } else if (grepl("specs", metric, fixed = TRUE)) {
+      # Compute SpecsVerification version of the metrics
+      metric <- (strsplit(metric, "_"))[[1]][1] # Get metric name
+      skill <- Compute_verif_metrics(exp$data, obs$data,
+          			   skill_metrics = metric,
+          			   verif.dims=c("syear", "sday", "sweek"),
+          			   na.rm = na.rm,
+          			   ncores = ncores)
+      skill_metrics[[ metric ]] <- skill
+    }
   }
-  # Ranked Probability Score and Fair version
-  if (metric %in% c('rps', 'frps')) {
-    skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
-		 Fair = Fair, ncores = ncores)
-  # Ranked Probability Skill Score and Fair version
-  } else if (metric %in% c('rpss', 'frpss')) {
-    skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
-		  Fair = Fair, ncores = ncores)
-  # Brier Skill Score - 10th percentile
-  } else if (metric == 'bss10') {
-    skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
-		  prob_thresholds = 0.1, Fair = Fair, ncores = ncores)
-  # Brier Skill Score - 90th percentile
-  } else if (metric == 'bss90') {
-    skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim,
-		  prob_thresholds = 0.9, Fair = Fair, ncores = ncores)
-  # Ensemble mean correlation
-  } else if (metric == 'enscorr') {
-    ## TODO: Implement option for Kendall and Spearman methods?
-    skill <- s2dv::Corr(exp$data, obs$data, 
-			dat_dim = 'dat', 
-			time_dim = time_dim, 
-			method = 'pearson', 
-			memb_dim = memb_dim,
-			ncores = ncores)
-  } else if (grepl("specs", metric, fixed = TRUE)) {
-    # Compute SpecsVerification version of the metrics
-    metric <- (strsplit(metric, "_"))[[1]][1] # Get metric name
-    skill <- Compute_verif_metrics(exp$data, obs$data,
-				   skill_metrics = metric,
-				   verif.dims=c("syear", "sday", "sweek"),
-				   na.rm = na.rm,
-				   ncores = ncores)
-  }
-
   print("##### SKILL METRIC COMPUTATION COMPLETE #####")
 
-  return(skill)
+  return(skill_metrics)
 
 }
-
-  ## TODO: Add specsVerification funs
-
diff --git a/modules/data_load/testing_recipes/recipe_5.yml b/modules/data_load/testing_recipes/recipe_5.yml
index 5e02eb1f..ccfebb34 100644
--- a/modules/data_load/testing_recipes/recipe_5.yml
+++ b/modules/data_load/testing_recipes/recipe_5.yml
@@ -19,7 +19,7 @@ Analysis:
     hcst_start: '1993'
     hcst_end: '2015'
     leadtimemin: 0
-    leadtimemax: 1
+    leadtimemax: 0
   Region:
     latmin: -10
     latmax: 10
@@ -32,7 +32,7 @@ Analysis:
     Calibration:
       method: evmos
     Skill:
-      metric: RPSS_Specs
+      metric: BSS90 FRPS RPSS_Specs
     Indicators:
       index: no
   Output_format: S2S4E
diff --git a/modules/test_victoria.R b/modules/test_victoria.R
index 4383eb6d..96a0e6bb 100644
--- a/modules/test_victoria.R
+++ b/modules/test_victoria.R
@@ -12,6 +12,6 @@ recipe <- read_yaml(recipe_file)
 # Calibrate datasets
 calibrated_data <- calibrate_datasets(data, recipe)
 # Compute skill metric
-skill_metric <- compute_skill_metrics(calibrated_data$hcst_calib, data$obs,
-				      recipe, na.rm = T, ncores = 4)
+skill_metrics <- compute_skill_metrics(calibrated_data$hcst_calib, data$obs,
+				       recipe, na.rm = T, ncores = 4)
 
-- 
GitLab


From 1571a454962e78c8a23dba261d6b25c27a119272 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Wed, 25 May 2022 12:21:33 +0200
Subject: [PATCH 29/29] Add option not to merge time dimensions, format fixes

---
 modules/Skill/s2s.metrics.R | 263 +++++++++++++++++++-----------------
 1 file changed, 138 insertions(+), 125 deletions(-)

diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R
index 1f5b8a9d..b0069730 100644
--- a/modules/Skill/s2s.metrics.R
+++ b/modules/Skill/s2s.metrics.R
@@ -30,24 +30,30 @@ mergedims <- function(data, indims, outdim) {
 
 Compute_verif_metrics <- function(exp, obs, skill_metrics,
                                   verif.dims=c("syear", "sday", "sweek"),
-                                  na.rm=T,ncores=1) {
+				  merge.dims=F,
+                                  na.rm=T, ncores=1) {
 
-  exp <- mergedims(exp, verif.dims, 'sdate')
-  obs <- mergedims(obs, verif.dims, 'sdate')
+  if (merge.dims) {
+    exp <- mergedims(exp, verif.dims, 'sdate')
+    obs <- mergedims(obs, verif.dims, 'sdate')
+    time.dim <- 'sdate'
+  } else {
+    time.dim <- 'syear'
+  }
   ## obs already contains ensemble dimension
   # obs <- InsertDim(obs, 2, 1, name='member') 
   
   ## REMOVE VALUES IN CASE PAIR OBS-EXP IS NOT COMPLETE 
   if (na.rm) {
-    nsdates <- dim(exp)[which(names(dim(exp)) == 'sdate')][]
+    nsdates <- dim(exp)[which(names(dim(exp)) == time.dim)][]
     not_missing_dates <- c() 
     for (sdate in 1:nsdates){
-      if(!all(is.na(Subset(exp,along='sdate', list(sdate), drop=F)))){
+      if(!all(is.na(Subset(exp, along=time.dim, list(sdate), drop=F)))){
         not_missing_dates <- c(not_missing_dates, sdate)
       }
     }
-    obs <- Subset(obs, along='sdate', not_missing_dates, drop=F)
-    exp <- Subset(exp, along='sdate', not_missing_dates, drop=F)
+    obs <- Subset(obs, along=time.dim, not_missing_dates, drop=F)
+    exp <- Subset(exp, along=time.dim, not_missing_dates, drop=F)
   }
   
   return(.compute_metrics(exp, obs, skill_metrics, ncores=ncores, na.rm=na.rm))
@@ -56,128 +62,134 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics,
 
 .compute_metrics <- function(exp, obs, metrics, 
 			     ncores=1, split_factor=1,
-			     na.rm=FALSE) {
- 
-    veriUnwrap <- easyVerification:::veriUnwrap
-
-    ## SPECS VERIFICATION PARAMETERS
-    SPECSVER_METRICS <- c("frpss", "frps", "bss10", "bss90", "enscorr", "rpss")
-
-    FUN <- list(frps="FairRps", rpss="EnsRpss", frpss="FairRpss",
-		bss10="FairRpss", bss90="FairRpss", enscorr="EnsCorr")
-
-    PROB <- list(frps=c(1/3, 2/3), rpss=c(1/3, 2/3), frpss=c(1/3, 2/3), 
-		 bss10=c(1/10), bss90=c(9/10), enscorr=NULL)
-
-    metrics_data <- list()
-    for (metric in metrics) {
-
-      if (metric %in% SPECSVER_METRICS) { 
-
-        data <- Apply(data=list(exp, obs),
-                      target_dims=list(c('sdate', 'ensemble'), 
-		      		       c('sdate', 'ensemble')),
-                      fun="veriApply",
-                      verifun=FUN[[metric]],
-                      prob=PROB[[metric]],
-                      ensdim=2,
-                      ncores=ncores,
-                      split_factor=split_factor,
-                      tdim=1,
-                      na.rm=na.rm)[[1]] #* 100
-
-        ## TODO: Keep 'ensemble' dimension to maintain flexibility?
-        data <- Subset(data, c('ensemble'), list(1), drop='selected')
-        data[!is.finite(data)] <- NaN
-        metrics_data[[ metric ]] <- data ## previously: list(data)
-
-      } else if (metric == "corr_eno") {
-        # computes ensemble mean
-        data <- multiApply::Apply(data = exp, 
-                                  target_dims = 'ensemble',
-                                  fun = mean, ncores = ncores)$output1
-
-        data <- multiApply::Apply(data = list(exp = data, 
-                                              obs = Subset(obs, c('ensemble'),
-					            list(1), drop='selected')),
-                                  target_dims = 'sdate', fun = .correlation_eno, 
-                                  time_dim = 'sdate', ncores = ncores)
-
-        # append both corr_r and corr_sign to metrics list
-        for (i in 1:length(data)) {
-          coeff <- data[[i]]
-          coeff[!is.finite(coeff)] <- NaN
-          if (names(data)[i] %in% c("r", "sign")){
-            metrics_data[[ paste0("corr_", names(data)[i]) ]] <- coeff ## list(coeff)
-          }
-        }
-
-      } else if (metric == "frpss_sign") {
-
-        terciles_obs <- Compute_probs(obs, c(1/3, 2/3), 
-				      quantile_dims=c('sdate'),
-                                      ncores=ncores,
-				      split_factor=1,
-				      na.rm=na.rm)
-
-        terciles_exp <- Compute_probs(exp, c(1/3, 2/3), 
-				      quantile_dims=c('sdate', 'ensemble'),
-                                      ncores=ncores,
-				      split_factor=1,
-				      na.rm=na.rm)
-
-        probs_clim = array(data = 1/3, dim = dim(terciles_obs$probs))
-
-        frps <- NULL
-        n_members <- dim(exp)[which(names(dim(exp)) == 'ensemble')][]
-        frps$clim <- multiApply::Apply(data = list(probs_exp = probs_clim,
-						   probs_obs = terciles_obs$probs),
-                                       target_dims = 'bin', 
-                                       fun = .rps_from_probs, 
-                                       n_categories = 3, 
-                                       n_members = n_members,
-                                       Fair = TRUE, 
-				       ncores = ncores)$output1
-
-        frps$exp <- multiApply::Apply(data = list(probs_exp = terciles_exp$probs,
-						  probs_obs = terciles_obs$probs), 
-                                      target_dims = 'bin', 
-                                      fun = .rps_from_probs, 
-                                      n_categories = 3, 
-                                      n_members = n_members,
-                                      Fair = TRUE,
-				      ncores = ncores)$output1
-
-        frps$clim_mean <- multiApply::Apply(data = frps$clim, target_dims='sdate',
-                                            fun=mean, ncores=ncores)$output1
-        frps$exp_mean <- multiApply::Apply(data = frps$exp, target_dims='sdate',
-                                            fun=mean, ncores=ncores)$output1
-
-        frpss_respect2clim <- NULL
-        frpss_respect2clim$rpss <- 1 - frps$exp_mean/frps$clim_mean
-        frpss_respect2clim$sign <- s2dv::RandomWalkTest(skill_A=frps$exp,
-							skill_B=frps$clim, 
-                                                        time_dim='sdate',
-							ncores=ncores)$signif
-          
-        frpss_respect2clim$rpss[!is.finite(frpss_respect2clim$rpss)] <- NaN
-        frpss_respect2clim$sign[!is.finite(frpss_respect2clim$sign)] <- NaN 
-
-        frpss_respect2clim$rpss <- Subset(frpss_respect2clim$rpss, 
-					  c('ensemble'), list(1), 
-					  drop='selected')
-        frpss_respect2clim$sign <- Subset(frpss_respect2clim$sign,
-					  c('ensemble'), list(1),
-					  drop='selected')
-
-        metrics_data[[ "frpss_values" ]] <- list(frpss_respect2clim$rpss)
-        metrics_data[[ "frpss_sign" ]] <- list(frpss_respect2clim$sign)
+			     merge.dims=FALSE, na.rm=FALSE) {
+  if (merge.dims) {
+    time.dim <- 'sdate'
+  } else {
+    time.dim <- 'syear'
+  }
 
+  veriUnwrap <- easyVerification:::veriUnwrap
+
+  ## SPECS VERIFICATION PARAMETERS
+  SPECSVER_METRICS <- c("frpss", "frps", "bss10", "bss90", "enscorr", "rpss")
+
+  FUN <- list(frps="FairRps", rpss="EnsRpss", frpss="FairRpss",
+      	bss10="FairRpss", bss90="FairRpss", enscorr="EnsCorr")
+
+  PROB <- list(frps=c(1/3, 2/3), rpss=c(1/3, 2/3), frpss=c(1/3, 2/3), 
+      	 bss10=c(1/10), bss90=c(9/10), enscorr=NULL)
+
+  metrics_data <- list()
+  for (metric in metrics) {
+
+    if (metric %in% SPECSVER_METRICS) { 
+
+      data <- Apply(data=list(exp, obs),
+                    target_dims=list(c(time.dim, 'ensemble'), 
+      	      		       c(time.dim, 'ensemble')),
+                    fun="veriApply",
+                    verifun=FUN[[metric]],
+                    prob=PROB[[metric]],
+                    ensdim=2,
+                    ncores=ncores,
+                    split_factor=split_factor,
+                    tdim=1,
+                    na.rm=na.rm)[[1]] #* 100
+
+      ## TODO: Keep 'ensemble' dimension to maintain flexibility?
+      data <- Subset(data, c('ensemble'), list(1), drop='selected')
+      data[!is.finite(data)] <- NaN
+      metric <- paste0(metric, "_specs")
+      metrics_data[[ metric ]] <- data ## previously: list(data)
+
+    } else if (metric == "corr_eno") {
+      # computes ensemble mean
+      data <- multiApply::Apply(data = exp, 
+                                target_dims = 'ensemble',
+                                fun = mean, ncores = ncores)$output1
+
+      data <- multiApply::Apply(data = list(exp = data, 
+                                            obs = Subset(obs, c('ensemble'),
+      				            list(1), drop='selected')),
+                                target_dims = time.dim, fun = .correlation_eno, 
+                                time_dim = time.dim, ncores = ncores)
+
+      # append both corr_r and corr_sign to metrics list
+      for (i in 1:length(data)) {
+        coeff <- data[[i]]
+        coeff[!is.finite(coeff)] <- NaN
+        if (names(data)[i] %in% c("r", "sign")){
+          metrics_data[[ paste0("corr_", names(data)[i]) ]] <- coeff ## list(coeff)
+        }
       }
 
+    } else if (metric == "frpss_sign") {
+
+      terciles_obs <- Compute_probs(obs, c(1/3, 2/3), 
+      			      quantile_dims=c(time.dim),
+                                    ncores=ncores,
+      			      split_factor=1,
+      			      na.rm=na.rm)
+
+      terciles_exp <- Compute_probs(exp, c(1/3, 2/3), 
+      			      quantile_dims=c(time.dim, 'ensemble'),
+                                    ncores=ncores,
+      			      split_factor=1,
+      			      na.rm=na.rm)
+
+      probs_clim = array(data = 1/3, dim = dim(terciles_obs$probs))
+
+      frps <- NULL
+      n_members <- dim(exp)[which(names(dim(exp)) == 'ensemble')][]
+      frps$clim <- multiApply::Apply(data = list(probs_exp = probs_clim,
+      					   probs_obs = terciles_obs$probs),
+                                     target_dims = 'bin', 
+                                     fun = .rps_from_probs, 
+                                     n_categories = 3, 
+                                     n_members = n_members,
+                                     Fair = TRUE, 
+      			       ncores = ncores)$output1
+
+      frps$exp <- multiApply::Apply(data = list(probs_exp = terciles_exp$probs,
+      					  probs_obs = terciles_obs$probs), 
+                                    target_dims = 'bin', 
+                                    fun = .rps_from_probs, 
+                                    n_categories = 3, 
+                                    n_members = n_members,
+                                    Fair = TRUE,
+      			      ncores = ncores)$output1
+
+      frps$clim_mean <- multiApply::Apply(data = frps$clim, target_dims=time.dim,
+                                          fun=mean, ncores=ncores)$output1
+      frps$exp_mean <- multiApply::Apply(data = frps$exp, target_dims=time.dim,
+                                          fun=mean, ncores=ncores)$output1
+
+      frpss_respect2clim <- NULL
+      frpss_respect2clim$rpss <- 1 - frps$exp_mean/frps$clim_mean
+      frpss_respect2clim$sign <- s2dv::RandomWalkTest(skill_A=frps$exp,
+      						skill_B=frps$clim, 
+                                                      time_dim=time.dim,
+      						ncores=ncores)$signif
+        
+      frpss_respect2clim$rpss[!is.finite(frpss_respect2clim$rpss)] <- NaN
+      frpss_respect2clim$sign[!is.finite(frpss_respect2clim$sign)] <- NaN 
+
+      frpss_respect2clim$rpss <- Subset(frpss_respect2clim$rpss, 
+      				  c('ensemble'), list(1), 
+      				  drop='selected')
+      frpss_respect2clim$sign <- Subset(frpss_respect2clim$sign,
+      				  c('ensemble'), list(1),
+      				  drop='selected')
+
+      metrics_data[[ "frpss_values" ]] <- list(frpss_respect2clim$rpss)
+      metrics_data[[ "frpss_sign" ]] <- list(frpss_respect2clim$sign)
+
     }
-    
-    return(metrics_data)
+
+  }
+  
+  return(metrics_data)
 
 }
 
@@ -246,4 +258,5 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics,
   }
   
   return(rps)
+
 }
-- 
GitLab