From 3d471c1b8c20f9cb7b27555d46435ee28e06fee7 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 26 May 2022 10:59:37 +0200
Subject: [PATCH 01/12] ../../Calibration/test_calib.R

---
 .../data_load/testing_recipes/recipe_1.yml    | 42 +++++++++----------
 .../data_load/testing_recipes/recipe_3.yml    |  2 +-
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/modules/data_load/testing_recipes/recipe_1.yml b/modules/data_load/testing_recipes/recipe_1.yml
index 68715462..d1aa13e6 100644
--- a/modules/data_load/testing_recipes/recipe_1.yml
+++ b/modules/data_load/testing_recipes/recipe_1.yml
@@ -2,37 +2,37 @@ Description:
   Author: V. Agudetse
 
 Analysis:
-  Horizon: Seasonal
+  Horizon: Seasonal # Mandatory, str: either subseasonal, seasonal, or decadal
   Variables:
-    name: tas
-    freq: daily_mean
+    name: tas # Mandatory, str: tas, 
+    freq: daily_mean # Mandatory, str: either monthly_mean or daily_mean
   Datasets:
     System:
-      name: system5c3s
-    Multimodel: no
+      name: system5c3s # Mandatory, str: System codename
+    Multimodel: no # Mandatory, bool: Either yes/true or no/false
     Reference:
-      name: era5
+      name: era5 # Mandatory, str: Reference codename
   Time:
-    sdate:
-      fcst_syear: '2020'
-      fcst_sday: '1101'
-    hcst_start: '1993'
-    hcst_end: '2016'
-    leadtimemin: 1
-    leadtimemax: 4
+    sdate: 
+      fcst_syear: '2020' # Optional, int: Forecast year 'YYYY'
+      fcst_sday: '1101' # Mandatory, int: Forecast startdate, 'MMDD'
+    hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY'
+    hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY'
+    leadtimemin: 1 # Mandatory, int: First leadtime time step in months 
+    leadtimemax: 4 # Mandatory, int: Last leadtime time step in months
   Region:
-    latmin: -10
-    latmax: 10
-    lonmin: 0
-    lonmax: 20
+    latmin: -10 # Mandatory, int: minimum latitude
+    latmax: 10 # Mandatory, int: maximum latitude
+    lonmin: 0 # Mandatory, int: minimum longitude 
+    lonmax: 20 # Mandatory, int: maximum longitude
   Regrid:
-    method: bilinear
-    type: to_system
+    method: bilinear # Mandatory, str: Interpolation method. See docu.
+    type: to_system # Mandatory, str: to_system, to_reference, or CDO-compatible grid.
   Workflow:
     Calibration:
-      method: SBC
+      method: qmap # Mandatory, str: Calibration method. See docu.
     Skill:
-      metric: RPSS
+      metric: RPSS #Mandatory, str: Skill metric or list of skill metrics. See docu.
     Indicators:
       index: no
   Output_format: S2S4E
diff --git a/modules/data_load/testing_recipes/recipe_3.yml b/modules/data_load/testing_recipes/recipe_3.yml
index da1bd78a..4488d43d 100644
--- a/modules/data_load/testing_recipes/recipe_3.yml
+++ b/modules/data_load/testing_recipes/recipe_3.yml
@@ -30,7 +30,7 @@ Analysis:
     type: to_system
   Workflow:
     Calibration:
-      method: mse_min
+      method: qmap
     Skill:
       metric: RPSS
     Indicators:
-- 
GitLab


From aa591cb2708aeaed5257ba0e3bb057bf7d4017ce Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 26 May 2022 11:00:32 +0200
Subject: [PATCH 02/12] Return error if calibration method is not quantile
 mapping for daily data

---
 modules/Calibration/test_calib.R | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R
index fa3f1359..1edfb4f1 100644
--- a/modules/Calibration/test_calib.R
+++ b/modules/Calibration/test_calib.R
@@ -32,10 +32,11 @@ calibrate_datasets <- function(data, recipe) {
     ## TODO: implement other calibration methods
     ## TODO: Restructure the code?
     if (!(method %in% CST_CALIB_METHODS)) {
-      stop("Calibration method is not implemented in CSTools")
+      stop("Calibration method in the recipe is not available for monthly",
+	   " data.")
     } else {
       ## Alba's version of CST_Calibration (pending merge) is being used
-      # Calibrate the hindcast and forecast
+      # Calibrate the hindcast
       hcst_calibrated <- CST_Calibration(hcst, obs,
                                          cal.method = method,
                                          eval.method = "leave-one-out",
@@ -68,8 +69,11 @@ calibrate_datasets <- function(data, recipe) {
 
   } else if (recipe$Analysis$Variables$freq == "daily_mean") {
     # Daily data calibration using Quantile Mapping
-    warning("Data is at daily frequency. Quantile Mapping will be used",
-            " as the calibration method.")
+    if (!(method %in% c("qmap"))) {
+      stop("Calibration method in the recipe is not available at daily ",
+	   "frequency. Only quantile mapping 'qmap' is implemented.")
+    }
+    # Calibrate the hindcast
     hcst_calibrated <- CST_QuantileMapping(hcst, obs,
       			             exp_cor = NULL,
       				     ## TODO: Tidy up
@@ -82,6 +86,7 @@ calibrate_datasets <- function(data, recipe) {
       				     na.rm = na.rm)
 
     if (!is.null(fcst)) {
+      # Calibrate the forecast
       fcst_calibrated <- CST_QuantileMapping(hcst, obs,
       				       exp_cor = fcst,
       				       sample_dims = c("syear",
-- 
GitLab


From 55b7c3ccce922eb4153d135a82329d5134ab6a93 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 27 May 2022 12:52:35 +0200
Subject: [PATCH 03/12] Change hcst_calib and fcst_calib names to hcst and fcst

---
 modules/Calibration/test_calib.R | 12 +++++++++---
 modules/test_victoria.R          |  2 +-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R
index 1edfb4f1..8c6600f3 100644
--- a/modules/Calibration/test_calib.R
+++ b/modules/Calibration/test_calib.R
@@ -4,6 +4,11 @@ source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-CalibrationFo
 
 ## Entry params data and recipe?
 calibrate_datasets <- function(data, recipe) {
+  # Function that calibrates the hindcast using the method stated in the
+  # recipe. If the forecast is not null, it calibrates it as well.
+  #
+  # data: list of s2dv_cube objects containing the hcst, obs and fcst.
+  # recipe: object obtained when passing the .yml recipe file to read_yaml()
   hcst <- data$hcst
   obs <- data$obs
   fcst <- data$fcst
@@ -76,7 +81,6 @@ calibrate_datasets <- function(data, recipe) {
     # Calibrate the hindcast
     hcst_calibrated <- CST_QuantileMapping(hcst, obs,
       			             exp_cor = NULL,
-      				     ## TODO: Tidy up
       			             sample_dims = c("syear",
       						     "time",
       						     "ensemble"),
@@ -101,6 +105,8 @@ calibrate_datasets <- function(data, recipe) {
     }
   }
   
-  print("###### CALIBRATION COMPLETE ######")
-  return(list(hcst_calib = hcst_calibrated, fcst_calib = fcst_calibrated))
+  print("##### CALIBRATION COMPLETE #####")
+  ## TODO: Return observations too?
+  ## TODO: Change naming convention?
+  return(list(hcst = hcst_calibrated, fcst = fcst_calibrated))
 }
diff --git a/modules/test_victoria.R b/modules/test_victoria.R
index 96a0e6bb..6b90d2e9 100644
--- a/modules/test_victoria.R
+++ b/modules/test_victoria.R
@@ -12,6 +12,6 @@ recipe <- read_yaml(recipe_file)
 # Calibrate datasets
 calibrated_data <- calibrate_datasets(data, recipe)
 # Compute skill metric
-skill_metrics <- compute_skill_metrics(calibrated_data$hcst_calib, data$obs,
+skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs,
 				       recipe, na.rm = T, ncores = 4)
 
-- 
GitLab


From f471f671672a7f4ba3977cabe70f2ef43e8484f3 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 27 May 2022 15:15:30 +0200
Subject: [PATCH 04/12] Remove startR_to_s2dv() and switch to as.s2dv_cube()

---
 modules/data_load/load.R | 59 +++-------------------------------------
 1 file changed, 4 insertions(+), 55 deletions(-)

diff --git a/modules/data_load/load.R b/modules/data_load/load.R
index 68588156..574d1bdd 100755
--- a/modules/data_load/load.R
+++ b/modules/data_load/load.R
@@ -9,56 +9,6 @@ path <- "/esarchive/scratch/lpalma/git/startR-test-attrDims/R/"
 ff <- lapply(list.files(path), function(x) paste0(path, x))
 invisible(lapply(ff, source))
 
-## TODO: Move to CSOperational
-# Conversion from startR_array to s2dv_array
-#-------------------------------------------------------------------
-attr2array <- function(attr){
-  return(array(as.vector(attr), dim(attr)))
-}
-
-startR_to_s2dv <- function(startR_array){
-
-  dates_dims <- dim(Subset(startR_array, 
-                           along=c('dat', 'var',
-                           'latitude', 'longitude', 'ensemble'), 
-                           list(1,1,1,1,1), drop="selected")) 
-
-  #sdates_dims <- dim(Subset(startR_array, 
-  #                         along=c('dat', 'var', 'time', 'sweek', 'sday',
-  #                         'latitude', 'longitude', 'ensemble'), 
-  #                         list(1,1,1,1,1,1,1,1), drop="selected")) 
-
-  dates_start <- attr(startR_array, 'Variables')$common$time
-  dates_end <- attr(startR_array, 'Variables')$common$time
-  #sdates <-  unlist(attr(startR_array, 'FileSelectors')$dat1$file_date)
-
-  dim(dates_start) <- dates_dims
-  dim(dates_end) <- dates_dims
-  #dim(sdates) <- sdates_dims
-
-  ## TODO: change this line when time attributes work correctly?
-  time_dims <- c("time", "sday", "sweek", "syear")
-
-  s2dv_object <- s2dv_cube(data = attr2array(startR_array), 
-                           lon = attr2array(attr(startR_array,
-                                            'Variables')$dat1$longitude),
-                           lat = attr2array(attr(startR_array,
-                                            'Variables')$dat1$latitude),
-                           Variable = list(varName=names(attr(startR_array, 
-							 'Variables')$common)[2],
-                                           level = NULL),
-                           Dates = list(start = dates_start,
-                                        end = dates_end),
-                                        #sdate = sdates),
-			   time_dims = time_dims,
-                           when = Sys.time(),
-                           source_files = attr2array(attr(startR_array, "Files"))
-          #                Datasets=list(exp1=list(InitializationsDates=list(                                        Member_1="01011990", 
-          #                                        Members="Member_1")))
-                          )
-  return(s2dv_object)
-}
-
 # RECIPE FOR TESTING
 # --------------------------------------------------------------------------------
 # recipe_file <- "modules/data_load/testing_recipes/recipe_3.yml"
@@ -199,7 +149,7 @@ load_datasets <- function(recipe_file){
     default_dims[names(dim(hcst))] <- dim(hcst)
     dim(hcst) <- default_dims
   }
-  hcst <- startR_to_s2dv(hcst)
+  hcst <- as.s2dv_cube(hcst)
   
   # Load forecast
   #-------------------------------------------------------------------
@@ -230,7 +180,6 @@ load_datasets <- function(recipe_file){
                   split_multiselected_dims = split_multiselected_dims,
     	          retrieve = TRUE)
     
-    ## TODO: Add 'sday' and 'sweek' dims to fcst
     if (recipe$Analysis$Variables$freq == "daily_mean") {
       # Adjusts dims for daily case, could be removed if startR allows
       # multidim split
@@ -241,8 +190,8 @@ load_datasets <- function(recipe_file){
       default_dims[names(dim(fcst))] <- dim(fcst)
       dim(fcst) <- default_dims
     }
-  #  names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear"
-    fcst <- startR_to_s2dv(fcst)
+
+    fcst <- as.s2dv_cube(fcst)
 
   } else {
     fcst <- NULL
@@ -324,7 +273,7 @@ load_datasets <- function(recipe_file){
   default_dims[names(dim(obs))] <- dim(obs)
   dim(obs) <- default_dims
   
-  obs <- startR_to_s2dv(obs)
+  obs <- as.s2dv_cube(obs)
   
   ############################################################################
   #
-- 
GitLab


From 84e94dad8d4eb0a91b870195d09b0032e59be866 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Mon, 30 May 2022 14:50:49 +0200
Subject: [PATCH 05/12] Remove lines loading startR from the repo

---
 modules/data_load/load.R | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/modules/data_load/load.R b/modules/data_load/load.R
index 574d1bdd..3941f5dc 100755
--- a/modules/data_load/load.R
+++ b/modules/data_load/load.R
@@ -5,10 +5,6 @@ source("/esarchive/scratch/vagudets/repos/cstools/R/s2dv_cube.R")
 source("modules/data_load/dates2load.R")
 source("tools/libs.R")
 
-path <- "/esarchive/scratch/lpalma/git/startR-test-attrDims/R/"
-ff <- lapply(list.files(path), function(x) paste0(path, x))
-invisible(lapply(ff, source))
-
 # RECIPE FOR TESTING
 # --------------------------------------------------------------------------------
 # recipe_file <- "modules/data_load/testing_recipes/recipe_3.yml"
-- 
GitLab


From 71cda4592eaaf63165a797328ea4d3715b22afaf Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Mon, 30 May 2022 15:24:43 +0200
Subject: [PATCH 06/12] Add documentation to recipe, small changes for testing

---
 modules/data_load/testing_recipes/recipe_1.yml | 2 +-
 modules/data_load/testing_recipes/recipe_5.yml | 4 ++--
 modules/test_victoria.R                        | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/data_load/testing_recipes/recipe_1.yml b/modules/data_load/testing_recipes/recipe_1.yml
index d1aa13e6..f9a6090d 100644
--- a/modules/data_load/testing_recipes/recipe_1.yml
+++ b/modules/data_load/testing_recipes/recipe_1.yml
@@ -1,4 +1,4 @@
-Description:
+Description: System5 seasonal forecast at daily frequency TEST
   Author: V. Agudetse
 
 Analysis:
diff --git a/modules/data_load/testing_recipes/recipe_5.yml b/modules/data_load/testing_recipes/recipe_5.yml
index ccfebb34..a3da66a5 100644
--- a/modules/data_load/testing_recipes/recipe_5.yml
+++ b/modules/data_load/testing_recipes/recipe_5.yml
@@ -17,7 +17,7 @@ Analysis:
       fcst_syear: #
       fcst_sday: '0301'
     hcst_start: '1993'
-    hcst_end: '2015'
+    hcst_end: '2016'
     leadtimemin: 0
     leadtimemax: 0
   Region:
@@ -32,7 +32,7 @@ Analysis:
     Calibration:
       method: evmos
     Skill:
-      metric: BSS90 FRPS RPSS_Specs
+      metric: FRPS
     Indicators:
       index: no
   Output_format: S2S4E
diff --git a/modules/test_victoria.R b/modules/test_victoria.R
index 6b90d2e9..aeed6518 100644
--- a/modules/test_victoria.R
+++ b/modules/test_victoria.R
@@ -1,6 +1,6 @@
 
 
-recipe_file <- "modules/data_load/testing_recipes/recipe_5.yml"
+recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml"
 
 source("modules/data_load/load.R")
 source("modules/Calibration/test_calib.R")
-- 
GitLab


From 4941d9668f55b79b22e734e7df1b6cad35ae305f Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Mon, 30 May 2022 17:15:30 +0200
Subject: [PATCH 07/12] Small format changes, load easyNCDF

---
 modules/data_load/dates2load.R | 9 ++-------
 tools/libs.R                   | 2 +-
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R
index d9e5fa0c..001f3155 100644
--- a/modules/data_load/dates2load.R
+++ b/modules/data_load/dates2load.R
@@ -49,12 +49,6 @@ dates2load <- function(recipe, logger){
 # adds the correspondent dims to each sdate array
 .add_dims <- function(data, type){
 
-#   if (type == "hcst"){
-#     default_dims <- c(sday = 1, sweek = 1, 
-#                       syear = length(data))
-#   } else {
-#     default_dims <- c(fcst_syear = length(data))
-#   }
   default_dims <- c(sday = 1, sweek = 1, syear = length(data))
 
   default_dims[names(dim(data))] <- dim(data)
@@ -69,6 +63,7 @@ dates2load <- function(recipe, logger){
 # The leadtimes are defined by months
 # Ex. 20201101 with leadtimes 1-4 corresponds to 
 # the forecasting times covering December to March
+
 get_timeidx <- function(sdates, ltmin, ltmax, 
                         time_freq="monthly_mean"){
 
@@ -94,7 +89,7 @@ get_timeidx <- function(sdates, ltmin, ltmax,
     lubridate::hour(indxs) <- 12 
     lubridate::minute(indxs) <- 00
     dim(indxs) <- list(file_date=length(sdates),
-                       time=(as.integer(idx_max[1]-idx_min[1])+1))
+                       time=(as.integer(idx_max[1] - idx_min[1]) + 1))
 
   } else if (time_freq == "monthly_mean") {
   
diff --git a/tools/libs.R b/tools/libs.R
index 3ee4c40a..2470225f 100644
--- a/tools/libs.R
+++ b/tools/libs.R
@@ -8,7 +8,7 @@ library(abind)
 # library(s2dverification)
 #  library(ncdf4)
 #  library(easyVerification)
-#  library(easyNCDF)
+library(easyNCDF)
 library(CSTools)
 #
 #  library(parallel)
-- 
GitLab


From a036e3c00e1cd425037cd0b5c8bfc5bb6ebaa64f Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 31 May 2022 13:08:39 +0200
Subject: [PATCH 08/12] Testing

---
 modules/Saving/Save_data.R                    | 115 ++++
 modules/Saving/export_2_nc-s2s4e.R            | 581 ++++++++++++++++++
 .../data_load/testing_recipes/recipe_1.yml    |  10 +-
 modules/test_victoria.R                       |   2 +-
 4 files changed, 702 insertions(+), 6 deletions(-)
 create mode 100644 modules/Saving/Save_data.R
 create mode 100644 modules/Saving/export_2_nc-s2s4e.R

diff --git a/modules/Saving/Save_data.R b/modules/Saving/Save_data.R
new file mode 100644
index 00000000..ad6906f1
--- /dev/null
+++ b/modules/Saving/Save_data.R
@@ -0,0 +1,115 @@
+library(easyNCDF)
+
+
+get_times <- function(fcst.type, leadtimes, sdate) {
+
+  ## TODO: Adapt to Auto-S2S recipe format
+  ## TODO: Is all of this necessary? Is some of it already computed by 
+  ## dates2load?
+  # fcst.type <- recipe$Analysis$Horizon
+  switch(tolower(fcst.type),
+	 ## TODO: Remove "sub_obs"?
+         "seasonal" = {len <- length(leadtimes); ref <- 'months since '; 
+                       stdname <- paste(strtoi(leadtimes), collapse=", ")},
+         "sub_obs" = {len <- 52; ref <- 'week of the year '; 
+                      stdname <- paste(strtoi(leadtimes), collapse=", ")},
+         "subseasonal" = {len <- 4; ref <- 'weeks since ';
+                          stdname <- ''}
+  ) 
+
+  time <- 1:len
+  dim(time) <- length(time)
+  #metadata <- list(time = list(standard_name = stdname,
+  metadata <- list(time = list(units = paste0(ref, sdate, ' 00:00:00')))
+  attr(time, 'variables') <- metadata
+  names(dim(time)) <- 'time'
+  
+  time_step <- 1
+  dim(time_step) <- length(time_step)
+  metadata <- list(time_step = list(units = paste0(ref, sdate, ' 00:00:00')))
+  attr(time_step, 'variables') <- metadata
+  names(dim(time_step)) <- 'time_step'
+
+  sdate <- 1:length(sdate)
+  dim(sdate) <- length(sdate)
+  metadata <- list(sdate = list(standard_name = paste(strtoi(sdate),
+						      collapse=", "),
+                                units = paste0('Init date')))
+  attr(sdate, 'variables') <- metadata
+  names(dim(sdate)) <- 'sdate'
+
+  return(list(time_step=time_step, time=time, sdate=sdate))
+
+}
+
+get_latlon <- function(lat, lon) {
+
+    longitude <- lon
+    dim(longitude) <- length(longitude)
+    metadata <- list(longitude = list(units = 'degrees_east'))
+    attr(longitude, 'variables') <- metadata
+    names(dim(longitude)) <- 'longitude'
+    
+    latitude <- lat
+    dim(latitude) <- length(latitude)
+    metadata <- list(latitude = list(units = 'degrees_north'))
+    attr(latitude, 'variables') <- metadata
+    names(dim(latitude)) <- 'latitude'
+
+    return(list(lat=latitude, lon=longitude))
+
+}
+
+save_metrics <- function(variable,
+                         skill,
+                         fcst.sdate,
+                         grid,
+                         outfile,
+                         monthnames,
+                         fcst.type,
+                         agg) {
+  
+  ## TODO: Remove variable as entry parameter
+
+  lalo <- c('longitude', 'latitude')
+
+  ## TODO: Sort out aggregation
+  if (tolower(agg) == "global") {
+    skill <-  lapply(skill, function(x){
+                     Reorder(x[[1]], c(lalo, 'time'))})
+  }
+
+  for (i in 1:length(skill)) {
+
+    metric <- names(skill[i])
+    if (tolower(agg) == "country"){
+      sdname <- paste0(names(metric), " region-aggregated metric")
+      dims <- c('Country', 'time')
+    } else {
+      sdname <- paste0(names(metric), " grid point metric")
+      dims <- c(lalo, 'time')
+    }
+    metadata <- list(name = metric, standard_name = sdname)
+
+    attr(skill[i], 'variables') <- metadata        
+    names(dim(skill[[i]])) <- dims 
+  }
+
+  times <- get_times(fcst.type, monthnames, fcst.sdate)
+  time <- times$time
+  time_step <- times$time_step
+
+  if (tolower(agg) == "country") { 
+    
+    country <- get_countries(grid)
+    ArrayToNc(append(country, time, skill, time_step), outfile)
+
+  } else  {
+
+    latlon <- get_latlon(grid$lat, grid$lon)
+    vars <- list(latlon$lat, latlon$lon, time)
+    vars <- c(vars, skill, list(time_step))
+    ArrayToNc(vars, outfile)
+  }
+}
+
diff --git a/modules/Saving/export_2_nc-s2s4e.R b/modules/Saving/export_2_nc-s2s4e.R
new file mode 100644
index 00000000..9adb6d80
--- /dev/null
+++ b/modules/Saving/export_2_nc-s2s4e.R
@@ -0,0 +1,581 @@
+library(easyNCDF)
+
+save_bias <- function(variable,
+                      data,
+                      fcst.sdate,
+                      outfile,
+                      leadtimes,
+                      grid,
+                      agg,
+                      fcst.type) {
+
+  lalo <- c('longitude', 'latitude') #decathlon subseasonal
+		
+  ## TODO: Sort out different aggregation cases
+  # if (tolower(agg) == "global") {
+  #   data <- Reorder(data, c(lalo,'time'))
+  # } else {
+  #   data <- Reorder(data, c('country', 'time'))
+  # }
+  
+  if (variable %in% c("tas", "tasmin", "tasmax")) {
+    obs <- data; units <- "ºC";
+    var.longname <- "Temperature bias" 
+  } else {
+    data.conv <- convert_data(list(fcst=data,test=data),variable,leadtimes,fcst.type,"forecast")
+    obs <- data.conv$data$fcst; units <- data.conv$units;
+    var.longname <- data.conv$var.longname
+    remove(data.conv)
+  }
+
+  if (tolower(agg) == "country"){
+    dims <- c('Country', 'time')
+    var.expname <- paste0(variable, '_country') 
+    var.sdname <- paste0("Country-Aggregated ", var.longname)
+  } else {
+    dims <- c(lalo,'time')
+    var.expname <- get_outname(variable,VARS_DICT)
+    var.sdname <- var.longname
+  }
+
+  metadata <- list(obs = list(name = var.expname,
+                              standard_name = var.sdname,
+                              units = units))
+  attr(obs, 'variables') <- metadata
+  names(dim(obs)) <- dims 
+  
+  times <- get_times(fcst.type, leadtimes, fcst.sdate)
+  time <- times$time
+  time_step <- times$time_step
+
+  if (tolower(agg) == "country") { 
+    
+    country <- get_countries(grid)
+    ArrayToNc(list(country, time, time_step, obs), outfile)
+
+  } else  {
+
+    latlon <- get_latlon(grid$lat, grid$lon)
+    ArrayToNc(list(latlon$lat, latlon$lon, time, obs, time_step), outfile)
+
+  }
+}
+
+save_obs_country_file <-
+  function(variable,
+           obs,
+           fcst.sdate,
+           outfile,
+           fcst.type,
+           monthnames) {
+
+    if (fcst.type == 'seasonal'){
+      mask.path <- 'masks/mask_europe_system5_2.Rdata'
+    } else {
+      mask.path <- 'masks/mask_europe_S2S_ecmwf.Rdata'
+    }
+    
+    load(mask.path)
+    
+    ifelse(exists("lonlat_dctln"),
+	lalo <- c('longitude','latitude'), #decathlon subseasonal
+	lalo <- c('latitude','longitude')) #no decathlon
+
+    obs <- Reorder(obs, c(lalo,'time'))
+#    obs <- Reorder(obs, c('latitude','longitude','time'))
+
+    obs.country <-Apply(data=list(pointdata=obs),
+#                                  target_dims=c('latitude','longitude'),
+				  target_dims=lalo,
+                                  output_dims=c('country'),
+                                  mask.path=mask.path, 
+                                  ncores=2,
+                                  split_factor=1,
+                                  fun = Country_mean)[[1]]
+
+  vars <- yaml.load_file(VARS_DICT)$vars
+  units <- vars[[variable]]$units
+  var.longname <- vars[[variable]]$longname
+
+  metadata <- list(obs.country = 
+      list(
+          name = paste0(variable, "_country"),
+          standard_name = paste0(var.longname, " (Country-Aggregated)"),
+          units = units
+      )
+  )
+    
+  attr(obs.country, 'variables') <- metadata
+  names(dim(obs.country)) <- c('Country', 'time')
+  
+  times <- get_times(fcst.type, monthnames, fcst.sdate)
+  time <- times$time
+  time_step <- times$time_step
+
+  country <- 1:length(europe.countries.iso)
+  dim(country) <- length(country)
+  metadata <- list( country = list(
+                    standard_name = paste(europe.countries.iso, collapse=" "),
+		            units = 'Country ISO 3166-1 alpha 3 Code')) #if region, these units are incorrect.
+  attr(country, 'variables') <- metadata
+  names(dim(country)) <- 'Country'
+
+
+  ArrayToNc(list(country,time,time_step,obs.country),
+            outfile)
+ 
+ }
+
+save_obs <- function(variable,
+                     data,
+                     fcst.sdate,
+                     outfile,
+                     leadtimes,
+                     grid,
+                     agg,
+                     fcst.type) {
+
+  lalo <- c('longitude','latitude') #decathlon subseasonal
+
+  if (tolower(agg) == "global"){
+    data <- Reorder(data, c(lalo,'time'))
+  } else {
+    data <- Reorder(data, c('country', 'time'))
+  }
+
+  data.conv <- convert_data(list(fcst=data,test=data),variable,leadtimes,fcst.type,"forecast")
+  obs <- data.conv$data$fcst; units <- data.conv$units;
+  var.longname <- data.conv$var.longname
+  remove(data.conv)
+
+  if (tolower(agg) == "country"){
+    dims <- c('Country', 'time')
+    var.expname <- paste0(variable, '_country') 
+    var.sdname <- paste0("Country-Aggregated ", var.longname)
+  } else {
+    dims <- c(lalo,'time')
+    var.expname <- get_outname(variable,VARS_DICT)
+    var.sdname <- var.longname
+  }
+
+  metadata <- list(obs = list(name = var.expname,
+                              standard_name = var.sdname,
+                              units = units))
+  attr(obs, 'variables') <- metadata
+  names(dim(obs)) <- dims 
+  
+  times <- get_times(fcst.type, leadtimes, fcst.sdate)
+  time <- times$time
+  time_step <- times$time_step
+
+  if (tolower(agg) == "country") { 
+    
+    country <- get_countries(grid)
+    ArrayToNc(list(country, time, time_step, obs), outfile)
+
+  } else  {
+
+    latlon <- get_latlon(grid$lat, grid$lon)
+    ArrayToNc(list(latlon$lat, latlon$lon, time, obs, time_step), outfile)
+
+  }
+}
+
+
+save_forecast <- function(variable,
+                          fcst,
+                          fcst.sdate,
+                          outfile,
+                          leadtimes,
+                          grid,
+                          agg,
+                          fcst.type) {
+
+  lalo <- c('longitude','latitude') #decathlon subseasonal
+
+  if (tolower(agg) == "global"){
+    fcst <- Reorder(fcst, c(lalo,'member',
+                          'time'))
+  } else {
+    fcst <- Reorder(fcst, c('country','member', 'time'))
+  }
+
+  fcst.conv <- convert_data(list(fcst=fcst,test=fcst),variable,leadtimes,fcst.type,"forecast")
+  fcst <- fcst.conv$data$fcst; units <- fcst.conv$units;
+  var.longname <- fcst.conv$var.longname
+  remove(fcst.conv)
+
+  if (tolower(agg) == "country"){
+    dims <- c('Country', 'ensemble', 'time')
+    var.expname <- paste0(variable, '_country') 
+    var.sdname <- paste0("Country-Aggregated ", var.longname)
+  } else {
+    dims <- c(lalo,'ensemble', 'time')
+    var.expname <- get_outname(variable,VARS_DICT)
+    var.sdname <- var.longname
+  }
+
+  metadata <- list(fcst = list(name = var.expname,
+                               standard_name = var.sdname,
+                               units = units))
+  attr(fcst, 'variables') <- metadata
+  names(dim(fcst)) <- dims 
+  
+  times <- get_times(fcst.type, leadtimes, fcst.sdate)
+  time <- times$time
+  time_step <- times$time_step
+
+  if (tolower(agg) == "country") { 
+    
+    country <- get_countries(grid)
+    ArrayToNc(list(country, time, time_step, fcst), outfile)
+
+  } else  {
+
+    latlon <- get_latlon(grid$lat, grid$lon)
+    ArrayToNc(list(latlon$lat, latlon$lon, time, fcst, time_step), outfile)
+
+  }
+}
+
+save_probs <- function(variable,
+                       probs,
+                       fcst.sdate,
+                       outfile,
+                       monthnames,
+                       grid,
+                       agg,
+                       fcst.type) {
+  
+  lalo <- c('longitude','latitude') #decathlon subseasonal
+
+  if (tolower(agg) == "global"){
+    probs <-  lapply(probs, function(x){
+                          Reorder(x, c('bin',lalo, 'time'))})
+  }
+
+  pbn <- Subset(probs$tercile, 'bin', list(1), drop='selected')
+  pn <-  Subset(probs$tercile, 'bin', list(2), drop='selected')
+  pan <- Subset(probs$tercile, 'bin', list(3), drop='selected')
+  p10 <- Subset(probs$extreme, 'bin', list(1), drop='selected')
+  p90 <- Subset(probs$extreme, 'bin', list(3), drop='selected')
+   
+  pn.sdname <- paste('Probability below normal category ', sep='');
+  pan.sdname <- paste('Probability above normal category ', sep='');
+  pbn.sdname <- paste('Probability normal category ', sep='');
+  p10.sdname <- paste('Probability below extreme category ', sep='');
+  p90.sdname <- paste('Probability above extreme category  ', sep='');
+  
+  if (tolower(agg) == "country"){
+    dims <- c('Country', 'time')
+    pn.sdanme <- paste0('Country-Aggregated ', pn.sdname) 
+    pbn.sdanme <- paste0('Country-Aggregated ', pbn.sdname) 
+    pan.sdanme <- paste0('Country-Aggregated ', pan.sdname) 
+    p10.sdanme <- paste0('Country-Aggregated ', p10.sdname) 
+    p90.sdanme <- paste0('Country-Aggregated ', p90.sdname) 
+  } else {
+    dims <- c(lalo, 'time')
+    pn.sdanme <-  paste0('Global ', pn.sdname) 
+    pbn.sdanme <- paste0('Global ', pbn.sdname) 
+    pan.sdanme <- paste0('Global ', pan.sdname) 
+    p10.sdanme <- paste0('Global ', p10.sdname) 
+    p90.sdanme <- paste0('Global ', p90.sdname) 
+  }
+
+  metadata <- list(pbn = list(name = 'prob_bn',
+                                        standard_name = pbn.sdname ),
+                   pn = list(name = 'prob_n',
+                                        standard_name = pn.sdname),
+                   pan = list(name = 'prob_an',
+                                        standard_name = pan.sdname),
+                   p10 = list(name = 'prob_bp10',
+                                        standard_name = p10.sdname),
+                   p90 = list(name = 'prob_ap90',
+                                        standard_name = p90.sdname))
+
+  attr(pbn, 'variables') <- metadata[1]
+  attr(pn, 'variables') <- metadata[2]
+  attr(pan, 'variables') <- metadata[3]
+  attr(p10, 'variables') <- metadata[4]
+  attr(p90, 'variables') <- metadata[5]
+
+  names(dim(pbn)) <- dims 
+  names(dim(pn))  <- dims 
+  names(dim(pan)) <- dims 
+  names(dim(p10)) <- dims 
+  names(dim(p90)) <- dims 
+        
+  times <- get_times(fcst.type, monthnames, fcst.sdate)
+  time <- times$time
+  time_step <- times$time_step
+
+  if (tolower(agg) == "country") { 
+    
+    country <- get_countries(grid)
+    ArrayToNc(list(country, time, pbn, pn, pan, p10, p90, time_step), outfile)
+
+  } else  {
+
+    latlon <- get_latlon(grid$lat, grid$lon)
+    latitude <- latlon$lat; longitude <- latlon$lon
+    ArrayToNc(list(latitude, longitude, time, pbn, pn, pan, p10, p90,
+    time_step), outfile)
+  }
+ 
+ }
+
+get_times <- function (fcst.type, leadtimes, sdate){
+
+  switch(tolower(fcst.type),
+         "seasonal"     = {len <- length(leadtimes); ref <- 'months since '; 
+                           stdname <- paste(strtoi(leadtimes), collapse=", ")},
+         "sub_obs"     = {len <- 52; ref <- 'week of the year '; 
+                           stdname <- paste(strtoi(leadtimes), collapse=", ")},
+         "subseasonal"     = {len <- 4; ref <- 'weeks since ';
+                           stdname <- ''}
+  ) 
+
+  time <- 1:len
+  dim(time) <- length(time)
+  #metadata <- list(time = list(standard_name = stdname,
+  metadata <- list(time = list(
+                               units = paste0(ref, sdate, ' 00:00:00')))
+  attr(time, 'variables') <- metadata
+  names(dim(time)) <- 'time'
+  
+  time_step <- 1
+  dim(time_step) <- length(time_step)
+  metadata <- list(time_step = list(units = paste0(
+                                    ref, sdate, ' 00:00:00')))
+  attr(time_step, 'variables') <- metadata
+  names(dim(time_step)) <- 'time_step'
+
+  sdate <- 1:length(sdate)
+  dim(sdate) <- length(sdate)
+  metadata <- list(sdate = list(standard_name = paste(strtoi(sdate), collapse=", "),
+                               units = paste0('Init date')))
+  attr(sdate, 'variables') <- metadata
+  names(dim(sdate)) <- 'sdate'
+
+  return(list(time_step=time_step, time=time, sdate=sdate))
+
+}
+
+get_countries <- function(europe.countries.iso){
+
+    country <- 1:length(europe.countries.iso)
+    dim(country) <- length(country)
+    metadata <- list( country = list(
+                                standard_name = paste(europe.countries.iso, collapse=" "),
+    	                        units = 'Country ISO 3166-1 alpha 3 Code'))
+    attr(country, 'variables') <- metadata
+    names(dim(country)) <- 'Country'
+    return(country)
+
+}
+
+get_latlon <- function(lat, lon){
+
+    longitude <- lon
+    dim(longitude) <- length(longitude)
+    metadata <- list(longitude = list(units = 'degrees_east'))
+    attr(longitude, 'variables') <- metadata
+    names(dim(longitude)) <- 'longitude'
+    
+    latitude <- lat
+    dim(latitude) <- length(latitude)
+    metadata <- list(latitude = list(units = 'degrees_north'))
+    attr(latitude, 'variables') <- metadata
+    names(dim(latitude)) <- 'latitude'
+
+    return(list(lat=latitude,lon=longitude))
+
+}
+
+save_metrics <- function(variable,
+                         skill,
+                         fcst.sdate,
+                         grid,
+                         outfile,
+                         monthnames,
+                         fcst.type,
+                         agg) 
+{
+  
+  lalo <- c('longitude', 'latitude')
+
+  ## TODO: Sort out aggregation
+  if (tolower(agg) == "global") {
+    skill <-  lapply(skill, function(x){
+                     Reorder(x[[1]], c(lalo, 'time'))})
+  }
+
+  for (i in 1:length(skill)) {
+
+    metric <- names(skill[i])
+    if (tolower(agg) == "country"){
+      sdname <- paste0(names(metric), " region-aggregated metric")
+      dims <- c('Country', 'time')
+    } else {
+      sdname <- paste0(names(metric), " grid point metric")
+      dims <- c(lalo, 'time')
+    }
+    metadata <- list(name = metric, standard_name = sdname)
+
+    attr(skill[i], 'variables') <- metadata        
+    names(dim(skill[[i]])) <- dims 
+  }
+
+  times <- get_times(fcst.type, monthnames, fcst.sdate)
+  time <- times$time
+  time_step <- times$time_step
+
+  if (tolower(agg) == "country") { 
+    
+    country <- get_countries(grid)
+    ArrayToNc(append(country, time, skill, time_step), outfile)
+
+  } else  {
+
+    latlon <- get_latlon(grid$lat, grid$lon)
+    vars <- list(latlon$lat, latlon$lon, time)
+    vars <- c(vars, skill, list(time_step))
+    ArrayToNc(vars, outfile)
+  }
+}
+
+convert_seasosnal_prlr <- function(data2convert, leadtimes,filetype){
+
+    ind <- 1:length(leadtimes)
+    #dates <- paste0(leadtimes,"01")
+    # computes the last day of the month
+    lastday <- sapply(ind, function(x) 
+                      {as.integer(substr((seq(as.Date(leadtimes[x],"%Y%m%d"), 
+                                               length=2,by="months")-1)[2], 
+                                        9, 10))})
+
+    if (filetype == "terciles"){
+      ter <- sapply(ind, function(x) {
+                Subset(data2convert$tercile, along='time',
+                indices=x,drop='selected')*1000*3600*24*lastday[x]},
+                simplify='array')
+      ext <- sapply(ind, function(x) {
+                Subset(data2convert$extreme, along='time',
+                indices=x,drop='selected')*1000*3600*24*lastday[x]},
+                simplify='array') 
+
+      data2convert <- list(tercile=ter, extreme=ext)
+    } else {
+    
+      ens <- sapply(ind, function(x) {
+                Subset(data2convert$fcst, along='time',
+                indices=x,drop='selected')*1000*3600*24*lastday[x]},
+                simplify='array')
+
+      data2convert <- list(fcst=ens)
+    }
+
+  return(data2convert)
+}
+
+convert_data <- function(data,variable, leadtimes, fcst.type,filetype){
+
+    vars <- yaml.load_file(VARS_DICT)$vars
+    units <- vars[[variable]]$units
+    var.longname <- vars[[variable]]$longname
+
+    if (variable %in% c("tas","tasmin","tasmax")){
+      data <- lapply(data, function(x){ x - 273.15})
+    } else if (variable %in% c("psl")){
+      data <- lapply(data, function(x){ x/100})
+    } else {
+      print("WARNING: NO DATA CONVERSION APPLIED")
+    }
+
+
+    return(list(data=data, units=units, var.longname=var.longname))
+
+} 
+
+
+## TODO: implement lists as in save_metrics
+save_terciles <- function(variable,
+                          terciles,
+                          fcst.sdate,
+                          grid,
+                          outfile,
+                          leadtimes,
+                          fcst.type,
+                          agg) {
+
+  lalo <- c('longitude','latitude') #decathlon subseasonal
+
+  if (tolower(agg) == "global"){
+    terciles <-  lapply(terciles, function(x){
+                          Reorder(x, c('bin',lalo, 'time'))})
+  }
+
+  terciles.conv <- convert_data(terciles,variable,leadtimes,fcst.type,"terciles")
+  terciles <- terciles.conv$data; units <- terciles.conv$units;
+  var.longname <- terciles.conv$var.longname
+  remove(terciles.conv)
+  
+  p33 <- Subset(terciles$tercile, 'bin', list(1), drop='selected')
+
+  p66 <- Subset(terciles$tercile, 'bin', list(2), drop='selected')
+  p10 <- Subset(terciles$extreme, 'bin', list(1), drop='selected')
+  p90 <- Subset(terciles$extreme, 'bin', list(2), drop='selected')
+
+  p33.sdname <- paste('Lower Tercile ', sep='');
+  p66.sdname <- paste('Upper Tercile ', sep='');
+  p10.sdname <- paste('Lower extreme', sep='');
+  p90.sdname <- paste('Upper extreme', sep='');
+  
+  if (tolower(agg) == "country"){
+    dims <- c('Country', 'time')
+    p33.sdanme <- paste0('Country-Aggregated ', p33.sdname) 
+    p66.sdanme <- paste0('Country-Aggregated ', p66.sdname) 
+    p10.sdanme <- paste0('Country-Aggregated ', p10.sdname) 
+    p90.sdanme <- paste0('Country-Aggregated ', p90.sdname) 
+  } else {
+    dims <- c(lalo, 'time')
+    p33.sdanme <- paste0('Global ', p33.sdname) 
+    p66.sdanme <- paste0('Global ', p66.sdname) 
+    p10.sdanme <- paste0('Gloabl ', p10.sdname) 
+    p90.sdanme <- paste0('Global ', p90.sdname) 
+  }
+
+  metadata <- list(pbn = list(name = 'p33',
+                                        standard_name = p33.sdname ),
+                   pn = list(name = 'p66',
+                                        standard_name = p66.sdname),
+                   pan = list(name = 'p10',
+                                        standard_name = p10.sdname),
+                   p10 = list(name = 'p90',
+                                        standard_name = p90.sdname))
+
+  attr(p33, 'variables') <- metadata[1]
+  attr(p66, 'variables') <- metadata[2]
+  attr(p10, 'variables') <- metadata[3]
+  attr(p90, 'variables') <- metadata[4]
+
+  names(dim(p33)) <- dims
+  names(dim(p66)) <- dims
+  names(dim(p10)) <- dims
+  names(dim(p90)) <- dims
+
+  times <- get_times(fcst.type, leadtimes, fcst.sdate)
+  time <- times$time
+  time_step <- times$time_step
+
+  if (tolower(agg) == "country") { 
+    
+    country <- get_countries(grid)
+    ArrayToNc(list(country, time, p33, p66, p10, p90, time_step), outfile)
+
+  } else  {
+
+    latlon <- get_latlon(grid$lat, grid$lon)
+    ArrayToNc(list(latlon$lat, latlon$lon, time, p33, p66, p10, p90, time_step), outfile)
+  }
+}
diff --git a/modules/data_load/testing_recipes/recipe_1.yml b/modules/data_load/testing_recipes/recipe_1.yml
index f9a6090d..aca4a897 100644
--- a/modules/data_load/testing_recipes/recipe_1.yml
+++ b/modules/data_load/testing_recipes/recipe_1.yml
@@ -1,5 +1,5 @@
-Description: System5 seasonal forecast at daily frequency TEST
-  Author: V. Agudetse
+Description: System5test
+Author: V. Agudetse
 
 Analysis:
   Horizon: Seasonal # Mandatory, str: either subseasonal, seasonal, or decadal
@@ -17,9 +17,9 @@ Analysis:
       fcst_syear: '2020' # Optional, int: Forecast year 'YYYY'
       fcst_sday: '1101' # Mandatory, int: Forecast startdate, 'MMDD'
     hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY'
-    hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY'
-    leadtimemin: 1 # Mandatory, int: First leadtime time step in months 
-    leadtimemax: 4 # Mandatory, int: Last leadtime time step in months
+    hcst_end: '1995' # Mandatory, int: Hindcast end year 'YYYY'
+    leadtimemin: 0 # Mandatory, int: First leadtime time step in months 
+    leadtimemax: 0 # Mandatory, int: Last leadtime time step in months
   Region:
     latmin: -10 # Mandatory, int: minimum latitude
     latmax: 10 # Mandatory, int: maximum latitude
diff --git a/modules/test_victoria.R b/modules/test_victoria.R
index aeed6518..b4c4f310 100644
--- a/modules/test_victoria.R
+++ b/modules/test_victoria.R
@@ -1,6 +1,6 @@
 
 
-recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml"
+recipe_file <- "modules/data_load/testing_recipes/recipe_1.yml"
 
 source("modules/data_load/load.R")
 source("modules/Calibration/test_calib.R")
-- 
GitLab


From ebfade273179b2572407373c5188c020ae971ba7 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 31 May 2022 15:06:36 +0200
Subject: [PATCH 09/12] testing

---
 modules/data_load/testing_recipes/recipe_1.yml | 2 +-
 modules/test_victoria.R                        | 9 +--------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/modules/data_load/testing_recipes/recipe_1.yml b/modules/data_load/testing_recipes/recipe_1.yml
index aca4a897..742a77b8 100644
--- a/modules/data_load/testing_recipes/recipe_1.yml
+++ b/modules/data_load/testing_recipes/recipe_1.yml
@@ -17,7 +17,7 @@ Analysis:
       fcst_syear: '2020' # Optional, int: Forecast year 'YYYY'
       fcst_sday: '1101' # Mandatory, int: Forecast startdate, 'MMDD'
     hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY'
-    hcst_end: '1995' # Mandatory, int: Hindcast end year 'YYYY'
+    hcst_end: '1994' # Mandatory, int: Hindcast end year 'YYYY'
     leadtimemin: 0 # Mandatory, int: First leadtime time step in months 
     leadtimemax: 0 # Mandatory, int: Last leadtime time step in months
   Region:
diff --git a/modules/test_victoria.R b/modules/test_victoria.R
index b4c4f310..3f394e28 100644
--- a/modules/test_victoria.R
+++ b/modules/test_victoria.R
@@ -1,17 +1,10 @@
 
 
-recipe_file <- "modules/data_load/testing_recipes/recipe_1.yml"
+recipe_file <- "modules/data_load/testing_recipes/recipe_3.yml"
 
 source("modules/data_load/load.R")
 source("modules/Calibration/test_calib.R")
-source("modules/Skill/Skill.R")
-
 data <- load_datasets(recipe_file)
 ## TODO: Instead of reading the recipe at each module, do it at the beginning?
 recipe <- read_yaml(recipe_file)
-# Calibrate datasets
 calibrated_data <- calibrate_datasets(data, recipe)
-# Compute skill metric
-skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs,
-				       recipe, na.rm = T, ncores = 4)
-
-- 
GitLab


From 5aaf78916d30f14375d6b33db70a9226df658555 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 31 May 2022 15:54:29 +0200
Subject: [PATCH 10/12] Bugfix: correct dimensions of dates object for loading
 of daily obs

---
 modules/data_load/load.R | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/modules/data_load/load.R b/modules/data_load/load.R
index 3941f5dc..d33dc0ca 100755
--- a/modules/data_load/load.R
+++ b/modules/data_load/load.R
@@ -145,6 +145,13 @@ load_datasets <- function(recipe_file){
     default_dims[names(dim(hcst))] <- dim(hcst)
     dim(hcst) <- default_dims
   }
+
+  #dates <- attr(hcst, 'Variables')$common$time
+#  dim(dates) <- dim(Subset(hcst, 
+#			   along=c('dat', 'var',
+#			   'latitude', 'longitude', 'ensemble'),
+#			   list(1,1,1,1,1), drop="selected"))
+
   hcst <- as.s2dv_cube(hcst)
   
   # Load forecast
@@ -195,16 +202,17 @@ load_datasets <- function(recipe_file){
   
   # Load reference
   #-------------------------------------------------------------------
-  dates <- hcst$Dates$start 
- 
+  dates <- hcst$Dates$start
+  dim(dates) <- dim(Subset(hcst$data, 
+			   along=c('dat', 'var',
+			   'latitude', 'longitude', 'ensemble'),
+			   list(1,1,1,1,1), drop="selected"))
+
   # Separate Start() call for monthly vs daily data
   if (store.freq == "monthly_mean") {
   
     dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m")
-    dim(dates_file) <- dim(Subset(hcst$data, 
-                                  along=c('dat','var',
-                                  'latitude', 'longitude', 'ensemble'), 
-                                  list(1,1,1,1,1), drop="selected"))
+    dim(dates_file) <- dim(dates)
 
     obs <- Start(dat = obs.path,
                  var = variable,
-- 
GitLab


From 90a55286916817a54fc4c907267c7dbb24b2ac39 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 31 May 2022 16:19:32 +0200
Subject: [PATCH 11/12] Add basic summary of output to the end of loading
 module

---
 modules/data_load/load.R | 48 +++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/modules/data_load/load.R b/modules/data_load/load.R
index d33dc0ca..7d6b390d 100755
--- a/modules/data_load/load.R
+++ b/modules/data_load/load.R
@@ -11,9 +11,8 @@ source("tools/libs.R")
 # recipe_file <- "modules/data_load/testing_recipes/recipe_2.yml"
 # recipe_file <- "modules/data_load/testing_recipes/recipe_1.yml"
 
-load_datasets <- function(recipe_file){
+load_datasets <- function(recipe_file) {
 
-  ## TODO: Get only the last part of the path as the recipe$filename? 
   recipe <- read_yaml(recipe_file)
   recipe$filename <- recipe_file 
 
@@ -146,12 +145,7 @@ load_datasets <- function(recipe_file){
     dim(hcst) <- default_dims
   }
 
-  #dates <- attr(hcst, 'Variables')$common$time
-#  dim(dates) <- dim(Subset(hcst, 
-#			   along=c('dat', 'var',
-#			   'latitude', 'longitude', 'ensemble'),
-#			   list(1,1,1,1,1), drop="selected"))
-
+  # Convert hcst to s2dv_cube object
   hcst <- as.s2dv_cube(hcst)
   
   # Load forecast
@@ -194,6 +188,7 @@ load_datasets <- function(recipe_file){
       dim(fcst) <- default_dims
     }
 
+    # Convert fcst to s2dv_cube
     fcst <- as.s2dv_cube(fcst)
 
   } else {
@@ -202,6 +197,9 @@ load_datasets <- function(recipe_file){
   
   # Load reference
   #-------------------------------------------------------------------
+
+  # Obtain dates and date dimensions from the loaded hcst data to make sure
+  # the corresponding observations are loaded correctly.
   dates <- hcst$Dates$start
   dim(dates) <- dim(Subset(hcst$data, 
 			   along=c('dat', 'var',
@@ -269,15 +267,45 @@ load_datasets <- function(recipe_file){
   	             retrieve = TRUE)
   }
   
-  # TODO: Reorder obs dims to match hcst dims?
   # Adds ensemble dim to obs (for consistency with hcst/fcst)
   default_dims <- c(dat = 1, var = 1, sday = 1, 
                     sweek = 1, syear = 1, time = 1, 
                     latitude = 1, longitude = 1, ensemble = 1)
   default_dims[names(dim(obs))] <- dim(obs)
   dim(obs) <- default_dims
-  
+ 
+  # Convert obs to s2dv_cube
   obs <- as.s2dv_cube(obs)
+
+  # Print a summary of the loaded data for the user, for each object
+  # (hcst, obs and fcst).
+  ## TODO: Incorporate into logger
+  ## TODO: Adapt to daily/subseasonal cases
+  ## TODO: Add check for missing files/NAs
+  data_summary <- function(object) {
+    # Get name and start dates
+    object_name <- deparse(substitute(object))
+    sdate_min <- format(min(as.Date(object$Dates[[1]])), format = '%b %Y')
+    sdate_max <- format(max(as.Date(object$Dates[[1]])), format = '%b %Y')
+    
+    print("DATA SUMMARY:")
+    print(paste0(object_name, " range: ", sdate_min, " to ", sdate_max))
+    print(paste0(object_name, " dimensions: "))
+    print(dim(object$data))
+    print(paste0("Statistical summary of the data in ", object_name, ":"))
+    print(summary(object$data))
+    print("---------------------------------------------")
+
+  }
+
+
+  data_summary(hcst)
+  data_summary(obs)
+  if (!is.null(fcst)) {
+    data_summary(fcst)
+  }
+
+  print("##### DATA LOADING COMPLETED SUCCESSFULLY #####")
   
   ############################################################################
   #
-- 
GitLab


From 092e1bb15fa4d7f0d695556f65b6bd652ee7ab43 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Wed, 1 Jun 2022 10:53:08 +0200
Subject: [PATCH 12/12] Add comments and fix formatting

---
 modules/data_load/load.R                      |  6 +--
 .../data_load/testing_recipes/recipe_1.yml    |  6 +--
 modules/test.R                                | 37 -------------------
 modules/test_victoria.R                       |  9 ++++-
 4 files changed, 14 insertions(+), 44 deletions(-)
 delete mode 100644 modules/test.R

diff --git a/modules/data_load/load.R b/modules/data_load/load.R
index 7d6b390d..570788ee 100755
--- a/modules/data_load/load.R
+++ b/modules/data_load/load.R
@@ -86,11 +86,11 @@ load_datasets <- function(recipe_file) {
   
   hcst.path <- paste0(archive$src,
                hcst.dir, store.freq, "/$var$",
-               exp_descrip[[store.freq]][[variable]],"$var$_$file_date$.nc")
+               exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc")
   
   fcst.path <- paste0(archive$src,
                hcst.dir, store.freq, "/$var$",
-  	           exp_descrip[[store.freq]][[variable]],"$var$_$file_date$.nc")
+  	       exp_descrip[[store.freq]][[variable]], "$var$_$file_date$.nc")
   
   # Define regrid parameters:
   #-------------------------------------------------------------------
@@ -126,7 +126,7 @@ load_datasets <- function(recipe_file) {
                 transform_vars = c('latitude', 'longitude'),
                 synonims = list(latitude = c('lat', 'latitude'),
                                 longitude = c('lon', 'longitude'),
-  			                    ensemble = c('member', 'ensemble')),
+  			        ensemble = c('member', 'ensemble')),
                 ensemble = indices(1:hcst.nmember),
                 return_vars = list(latitude = 'dat',
                                    longitude = 'dat',
diff --git a/modules/data_load/testing_recipes/recipe_1.yml b/modules/data_load/testing_recipes/recipe_1.yml
index 742a77b8..ebf0299d 100644
--- a/modules/data_load/testing_recipes/recipe_1.yml
+++ b/modules/data_load/testing_recipes/recipe_1.yml
@@ -17,7 +17,7 @@ Analysis:
       fcst_syear: '2020' # Optional, int: Forecast year 'YYYY'
       fcst_sday: '1101' # Mandatory, int: Forecast startdate, 'MMDD'
     hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY'
-    hcst_end: '1994' # Mandatory, int: Hindcast end year 'YYYY'
+    hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY'
     leadtimemin: 0 # Mandatory, int: First leadtime time step in months 
     leadtimemax: 0 # Mandatory, int: Last leadtime time step in months
   Region:
@@ -39,5 +39,5 @@ Analysis:
 Run:
   Loglevel: INFO
   Terminal: yes
-  output_dir: /esarchive/scratch/lpalma/git/auto-s2s/out-logs/
-  code_dir: /esarchive/scratch/lpalma/git/auto-s2s/
+  output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/
+  code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
diff --git a/modules/test.R b/modules/test.R
deleted file mode 100644
index 744f6ecd..00000000
--- a/modules/test.R
+++ /dev/null
@@ -1,37 +0,0 @@
-
-
-recipe_file <- "modules/data_load/testing_recipes/recipe_3.yml"
-recipe_file <- "modules/data_load/testing_recipes/recipe_2.yml"
-recipe_file <- "modules/data_load/testing_recipes/recipe_1.yml"
-
-source("modules/data_load/load.R")
-
-data <- load_datasets(recipe_file)
-
-method <- "bias"
-mm=F
-ncores=4
-na.rm=T
-
-# Hcst calibration 
-#hcst <- CSTools::CST_Calibration(data$hcst, data$obs,
-#                                 cal.method = method,
-#                                 eval.method = "leave-one-out",
-#                                 multi.model = mm,
-#                                 na.fill = TRUE,
-#                                 na.rm = na.rm,
-#                                 apply_to = NULL,
-#                                 alpha = NULL,
-#                                 memb_dim = "ensemble",
-#                                 sdate_dim = "syear",
-#                                 ncores = ncores)
-
-# For daily
-hcst <- CSTools::CST_QuantileMapping(data$hcst,
-                                     data$obs,
-                                     exp_cor = NULL,
-                                     sample_dims = c("syear", "time", "ensemble"),
-                                     sample_length = NULL,
-                                     method = "QUANT",
-                                     ncores = ncores,
-                                     n.rm = na.rm)
diff --git a/modules/test_victoria.R b/modules/test_victoria.R
index 3f394e28..58d9be6a 100644
--- a/modules/test_victoria.R
+++ b/modules/test_victoria.R
@@ -1,10 +1,17 @@
 
 
-recipe_file <- "modules/data_load/testing_recipes/recipe_3.yml"
+recipe_file <- "modules/data_load/testing_recipes/recipe_1.yml"
 
 source("modules/data_load/load.R")
 source("modules/Calibration/test_calib.R")
+source("modules/Skill/Skill.R")
+
+# Load datasets
 data <- load_datasets(recipe_file)
 ## TODO: Instead of reading the recipe at each module, do it at the beginning?
 recipe <- read_yaml(recipe_file)
+# Calibrate datasets
 calibrated_data <- calibrate_datasets(data, recipe)
+# Compute skill metrics
+skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs,
+				       recipe, na.rm = T, ncores = 4)
-- 
GitLab