From 81552c14e896dc2da74ef24be46028f52dde0678 Mon Sep 17 00:00:00 2001
From: Carlos Delgado <carlos.delgado@bsc.es>
Date: Mon, 20 Mar 2023 16:49:05 +0100
Subject: [PATCH 01/10] first version of multimodel

---
 conf/archive_decadal.yml                     |   4 +-
 modules/Calibration/Calibration.R            |  11 +-
 modules/Loading/Loading.R                    |  33 ++
 modules/Loading/Loading_decadal.R            | 487 ++++++++++---------
 modules/Loading/join_datasets.R              |  59 +++
 recipes/tests/recipe_model_decadal.yml       |  51 ++
 recipes/tests/recipe_model_seasonal.yml      |  55 +++
 recipes/tests/recipe_multimodel_decadal.yml  |  51 ++
 recipes/tests/recipe_multimodel_seasonal.yml |  55 +++
 9 files changed, 572 insertions(+), 234 deletions(-)
 create mode 100644 modules/Loading/join_datasets.R
 create mode 100644 recipes/tests/recipe_model_decadal.yml
 create mode 100644 recipes/tests/recipe_model_seasonal.yml
 create mode 100644 recipes/tests/recipe_multimodel_decadal.yml
 create mode 100644 recipes/tests/recipe_multimodel_seasonal.yml

diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml
index 2b74bff8..d6a7e5ea 100644
--- a/conf/archive_decadal.yml
+++ b/conf/archive_decadal.yml
@@ -228,8 +228,8 @@ archive:
 
 # ----
       MIROC6:
-        name:
-        institution:
+        name: "MIRC6"
+        institution: "MIROC"
         src: 
           hcst: "exp/CMIP6/dcppA-hindcast/MIROC6/DCPP/MIROC/MIROC6/dcppA-hindcast/"
           fcst: 
diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index 899b1291..aeae3924 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -8,7 +8,7 @@ calibrate_datasets <- function(recipe, data) {
   # recipe: object obtained when passing the .yml recipe file to read_yaml()
   
   method <- tolower(recipe$Analysis$Workflow$Calibration$method)
-
+  
   if (method == "raw") {
     warn(recipe$Run$logger,
 	 paste("The Calibration module has been called, but the calibration",
@@ -26,7 +26,10 @@ calibrate_datasets <- function(recipe, data) {
   } else {
     ## TODO: Calibrate full fields when present
     # Calibration function params
-    mm <- recipe$Analysis$Datasets$Multimodel
+    if (!isFALSE(recipe$Analysis$Datasets$Multimodel)){
+      mm <- TRUE
+    } else {mm <- FALSE}
+    
     if (is.null(recipe$Analysis$ncores)) {
       ncores <- 1
     } else {
@@ -42,12 +45,12 @@ calibrate_datasets <- function(recipe, data) {
     # Replicate observation array for the multi-model case
     ## TODO: Implement for obs.full_val
     if (mm) {
-      obs.mm <- obs$data
+      obs.mm <- data$obs$data
       for(dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) {
         obs.mm <- abind(obs.mm, data$obs$data,
   		      along=which(names(dim(data$obs$data)) == 'dat'))
       }
-      names(dim(obs.mm)) <- names(dim(obs$data))
+      names(dim(obs.mm)) <- names(dim(data$obs$data))
       data$obs$data <- obs.mm
       remove(obs.mm)
     }
diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index e5dafd0d..fd21d2cb 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -3,10 +3,43 @@ source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R")
 # Load required libraries/funs
 source("modules/Loading/dates2load.R")
 source("modules/Loading/check_latlon.R")
+source('modules/Loading/join_datasets.R')
 ## TODO: Move to prepare_outputs.R
 source("tools/libs.R")
 
 load_datasets <- function(recipe) {
+  
+  if (length(recipe$Analysis$Datasets$System$name) == 1){ ## Only one model
+    
+    output <- load_single_datasets(recipe = recipe)
+    
+  } else { ## Several models and/or Multimodel
+    
+    multimodel <- recipe$Analysis$Datasets$Multimodel ## "TRUE", "FALSE" or "both"
+    
+    ## Loading each model individually
+    data <- list()
+    n_members <- list(hcst = c(), fcst = c())
+    for (model in 1:length(recipe$Analysis$Datasets$System$name)){
+      recipe_single <- recipe
+      recipe_single$Analysis$Datasets$System$name <- recipe_single$Analysis$Datasets$System$name[model]
+      data[[model]] <- load_single_datasets(recipe = recipe_single)
+      n_members$hcst[model] <- dim(data[[model]]$hcst$data)['ensemble']
+      if (!is.null(data[[model]]$fcst)){
+        n_members$fcst[model] <- dim(data[[model]]$fcst$data)['ensemble']
+      }
+      
+    }
+    
+    ## Joining models and/or creating multimodel
+    output <- join_datasets(data = data, multimodel = multimodel, n_members = n_members)
+    
+  }
+  
+  return(output) # list(hcst = hcst, fcst = fcst, obs = obs)
+}
+
+load_single_datasets <- function(recipe) {
 
   # -------------------------------------------
   # Set params -----------------------------------------
diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R
index e3677e1d..885c1062 100644
--- a/modules/Loading/Loading_decadal.R
+++ b/modules/Loading/Loading_decadal.R
@@ -10,6 +10,7 @@ source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R")
 source("modules/Loading/helper_loading_decadal.R")
 source("modules/Loading/dates2load.R")
 source("modules/Loading/check_latlon.R")
+source('modules/Loading/join_datasets.R')
 source("tools/libs.R")
 ## TODO: Remove once the fun is included in CSTools
 source("tools/tmp/as.s2dv_cube.R")
@@ -20,12 +21,42 @@ source("tools/tmp/as.s2dv_cube.R")
 # recipe_file <- "modules/Loading/testing_recipes/recipe_decadal_daily.yml"
 
 load_datasets <- function(recipe) {
+  
+  if (length(recipe$Analysis$Datasets$System$name) == 1){ ## Only one model
+    
+    output <- load_single_datasets(recipe = recipe)
+    
+  } else { ## Several models and/or Multimodel
+    
+    multimodel <- recipe$Analysis$Datasets$Multimodel ## "TRUE", "FALSE" or "both"
+    
+    ## Loading each model individually
+    data <- list()
+    n_members <- list(hcst = c(), fcst = c())
+    for (model in 1:length(recipe$Analysis$Datasets$System$name)){
+      recipe_single <- recipe
+      recipe_single$Analysis$Datasets$System$name <- recipe_single$Analysis$Datasets$System$name[model]
+      recipe_single$Analysis$Datasets$System$member <- recipe_single$Analysis$Datasets$System$member[model][[1]]
+      n_members$hcst[model] <- length(recipe_single$Analysis$Datasets$System$member)
+      n_members$fcst[model] <- length(recipe_single$Analysis$Datasets$System$member)
+      data[[model]] <- load_single_datasets(recipe = recipe_single)
+    }
+    
+    ## Joining models and/or creating multimodel
+    output <- join_datasets(data = data, multimodel = multimodel, n_members = n_members)
+    
+  }
+  
+  return(output) # list(hcst = hcst, fcst = fcst, obs = obs)
+}
 
+load_single_datasets <- function(recipe) {
+  
   archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive_decadal.yml"))$archive
-
+  
   # Print Start() info or not
   DEBUG <- FALSE
-
+  
   ## TODO: this should come from the main script 
   # Create output folder and log:
   
@@ -41,14 +72,14 @@ load_datasets <- function(recipe) {
   lats.max <- as.numeric(recipe$Analysis$Region$latmax) #10
   lons.min <- as.numeric(recipe$Analysis$Region$lonmin) #0
   lons.max <- as.numeric(recipe$Analysis$Region$lonmax) #10
-
+  
   # change to: sdates <- dates2load(recipe, logger)
   sdates_hcst <- as.numeric(recipe$Analysis$Time$hcst_start):as.numeric(recipe$Analysis$Time$hcst_end) #1960:2015
   sdates_fcst <- recipe$Analysis$Time$fcst
- 
+  
   if (store.freq == "monthly_mean") {
     time_ind <- (as.numeric(recipe$Analysis$Time$ftime_min):as.numeric(recipe$Analysis$Time$ftime_max))
-
+    
   } else if (store.freq == "daily_mean") {
     time_ind <- get_daily_time_ind(ftimemin = as.numeric(recipe$Analysis$Time$ftime_min),
                                    ftimemax = as.numeric(recipe$Analysis$Time$ftime_max),
@@ -56,10 +87,10 @@ load_datasets <- function(recipe) {
                                    sdates = sdates_hcst,
                                    calendar = archive$System[[exp.name]]$calendar)
   }
-
-#NOTE: May be used in the future
-#  season <- recipe$Analysis$Time$season
-
+  
+  #NOTE: May be used in the future
+  #  season <- recipe$Analysis$Time$season
+  
   #-------------------------
   # Read from archive:
   #-------------------------
@@ -73,20 +104,20 @@ load_datasets <- function(recipe) {
   if (identical(member, 'all')) {
     member <- strsplit(archive$System[[exp.name]]$member, ',')[[1]]
   }
-
+  
   #-------------------------
   # derived from above:
   #-------------------------
   # Check lat and lon and decide CircularSort
-   circularsort <- check_latlon(latmin = lats.min, latmax = lats.max, lonmin = lons.min, lonmax = lons.max)
-
+  circularsort <- check_latlon(latmin = lats.min, latmax = lats.max, lonmin = lons.min, lonmax = lons.max)
+  
   # generate transform params for system and ref
   regrid_params <- get_regrid_params(recipe, archive)
-
+  
   # Only if the time length in each chunk may differ that we need largest_dims_length to be TRUE. Otherwise, set FALSE to increase efficiency.
   need_largest_dims_length <- ifelse(exp.name == 'EC-Earth3-i2', TRUE, FALSE) 
-
-
+  
+  
   #-------------------------------------------
   # Step 1: Load the hcst
   #-------------------------------------------
@@ -95,11 +126,11 @@ load_datasets <- function(recipe) {
                        version = version, sdates = sdates_hcst)
   path_list <- tmp$path_list
   multi_path <- tmp$multi_path
-#  hcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst,
-#                         '$ensemble$', table, '$var$', grid, version)
-#  hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
-
-
+  #  hcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$hcst,
+  #                         '$ensemble$', table, '$var$', grid, version)
+  #  hcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
+  
+  
   Start_default_arg_list <- list(
     dat = path_list, #file.path(hcst.path, hcst.files),
     var = variable,
@@ -127,25 +158,25 @@ load_datasets <- function(recipe) {
                        time = c('syear', 'chunk')),
     silent = !DEBUG,
     retrieve = T)
-
+  
   if (!multi_path) {
     Start_hcst_arg_list <- Start_default_arg_list
     hcst <- do.call(Start, Start_hcst_arg_list)
-
+    
   } else {
     Start_hcst_arg_list <- Start_default_arg_list
     Start_hcst_arg_list[['syear']] <- NULL
     Start_hcst_arg_list[['chunk_depends']] <- NULL
     remove_ind <- which(Start_hcst_arg_list[['return_vars']][['time']] == 'syear')
     Start_hcst_arg_list[['return_vars']][['time']] <- Start_hcst_arg_list[['return_vars']][['time']][-remove_ind]
-
+    
     hcst <- do.call(Start, Start_hcst_arg_list)
-
+    
     # Reshape and reorder dimensions
     ## dat should be 1, syear should be length of dat; reorder dimensions
     dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6])
     hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble'))
-
+    
     # Manipulate time attr because Start() cannot read it correctly
     wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only
     tmp <- array(dim = c(dim(hcst)[c('syear', 'time')]))
@@ -155,284 +186,284 @@ load_datasets <- function(recipe) {
       tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear])
     }
     attr(hcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC')
-
+    
   }
-
+  
   tmp_time_attr <- attr(hcst, 'Variables')$common$time
-
+  
   # change syear to c(sday, sweek, syear)
   # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble]
   dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7])
   if (!identical(dim(tmp_time_attr), dim(hcst)[c('syear', 'time')])) {
     error(recipe$Run$logger,
-	  "hcst has problem in matching data and time attr dimension.")
+          "hcst has problem in matching data and time attr dimension.")
     stop()
   }
   dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr))
-
+  
   #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1
   if (multi_path) {
     attributes(hcst)$Variables$common[[variable]] <- attributes(hcst)$Variables$dat1[[variable]]
   }
-
+  
   # Change class from startR_array to s2dv_cube
   suppressWarnings(
-  hcst <- as.s2dv_cube(hcst)
+    hcst <- as.s2dv_cube(hcst)
   )
-
-#-------------------------------------------
-# Step 2: Load the fcst
-#-------------------------------------------
+  
+  #-------------------------------------------
+  # Step 2: Load the fcst
+  #-------------------------------------------
   if (!is.null(recipe$Analysis$Time$fcst)) {
-
-  tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, 
-                       version = version, sdates = sdates_fcst)
-  path_list <- tmp$path_list
-  multi_path <- tmp$multi_path
-#  fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst,
-#                         '$ensemble$', table, '$var$', grid, version)
-#  fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
-
-
-  # monthly & daily
-  if (!multi_path) {
-    #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst
-    Start_fcst_arg_list <- Start_default_arg_list
-    Start_fcst_arg_list[['dat']] <- path_list
-    Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst)
-    fcst <- do.call(Start, Start_fcst_arg_list)
-
-
-  } else { # multi_path
-
-    #TODO: time attribute is not correct. Improve Start().
-    Start_fcst_arg_list <- Start_default_arg_list
-    Start_fcst_arg_list[['dat']] <- path_list
-    Start_fcst_arg_list[['syear']] <- NULL
-    Start_fcst_arg_list[['chunk_depends']] <- NULL
-    remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear')
-    Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind]
-    fcst <- do.call(Start, Start_fcst_arg_list)
-
-    # Reshape and reorder dimensions
-    ## dat should be 1, syear should be length of dat; reorder dimensions
-    ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble]
-    dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6])
-    fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble'))
-
-    # Manipulate time attr because Start() cannot read it correctly
-    wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only
-    tmp <- array(dim = c(dim(fcst)[c('syear', 'time')]))
-    tmp[1, ] <- wrong_time_attr
-    yr_diff <- (sdates_fcst - sdates_fcst[1])[-1] #diff(sdates_fcst)
-    for (i_syear in 1:length(yr_diff)) {
-      tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) 
+    
+    tmp <- get_dcpp_path(archive = archive, exp.name = exp.name, table = table, grid = grid, 
+                         version = version, sdates = sdates_fcst)
+    path_list <- tmp$path_list
+    multi_path <- tmp$multi_path
+    #  fcst.path <- file.path(archive$src, archive$System[[exp.name]]$src$fcst,
+    #                         '$ensemble$', table, '$var$', grid, version)
+    #  fcst.files <- paste0('$var$_', table, '_*_*_s$syear$-$ensemble$_', grid, '_$chunk$.nc')
+    
+    
+    # monthly & daily
+    if (!multi_path) {
+      #NOTE: the adjustment for two cases (multiple files per sdate or not) has been made in hcst
+      Start_fcst_arg_list <- Start_default_arg_list
+      Start_fcst_arg_list[['dat']] <- path_list
+      Start_fcst_arg_list[['syear']] <- paste0(sdates_fcst)
+      fcst <- do.call(Start, Start_fcst_arg_list)
+      
+      
+    } else { # multi_path
+      
+      #TODO: time attribute is not correct. Improve Start().
+      Start_fcst_arg_list <- Start_default_arg_list
+      Start_fcst_arg_list[['dat']] <- path_list
+      Start_fcst_arg_list[['syear']] <- NULL
+      Start_fcst_arg_list[['chunk_depends']] <- NULL
+      remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear')
+      Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind]
+      fcst <- do.call(Start, Start_fcst_arg_list)
+      
+      # Reshape and reorder dimensions
+      ## dat should be 1, syear should be length of dat; reorder dimensions
+      ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble]
+      dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6])
+      fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble'))
+      
+      # Manipulate time attr because Start() cannot read it correctly
+      wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only
+      tmp <- array(dim = c(dim(fcst)[c('syear', 'time')]))
+      tmp[1, ] <- wrong_time_attr
+      yr_diff <- (sdates_fcst - sdates_fcst[1])[-1] #diff(sdates_fcst)
+      for (i_syear in 1:length(yr_diff)) {
+        tmp[(i_syear + 1), ] <- wrong_time_attr + lubridate::years(yr_diff[i_syear]) 
+      }
+      attr(fcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC')
+      
     }
-    attr(fcst, 'Variables')$common$time <- as.POSIXct(tmp, origin = '1970-01-01', tz = 'UTC')
-
-  }
-
-  tmp_time_attr <- attr(fcst, 'Variables')$common$time
-
-  # change syear to c(sday, sweek, syear)
-  # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble]
-  dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7])
-  if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) {
-    error(recipe$Run$logger,
-	  "fcst has problem in matching data and time attr dimension.")
-    stop()
-  }
-  dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr))
-
-  #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1
-  if (multi_path) {
-    attributes(fcst)$Variables$common[[variable]] <- attributes(fcst)$Variables$dat1[[variable]]
-  }
-
-  # Change class from startR_array to s2dv_cube
-  suppressWarnings(
-  fcst <- as.s2dv_cube(fcst)
-  )
-
-  # Only syear could be different
-  if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) {
-    error(recipe$Run$logger,
-          "hcst and fcst do not share the same dimension structure.")
-    stop()
-  }
-
+    
+    tmp_time_attr <- attr(fcst, 'Variables')$common$time
+    
+    # change syear to c(sday, sweek, syear)
+    # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble]
+    dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7])
+    if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) {
+      error(recipe$Run$logger,
+            "fcst has problem in matching data and time attr dimension.")
+      stop()
+    }
+    dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr))
+    
+    #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1
+    if (multi_path) {
+      attributes(fcst)$Variables$common[[variable]] <- attributes(fcst)$Variables$dat1[[variable]]
+    }
+    
+    # Change class from startR_array to s2dv_cube
+    suppressWarnings(
+      fcst <- as.s2dv_cube(fcst)
+    )
+    
+    # Only syear could be different
+    if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) {
+      error(recipe$Run$logger,
+            "hcst and fcst do not share the same dimension structure.")
+      stop()
+    }
+    
   } else {
     fcst <- NULL
   }
-
-#-------------------------------------------
-# Step 3. Load the reference
-#-------------------------------------------
+  
+  #-------------------------------------------
+  # Step 3. Load the reference
+  #-------------------------------------------
   obs.path <- file.path(archive$src, archive$Reference[[ref.name]]$src, store.freq,
                         paste0(variable, archive$Reference[[ref.name]][[store.freq]][[variable]]))
   obs.files <- paste0('$var$_$file_date$.nc')
-
+  
   # Get from startR_cube
-#  dates <- attr(hcst, 'Variables')$common$time
+  #  dates <- attr(hcst, 'Variables')$common$time
   # Get from s2dv_cube
   dates <- hcst$Dates$start 
   dates_file <- sapply(dates, format, '%Y%m')
   dim(dates_file) <- dim(dates)
-
+  
   if (store.freq == "daily_mean") {
-#////////////////
-# Method 1: use hcst time attr as obs time selector
-#////////////////
-
-  # Set hour to 12:00 to ensure correct date retrieval for daily data
-  lubridate::hour(dates) <- 12 
-  lubridate::minute(dates) <- 00
-  # Restore correct dimensions
-  dim(dates) <- dim(dates_file)
-
-  obs <- Start(dat = file.path(obs.path, obs.files),
-               var = variable,
-               file_date = unique(format(dates, '%Y%m')),
-               time = dates, # [sday, sweek, syear, time]
-               time_across = 'file_date',
-               merge_across_dims = TRUE,
-               split_multiselected_dims = TRUE,
-               latitude = values(list(lats.min, lats.max)),
-               latitude_reorder = Sort(decreasing = TRUE), 
-               longitude = values(list(lons.min, lons.max)),
-               longitude_reorder = circularsort,
-               transform = regrid_params$obs.transform,
-               transform_extra_cells = 2,
-               transform_params = list(grid = regrid_params$obs.gridtype, #nc file
-                                       method = regrid_params$obs.gridmethod),
-               transform_vars = c('latitude', 'longitude'),
-               synonims = list(latitude = c('lat','latitude'), 
-                               longitude = c('lon','longitude')),
-               return_vars = list(latitude = NULL, longitude = NULL,
-                                  time = 'file_date'),
-               silent = !DEBUG,
-               retrieve = TRUE)
-
+    #////////////////
+    # Method 1: use hcst time attr as obs time selector
+    #////////////////
+    
+    # Set hour to 12:00 to ensure correct date retrieval for daily data
+    lubridate::hour(dates) <- 12 
+    lubridate::minute(dates) <- 00
+    # Restore correct dimensions
+    dim(dates) <- dim(dates_file)
+    
+    obs <- Start(dat = file.path(obs.path, obs.files),
+                 var = variable,
+                 file_date = unique(format(dates, '%Y%m')),
+                 time = dates, # [sday, sweek, syear, time]
+                 time_across = 'file_date',
+                 merge_across_dims = TRUE,
+                 split_multiselected_dims = TRUE,
+                 latitude = values(list(lats.min, lats.max)),
+                 latitude_reorder = Sort(decreasing = TRUE), 
+                 longitude = values(list(lons.min, lons.max)),
+                 longitude_reorder = circularsort,
+                 transform = regrid_params$obs.transform,
+                 transform_extra_cells = 2,
+                 transform_params = list(grid = regrid_params$obs.gridtype, #nc file
+                                         method = regrid_params$obs.gridmethod),
+                 transform_vars = c('latitude', 'longitude'),
+                 synonims = list(latitude = c('lat','latitude'), 
+                                 longitude = c('lon','longitude')),
+                 return_vars = list(latitude = NULL, longitude = NULL,
+                                    time = 'file_date'),
+                 silent = !DEBUG,
+                 retrieve = TRUE)
+    
   } else if (store.freq == "monthly_mean") {
-#////////////////
-# Method 2: reshape hcst time attr's date into an array with time dim then as obs date selector
-#////////////////
-
-  obs <- Start(dat = file.path(obs.path, obs.files),
-               var = variable,
-               file_date = dates_file, #dates_arr, # [sday, sweek, syear, time]
-               split_multiselected_dims = TRUE,
-               latitude = values(list(lats.min, lats.max)),
-               latitude_reorder = Sort(decreasing = TRUE),
-               longitude = values(list(lons.min, lons.max)),
-               longitude_reorder = circularsort,
-               transform = regrid_params$obs.transform,
-               transform_extra_cells = 2,
-               transform_params = list(grid = regrid_params$obs.gridtype, #nc file
-                                       method = regrid_params$obs.gridmethod),
-               transform_vars = c('latitude', 'longitude'),
-               synonims = list(latitude = c('lat','latitude'),
-                               longitude = c('lon','longitude')),
-               return_vars = list(latitude = NULL, longitude = NULL,
-                                  time = 'file_date'),
-               silent = !DEBUG,
-               retrieve = TRUE)
+    #////////////////
+    # Method 2: reshape hcst time attr's date into an array with time dim then as obs date selector
+    #////////////////
+    
+    obs <- Start(dat = file.path(obs.path, obs.files),
+                 var = variable,
+                 file_date = dates_file, #dates_arr, # [sday, sweek, syear, time]
+                 split_multiselected_dims = TRUE,
+                 latitude = values(list(lats.min, lats.max)),
+                 latitude_reorder = Sort(decreasing = TRUE),
+                 longitude = values(list(lons.min, lons.max)),
+                 longitude_reorder = circularsort,
+                 transform = regrid_params$obs.transform,
+                 transform_extra_cells = 2,
+                 transform_params = list(grid = regrid_params$obs.gridtype, #nc file
+                                         method = regrid_params$obs.gridmethod),
+                 transform_vars = c('latitude', 'longitude'),
+                 synonims = list(latitude = c('lat','latitude'),
+                                 longitude = c('lon','longitude')),
+                 return_vars = list(latitude = NULL, longitude = NULL,
+                                    time = 'file_date'),
+                 silent = !DEBUG,
+                 retrieve = TRUE)
   }
-
-
-#dim(attr(obs, 'Variables')$common$time)
-# sday sweek syear  time 
-#    1     1     2    14 
-
+  
+  
+  #dim(attr(obs, 'Variables')$common$time)
+  # sday sweek syear  time 
+  #    1     1     2    14 
+  
   # Only ensemble dim could be different
   if (!identical(dim(obs), dim(hcst$data)[-9])) {
     error(recipe$Run$logger,
-	  "obs and hcst dimensions do not match.")
+          "obs and hcst dimensions do not match.")
     stop()
   }
   # Add ensemble dim to obs
   dim(obs) <- c(dim(obs), ensemble = 1)
-
+  
   # Change class from startR_array to s2dv_cube
   suppressWarnings(
-  obs <- as.s2dv_cube(obs)
+    obs <- as.s2dv_cube(obs)
   )
-
-
-#-------------------------------------------
-# Step 4. Verify the consistance between data
-#-------------------------------------------
+  
+  
+  #-------------------------------------------
+  # Step 4. Verify the consistance between data
+  #-------------------------------------------
   # dimension
   if (any(!names(dim(obs$data)) %in% names(dim(hcst$data)))) {
     error(recipe$Run$logger,
-	  "hcst and obs don't share the same dimension names.")
+          "hcst and obs don't share the same dimension names.")
     stop()
   } else {
     ens_ind <- which(names(dim(obs$data)) == 'ensemble')
     match_ind <- match(names(dim(obs$data))[-ens_ind], names(dim(hcst$data)))
     if (!all(dim(hcst$data)[match_ind] == dim(obs$data)[-ens_ind])) {
       error(recipe$Run$logger,
-	    "hcst and obs don't share the same dimension length.")
+            "hcst and obs don't share the same dimension length.")
       stop()
     }
   }
-
+  
   # time attribute
   if (!identical(format(hcst$Dates$start, '%Y%m'),
                  format(obs$Dates$start, '%Y%m'))) {
     error(recipe$Run$logger,
-	  "hcst and obs don't share the same time.")
+          "hcst and obs don't share the same time.")
     stop()
   }
-
+  
   # lat and lon attributes
   if (!(recipe$Analysis$Regrid$type == 'none')) {
     if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) {
       lat_error_msg <- paste("Latitude mismatch between hcst and obs.",
-			     "Please check the original grids and the",
-			     "regrid parameters in your recipe.")
+                             "Please check the original grids and the",
+                             "regrid parameters in your recipe.")
       error(recipe$Run$logger, lat_error_msg)
       hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1],
-			     "; Last hcst lat: ", hcst$lat[length(hcst$lat)])
+                             "; Last hcst lat: ", hcst$lat[length(hcst$lat)])
       info(recipe$Run$logger, hcst_lat_msg)
       obs_lat_msg <- paste0("First obs lat: ", obs$lat[1],
-			     "; Last obs lat: ", obs$lat[length(obs$lat)])
+                            "; Last obs lat: ", obs$lat[length(obs$lat)])
       info(recipe$Run$logger, obs_lat_msg)
       stop("hcst and obs don't share the same latitudes.")
     }
-
+    
     if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) {
       lon_error_msg <- paste("Longitude mismatch between hcst and obs.",
-			     "Please check the original grids and the",
-			     "regrid parameters in your recipe.")
+                             "Please check the original grids and the",
+                             "regrid parameters in your recipe.")
       error(recipe$Run$logger, lon_error_msg)
       hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1],
-			     "; Last hcst lon: ", hcst$lon[length(hcst$lon)])
+                             "; Last hcst lon: ", hcst$lon[length(hcst$lon)])
       info(recipe$Run$logger, hcst_lon_msg)
       obs_lon_msg <- paste0("First obs lon: ", obs$lon[1],
-			     "; Last obs lon: ", obs$lon[length(obs$lon)])
+                            "; Last obs lon: ", obs$lon[length(obs$lon)])
       info(recipe$Run$logger, obs_lon_msg)
       stop("hcst and obs don't share the same longitudes.")
     }
- }
-
+  }
+  
   # Check fcst
   if (!is.null(fcst)) {
     # dimension
     if (any(!names(dim(fcst$data)) %in% names(dim(hcst$data)))) {
       error(recipe$Run$logger,
-	    "hcst and fcst don't share the same dimension names.")
+            "hcst and fcst don't share the same dimension names.")
       stop()
     } else {
       ens_ind <- which(names(dim(fcst$data)) %in% c('ensemble', 'syear'))
       match_ind <- match(names(dim(fcst$data))[-ens_ind], names(dim(hcst$data)))
       if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) {
         error(recipe$Run$logger,
-	      "hcst and fcst don't share the same dimension length.")
+              "hcst and fcst don't share the same dimension length.")
         stop()
       }
     }
-
+    
     # lat and lon attributes
     if (!(recipe$Analysis$Regrid$type == 'none')) {
       if (!identical(as.vector(hcst$lat), as.vector(fcst$lat))) {
@@ -448,7 +479,7 @@ load_datasets <- function(recipe) {
         info(recipe$Run$logger, fcst_lat_msg)
         stop("hcst and fcst don't share the same latitudes.")
       }
-
+      
       if (!identical(as.vector(hcst$lon), as.vector(fcst$lon))) {
         lon_error_msg <- paste("Longitude mismatch between hcst and fcst.",
                                "Please check the original grids and the",
@@ -463,18 +494,18 @@ load_datasets <- function(recipe) {
         stop("hcst and fcst don't share the same longitudes.")
       }
     }
-
+    
   }
-
-
-#-------------------------------------------
-# Step 5. Tune data
-#-------------------------------------------
+  
+  
+  #-------------------------------------------
+  # Step 5. Tune data
+  #-------------------------------------------
   # Remove negative values in accumulative variables
   dictionary <- read_yaml("conf/variable-dictionary.yml")
   if (dictionary$vars[[variable]]$accum) {
     info(recipe$Run$logger, 
-	 " Accumulated variable: setting negative values to zero.")
+         " Accumulated variable: setting negative values to zero.")
     obs$data[obs$data < 0] <- 0
     hcst$data[hcst$data < 0] <- 0
     if (!is.null(fcst)) {
@@ -486,12 +517,12 @@ load_datasets <- function(recipe) {
   if (variable == "prlr") {
     # Verify that the units are m/s and the same in obs and hcst
     if (((attr(obs$Variable, "variable")$units == "m s-1") ||
-	 (attr(obs$Variable, "variable")$units == "m s**-1")) &&
-	((attr(hcst$Variable, "variable")$units == "m s-1") ||
-	 (attr(hcst$Variable, "variable")$units == "m s**-1"))) {
+         (attr(obs$Variable, "variable")$units == "m s**-1")) &&
+        ((attr(hcst$Variable, "variable")$units == "m s-1") ||
+         (attr(hcst$Variable, "variable")$units == "m s**-1"))) {
       
       info(recipe$Run$logger,
-	   "Converting precipitation from m/s to mm/day.")
+           "Converting precipitation from m/s to mm/day.")
       obs$data <- obs$data*86400*1000
       attr(obs$Variable, "variable")$units <- "mm/day"
       hcst$data <- hcst$data*86400*1000
@@ -502,11 +533,11 @@ load_datasets <- function(recipe) {
       }
     }
   }
-
-#-------------------------------------------
-# Step 6. Print summary
-#-------------------------------------------
-
+  
+  #-------------------------------------------
+  # Step 6. Print summary
+  #-------------------------------------------
+  
   # Print a summary of the loaded data for the user, for each object
   if (recipe$Run$logger$threshold <= 2) {
     data_summary(hcst, recipe)
@@ -515,10 +546,10 @@ load_datasets <- function(recipe) {
       data_summary(fcst, recipe)
     }
   }
-
+  
   info(recipe$Run$logger,
        "##### DATA LOADING COMPLETED SUCCESSFULLY #####")
-
-
+  
+  
   return(list(hcst = hcst, fcst = fcst, obs = obs))
 }
diff --git a/modules/Loading/join_datasets.R b/modules/Loading/join_datasets.R
new file mode 100644
index 00000000..78922d15
--- /dev/null
+++ b/modules/Loading/join_datasets.R
@@ -0,0 +1,59 @@
+
+join_datasets <- function(data, multimodel, n_members) {
+  
+  ## Taking the first model as "template"
+  output <- data[[1]]
+  
+  if (is.null(output$fcst$data)){
+    exps <- 'hcst'
+  } else {exps <- c('hcst','fcst')}
+  
+  for (exp in exps){
+    
+    ## New dimensions of the arrays
+    if (isFALSE(multimodel)){ ## Individual models
+      n_models <- length(data)
+      max_members <- max(n_members[[exp]])
+    } else if (isTRUE(multimodel)){ ## Multimodel
+      n_models <- length(data)
+      max_members <- sum(n_members[[exp]])
+    } else if (identical(multimodel,'both')){ ## Individual models and multimodel
+      n_models <- length(data)+1
+      max_members <- sum(n_members[[exp]])
+    } else {stop('Incorrect "Multimodel" in the recipe. It must be "yes", "no" or "both"')}
+    dims <- dim(data[[1]][[exp]]$data)
+    dims['dat'] <- n_models
+    dims['ensemble'] <- max_members
+    stopifnot(identical(names(dims), c('dat','var','sday','sweek','syear','time','latitude','longitude','ensemble')))
+    
+    ## Creating empty models and multimodel
+    output[[exp]]$data <- array(data = NA, dim = dims)
+    ## Filling the models
+    for (model in 1:length(data)){
+      output[[exp]]$data[model,,,,,,,,1:n_members[[exp]][model]] <- data[[model]][[exp]]$data
+    }
+    
+    ## Filling the multimodel
+    if (isFALSE(multimodel)){ ## Individual models
+      # output[[exp]]$data <- output[[exp]]$data
+    } else if (isTRUE(multimodel)){ ## Multimodel
+      output[[exp]]$data <- CSTools::MergeDims(data = output[[exp]]$data, 
+                                               merge_dims = c('ensemble','dat'),
+                                               rename_dim = 'ensemble', 
+                                               na.rm = TRUE)
+      output[[exp]]$data <- s2dv::InsertDim(data = output[[exp]]$data,
+                                            posdim = 1,
+                                            lendim = 1,
+                                            name = 'dat')
+    } else if (identical(multimodel,'both')){ ## Individual models and multimodel
+      multimodel_data <- CSTools::MergeDims(data = output[[exp]]$data, 
+                                            merge_dims = c('ensemble','dat'),
+                                            rename_dim = 'ensemble', 
+                                            na.rm = TRUE)
+      output[[exp]]$data[n_models,,,,,,,,1:max_members] <- multimodel_data
+    } else {stop('Incorrect "Multimodel" in the recipe. It must be "yes", "no" or "both"')}
+    
+  }
+  
+  return(output)
+}
diff --git a/recipes/tests/recipe_model_decadal.yml b/recipes/tests/recipe_model_decadal.yml
new file mode 100644
index 00000000..d7d5339d
--- /dev/null
+++ b/recipes/tests/recipe_model_decadal.yml
@@ -0,0 +1,51 @@
+Description:
+  Author: Carlos Delgado Torres
+  Info: Test for one decadal forecast system (only hindcast)
+Analysis:
+  Horizon: Decadal
+  Variables:
+    name: tas
+    freq: monthly_mean
+  Datasets:
+    System: 
+      name: EC-Earth3-i4
+      member: r1i4p1f1,r2i4p1f1 #'all'
+    Multimodel: no
+    Reference:
+      name: JRA-55
+  Time:
+    fcst_year: 
+    hcst_start: 2015 # 2015-2016 in dcppA, 2017-2018 in dcppB
+    hcst_end: 2018
+#    season: 'Annual'
+    ftime_min: 6 # Apr
+    ftime_max: 8
+  Region:
+    latmin: 40
+    latmax: 55 
+    lonmin: 0
+    lonmax: 10 
+  Regrid:
+    method: bilinear
+    type: to_system
+  Workflow:
+    Anomalies: 
+      compute: no
+      cross_validation: 
+    Calibration:
+      method: 'bias'
+    Skill:
+      metric: EnsCorr RPSS
+    Probabilities:
+      percentiles: [[1/3, 2/3]]
+    Indicators:
+      index: FALSE
+  ncores: 8 # Optional, int: number of cores, defaults to 1
+  remove_NAs: FALSE # Optional, bool: Whether NAs are removed, defaults to FALSE
+  Output_format: S2S4E
+Run:
+  Loglevel: INFO
+  Terminal: yes
+  output_dir: /esarchive/scratch/cdelgado/auto-s2s_outputs/
+  code_dir: /esarchive/scratch/cdelgado/gitlab/auto-s2s/
+
diff --git a/recipes/tests/recipe_model_seasonal.yml b/recipes/tests/recipe_model_seasonal.yml
new file mode 100644
index 00000000..a45ff2c8
--- /dev/null
+++ b/recipes/tests/recipe_model_seasonal.yml
@@ -0,0 +1,55 @@
+Description:
+  Author: Carlos Delgado Torres
+  Info: Test for one seasonal forecast system (only hindcast)
+
+Analysis:
+  Horizon: seasonal # Mandatory, str: 'subseasonal', 'seasonal', or 'decadal'
+  Variables:
+    name: tas # Mandatory, str: variable name in /esarchive/
+    freq: monthly_mean # Mandatory, str: 'monthly_mean' or 'daily_mean'
+  Datasets:
+    System:
+      name: DWD-GCFS2.1 # ECMWF-SEAS5 # Mandatory, str: System name.
+    Multimodel: no # Mandatory, bool: Either yes/true or no/false
+    Reference:
+      name: ERA5 # Mandatory, str: Reference name. 
+  Time:
+    sdate: '1101' # Mandatory, int: Start date, 'mmdd'
+    fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY'
+    hcst_start: '2014' # Mandatory, int: Hindcast initialization start year 'YYYY'
+    hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY'
+    ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”.
+    ftime_max: 2 # Mandatory, int: Last forecast time step in months. Starts at “1”.
+  Region:
+    latmin: 0 # Mandatory, int: minimum latitude
+    latmax: 3 # Mandatory, int: maximum latitude
+    lonmin: 0 # Mandatory, int: minimum longitude
+    lonmax: 2 # Mandatory, int: maximum longitude
+  Regrid:
+    method: bilinear # Mandatory, str: Interpolation method.
+    type: 'r180x90' # Mandatory, str: 'to_system', 'to_reference', 'none',
+                    # or CDO-accepted grid.
+  Workflow:
+    Calibration:
+      method: mse_min # Mandatory, str: Calibration method.
+    Anomalies:
+      compute: no # Mandatory, bool: Either yes/true or no/false
+      cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false
+    Skill:
+      metric: RPSS CRPSS # Mandatory, str: List of skill metrics.
+    Probabilities:
+      percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds 
+      # for quantiles and probability categories. # Each set of thresholds should be
+      # enclosed within brackets.
+    Indicators:
+      index: no # This feature is not implemented yet
+  ncores: 10 # Optional, int: number of cores to be used in parallel computation.
+             # If left empty, defaults to 1.
+  remove_NAs: TRUE # Optional, bool: Whether to remove NAs. 
+                   # If left empty, defaults to FALSE.
+  Output_format: S2S4E # This feature is not implemented yet
+Run:
+  Loglevel: INFO # Optional, str: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. Default 'INFO'.
+  Terminal: TRUE # Optional, bool: Whether to display log messages in the terminal. Default 'TRUE'.
+  output_dir: /esarchive/scratch/cdelgado/auto-s2s_outputs # Mandatory, str: output directory
+  code_dir: /esarchive/scratch/cdelgado/gitlab/auto-s2s/
diff --git a/recipes/tests/recipe_multimodel_decadal.yml b/recipes/tests/recipe_multimodel_decadal.yml
new file mode 100644
index 00000000..2ec31fb3
--- /dev/null
+++ b/recipes/tests/recipe_multimodel_decadal.yml
@@ -0,0 +1,51 @@
+Description:
+  Author: Carlos Delgado Torres
+  Info: Test for multi-model decadal forecast (only hindcast)
+Analysis:
+  Horizon: Decadal
+  Variables:
+    name: tas
+    freq: monthly_mean
+  Datasets:
+    System: 
+      name: [EC-Earth3-i4, FGOALS-f3-L]
+      member: [[r1i4p1f1,r2i4p1f1], [r1i1p1f1,r2i1p1f1,r3i1p1f1]]
+    Multimodel: both ## yes, no, both
+    Reference:
+      name: JRA-55
+  Time:
+    fcst_year: 
+    hcst_start: 2015 # 2015-2016 in dcppA, 2017-2018 in dcppB
+    hcst_end: 2018
+#    season: 'Annual'
+    ftime_min: 6 # Apr
+    ftime_max: 8
+  Region:
+    latmin: 40
+    latmax: 55 
+    lonmin: 0
+    lonmax: 10 
+  Regrid:
+    method: bilinear
+    type: to_reference
+  Workflow:
+    Anomalies: 
+      compute: no
+      cross_validation: 
+    Calibration:
+      method: 'bias'
+    Skill:
+      metric: EnsCorr RPSS
+    Probabilities:
+      percentiles: [[1/3, 2/3]]
+    Indicators:
+      index: FALSE
+  ncores: 8 # Optional, int: number of cores, defaults to 1
+  remove_NAs: FALSE # Optional, bool: Whether NAs are removed, defaults to FALSE
+  Output_format: S2S4E
+Run:
+  Loglevel: INFO
+  Terminal: yes
+  output_dir: /esarchive/scratch/cdelgado/auto-s2s_outputs/
+  code_dir: /esarchive/scratch/cdelgado/gitlab/auto-s2s/
+
diff --git a/recipes/tests/recipe_multimodel_seasonal.yml b/recipes/tests/recipe_multimodel_seasonal.yml
new file mode 100644
index 00000000..87a5e31e
--- /dev/null
+++ b/recipes/tests/recipe_multimodel_seasonal.yml
@@ -0,0 +1,55 @@
+Description:
+  Author: Carlos Delgado Torres
+  Info: Test for one seasonal forecast system (only hindcast)
+
+Analysis:
+  Horizon: seasonal # Mandatory, str: 'subseasonal', 'seasonal', or 'decadal'
+  Variables:
+    name: tas # Mandatory, str: variable name in /esarchive/
+    freq: monthly_mean # Mandatory, str: 'monthly_mean' or 'daily_mean'
+  Datasets:
+    System:
+      name: [ECMWF-SEAS5, DWD-GCFS2.1] # Mandatory, str: System name.
+    Multimodel: both # Mandatory, bool: Either yes/true or no/false or both
+    Reference:
+      name: ERA5 # Mandatory, str: Reference name. 
+  Time:
+    sdate: '1101' # Mandatory, int: Start date, 'mmdd'
+    fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY'
+    hcst_start: '2014' # Mandatory, int: Hindcast initialization start year 'YYYY'
+    hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY'
+    ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”.
+    ftime_max: 2 # Mandatory, int: Last forecast time step in months. Starts at “1”.
+  Region:
+    latmin: 0 # Mandatory, int: minimum latitude
+    latmax: 3 # Mandatory, int: maximum latitude
+    lonmin: 0 # Mandatory, int: minimum longitude
+    lonmax: 2 # Mandatory, int: maximum longitude
+  Regrid:
+    method: bilinear # Mandatory, str: Interpolation method.
+    type: 'r180x90' # Mandatory, str: 'to_system', 'to_reference', 'none',
+                    # or CDO-accepted grid.
+  Workflow:
+    Calibration:
+      method: mse_min # Mandatory, str: Calibration method.
+    Anomalies:
+      compute: no # Mandatory, bool: Either yes/true or no/false
+      cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false
+    Skill:
+      metric: RPSS CRPSS # Mandatory, str: List of skill metrics.
+    Probabilities:
+      percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds 
+      # for quantiles and probability categories. # Each set of thresholds should be
+      # enclosed within brackets.
+    Indicators:
+      index: no # This feature is not implemented yet
+  ncores: 10 # Optional, int: number of cores to be used in parallel computation.
+             # If left empty, defaults to 1.
+  remove_NAs: TRUE # Optional, bool: Whether to remove NAs. 
+                   # If left empty, defaults to FALSE.
+  Output_format: S2S4E # This feature is not implemented yet
+Run:
+  Loglevel: INFO # Optional, str: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. Default 'INFO'.
+  Terminal: TRUE # Optional, bool: Whether to display log messages in the terminal. Default 'TRUE'.
+  output_dir: /esarchive/scratch/cdelgado/auto-s2s_outputs # Mandatory, str: output directory
+  code_dir: /esarchive/scratch/cdelgado/gitlab/auto-s2s/
-- 
GitLab


From 266f0b2b2db0d15ce97abc482beb61828bea3ee8 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 8 Sep 2023 17:09:14 +0200
Subject: [PATCH 02/10] Code adjustment; Add NOTE

---
 modules/Loading/Loading.R                     | 16 +++--
 modules/Loading/join_datasets.R               | 39 ++++++------
 .../tests/recipe_multimodel_seasonal_aho.yml  | 59 +++++++++++++++++++
 3 files changed, 87 insertions(+), 27 deletions(-)
 create mode 100644 recipes/tests/recipe_multimodel_seasonal_aho.yml

diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index b864b6ff..414f893e 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -10,7 +10,7 @@ source("tools/libs.R")
 
 load_datasets <- function(recipe) {
   
-  if (length(recipe$Analysis$Datasets$System$name) == 1){ ## Only one model
+  if (length(recipe$Analysis$Datasets$System$name) == 1) { ## Only one model
     
     output <- load_single_datasets(recipe = recipe)
     
@@ -18,16 +18,20 @@ load_datasets <- function(recipe) {
     
     multimodel <- recipe$Analysis$Datasets$Multimodel ## "TRUE", "FALSE" or "both"
     
-    ## Loading each model individually
+    # Loading each model individually
+
+    ##NOTE:
+    ## if retrieve = T --> data is a list of models, each model contains a list of s2dv_cube. Try str(data, max.level=3, give.attr=F).
+    ## if retrieve = F --> must be a list be startR_cube  --> unlist(data, recursive = F). 'dat', 'member' cannot be chunking dims.
     data <- list()
     n_members <- list(hcst = c(), fcst = c())
-    for (model in 1:length(recipe$Analysis$Datasets$System$name)){
+    for (model in 1:length(recipe$Analysis$Datasets$System$name)) {
       recipe_single <- recipe
       recipe_single$Analysis$Datasets$System$name <- recipe_single$Analysis$Datasets$System$name[model]
       data[[model]] <- load_single_datasets(recipe = recipe_single)
-      n_members$hcst[model] <- dim(data[[model]]$hcst$data)['ensemble']
-      if (!is.null(data[[model]]$fcst)){
-        n_members$fcst[model] <- dim(data[[model]]$fcst$data)['ensemble']
+      n_members$hcst[model] <- data[[model]]$hcst$dims['ensemble']
+      if (!is.null(data[[model]]$fcst)) {
+        n_members$fcst[model] <- data[[model]]$fcst$dims['ensemble']
       }
       
     }
diff --git a/modules/Loading/join_datasets.R b/modules/Loading/join_datasets.R
index 78922d15..623cf0f7 100644
--- a/modules/Loading/join_datasets.R
+++ b/modules/Loading/join_datasets.R
@@ -1,42 +1,40 @@
-
 join_datasets <- function(data, multimodel, n_members) {
   
   ## Taking the first model as "template"
+  ##TODO: the s2dv_cube has only the first model's info
   output <- data[[1]]
   
-  if (is.null(output$fcst$data)){
+  if (is.null(output$fcst$data)) {
     exps <- 'hcst'
   } else {exps <- c('hcst','fcst')}
   
-  for (exp in exps){
+  for (exp in exps) {
     
     ## New dimensions of the arrays
-    if (isFALSE(multimodel)){ ## Individual models
+    if (isFALSE(multimodel)) { ## Individual models
       n_models <- length(data)
       max_members <- max(n_members[[exp]])
-    } else if (isTRUE(multimodel)){ ## Multimodel
+    } else if (isTRUE(multimodel)) { ## Multimodel
       n_models <- length(data)
       max_members <- sum(n_members[[exp]])
-    } else if (identical(multimodel,'both')){ ## Individual models and multimodel
+    } else if (identical(multimodel,'both')) { ## Individual models and multimodel
       n_models <- length(data)+1
       max_members <- sum(n_members[[exp]])
-    } else {stop('Incorrect "Multimodel" in the recipe. It must be "yes", "no" or "both"')}
+    } else {stop('Incorrect "Multimodel" in the recipe. It must be "yes", "no" or "both".')}
     dims <- dim(data[[1]][[exp]]$data)
     dims['dat'] <- n_models
     dims['ensemble'] <- max_members
-    stopifnot(identical(names(dims), c('dat','var','sday','sweek','syear','time','latitude','longitude','ensemble')))
+    stopifnot(identical(names(dims), c('dat', 'var', 'sday', 'sweek', 'syear', 'time', 'latitude', 'longitude', 'ensemble')))
     
     ## Creating empty models and multimodel
     output[[exp]]$data <- array(data = NA, dim = dims)
     ## Filling the models
-    for (model in 1:length(data)){
-      output[[exp]]$data[model,,,,,,,,1:n_members[[exp]][model]] <- data[[model]][[exp]]$data
+    for (model in 1:length(data)) {
+      output[[exp]]$data[model, , , , , , , , 1:n_members[[exp]][model]] <- data[[model]][[exp]]$data
     }
     
     ## Filling the multimodel
-    if (isFALSE(multimodel)){ ## Individual models
-      # output[[exp]]$data <- output[[exp]]$data
-    } else if (isTRUE(multimodel)){ ## Multimodel
+    if (isTRUE(multimodel)) { ## Multimodel
       output[[exp]]$data <- CSTools::MergeDims(data = output[[exp]]$data, 
                                                merge_dims = c('ensemble','dat'),
                                                rename_dim = 'ensemble', 
@@ -45,14 +43,13 @@ join_datasets <- function(data, multimodel, n_members) {
                                             posdim = 1,
                                             lendim = 1,
                                             name = 'dat')
-    } else if (identical(multimodel,'both')){ ## Individual models and multimodel
-      multimodel_data <- CSTools::MergeDims(data = output[[exp]]$data, 
-                                            merge_dims = c('ensemble','dat'),
-                                            rename_dim = 'ensemble', 
-                                            na.rm = TRUE)
-      output[[exp]]$data[n_models,,,,,,,,1:max_members] <- multimodel_data
-    } else {stop('Incorrect "Multimodel" in the recipe. It must be "yes", "no" or "both"')}
-    
+    } else if (identical(multimodel,'both')) { ## Individual models and multimodel
+      output[[exp]]$data[n_models, , , , , , , , 1:max_members] <- 
+        CSTools::MergeDims(data = output[[exp]]$data, 
+                           merge_dims = c('ensemble','dat'),
+                           rename_dim = 'ensemble', 
+                           na.rm = TRUE)
+    }
   }
   
   return(output)
diff --git a/recipes/tests/recipe_multimodel_seasonal_aho.yml b/recipes/tests/recipe_multimodel_seasonal_aho.yml
new file mode 100644
index 00000000..2696b2b6
--- /dev/null
+++ b/recipes/tests/recipe_multimodel_seasonal_aho.yml
@@ -0,0 +1,59 @@
+Description:
+  Author: An-Chi Ho
+  Info: Test for one seasonal forecast system (only hindcast)
+
+Analysis:
+  Horizon: seasonal # Mandatory, str: 'subseasonal', 'seasonal', or 'decadal'
+  Variables:
+    name: tas # Mandatory, str: variable name in /esarchive/
+    freq: monthly_mean # Mandatory, str: 'monthly_mean' or 'daily_mean'
+  Datasets:
+    System:
+      name: [ECMWF-SEAS5, DWD-GCFS2.1] # Mandatory, str: System name.
+    Multimodel: both # Mandatory, bool: Either yes/true or no/false or both
+    Reference:
+      name: ERA5 # Mandatory, str: Reference name. 
+  Time:
+    sdate: '1101' # Mandatory, int: Start date, 'mmdd'
+    fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY'
+    hcst_start: '2014' # Mandatory, int: Hindcast initialization start year 'YYYY'
+    hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY'
+    ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”.
+    ftime_max: 2 # Mandatory, int: Last forecast time step in months. Starts at “1”.
+  Region:
+    latmin: 0 # Mandatory, int: minimum latitude
+    latmax: 3 # Mandatory, int: maximum latitude
+    lonmin: 0 # Mandatory, int: minimum longitude
+    lonmax: 2 # Mandatory, int: maximum longitude
+  Regrid:
+    method: bilinear # Mandatory, str: Interpolation method.
+    type: 'r180x90' # Mandatory, str: 'to_system', 'to_reference', 'none',
+                    # or CDO-accepted grid.
+  Workflow:
+    Calibration:
+      method: mse_min # Mandatory, str: Calibration method.
+      save: none
+    Anomalies:
+      compute: no # Mandatory, bool: Either yes/true or no/false
+      cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false
+    Skill:
+      metric: RPSS CRPSS # Mandatory, str: List of skill metrics.
+    Probabilities:
+      percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds 
+      # for quantiles and probability categories. # Each set of thresholds should be
+      # enclosed within brackets.
+      save: none
+    Visualization:
+      plots: forecast_ensemble_mean
+    Indicators:
+      index: no # This feature is not implemented yet
+  ncores: 10 # Optional, int: number of cores to be used in parallel computation.
+             # If left empty, defaults to 1.
+  remove_NAs: TRUE # Optional, bool: Whether to remove NAs. 
+                   # If left empty, defaults to FALSE.
+  Output_format: S2S4E # This feature is not implemented yet
+Run:
+  Loglevel: INFO # Optional, str: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. Default 'INFO'.
+  Terminal: TRUE # Optional, bool: Whether to display log messages in the terminal. Default 'TRUE'.
+  output_dir: /esarchive/scratch/aho/tmp/auto-s2s_outputs # Mandatory, str: output directory
+  code_dir: /esarchive/scratch/aho/git/auto-s2s/
-- 
GitLab


From 6b3630371384c00339d9964cad03e8526ed5428d Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Tue, 12 Sep 2023 14:39:48 +0200
Subject: [PATCH 03/10] add dat_dim

---
 modules/Skill/Skill.R                            | 15 +++++++++++++++
 recipes/tests/recipe_multimodel_seasonal_aho.yml |  4 +++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index a6ae6d48..9cf3a1d0 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -84,6 +84,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') {
 #        correct.")
 #   }
   time_dim <- 'syear'
+  dat_dim <- 'dat'
   memb_dim <- 'ensemble'
   metrics <- tolower(recipe$Analysis$Workflow$Skill$metric)
   if (is.null(recipe$Analysis$ncores)) {
@@ -122,6 +123,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') {
     if (metric %in% c('rps', 'frps')) {
       skill <- RPS(data$hcst$data, data$obs$data,
                    time_dim = time_dim,
+                   dat_dim = dat_dim,
                    memb_dim = memb_dim,
                    Fair = Fair,
                    cross.val = cross.val,
@@ -137,6 +139,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') {
     } else if (metric %in% c('rpss', 'frpss')) {
       skill <- RPSS(data$hcst$data, data$obs$data,
                     time_dim = time_dim,
+                    dat_dim = dat_dim,
                     memb_dim = memb_dim,
                     Fair = Fair,
                     cross.val = cross.val,
@@ -149,6 +152,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') {
     } else if (metric == 'bss10') {
       skill <- RPSS(data$hcst$data, data$obs$data,
                     time_dim = time_dim, 
+                    dat_dim = dat_dim,
                     memb_dim = memb_dim,
                     prob_thresholds = 0.1,
                     cross.val = cross.val,
@@ -162,6 +166,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') {
     } else if (metric == 'bss90') {
       skill <- RPSS(data$hcst$data, data$obs$data,
                     time_dim = time_dim,
+                    dat_dim = dat_dim,
                     memb_dim = memb_dim,
                     prob_thresholds = 0.9,
                     cross.val = cross.val,
@@ -175,6 +180,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') {
     } else if (metric %in% c('crps', 'fcrps')) {
       skill <- CRPS(data$hcst$data, data$obs$data,
                     time_dim = time_dim,
+                    dat_dim = dat_dim,
                     memb_dim = memb_dim,
                     Fair = Fair,
                     ncores = ncores)
@@ -188,6 +194,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') {
     } else if (metric %in% c('crpss', 'fcrpss')) {
       skill <- CRPSS(data$hcst$data, data$obs$data,
                      time_dim = time_dim,
+                     dat_dim = dat_dim,
                      memb_dim = memb_dim,
                      Fair = Fair,
                      ncores = ncores)
@@ -215,11 +222,13 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') {
           (recipe$Analysis$Workflow$Anomalies$compute)) {
         skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data,
                       time_dim = time_dim,
+                      dat_dim = dat_dim,
                       memb_dim = memb_dim,
                       ncores = ncores)
       } else {
         skill <- Bias(data$hcst$data, data$obs$data,
                       time_dim = time_dim,
+                      dat_dim = dat_dim,
                       memb_dim = memb_dim,
                       ncores = ncores)
       }
@@ -231,11 +240,13 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') {
           (recipe$Analysis$Workflow$Anomalies$compute)) {
         skill <- AbsBiasSS(data$hcst.full_val$data, data$obs.full_val$data,
                            time_dim = time_dim,
+                           dat_dim = dat_dim,
                            memb_dim = memb_dim,
                            ncores = ncores)
       } else {
         skill <- AbsBiasSS(data$hcst$data, data$obs$data,
                            time_dim = time_dim,
+                           dat_dim = dat_dim,
                            memb_dim = memb_dim,
                            ncores = ncores)
       }
@@ -251,6 +262,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') {
                           dat_dim = 'dat', 
                           time_dim = time_dim, 
                           method = 'pearson', 
+                          dat_dim = dat_dim,
                           memb_dim = memb_dim,
                           memb = memb,
                           conf = F,
@@ -267,6 +279,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') {
       skill <- RMSSS(data$hcst$data, data$obs$data,
                      dat_dim = 'dat',
                      time_dim = time_dim,
+                     dat_dim = dat_dim,
                      memb_dim = memb_dim,
                      pval = FALSE,
                      sign = TRUE,
@@ -280,6 +293,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') {
     } else if (metric == 'mse') {
       skill <- MSE(data$hcst$data, data$obs$data,
                    time_dim = time_dim,
+                   dat_dim = dat_dim,
                    memb_dim = memb_dim,
                    dat_dim = 'dat',
                    comp_dim = NULL,
@@ -292,6 +306,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') {
     } else if (metric == 'msss') {
       skill <- MSSS(data$hcst$data, data$obs$data,
                     time_dim = time_dim,
+                    dat_dim = dat_dim,
                     memb_dim = memb_dim,
                     dat_dim = 'dat',
                     sign = TRUE,
diff --git a/recipes/tests/recipe_multimodel_seasonal_aho.yml b/recipes/tests/recipe_multimodel_seasonal_aho.yml
index 2696b2b6..d87394dd 100644
--- a/recipes/tests/recipe_multimodel_seasonal_aho.yml
+++ b/recipes/tests/recipe_multimodel_seasonal_aho.yml
@@ -31,13 +31,15 @@ Analysis:
                     # or CDO-accepted grid.
   Workflow:
     Calibration:
-      method: mse_min # Mandatory, str: Calibration method.
+      method: none #mse_min # Mandatory, str: Calibration method.
       save: none
     Anomalies:
       compute: no # Mandatory, bool: Either yes/true or no/false
       cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false
+      save: none
     Skill:
       metric: RPSS CRPSS # Mandatory, str: List of skill metrics.
+      save: none
     Probabilities:
       percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds 
       # for quantiles and probability categories. # Each set of thresholds should be
-- 
GitLab


From 30ce1e2681e26ba80a0188ecd29225f738710ec4 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 13 Sep 2023 11:17:13 +0200
Subject: [PATCH 04/10] Some fixes and TODO for multimodel

---
 modules/Anomalies/Anomalies.R                 |  1 +
 modules/Loading/Loading.R                     | 10 ++-
 modules/Loading/join_datasets.R               | 10 +--
 modules/Visualization/R/plot_ensemble_mean.R  |  1 +
 modules/Visualization/R/plot_skill_metrics.R  |  1 +
 .../tests/recipe_multimodel_seasonal_aho.yml  | 16 ++---
 .../recipe_multimodel_seasonal_aho_single.yml | 61 +++++++++++++++++++
 7 files changed, 87 insertions(+), 13 deletions(-)
 create mode 100644 recipes/tests/recipe_multimodel_seasonal_aho_single.yml

diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R
index 23014cbd..4d9cfdc1 100644
--- a/modules/Anomalies/Anomalies.R
+++ b/modules/Anomalies/Anomalies.R
@@ -21,6 +21,7 @@ compute_anomalies <- function(recipe, data) {
     original_dims <- data$hcst$dim
 
     # Compute anomalies
+    ##TODO: all NAs returned
     anom <- CST_Anomaly(data$hcst, data$obs,
                         cross = cross,
                         memb = TRUE,
diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index 6c8a6192..38e84c71 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -38,7 +38,15 @@ load_datasets <- function(recipe) {
     
     ## Joining models and/or creating multimodel
     output <- join_datasets(data = data, multimodel = multimodel, n_members = n_members)
-    
+
+    ##TODO: Adjust s2dv_cube
+    ### $dims
+    output$hcst$dims <- dim(output$hcst$data)
+    output$obs$dims <- dim(output$obs$data)
+    if (!is.null(output$fcst)) {
+      output$fcst$dims <- dim(output$fcst$data)
+    }
+
   }
   
   return(output) # list(hcst = hcst, fcst = fcst, obs = obs)
diff --git a/modules/Loading/join_datasets.R b/modules/Loading/join_datasets.R
index 623cf0f7..1bacb619 100644
--- a/modules/Loading/join_datasets.R
+++ b/modules/Loading/join_datasets.R
@@ -18,9 +18,11 @@ join_datasets <- function(data, multimodel, n_members) {
       n_models <- length(data)
       max_members <- sum(n_members[[exp]])
     } else if (identical(multimodel,'both')) { ## Individual models and multimodel
-      n_models <- length(data)+1
+      n_models <- length(data) + 1
       max_members <- sum(n_members[[exp]])
-    } else {stop('Incorrect "Multimodel" in the recipe. It must be "yes", "no" or "both".')}
+    } else {
+      stop('Incorrect "Multimodel" in the recipe. It must be "yes", "no" or "both".')
+    }
     dims <- dim(data[[1]][[exp]]$data)
     dims['dat'] <- n_models
     dims['ensemble'] <- max_members
@@ -36,7 +38,7 @@ join_datasets <- function(data, multimodel, n_members) {
     ## Filling the multimodel
     if (isTRUE(multimodel)) { ## Multimodel
       output[[exp]]$data <- CSTools::MergeDims(data = output[[exp]]$data, 
-                                               merge_dims = c('ensemble','dat'),
+                                               merge_dims = c('ensemble', 'dat'),
                                                rename_dim = 'ensemble', 
                                                na.rm = TRUE)
       output[[exp]]$data <- s2dv::InsertDim(data = output[[exp]]$data,
@@ -46,7 +48,7 @@ join_datasets <- function(data, multimodel, n_members) {
     } else if (identical(multimodel,'both')) { ## Individual models and multimodel
       output[[exp]]$data[n_models, , , , , , , , 1:max_members] <- 
         CSTools::MergeDims(data = output[[exp]]$data, 
-                           merge_dims = c('ensemble','dat'),
+                           merge_dims = c('ensemble', 'dat'),
                            rename_dim = 'ensemble', 
                            na.rm = TRUE)
     }
diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R
index 07e35706..37f97ffd 100644
--- a/modules/Visualization/R/plot_ensemble_mean.R
+++ b/modules/Visualization/R/plot_ensemble_mean.R
@@ -60,6 +60,7 @@ plot_ensemble_mean <- function(recipe, fcst, outdir) {
     for (i_syear in start_date) {
       if (length(start_date) == 1) {
         i_var_ens_mean <- var_ens_mean[which(start_date == i_syear), , , ]
+        ## TODO: Consider multimodel here
         outfile <- paste0(outdir[[var]], "forecast_ensemble_mean-", start_date)
       } else {
         i_var_ens_mean <- var_ens_mean[which(start_date == i_syear), , , ]
diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R
index 76dae774..750e1087 100644
--- a/modules/Visualization/R/plot_skill_metrics.R
+++ b/modules/Visualization/R/plot_skill_metrics.R
@@ -112,6 +112,7 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics,
           col_sup <- colorbar[length(colorbar)]
         }
         # Reorder dimensions
+browser(skipCalls=1)
         skill <- Reorder(skill, c("time", "longitude", "latitude"))
         # If the significance has been requested and the variable has it,
         # retrieve it and reorder its dimensions. 
diff --git a/recipes/tests/recipe_multimodel_seasonal_aho.yml b/recipes/tests/recipe_multimodel_seasonal_aho.yml
index d87394dd..6c2d2965 100644
--- a/recipes/tests/recipe_multimodel_seasonal_aho.yml
+++ b/recipes/tests/recipe_multimodel_seasonal_aho.yml
@@ -16,10 +16,10 @@ Analysis:
   Time:
     sdate: '1101' # Mandatory, int: Start date, 'mmdd'
     fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY'
-    hcst_start: '2014' # Mandatory, int: Hindcast initialization start year 'YYYY'
+    hcst_start: '2013' # Mandatory, int: Hindcast initialization start year 'YYYY'
     hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY'
     ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”.
-    ftime_max: 2 # Mandatory, int: Last forecast time step in months. Starts at “1”.
+    ftime_max: 5 # Mandatory, int: Last forecast time step in months. Starts at “1”.
   Region:
     latmin: 0 # Mandatory, int: minimum latitude
     latmax: 3 # Mandatory, int: maximum latitude
@@ -31,22 +31,22 @@ Analysis:
                     # or CDO-accepted grid.
   Workflow:
     Calibration:
-      method: none #mse_min # Mandatory, str: Calibration method.
-      save: none
+      method: mse_min # Mandatory, str: Calibration method.
+      save: none #all
     Anomalies:
-      compute: no # Mandatory, bool: Either yes/true or no/false
+      compute: yes # Mandatory, bool: Either yes/true or no/false
       cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false
       save: none
     Skill:
       metric: RPSS CRPSS # Mandatory, str: List of skill metrics.
-      save: none
+      save: none #all
     Probabilities:
       percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds 
       # for quantiles and probability categories. # Each set of thresholds should be
       # enclosed within brackets.
-      save: none
+      save: none #all
     Visualization:
-      plots: forecast_ensemble_mean
+      plots: forecast_ensemble_mean most_likely_terciles #skill_metrics
     Indicators:
       index: no # This feature is not implemented yet
   ncores: 10 # Optional, int: number of cores to be used in parallel computation.
diff --git a/recipes/tests/recipe_multimodel_seasonal_aho_single.yml b/recipes/tests/recipe_multimodel_seasonal_aho_single.yml
new file mode 100644
index 00000000..715da1e0
--- /dev/null
+++ b/recipes/tests/recipe_multimodel_seasonal_aho_single.yml
@@ -0,0 +1,61 @@
+Description:
+  Author: An-Chi Ho
+  Info: Test for one seasonal forecast system (only hindcast)
+
+Analysis:
+  Horizon: seasonal # Mandatory, str: 'subseasonal', 'seasonal', or 'decadal'
+  Variables:
+    name: tas # Mandatory, str: variable name in /esarchive/
+    freq: monthly_mean # Mandatory, str: 'monthly_mean' or 'daily_mean'
+  Datasets:
+    System:
+      name: ECMWF-SEAS5 # Mandatory, str: System name.
+    Multimodel: no # Mandatory, bool: Either yes/true or no/false or both
+    Reference:
+      name: ERA5 # Mandatory, str: Reference name. 
+  Time:
+    sdate: '1101' # Mandatory, int: Start date, 'mmdd'
+    fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY'
+    hcst_start: '2013' # Mandatory, int: Hindcast initialization start year 'YYYY'
+    hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY'
+    ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”.
+    ftime_max: 5 # Mandatory, int: Last forecast time step in months. Starts at “1”.
+  Region:
+    latmin: 0 # Mandatory, int: minimum latitude
+    latmax: 3 # Mandatory, int: maximum latitude
+    lonmin: 0 # Mandatory, int: minimum longitude
+    lonmax: 2 # Mandatory, int: maximum longitude
+  Regrid:
+    method: bilinear # Mandatory, str: Interpolation method.
+    type: 'r180x90' # Mandatory, str: 'to_system', 'to_reference', 'none',
+                    # or CDO-accepted grid.
+  Workflow:
+    Calibration:
+      method: mse_min # Mandatory, str: Calibration method.
+      save: none
+    Anomalies:
+      compute: yes # Mandatory, bool: Either yes/true or no/false
+      cross_validation: no # Mandatory if 'compute: yes', bool: Either yes/true or no/false
+      save: none
+    Skill:
+      metric: RPSS CRPSS # Mandatory, str: List of skill metrics.
+      save: none
+    Probabilities:
+      percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Optional: Thresholds 
+      # for quantiles and probability categories. # Each set of thresholds should be
+      # enclosed within brackets.
+      save: none
+    Visualization:
+      plots: forecast_ensemble_mean
+    Indicators:
+      index: no # This feature is not implemented yet
+  ncores: 10 # Optional, int: number of cores to be used in parallel computation.
+             # If left empty, defaults to 1.
+  remove_NAs: TRUE # Optional, bool: Whether to remove NAs. 
+                   # If left empty, defaults to FALSE.
+  Output_format: S2S4E # This feature is not implemented yet
+Run:
+  Loglevel: INFO # Optional, str: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. Default 'INFO'.
+  Terminal: TRUE # Optional, bool: Whether to display log messages in the terminal. Default 'TRUE'.
+  output_dir: /esarchive/scratch/aho/tmp/auto-s2s_outputs # Mandatory, str: output directory
+  code_dir: /esarchive/scratch/aho/git/auto-s2s/
-- 
GitLab


From 45532d5d547ec84b5b5a01c8f9cb32917b0115e9 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 13 Sep 2023 11:21:41 +0200
Subject: [PATCH 05/10] Move helper functions to R/

---
 modules/Loading/Loading.R                        | 2 +-
 modules/Loading/Loading_decadal.R                | 4 ++--
 modules/Loading/{ => R}/helper_loading_decadal.R | 0
 modules/Loading/{ => R}/join_datasets.R          | 0
 modules/Visualization/R/plot_skill_metrics.R     | 1 -
 5 files changed, 3 insertions(+), 4 deletions(-)
 rename modules/Loading/{ => R}/helper_loading_decadal.R (100%)
 rename modules/Loading/{ => R}/join_datasets.R (100%)

diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index 38e84c71..9c259722 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -1,7 +1,7 @@
 ## TODO: remove paths to personal scratchs 
 source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R")
 # Load required libraries/funs
-source('modules/Loading/join_datasets.R')
+source('modules/Loading/R/join_datasets.R')
 source("modules/Loading/R/dates2load.R")
 source("modules/Loading/R/get_timeidx.R")
 source("modules/Loading/R/check_latlon.R")
diff --git a/modules/Loading/Loading_decadal.R b/modules/Loading/Loading_decadal.R
index c688545a..8a07cb8f 100644
--- a/modules/Loading/Loading_decadal.R
+++ b/modules/Loading/Loading_decadal.R
@@ -7,8 +7,8 @@
 ## TODO: remove paths to personal scratchs 
 source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R")
 # Load required libraries/funs
-source("modules/Loading/helper_loading_decadal.R")
-source('modules/Loading/join_datasets.R')
+source("modules/Loading/R/helper_loading_decadal.R")
+source('modules/Loading/R/join_datasets.R')
 source("modules/Loading/R/dates2load.R")
 source("modules/Loading/R/check_latlon.R")
 source("modules/Loading/R/get_timeidx.R")
diff --git a/modules/Loading/helper_loading_decadal.R b/modules/Loading/R/helper_loading_decadal.R
similarity index 100%
rename from modules/Loading/helper_loading_decadal.R
rename to modules/Loading/R/helper_loading_decadal.R
diff --git a/modules/Loading/join_datasets.R b/modules/Loading/R/join_datasets.R
similarity index 100%
rename from modules/Loading/join_datasets.R
rename to modules/Loading/R/join_datasets.R
diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R
index 750e1087..76dae774 100644
--- a/modules/Visualization/R/plot_skill_metrics.R
+++ b/modules/Visualization/R/plot_skill_metrics.R
@@ -112,7 +112,6 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics,
           col_sup <- colorbar[length(colorbar)]
         }
         # Reorder dimensions
-browser(skipCalls=1)
         skill <- Reorder(skill, c("time", "longitude", "latitude"))
         # If the significance has been requested and the variable has it,
         # retrieve it and reorder its dimensions. 
-- 
GitLab


From ebbd1499f489c75de8415f0bfff3f28e269811fe Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Thu, 14 Sep 2023 11:39:40 +0200
Subject: [PATCH 06/10] Consider Compute() case; forecast ensemble mean na.rm =
 T for multi-model

---
 modules/Loading/Loading.R                     | 57 ++++++++++++-------
 modules/Loading/R/join_datasets.R             |  4 ++
 modules/Visualization/R/plot_ensemble_mean.R  |  7 ++-
 .../recipe_multimodel_seasonal_aho_single.yml |  2 +-
 4 files changed, 47 insertions(+), 23 deletions(-)

diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index 9c259722..04a30f2d 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -8,7 +8,7 @@ source("modules/Loading/R/check_latlon.R")
 ## TODO: Move to prepare_outputs.R
 source("tools/libs.R")
 
-load_datasets <- function(recipe) {
+load_datasets <- function(recipe, retrieve = T) {
   
   if (length(recipe$Analysis$Datasets$System$name) == 1) { ## Only one model
     
@@ -22,29 +22,44 @@ load_datasets <- function(recipe) {
 
     ##NOTE:
     ## if retrieve = T --> data is a list of models, each model contains a list of s2dv_cube. Try str(data, max.level=3, give.attr=F).
-    ## if retrieve = F --> must be a list be startR_cube  --> unlist(data, recursive = F). 'dat', 'member' cannot be chunking dims.
+    ## if retrieve = F --> must be a list be startR_cube  --> unlist(data, recursive = F). 'dat', 'ensemble' need to be target_dims.
     data <- list()
     n_members <- list(hcst = c(), fcst = c())
     for (model in 1:length(recipe$Analysis$Datasets$System$name)) {
       recipe_single <- recipe
       recipe_single$Analysis$Datasets$System$name <- recipe_single$Analysis$Datasets$System$name[model]
-      data[[model]] <- load_single_datasets(recipe = recipe_single)
-      n_members$hcst[model] <- data[[model]]$hcst$dims['ensemble']
-      if (!is.null(data[[model]]$fcst)) {
-        n_members$fcst[model] <- data[[model]]$fcst$dims['ensemble']
+      data[[model]] <- load_single_datasets(recipe = recipe_single, retrieve = retrieve)
+
+      if (retrieve) {
+        n_members$hcst[model] <- data[[model]]$hcst$dims['ensemble']
+        if (!is.null(data[[model]]$fcst)) {
+          n_members$fcst[model] <- data[[model]]$fcst$dims['ensemble']
+        }
+      } else {
+        n_members$hcst[model] <- attr(data[[model]]$hcst, 'Dimensions')['ensemble']
+        if (!is.null(data[[model]]$fcst)) {
+          n_members$fcst[model] <- attr(data[[model]]$fcst, 'Dimensions')['ensemble']
+        }
       }
-      
     }
-    
-    ## Joining models and/or creating multimodel
-    output <- join_datasets(data = data, multimodel = multimodel, n_members = n_members)
+   
+    if (retrieve) {
+      ## Joining models and/or creating multimodel
+      output <- join_datasets(data = data, multimodel = multimodel, n_members = n_members)
+
+      ##TODO: Adjust s2dv_cube
+      ### $dims
+      output$hcst$dims <- dim(output$hcst$data)
+      output$obs$dims <- dim(output$obs$data)
+      if (!is.null(output$fcst)) {
+        output$fcst$dims <- dim(output$fcst$data)
+      }
 
-    ##TODO: Adjust s2dv_cube
-    ### $dims
-    output$hcst$dims <- dim(output$hcst$data)
-    output$obs$dims <- dim(output$obs$data)
-    if (!is.null(output$fcst)) {
-      output$fcst$dims <- dim(output$fcst$data)
+    } else {
+      # Unlist data to be a list of s2dv_cube objects
+      names(data) <- recipe$Analysis$Datasets$System$name
+      output <- unlist(data, recursive = F)
+      output$n_members <- n_members
     }
 
   }
@@ -52,7 +67,7 @@ load_datasets <- function(recipe) {
   return(output) # list(hcst = hcst, fcst = fcst, obs = obs)
 }
 
-load_single_datasets <- function(recipe) {
+load_single_datasets <- function(recipe, retrieve = T) {
 
   # -------------------------------------------
   # Set params -----------------------------------------
@@ -169,7 +184,7 @@ load_single_datasets <- function(recipe) {
                                    longitude = 'dat',
                                    time = 'file_date'),
                 split_multiselected_dims = split_multiselected_dims,
-                retrieve = TRUE)
+                retrieve = retrieve)
   
   # Remove var_dir dimension
   if ("var_dir" %in% names(dim(hcst))) {
@@ -233,7 +248,7 @@ load_single_datasets <- function(recipe) {
                                      longitude = 'dat',
                                      time = 'file_date'),
                   split_multiselected_dims = split_multiselected_dims,
-                  retrieve = TRUE)
+                  retrieve = retrieve)
     
     if ("var_dir" %in% names(dim(fcst))) {
       fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected")
@@ -303,7 +318,7 @@ load_single_datasets <- function(recipe) {
                                     longitude = 'dat',
                                     time = 'file_date'),
                  split_multiselected_dims = TRUE,
-                 retrieve = TRUE)
+                 retrieve = retrieve)
     
   } else if (store.freq %in% c("daily_mean", "daily")) {
     
@@ -341,7 +356,7 @@ load_single_datasets <- function(recipe) {
                                     longitude = 'dat',
                                     time = 'file_date'),
                  split_multiselected_dims = TRUE,
-                 retrieve = TRUE)
+                 retrieve = retrieve)
   }
   
   
diff --git a/modules/Loading/R/join_datasets.R b/modules/Loading/R/join_datasets.R
index 1bacb619..3690179e 100644
--- a/modules/Loading/R/join_datasets.R
+++ b/modules/Loading/R/join_datasets.R
@@ -26,11 +26,14 @@ join_datasets <- function(data, multimodel, n_members) {
     dims <- dim(data[[1]][[exp]]$data)
     dims['dat'] <- n_models
     dims['ensemble'] <- max_members
+
+    #TODO: Consider Compute() case
     stopifnot(identical(names(dims), c('dat', 'var', 'sday', 'sweek', 'syear', 'time', 'latitude', 'longitude', 'ensemble')))
     
     ## Creating empty models and multimodel
     output[[exp]]$data <- array(data = NA, dim = dims)
     ## Filling the models
+    #TODO: Consider Compute() case
     for (model in 1:length(data)) {
       output[[exp]]$data[model, , , , , , , , 1:n_members[[exp]][model]] <- data[[model]][[exp]]$data
     }
@@ -46,6 +49,7 @@ join_datasets <- function(data, multimodel, n_members) {
                                             lendim = 1,
                                             name = 'dat')
     } else if (identical(multimodel,'both')) { ## Individual models and multimodel
+      #TODO: Consider Compute() case
       output[[exp]]$data[n_models, , , , , , , , 1:max_members] <- 
         CSTools::MergeDims(data = output[[exp]]$data, 
                            merge_dims = c('ensemble', 'dat'),
diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R
index 37f97ffd..81686c06 100644
--- a/modules/Visualization/R/plot_ensemble_mean.R
+++ b/modules/Visualization/R/plot_ensemble_mean.R
@@ -21,7 +21,12 @@ plot_ensemble_mean <- function(recipe, fcst, outdir) {
   }
 
   # Compute ensemble mean
-  ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble')
+  if (length(recipe$Analysis$Datasets$System$name) == 1) {
+    ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble')
+  } else {
+    # multi-model
+    ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble', na.rm = TRUE)
+  }
   # Loop over variable dimension
   for (var in 1:fcst$dims[['var']]) {
     variable <- fcst$attrs$Variable$varName[[var]]
diff --git a/recipes/tests/recipe_multimodel_seasonal_aho_single.yml b/recipes/tests/recipe_multimodel_seasonal_aho_single.yml
index 715da1e0..5a8d2d3f 100644
--- a/recipes/tests/recipe_multimodel_seasonal_aho_single.yml
+++ b/recipes/tests/recipe_multimodel_seasonal_aho_single.yml
@@ -16,7 +16,7 @@ Analysis:
   Time:
     sdate: '1101' # Mandatory, int: Start date, 'mmdd'
     fcst_year: '2020' # Optional, int: Forecast initialization year 'YYYY'
-    hcst_start: '2013' # Mandatory, int: Hindcast initialization start year 'YYYY'
+    hcst_start: '2014' # Mandatory, int: Hindcast initialization start year 'YYYY'
     hcst_end: '2016' # Mandatory, int: Hindcast initialization end year 'YYYY'
     ftime_min: 1 # Mandatory, int: First forecast time step in months. Starts at “1”.
     ftime_max: 5 # Mandatory, int: Last forecast time step in months. Starts at “1”.
-- 
GitLab


From a8b7e20d8de47c2d73bfe35f07df743608e37b71 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 15 Sep 2023 09:49:48 +0200
Subject: [PATCH 07/10] TODO: Add dat_dim for loop

---
 modules/Visualization/R/plot_ensemble_mean.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R
index 81686c06..78bd3d05 100644
--- a/modules/Visualization/R/plot_ensemble_mean.R
+++ b/modules/Visualization/R/plot_ensemble_mean.R
@@ -20,6 +20,7 @@ plot_ensemble_mean <- function(recipe, fcst, outdir) {
     projection <- "cylindrical_equidistant"
   }
 
+#TODO: Add a for loop for dat
   # Compute ensemble mean
   if (length(recipe$Analysis$Datasets$System$name) == 1) {
     ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble')
-- 
GitLab


From 3622448bcbc54a96ebb968a074bf13b2a52ac518 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 15 Sep 2023 13:33:42 +0200
Subject: [PATCH 08/10] Correct hcst and hcst s2dv_cube

---
 modules/Loading/Loading.R | 53 +++++++++++++++++++++++++++++++--------
 1 file changed, 43 insertions(+), 10 deletions(-)

diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index 04a30f2d..db5a41f1 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -44,18 +44,51 @@ load_datasets <- function(recipe, retrieve = T) {
     }
    
     if (retrieve) {
-      ## Joining models and/or creating multimodel
+      # Joining models and/or creating multimodel
       output <- join_datasets(data = data, multimodel = multimodel, n_members = n_members)
 
-      ##TODO: Adjust s2dv_cube
-      ### $dims
+      # Adjust s2dv_cube
+      # hcst:
+      ## $dims
       output$hcst$dims <- dim(output$hcst$data)
-      output$obs$dims <- dim(output$obs$data)
+      ## $coords
+      if (isFALSE(multimodel)) {
+        output$hcst$coords$dat <- recipe$Analysis$Datasets$System$name
+      } else if (isTRUE(multimodel)) {
+        output$hcst$coords$dat <- 'multimodel'
+      } else if (multimodel == 'both') {
+        output$hcst$coords$dat <- c(recipe$Analysis$Datasets$System$name, 'multimodel')
+      }
+      output$hcst$coords$ensemble <- 1:dim(output$hcst$data)['ensemble']
+      ## $attrs
+      output$hcst$attrs$Datasets <- recipe$Analysis$Datasets$System$name
+      tmp <- array(c(sapply(data, "[[", c('hcst', 'attrs', 'source_files'))),
+                   dim = c(dim(output$hcst$attrs$source_files)[-1], 
+                           dat = length(recipe$Analysis$Datasets$System$name)))
+      output$hcst$attrs$source_files <- aperm(tmp, c(length(dim(tmp)), 1:(length(dim(tmp)) - 1)))
+
+      # fcst:
       if (!is.null(output$fcst)) {
+        ## $dims
         output$fcst$dims <- dim(output$fcst$data)
+        ## $coords
+        if (isFALSE(multimodel)) {
+          output$fcst$coords$dat <- recipe$Analysis$Datasets$System$name
+        } else if (isTRUE(multimodel)) {
+          output$fcst$coords$dat <- 'multimodel'
+        } else if (multimodel == 'both') {
+          output$fcst$coords$dat <- c(recipe$Analysis$Datasets$System$name, 'multimodel')
+        }
+        output$fcst$coords$ensemble <- 1:dim(output$fcst$data)['ensemble']
+        ## $attrs
+        output$fcst$attrs$Datasets <- recipe$Analysis$Datasets$System$name
+        tmp <- array(c(sapply(data, "[[", c('fcst', 'attrs', 'source_files'))),
+                     dim = c(dim(output$fcst$attrs$source_files)[-1],
+                             dat = length(recipe$Analysis$Datasets$System$name)))
+        output$fcst$attrs$source_files <- aperm(tmp, c(length(dim(tmp)), 1:(length(dim(tmp)) - 1)))
       }
-
-    } else {
+      
+    } else {  # retrieve = F
       # Unlist data to be a list of s2dv_cube objects
       names(data) <- recipe$Analysis$Datasets$System$name
       output <- unlist(data, recursive = F)
@@ -161,7 +194,7 @@ load_single_datasets <- function(recipe, retrieve = T) {
   
   # Load hindcast 
   #-------------------------------------------------------------------
-  hcst <- Start(dat = hcst.path,
+  hcst <- Start(dat = list(list(name = recipe$Analysis$Datasets$System$name, path = hcst.path)),
                 var = variable,
                 var_dir = var_dir_exp,
                 file_date = sdates$hcst,
@@ -225,7 +258,7 @@ load_single_datasets <- function(recipe, retrieve = T) {
     # with the daily case and the current version of startR not allowing
     # multiple dims split
     
-    fcst <- Start(dat = fcst.path,
+    fcst <- Start(dat = list(list(name = recipe$Analysis$Datasets$System$name, path = fcst.path)),
                   var = variable,
                   var_dir = var_dir_exp,
                   var_dir_depends = 'var',
@@ -298,7 +331,7 @@ load_single_datasets <- function(recipe, retrieve = T) {
     dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m")
     dim(dates_file) <- dim(dates)
     
-    obs <- Start(dat = obs.path,
+    obs <- Start(dat = list(list(name = recipe$Analysis$Datasets$Reference$name, path = obs.path)),
                  var = variable,
                  var_dir = var_dir_obs,
                  var_dir_depends = 'var',
@@ -331,7 +364,7 @@ load_single_datasets <- function(recipe, retrieve = T) {
     # Restore correct dimensions
     dim(dates) <- dim(dates_file)
     
-    obs <- Start(dat = obs.path,
+    obs <- Start(dat = list(list(name = recipe$Analysis$Datasets$Reference$name, path = obs.path)),
                  var = variable,
                  var_dir = var_dir_obs,
                  var_dir_depends = 'var',
-- 
GitLab


From ce19ae65ab41adea995f3465213a453b732c7336 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Tue, 19 Sep 2023 15:34:45 +0200
Subject: [PATCH 09/10] Make Loading.R and join_datasets() work with Compute
 cases

---
 modules/Loading/Loading.R                     |  2 +-
 modules/Loading/R/join_datasets.R             | 83 +++++++++++++------
 .../tests/recipe_multimodel_seasonal_aho.yml  |  2 +-
 3 files changed, 58 insertions(+), 29 deletions(-)

diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index db5a41f1..1394eb1e 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -21,7 +21,7 @@ load_datasets <- function(recipe, retrieve = T) {
     # Loading each model individually
 
     ##NOTE:
-    ## if retrieve = T --> data is a list of models, each model contains a list of s2dv_cube. Try str(data, max.level=3, give.attr=F).
+    ## if retrieve = T --> data is a list of models, each model contains a list of s2dv_cube. Try str(data, max.level = 3, give.attr = F).
     ## if retrieve = F --> must be a list be startR_cube  --> unlist(data, recursive = F). 'dat', 'ensemble' need to be target_dims.
     data <- list()
     n_members <- list(hcst = c(), fcst = c())
diff --git a/modules/Loading/R/join_datasets.R b/modules/Loading/R/join_datasets.R
index 3690179e..b7a5d17c 100644
--- a/modules/Loading/R/join_datasets.R
+++ b/modules/Loading/R/join_datasets.R
@@ -1,23 +1,22 @@
 join_datasets <- function(data, multimodel, n_members) {
   
   ## Taking the first model as "template"
-  ##TODO: the s2dv_cube has only the first model's info
   output <- data[[1]]
   
   if (is.null(output$fcst$data)) {
     exps <- 'hcst'
-  } else {exps <- c('hcst','fcst')}
+  } else {exps <- c('hcst', 'fcst')}
   
   for (exp in exps) {
     
-    ## New dimensions of the arrays
+    # New dimensions of the arrays
     if (isFALSE(multimodel)) { ## Individual models
       n_models <- length(data)
       max_members <- max(n_members[[exp]])
     } else if (isTRUE(multimodel)) { ## Multimodel
       n_models <- length(data)
       max_members <- sum(n_members[[exp]])
-    } else if (identical(multimodel,'both')) { ## Individual models and multimodel
+    } else if (identical(multimodel, 'both')) { ## Individual models and multimodel
       n_models <- length(data) + 1
       max_members <- sum(n_members[[exp]])
     } else {
@@ -27,35 +26,65 @@ join_datasets <- function(data, multimodel, n_members) {
     dims['dat'] <- n_models
     dims['ensemble'] <- max_members
 
-    #TODO: Consider Compute() case
-    stopifnot(identical(names(dims), c('dat', 'var', 'sday', 'sweek', 'syear', 'time', 'latitude', 'longitude', 'ensemble')))
-    
-    ## Creating empty models and multimodel
+    # Creating array for models and/or multimodel
     output[[exp]]$data <- array(data = NA, dim = dims)
+
     ## Filling the models
-    #TODO: Consider Compute() case
-    for (model in 1:length(data)) {
-      output[[exp]]$data[model, , , , , , , , 1:n_members[[exp]][model]] <- data[[model]][[exp]]$data
+    fill_data <- function(data, n_members, dims) {
+      # data: a list of hcst/fcst$data from each model 
+      # n_members: a vector of members of each model
+      # dims: c(dat = n_models, ensemble = max_members)
+
+      .fill_data <- function(data, n_members, dims) {
+        output <- rep(NA, length = dims['ensemble'])
+        output[1:n_members] <- data
+        return(output)
+      }
+
+      data_list <- vector('list', length = length(data))
+      for (i_model in 1:length(data)) {
+      data_list[[i_model]] <- multiApply::Apply(data = data[[i_model]], 
+                                  target_dims = 'ensemble', output_dims = 'ensemble',
+                                  fun = .fill_data, n_members = n_members[i_model],
+                                  dims = dims)$output1
+      }
+      if (dims['dat'] > length(data)) { # Multimodel = 'both'
+        data_list[[as.numeric(dims['dat'])]] <- array(as.numeric(NA), dim = dim(data_list[[1]]))
+      }
+      output <- array(unlist(data_list), dim = c(dims['ensemble'], dim(data_list[[1]])[-c(1, 2)], dims['dat']))
+      return(output)
     }
+
+    data_exp_data <- lapply(data, '[[', c(exp, 'data'))
+    output[[exp]]$data <- fill_data(data = (data_exp_data), n_members = n_members[[exp]], 
+                                    dims = dims[c('dat', 'ensemble')])
     
     ## Filling the multimodel
-    if (isTRUE(multimodel)) { ## Multimodel
-      output[[exp]]$data <- CSTools::MergeDims(data = output[[exp]]$data, 
-                                               merge_dims = c('ensemble', 'dat'),
-                                               rename_dim = 'ensemble', 
-                                               na.rm = TRUE)
-      output[[exp]]$data <- s2dv::InsertDim(data = output[[exp]]$data,
-                                            posdim = 1,
-                                            lendim = 1,
-                                            name = 'dat')
-    } else if (identical(multimodel,'both')) { ## Individual models and multimodel
-      #TODO: Consider Compute() case
-      output[[exp]]$data[n_models, , , , , , , , 1:max_members] <- 
-        CSTools::MergeDims(data = output[[exp]]$data, 
-                           merge_dims = c('ensemble', 'dat'),
-                           rename_dim = 'ensemble', 
-                           na.rm = TRUE)
+    if (!isFALSE(multimodel)) {
+        data_multi <- CSTools::MergeDims(data = output[[exp]]$data,
+                                         merge_dims = c('ensemble', 'dat'),
+                                         rename_dim = 'ensemble',
+                                         na.rm = TRUE)
+
+      if (isTRUE(multimodel)) { ## Multimodel
+        output[[exp]]$data <- data_multi
+        output[[exp]]$data <- s2dv::InsertDim(data = output[[exp]]$data,
+                                              posdim = 1,
+                                              lendim = 1,
+                                              name = 'dat')
+
+      } else if (identical(multimodel, 'both')) { ## Individual models and multimodel
+        tmp <- asplit(output[[exp]]$data, which(names(dim(output[[exp]]$data)) == 'dat'))
+        tmp[[n_models]] <- data_multi
+        output[[exp]]$data <- array(unlist(tmp), dim = c(dim(tmp[[1]]), dat = n_models))
+      }
     }
+
+    # Reorder data back to standard
+    tmp <- match(c('dat', 'var', 'sday', 'sweek', 'syear', 'time', 'latitude', 'longitude', 'ensemble'),
+                 names(dim(output[[exp]]$data)))
+    output[[exp]]$data <- aperm(output[[exp]]$data, tmp[!is.na(tmp)])
+
   }
   
   return(output)
diff --git a/recipes/tests/recipe_multimodel_seasonal_aho.yml b/recipes/tests/recipe_multimodel_seasonal_aho.yml
index 6c2d2965..f56119fc 100644
--- a/recipes/tests/recipe_multimodel_seasonal_aho.yml
+++ b/recipes/tests/recipe_multimodel_seasonal_aho.yml
@@ -49,7 +49,7 @@ Analysis:
       plots: forecast_ensemble_mean most_likely_terciles #skill_metrics
     Indicators:
       index: no # This feature is not implemented yet
-  ncores: 10 # Optional, int: number of cores to be used in parallel computation.
+  ncores: 1 # Optional, int: number of cores to be used in parallel computation.
              # If left empty, defaults to 1.
   remove_NAs: TRUE # Optional, bool: Whether to remove NAs. 
                    # If left empty, defaults to FALSE.
-- 
GitLab


From 4c0cb05d20b7a13747b9e5f52e102684d9d5e03f Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Wed, 27 Sep 2023 16:22:34 +0200
Subject: [PATCH 10/10] The loading script for calibrated data. Supposed to be
 used before join_datasets()

---
 modules/Loading/R/load_seasonal_calibrated.R | 265 +++++++++++++++++++
 1 file changed, 265 insertions(+)
 create mode 100644 modules/Loading/R/load_seasonal_calibrated.R

diff --git a/modules/Loading/R/load_seasonal_calibrated.R b/modules/Loading/R/load_seasonal_calibrated.R
new file mode 100644
index 00000000..f8e3ea63
--- /dev/null
+++ b/modules/Loading/R/load_seasonal_calibrated.R
@@ -0,0 +1,265 @@
+source("modules/Loading/R/dates2load.R")
+source("modules/Loading/R/get_timeidx.R")
+source("modules/Loading/R/check_latlon.R")
+
+#TODO: Get 'output_module' from recipe (e.g., multimodel$readFrom)
+load_seasonal_calibrated <- function(recipe, output_module) {
+
+  archive <- read_yaml("conf/archive.yml")$esarchive
+  ref.name <- recipe$Analysis$Datasets$Reference$name
+  exp.name <- recipe$Analysis$Datasets$System$name
+  store.freq <- recipe$Analysis$Variables$freq
+  variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]]
+  exp_descrip <- archive$System[[exp.name]]
+  reference_descrip <- archive$Reference[[ref.name]]
+  sdates <- dates2load(recipe, recipe$Run$logger)
+
+  lats.min <- recipe$Analysis$Region$latmin
+  lats.max <- recipe$Analysis$Region$latmax
+  lons.min <- recipe$Analysis$Region$lonmin
+  lons.max <- recipe$Analysis$Region$lonmax
+  circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max)
+
+  if (recipe$Analysis$Variables$freq == "monthly_mean") {
+    split_multiselected_dims = TRUE
+  } else {
+    split_multiselected_dims = FALSE
+  }
+
+  # Find the saved data directory
+  recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", output_module)
+  hcst.path <- file.path(get_dir(recipe = recipe, variable = variable[1]),
+                       "$var$_$file_date$.nc")
+  hcst.path <- gsub(variable[1], "$var$", hcst.path)
+  fcst.path <- obs.path <- hcst.path
+  obs.path <- gsub("_$file_date$", "-obs_$file_date$", obs.path, fixed = T)
+
+  # Load hindcast 
+  #-------------------------------------------------------------------
+  hcst <- Start(dat = hcst.path,
+                var = variable,
+                file_date = sdates$hcst,
+                time = 'all',
+                latitude = 'all',
+                latitude_reorder = Sort(),
+                longitude = 'all',
+                longitude_reorder = circularsort,
+                synonims = list(latitude = c('lat', 'latitude'),
+                                longitude = c('lon', 'longitude'),
+                                ensemble = c('member', 'ensemble')),
+                ensemble = 'all',
+                metadata_dims = 'var',
+                return_vars = list(latitude = 'dat',
+                                   longitude = 'dat',
+                                   time = 'file_date'),
+                split_multiselected_dims = split_multiselected_dims,
+                retrieve = TRUE)
+
+  #############################
+  #NOTE: NOT TESTED YET
+  if (store.freq %in% c("daily_mean", "daily")) {
+    # Adjusts dims for daily case, could be removed if startR allows
+    # multidim split
+    names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear"
+    default_dims <- c(dat = 1, var = 1, sday = 1, 
+                      sweek = 1, syear = 1, time = 1, 
+                      latitude = 1, longitude = 1, ensemble = 1)
+    default_dims[names(dim(hcst))] <- dim(hcst)
+    dim(hcst) <- default_dims
+    # Change time attribute dimensions
+    default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1)
+    names(dim(attr(hcst, "Variables")$common$time))[which(names(
+      dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear"
+    default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- 
+      dim(attr(hcst, "Variables")$common$time)
+    dim(attr(hcst, "Variables")$common$time) <- default_time_dims
+  }
+  ###############################
+
+  # Convert hcst to s2dv_cube object
+  hcst <- as.s2dv_cube(hcst)
+  # Adjust dates for models where the time stamp goes into the next month
+  if (recipe$Analysis$Variables$freq == "monthly_mean") {
+    hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag)
+  }
+
+
+  # Load forecast
+  #-------------------------------------------------------------------
+  if (!is.null(recipe$Analysis$Time$fcst_year)) {
+    fcst <- Start(dat = fcst.path,
+                  var = variable,
+                  file_date = sdates$fcst,
+                  time = 'all',
+                  latitude = 'all',
+                  latitude_reorder = Sort(),
+                  longitude = 'all',
+                  longitude_reorder = circularsort,
+                  synonims = list(latitude = c('lat', 'latitude'),
+                                  longitude = c('lon', 'longitude'),
+                                  ensemble = c('member', 'ensemble')),
+                  ensemble = 'all',
+                  metadata_dims = 'var',
+                  return_vars = list(latitude = 'dat',
+                                     longitude = 'dat',
+                                     time = 'file_date'),
+                  split_multiselected_dims = split_multiselected_dims,
+                  retrieve = TRUE)
+
+    #############################
+    #NOTE: NOT TESTED YET
+    if (store.freq %in% c("daily_mean", "daily")) {
+      # Adjusts dims for daily case, could be removed if startR allows
+      # multidim split
+      names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear"
+      default_dims <- c(dat = 1, var = 1, sday = 1,
+                        sweek = 1, syear = 1, time = 1,
+                        latitude = 1, longitude = 1, ensemble = 1)
+      default_dims[names(dim(fcst))] <- dim(fcst)
+      dim(fcst) <- default_dims
+      # Change time attribute dimensions
+      default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1)
+      names(dim(attr(fcst, "Variables")$common$time))[which(names(
+        dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear"
+      default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- 
+        dim(attr(fcst, "Variables")$common$time)
+      dim(attr(fcst, "Variables")$common$time) <- default_time_dims
+    }
+    #############################
+    
+    # Convert fcst to s2dv_cube
+    fcst <- as.s2dv_cube(fcst)
+    # Adjust dates for models where the time stamp goes into the next month
+    if (recipe$Analysis$Variables$freq == "monthly_mean") {
+      fcst$attrs$Dates[] <- 
+        fcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag)
+    }
+
+  } else {
+    fcst <- NULL
+  }
+
+
+
+  # Load reference
+  #-------------------------------------------------------------------
+  
+  # Obtain dates and date dimensions from the loaded hcst data to make sure
+  # the corresponding observations are loaded correctly.
+  dates <- hcst$attrs$Dates
+  dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")]
+
+  if (store.freq == "monthly_mean") {
+
+    dates_file <- paste0(format(as.Date(dates, '%Y%m%d'), "%Y%m"), "01")
+    dim(dates_file) <- dim(dates)
+    
+    obs <- Start(dat = obs.path,
+                 var = variable,
+                 file_date = dates_file,
+                 latitude = 'all',
+                 latitude_reorder = Sort(),
+                 longitude = 'all',
+                 longitude_reorder = circularsort,
+                 synonims = list(latitude = c('lat','latitude'),
+                                 longitude = c('lon','longitude')),
+                 metadata_dims = 'var',
+                 return_vars = list(latitude = 'dat',
+                                    longitude = 'dat',
+                                    time = 'file_date'),
+                 split_multiselected_dims = TRUE,
+                 retrieve = TRUE)
+
+  } else if (store.freq %in% c("daily_mean", "daily")) {
+    
+    #############################
+    #NOTE: NOT TESTED YET
+
+    # Get year and month for file_date
+    dates_file <- sapply(dates, format, '%Y%m')
+    dim(dates_file) <- dim(dates)
+    # Set hour to 12:00 to ensure correct date retrieval for daily data
+    lubridate::hour(dates) <- 12 
+    lubridate::minute(dates) <- 00
+    # Restore correct dimensions
+    dim(dates) <- dim(dates_file)
+    
+    obs <- Start(dat = obs.path,
+                 var = variable,
+                 file_date = sort(unique(dates_file)),
+                 time = dates,
+                 time_var = 'time',
+                 time_across = 'file_date',
+                 merge_across_dims = TRUE,
+                 merge_across_dims_narm = TRUE,
+                 latitude = 'all',
+                 latitude_reorder = Sort(),
+                 longitude = 'all',
+                 longitude_reorder = circularsort,
+                 synonims = list(latitude = c('lat','latitude'),
+                                 longitude = c('lon','longitude')),
+                 metadata_dims = 'var',
+                 return_vars = list(latitude = 'dat',
+                                    longitude = 'dat',
+                                    time = 'file_date'),
+                 split_multiselected_dims = TRUE,
+                 retrieve = TRUE)
+    #############################
+
+  }
+
+  # Adds ensemble dim to obs (for consistency with hcst/fcst)
+  default_dims <- c(dat = 1, var = 1, sday = 1, 
+                    sweek = 1, syear = 1, time = 1, 
+                    latitude = 1, longitude = 1, ensemble = 1)
+  default_dims[names(dim(obs))] <- dim(obs)
+  dim(obs) <- default_dims
+  
+  # Convert obs to s2dv_cube
+  obs <- as.s2dv_cube(obs)
+
+
+  # Check for consistency between hcst and obs grid
+  if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) {
+    lat_error_msg <- paste("Latitude mismatch between hcst and obs.",
+                           "Please check the original grids and the",
+                           "regrid parameters in your recipe.")
+    error(recipe$Run$logger, lat_error_msg)
+    hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1],
+                           "; Last hcst lat: ", hcst$lat[length(hcst$lat)])
+    info(recipe$Run$logger, hcst_lat_msg)
+    obs_lat_msg <- paste0("First obs lat: ", obs$lat[1],
+                          "; Last obs lat: ", obs$lat[length(obs$lat)])
+    info(recipe$Run$logger, obs_lat_msg)
+    stop("hcst and obs don't share the same latitudes.")
+  }
+  if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) {
+    lon_error_msg <- paste("Longitude mismatch between hcst and obs.",
+                           "Please check the original grids and the",
+                           "regrid parameters in your recipe.")
+    error(recipe$Run$logger, lon_error_msg)
+    hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1],
+                           "; Last hcst lon: ", hcst$lon[length(hcst$lon)])
+    info(recipe$Run$logger, hcst_lon_msg)
+    obs_lon_msg <- paste0("First obs lon: ", obs$lon[1],
+                          "; Last obs lon: ", obs$lon[length(obs$lon)])
+    info(recipe$Run$logger, obs_lon_msg)
+    stop("hcst and obs don't share the same longitudes.")  
+  }
+
+  # Print a summary of the loaded data for the user, for each object
+  if (recipe$Run$logger$threshold <= 2) {
+    data_summary(hcst, recipe)
+    data_summary(obs, recipe)
+    if (!is.null(fcst)) {
+      data_summary(fcst, recipe)
+    }
+  }
+
+ info(recipe$Run$logger,
+       "##### CALIBRATED DATA LOADING COMPLETED SUCCESSFULLY #####")
+
+
+  return(list(hcst = hcst, fcst = fcst, obs = obs))
+
+}
-- 
GitLab