From ca0675183dbec6563e4c3037e6fe306d68ed50d8 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Mon, 20 Mar 2023 12:08:25 +0100
Subject: [PATCH 01/96] Add retrieve = False (WIP)

---
 modules/Loading/Loading.R                     | 230 +++++++++---------
 .../testing_recipes/recipe_seasonal-tests.yml |   4 +-
 2 files changed, 121 insertions(+), 113 deletions(-)
diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index e5dafd0d..a45dcfdb 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -6,8 +6,9 @@ source("modules/Loading/check_latlon.R")
 ## TODO: Move to prepare_outputs.R
 source("tools/libs.R")
 
-load_datasets <- function(recipe) {
+load_datasets <- function(recipe, retrieve = T) {
 
+  # retrieve <- F
   # -------------------------------------------
   # Set params -----------------------------------------
 
@@ -98,6 +99,8 @@ load_datasets <- function(recipe) {
   
   # Load hindcast 
   #-------------------------------------------------------------------
+  ## PROBLEM 1: Bug in Start() that causes problems if regrid params are NULL
+  ## PROBLEM 2: "Error: Incompatible classes: <NULL> + <Period>" unknown origin
   hcst <- Start(dat = hcst.path,
                 var = variable,
                 file_date = sdates$hcst,
@@ -118,7 +121,7 @@ load_datasets <- function(recipe) {
                                    longitude = 'dat',
                                    time = 'file_date'),
                 split_multiselected_dims = split_multiselected_dims,
-                retrieve = TRUE)
+                retrieve = retrieve)
 
   if (recipe$Analysis$Variables$freq == "daily_mean") {
     # Adjusts dims for daily case, could be removed if startR allows
@@ -138,15 +141,16 @@ load_datasets <- function(recipe) {
     dim(attr(hcst, "Variables")$common$time) <- default_time_dims
   }
 
-  # Convert hcst to s2dv_cube object
-  ## TODO: Give correct dimensions to $Dates$start
-  ## (sday, sweek, syear instead of file_date)
-  hcst <- as.s2dv_cube(hcst)
   # Adjust dates for models where the time stamp goes into the next month
   if (recipe$Analysis$Variables$freq == "monthly_mean") {
-    hcst$Dates$start[] <- hcst$Dates$start - seconds(exp_descrip$time_stamp_lag)
+    attr(hcst, "Variables")$common$time[] <-
+      attr(hcst, "Variables")$common$time - seconds(exp_descrip$time_stamp_lag)
+  }
+  dates <- attr(hcst, "Variables")$common$time
+  # Convert hcst to s2dv_cube object
+  if (retrieve) {
+    hcst <- as.s2dv_cube(hcst)
   }
-
   # Load forecast
   #-------------------------------------------------------------------
   if (!is.null(recipe$Analysis$Time$fcst_year)) {
@@ -155,26 +159,26 @@ load_datasets <- function(recipe) {
     # multiple dims split
 
   fcst <- Start(dat = fcst.path,
-    	          var = variable,
-    	          file_date = sdates$fcst,
-                  time = idxs$fcst,
-    	          latitude = values(list(lats.min, lats.max)),
-    	          latitude_reorder = Sort(),
-    	          longitude = values(list(lons.min, lons.max)),
-    	          longitude_reorder = circularsort,
-    	          transform = regrid_params$fcst.transform,
-    	          transform_params = list(grid = regrid_params$fcst.gridtype,
-    		                          method = regrid_params$fcst.gridmethod),
-    	          transform_vars = c('latitude', 'longitude'),
-    	          synonims = list(latitude = c('lat', 'latitude'),
-    		                  longitude = c('lon', 'longitude'),
-    		                  ensemble = c('member', 'ensemble')),
-    	          ensemble = indices(1:fcst.nmember),
-    	          return_vars = list(latitude = 'dat',
-    		                     longitude = 'dat',
-    		                     time = 'file_date'),
-                  split_multiselected_dims = split_multiselected_dims,
-    	          retrieve = TRUE)
+    	        var = variable,
+    	        file_date = sdates$fcst,
+                time = idxs$fcst,
+    	        latitude = values(list(lats.min, lats.max)),
+    	        latitude_reorder = Sort(),
+    	        longitude = values(list(lons.min, lons.max)),
+    	        longitude_reorder = circularsort,
+    	        transform = regrid_params$fcst.transform,
+    	        transform_params = list(grid = regrid_params$fcst.gridtype,
+    		                        method = regrid_params$fcst.gridmethod),
+    	        transform_vars = c('latitude', 'longitude'),
+    	        synonims = list(latitude = c('lat', 'latitude'),
+    		                longitude = c('lon', 'longitude'),
+    		                ensemble = c('member', 'ensemble')),
+    	        ensemble = indices(1:fcst.nmember),
+    	        return_vars = list(latitude = 'dat',
+    		                   longitude = 'dat',
+    		                   time = 'file_date'),
+                split_multiselected_dims = split_multiselected_dims,
+    	        retrieve = retrieve)
     
     if (recipe$Analysis$Variables$freq == "daily_mean") {
       # Adjusts dims for daily case, could be removed if startR allows
@@ -186,6 +190,7 @@ load_datasets <- function(recipe) {
       default_dims[names(dim(fcst))] <- dim(fcst)
       dim(fcst) <- default_dims
       # Change time attribute dimensions
+      ## TODO: Is this still necessary?
       default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1)
       names(dim(attr(fcst, "Variables")$common$time))[which(names(
             dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear"
@@ -194,14 +199,15 @@ load_datasets <- function(recipe) {
       dim(attr(fcst, "Variables")$common$time) <- default_time_dims
     }
 
-    # Convert fcst to s2dv_cube
-    fcst <- as.s2dv_cube(fcst)
-    # Adjust dates for models where the time stamp goes into the next month
+    # Change dates in models with wrong timestamp
     if (recipe$Analysis$Variables$freq == "monthly_mean") {
-      fcst$Dates$start[] <-
-	fcst$Dates$start - seconds(exp_descrip$time_stamp_lag)
+      attr(fcst, "Variables")$common$time <-
+	attr(fcst, "Variables")$common$time - seconds(exp_descrip$time_stamp_lag)
+    }
+    # Convert fcst to s2dv_cube
+    if (retrieve) {
+      fcst <- as.s2dv_cube(fcst)
     }
-
   } else {
     fcst <- NULL
   }
@@ -211,11 +217,7 @@ load_datasets <- function(recipe) {
 
   # Obtain dates and date dimensions from the loaded hcst data to make sure
   # the corresponding observations are loaded correctly.
-  dates <- hcst$Dates$start
-  dim(dates) <- dim(Subset(hcst$data, 
-			   along=c('dat', 'var',
-			   'latitude', 'longitude', 'ensemble'),
-			   list(1,1,1,1,1), drop="selected"))
+  # dates <- hcst$Dates$start
 
   # Separate Start() call for monthly vs daily data
   if (store.freq == "monthly_mean") {
@@ -240,7 +242,7 @@ load_datasets <- function(recipe) {
   		                    longitude = 'dat',
   		                    time = 'file_date'),
   	         split_multiselected_dims = TRUE,
-  	         retrieve = TRUE)
+  	         retrieve = retrieve)
   
   } else if (store.freq == "daily_mean") {
 
@@ -275,90 +277,96 @@ load_datasets <- function(recipe) {
   		      	              longitude = 'dat',
   		      	              time = 'file_date'),
   	         split_multiselected_dims = TRUE,
-  	         retrieve = TRUE)
+  	         retrieve = retrieve)
   }
-  
+  ## TODO: How to handle this with retrieve = F? 
   # Adds ensemble dim to obs (for consistency with hcst/fcst)
   default_dims <- c(dat = 1, var = 1, sday = 1, 
                     sweek = 1, syear = 1, time = 1, 
                     latitude = 1, longitude = 1, ensemble = 1)
-  default_dims[names(dim(obs))] <- dim(obs)
-  dim(obs) <- default_dims
- 
-  # Convert obs to s2dv_cube
-  obs <- as.s2dv_cube(obs)
-  
-  # Check for consistency between hcst and obs grid
-  if (!(recipe$Analysis$Regrid$type == 'none')) {
-    if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) {
-      lat_error_msg <- paste("Latitude mismatch between hcst and obs.",
-			     "Please check the original grids and the",
-			     "regrid parameters in your recipe.")
-      error(recipe$Run$logger, lat_error_msg)
-      hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1],
-			     "; Last hcst lat: ", hcst$lat[length(hcst$lat)])
-      info(recipe$Run$logger, hcst_lat_msg)
-      obs_lat_msg <- paste0("First obs lat: ", obs$lat[1],
-			     "; Last obs lat: ", obs$lat[length(obs$lat)])
-      info(recipe$Run$logger, obs_lat_msg)
-      stop("hcst and obs don't share the same latitudes.")
-    }
-    if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) {
-      lon_error_msg <- paste("Longitude mismatch between hcst and obs.",
-			     "Please check the original grids and the",
-			     "regrid parameters in your recipe.")
-      error(recipe$Run$logger, lon_error_msg)
-      hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1],
-			     "; Last hcst lon: ", hcst$lon[length(hcst$lon)])
-      info(recipe$Run$logger, hcst_lon_msg)
-      obs_lon_msg <- paste0("First obs lon: ", obs$lon[1],
-			     "; Last obs lon: ", obs$lon[length(obs$lon)])
-      info(recipe$Run$logger, obs_lon_msg)
-      stop("hcst and obs don't share the same longitudes.")
-
-    }
+  if (retrieve) {
+    default_dims[names(dim(obs))] <- dim(obs)
+    dim(obs) <- default_dims
+    # Convert obs to s2dv_cube
+    obs <- as.s2dv_cube(obs)
+  } else {
+    default_dims[names(attr(obs, "Dimensions"))] <- attr(obs, "Dimensions")
+    attr(obs, "Dimensions") <- default_dims
   }
+  # Checks and data summary (only if retrieve = TRUE)
+  if (retrieve) {
+    # Check for consistency between hcst and obs grid
+    if (!(recipe$Analysis$Regrid$type == 'none')) {
+      if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) {
+        lat_error_msg <- paste("Latitude mismatch between hcst and obs.",
+          		     "Please check the original grids and the",
+          		     "regrid parameters in your recipe.")
+        error(recipe$Run$logger, lat_error_msg)
+        hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1],
+          		     "; Last hcst lat: ", hcst$lat[length(hcst$lat)])
+        info(recipe$Run$logger, hcst_lat_msg)
+        obs_lat_msg <- paste0("First obs lat: ", obs$lat[1],
+          		     "; Last obs lat: ", obs$lat[length(obs$lat)])
+        info(recipe$Run$logger, obs_lat_msg)
+        stop("hcst and obs don't share the same latitudes.")
+      }
+      if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) {
+        lon_error_msg <- paste("Longitude mismatch between hcst and obs.",
+          		     "Please check the original grids and the",
+          		     "regrid parameters in your recipe.")
+        error(recipe$Run$logger, lon_error_msg)
+        hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1],
+          		     "; Last hcst lon: ", hcst$lon[length(hcst$lon)])
+        info(recipe$Run$logger, hcst_lon_msg)
+        obs_lon_msg <- paste0("First obs lon: ", obs$lon[1],
+          		     "; Last obs lon: ", obs$lon[length(obs$lon)])
+        info(recipe$Run$logger, obs_lon_msg)
+        stop("hcst and obs don't share the same longitudes.")
 
-  # Remove negative values in accumulative variables
-  dictionary <- read_yaml("conf/variable-dictionary.yml")
-  if (dictionary$vars[[variable]]$accum) {
-    info(recipe$Run$logger,
-	 "Accumulated variable: setting negative values to zero.")
-    obs$data[obs$data < 0] <- 0
-    hcst$data[hcst$data < 0] <- 0
-    if (!is.null(fcst)) {
-      fcst$data[fcst$data < 0] <- 0
+      }
     }
-  }
 
-  # Convert prlr from m/s to mm/day
-  ## TODO: Make a unit conversion function?
-  if (variable == "prlr") {
-    # Verify that the units are m/s and the same in obs and hcst
-    if (((attr(obs$Variable, "variable")$units == "m s-1") ||
-	 (attr(obs$Variable, "variable")$units == "m s**-1")) &&
-	((attr(hcst$Variable, "variable")$units == "m s-1") ||
-	 (attr(hcst$Variable, "variable")$units == "m s**-1"))) {
-      
-      info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.")
-      obs$data <- obs$data*86400*1000
-      attr(obs$Variable, "variable")$units <- "mm/day"
-      hcst$data <- hcst$data*86400*1000
-      attr(hcst$Variable, "variable")$units <- "mm/day"
+    # Remove negative values in accumulative variables
+    dictionary <- read_yaml("conf/variable-dictionary.yml")
+    if (dictionary$vars[[variable]]$accum) {
+      info(recipe$Run$logger,
+           "Accumulated variable: setting negative values to zero.")
+      obs$data[obs$data < 0] <- 0
+      hcst$data[hcst$data < 0] <- 0
       if (!is.null(fcst)) {
-        fcst$data <- fcst$data*86400*1000
-        attr(fcst$Variable, "variable")$units <- "mm/day"
+        fcst$data[fcst$data < 0] <- 0
+      }
+    }
+
+    # Convert prlr from m/s to mm/day
+    ## TODO: Make a unit conversion function?
+    if (variable == "prlr") {
+      # Verify that the units are m/s and the same in obs and hcst
+      if (((attr(obs$Variable, "variable")$units == "m s-1") ||
+           (attr(obs$Variable, "variable")$units == "m s**-1")) &&
+          ((attr(hcst$Variable, "variable")$units == "m s-1") ||
+           (attr(hcst$Variable, "variable")$units == "m s**-1"))) {
+        
+        info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.")
+        obs$data <- obs$data*86400*1000
+        attr(obs$Variable, "variable")$units <- "mm/day"
+        hcst$data <- hcst$data*86400*1000
+        attr(hcst$Variable, "variable")$units <- "mm/day"
+        if (!is.null(fcst)) {
+          fcst$data <- fcst$data*86400*1000
+          attr(fcst$Variable, "variable")$units <- "mm/day"
+        }
       }
     }
-  }
 
-  # Compute anomalies if requested
-  # Print a summary of the loaded data for the user, for each object
-  if (recipe$Run$logger$threshold <= 2) {
-    data_summary(hcst, recipe)
-    data_summary(obs, recipe)
-    if (!is.null(fcst)) {
-      data_summary(fcst, recipe)
+    # Compute anomalies if requested
+    # Print a summary of the loaded data for the user, for each object
+    if (recipe$Run$logger$threshold <= 2) {
+      data_summary(hcst, recipe)
+      data_summary(obs, recipe)
+      if (!is.null(fcst)) {
+        data_summary(fcst, recipe)
+      }
     }
   }
 
diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml
index b4e05fc4..5144e98e 100644
--- a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml
+++ b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml
@@ -25,8 +25,8 @@ Analysis:
     lonmin: -10
     lonmax: 30
   Regrid:
-    method: 
-    type: none 
+    method: conservative 
+    type: 'r360x180'
   Workflow:
     Calibration:
       method: raw
-- 
GitLab


From f8090af5cecced04e347f71b1bfe16c65f2a7cbb Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Mon, 20 Mar 2023 15:38:35 +0100
Subject: [PATCH 02/96] Correct date metadata dimensions

---
 modules/Loading/Loading.R | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index a45dcfdb..a3260f69 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -150,6 +150,7 @@ load_datasets <- function(recipe, retrieve = T) {
   # Convert hcst to s2dv_cube object
   if (retrieve) {
     hcst <- as.s2dv_cube(hcst)
+    dim(hcst$Dates$start) <- dim(dates)
   }
   # Load forecast
   #-------------------------------------------------------------------
@@ -197,11 +198,8 @@ load_datasets <- function(recipe, retrieve = T) {
       default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- 
         dim(attr(fcst, "Variables")$common$time)
       dim(attr(fcst, "Variables")$common$time) <- default_time_dims
-    }
-
-    # Change dates in models with wrong timestamp
-    if (recipe$Analysis$Variables$freq == "monthly_mean") {
-      attr(fcst, "Variables")$common$time <-
+    } else if (recipe$Analysis$Variables$freq == "monthly_mean") {
+      attr(fcst, "Variables")$common$time[] <-
 	attr(fcst, "Variables")$common$time - seconds(exp_descrip$time_stamp_lag)
     }
     # Convert fcst to s2dv_cube
@@ -221,7 +219,6 @@ load_datasets <- function(recipe, retrieve = T) {
 
   # Separate Start() call for monthly vs daily data
   if (store.freq == "monthly_mean") {
-  
     dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m")
     dim(dates_file) <- dim(dates)
 
@@ -245,7 +242,6 @@ load_datasets <- function(recipe, retrieve = T) {
   	         retrieve = retrieve)
   
   } else if (store.freq == "daily_mean") {
-
     # Get year and month for file_date
     dates_file <- sapply(dates, format, '%Y%m')
     dim(dates_file) <- dim(dates)
@@ -285,11 +281,11 @@ load_datasets <- function(recipe, retrieve = T) {
                     sweek = 1, syear = 1, time = 1, 
                     latitude = 1, longitude = 1, ensemble = 1)
   if (retrieve) {
-    default_dims[names(dim(obs))] <- dim(obs)
-    dim(obs) <- default_dims
     # Convert obs to s2dv_cube
+    dim(obs) <- c(dim(obs), "ensemble" = 1)
     obs <- as.s2dv_cube(obs)
   } else {
+    ## TODO: How to handle this with retrieve = F?
     default_dims[names(attr(obs, "Dimensions"))] <- attr(obs, "Dimensions")
     attr(obs, "Dimensions") <- default_dims
   }
-- 
GitLab


From 429cbc60cfa1317dfbf59a81a478c136a91df0ac Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 21 Mar 2023 08:55:22 +0100
Subject: [PATCH 03/96] Testing, code cleanup

---
 modules/Loading/Loading.R                     | 36 +++++++++----------
 .../testing_recipes/recipe_seasonal-tests.yml |  2 +-
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index a3260f69..37c1db41 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -91,7 +91,7 @@ load_datasets <- function(recipe, retrieve = T) {
   #-------------------------------------------------------------------
   circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max)
 
-  if (recipe$Analysis$Variables$freq == "monthly_mean"){
+  if (recipe$Analysis$Variables$freq == "monthly_mean") {
     split_multiselected_dims = TRUE
   } else {
     split_multiselected_dims = FALSE
@@ -99,8 +99,6 @@ load_datasets <- function(recipe, retrieve = T) {
   
   # Load hindcast 
   #-------------------------------------------------------------------
-  ## PROBLEM 1: Bug in Start() that causes problems if regrid params are NULL
-  ## PROBLEM 2: "Error: Incompatible classes: <NULL> + <Period>" unknown origin
   hcst <- Start(dat = hcst.path,
                 var = variable,
                 file_date = sdates$hcst,
@@ -221,7 +219,7 @@ load_datasets <- function(recipe, retrieve = T) {
   if (store.freq == "monthly_mean") {
     dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m")
     dim(dates_file) <- dim(dates)
-
+    
     obs <- Start(dat = obs.path,
                  var = variable,
   	         file_date = dates_file,
@@ -265,13 +263,13 @@ load_datasets <- function(recipe, retrieve = T) {
   	         longitude_reorder = circularsort,
   	         transform = regrid_params$obs.transform,
   	         transform_params = list(grid = regrid_params$obs.gridtype,
-  	          		               method = regrid_params$obs.gridmethod),
+  	          		         method = regrid_params$obs.gridmethod),
   	         transform_vars = c('latitude', 'longitude'),
   	         synonims = list(latitude = c('lat','latitude'),
-  	  	                     longitude = c('lon','longitude')),
+  	  	                 longitude = c('lon','longitude')),
   	         return_vars = list(latitude = 'dat',
-  		      	              longitude = 'dat',
-  		      	              time = 'file_date'),
+  		      	            longitude = 'dat',
+  		      	            time = 'file_date'),
   	         split_multiselected_dims = TRUE,
   	         retrieve = retrieve)
   }
@@ -290,38 +288,39 @@ load_datasets <- function(recipe, retrieve = T) {
     attr(obs, "Dimensions") <- default_dims
   }
   # Checks and data summary (only if retrieve = TRUE)
+  ## TODO: See if these checks can be performed on the startR_cube metadata
   if (retrieve) {
     # Check for consistency between hcst and obs grid
     if (!(recipe$Analysis$Regrid$type == 'none')) {
       if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) {
         lat_error_msg <- paste("Latitude mismatch between hcst and obs.",
-          		     "Please check the original grids and the",
-          		     "regrid parameters in your recipe.")
+          		       "Please check the original grids and the",
+          		       "regrid parameters in your recipe.")
         error(recipe$Run$logger, lat_error_msg)
         hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1],
-          		     "; Last hcst lat: ", hcst$lat[length(hcst$lat)])
+          		       "; Last hcst lat: ", hcst$lat[length(hcst$lat)])
         info(recipe$Run$logger, hcst_lat_msg)
         obs_lat_msg <- paste0("First obs lat: ", obs$lat[1],
-          		     "; Last obs lat: ", obs$lat[length(obs$lat)])
+          		      "; Last obs lat: ", obs$lat[length(obs$lat)])
         info(recipe$Run$logger, obs_lat_msg)
         stop("hcst and obs don't share the same latitudes.")
       }
       if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) {
         lon_error_msg <- paste("Longitude mismatch between hcst and obs.",
-          		     "Please check the original grids and the",
-          		     "regrid parameters in your recipe.")
+          		       "Please check the original grids and the",
+          		       "regrid parameters in your recipe.")
         error(recipe$Run$logger, lon_error_msg)
         hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1],
-          		     "; Last hcst lon: ", hcst$lon[length(hcst$lon)])
+          		       "; Last hcst lon: ", hcst$lon[length(hcst$lon)])
         info(recipe$Run$logger, hcst_lon_msg)
         obs_lon_msg <- paste0("First obs lon: ", obs$lon[1],
-          		     "; Last obs lon: ", obs$lon[length(obs$lon)])
+          		      "; Last obs lon: ", obs$lon[length(obs$lon)])
         info(recipe$Run$logger, obs_lon_msg)
         stop("hcst and obs don't share the same longitudes.")
 
       }
     }
-
+    # Unit manipulation and conversion:
     # Remove negative values in accumulative variables
     dictionary <- read_yaml("conf/variable-dictionary.yml")
     if (dictionary$vars[[variable]]$accum) {
@@ -333,7 +332,6 @@ load_datasets <- function(recipe, retrieve = T) {
         fcst$data[fcst$data < 0] <- 0
       }
     }
-
     # Convert prlr from m/s to mm/day
     ## TODO: Make a unit conversion function?
     if (variable == "prlr") {
@@ -354,8 +352,6 @@ load_datasets <- function(recipe, retrieve = T) {
         }
       }
     }
-
-    # Compute anomalies if requested
     # Print a summary of the loaded data for the user, for each object
     if (recipe$Run$logger$threshold <= 2) {
       data_summary(hcst, recipe)
diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml
index 5144e98e..ff6ede09 100644
--- a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml
+++ b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml
@@ -26,7 +26,7 @@ Analysis:
     lonmax: 30
   Regrid:
     method: conservative 
-    type: 'r360x180'
+    type: to_system
   Workflow:
     Calibration:
       method: raw
-- 
GitLab


From 1d74faaf07b82d02b881aa48e4b9b4d858ce6c4b Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 24 Mar 2023 15:23:42 +0100
Subject: [PATCH 04/96] Source startR functions

---
 modules/Calibration/calibrate_with_compute.R | 176 +++++++++++++++++++
 modules/Loading/Loading.R                    |   4 +
 2 files changed, 180 insertions(+)
 create mode 100644 modules/Calibration/calibrate_with_compute.R

diff --git a/modules/Calibration/calibrate_with_compute.R b/modules/Calibration/calibrate_with_compute.R
new file mode 100644
index 00000000..f9b3cb4c
--- /dev/null
+++ b/modules/Calibration/calibrate_with_compute.R
@@ -0,0 +1,176 @@
+
+calibrate_with_compute <- function(recipe, hcst, obs, fcst = NULL) {
+  # Function that calibrates the hindcast using the method stated in the
+  # recipe. If the forecast is not null, it calibrates it as well.
+  #
+  # data: list of s2dv_cube objects containing the hcst, obs and fcst.
+  #       Optionally, it may also have hcst.full_val and obs.full_val.
+  # recipe: object obtained when passing the .yml recipe file to read_yaml()
+  
+  method <- tolower(recipe$Analysis$Workflow$Calibration$method)
+
+  if (method == "raw") {
+    warn(recipe$Run$logger,
+	 paste("The Calibration module has been called, but the calibration",
+	       "method in the recipe is 'raw'. The hcst and fcst will not be",
+	       "calibrated."))
+    fcst_calibrated <- fcst
+    hcst_calibrated <- hcst
+    if (!is.null(hcst.full_val)) {
+      hcst_full_calibrated <- hcst.full_val
+    } else {
+      hcst_full_calibrated <- NULL
+    }
+    CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####"
+
+  } else {
+    ## TODO: Calibrate full fields when present
+    # Calibration function params
+    mm <- recipe$Analysis$Datasets$Multimodel
+    if (is.null(recipe$Analysis$ncores)) {
+      ncores <- 1
+    } else {
+      ncores <- recipe$Analysis$ncores
+    }
+    if (is.null(recipe$Analysis$remove_NAs)) {
+      na.rm = F
+    } else {
+      na.rm = recipe$Analysis$remove_NAs
+    }
+    
+    CALIB_MSG <- "##### CALIBRATION COMPLETE #####"
+    # Replicate observation array for the multi-model case
+    ## TODO: Implement for obs.full_val
+    if (mm) {
+      obs.mm <- obs
+      for(dat in 1:(dim(hcst)['dat'][[1]]-1)) {
+        obs.mm <- abind(obs.mm, obs,
+  		      along=which(names(dim(obs)) == 'dat'))
+      }
+      names(dim(obs.mm)) <- names(dim(obs))
+      obs$data <- obs.mm
+      remove(obs.mm)
+    }
+  
+    if (recipe$Analysis$Variables$freq == "monthly_mean") {
+    
+      CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based")
+      ## TODO: implement other calibration methods
+      if (!(method %in% CST_CALIB_METHODS)) {
+	error(recipe$Run$logger, 
+	      paste("Calibration method in the recipe is not available for",
+	            "monthly data."))
+        stop()
+      } else {
+        # Calibrate the hindcast
+        hcst_calibrated <- CST_Calibration(hcst, obs,
+                                           cal.method = method,
+                                           eval.method = "leave-one-out",
+                                           multi.model = mm,
+                                           na.fill = TRUE,
+                                           na.rm = na.rm,
+                                           apply_to = NULL,
+                                           alpha = NULL,
+                                           memb_dim = "ensemble",
+                                           sdate_dim = "syear",
+                                           ncores = ncores)
+	# In the case where anomalies have been computed, calibrate full values
+	if (!is.null(hcst.full_val)) {
+	  hcst_full_calibrated <- CST_Calibration(hcst.full_val,
+						  obs.full_val,
+						  cal.method = method,
+						  eval.method = "leave-one-out",
+						  multi.model = mm,
+						  na.fill = TRUE,
+						  na.rm = na.rm,
+						  apply_to = NULL,
+						  memb_dim = "ensemble",
+						  sdate_dim = "syear",
+						  ncores = ncores)
+	} else {
+	  hcst_full_calibrated <- NULL
+	}
+
+	# Calibrate the forecast
+        if (!is.null(fcst)) {
+          fcst_calibrated <- CST_Calibration(hcst, obs, fcst,
+                                             cal.method = method,
+                                             eval.method = "leave-one-out",
+                                             multi.model = mm,
+                                             na.fill = TRUE,
+                                             na.rm = na.rm,
+                                             apply_to = NULL,
+                                             alpha = NULL,
+                                             memb_dim = "ensemble",
+                                             sdate_dim = "syear",
+                                             ncores = ncores)
+        } else {
+          fcst_calibrated <- NULL
+        }
+      } 
+    } else if (recipe$Analysis$Variables$freq == "daily_mean") {
+      # Daily data calibration using Quantile Mapping
+      if (!(method %in% c("qmap"))) {
+	error(recipe$Run$logger,
+	      paste("Calibration method in the recipe is not available for",
+		    "daily data. Only quantile mapping 'qmap is implemented."))
+        stop()
+      }
+      # Calibrate the hindcast
+      dim_order <- names(dim(hcst$data))
+      hcst_calibrated <- CST_QuantileMapping(hcst, obs,
+        			             exp_cor = NULL,
+					     sdate_dim = "syear",
+					     memb_dim = "ensemble",
+					     # window_dim = "time",
+        				     method = "QUANT",
+        				     ncores = ncores,
+        				     na.rm = na.rm,
+					     wet.day = F)
+      # Restore dimension order
+      hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order)
+      # In the case where anomalies have been computed, calibrate full values
+      if (!is.null(hcst.full_val)) {
+	  hcst_full_calibrated <- CST_QuantileMapping(hcst.full_val,
+						      obs.full_val,
+						      exp_cor = NULL,
+						      sdate_dim = "syear",
+						      memb_dim = "ensemble",
+						      method = "QUANT",
+						      ncores = ncores,
+						      na.rm = na.rm,
+						      wet.day = F)
+      } else {
+	hcst_full_calibrated <- NULL
+      }
+
+      if (!is.null(fcst)) {
+        # Calibrate the forecast
+        fcst_calibrated <- CST_QuantileMapping(hcst, obs,
+        				       exp_cor = fcst,
+					       sdate_dim = "syear",
+					       memb_dim = "ensemble",
+					       # window_dim = "time",
+        				       method = "QUANT",
+        				       ncores = ncores,
+        				       na.rm = na.rm,
+					       wet.day = F)
+        # Restore dimension order
+	fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order)
+      } else {
+        fcst_calibrated <- NULL
+      }
+    }
+  }
+  info(recipe$Run$logger, CALIB_MSG)
+  ## TODO: Sort out returns
+  return_list <- list(hcst = hcst_calibrated,
+		      obs = obs,
+		      fcst = fcst_calibrated)
+  if (!is.null(hcst_full_calibrated)) {
+    return_list <- append(return_list,
+			  list(hcst.full_val = hcst_full_calibrated,
+			       obs.full_val = obs.full_val))
+  }
+  return(return_list)
+}
diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index 94e16493..14e99b32 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -8,6 +8,10 @@ source("tools/libs.R")
 #TODO: remove these two lines when new as.s2dv_cube() is in CSTools
 source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/as.s2dv_cube.R')
 source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/zzz.R')
+## TODO: remove these lines when new version of startR is released
+source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Start.R")
+source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Utils.R")
+source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/zzz.R")
 
 load_datasets <- function(recipe, retrieve = T) {
 
-- 
GitLab


From c478ba09cce25ec2703e867bb9b2811551e72042 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Mon, 27 Mar 2023 17:02:44 +0200
Subject: [PATCH 05/96] Remove startR sourced functions

---
 modules/Loading/Loading.R | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index 14e99b32..94e16493 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -8,10 +8,6 @@ source("tools/libs.R")
 #TODO: remove these two lines when new as.s2dv_cube() is in CSTools
 source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/as.s2dv_cube.R')
 source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/zzz.R')
-## TODO: remove these lines when new version of startR is released
-source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Start.R")
-source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Utils.R")
-source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/zzz.R")
 
 load_datasets <- function(recipe, retrieve = T) {
 
-- 
GitLab


From 11bcc01c015b687a642232b45a8992a9a3e02802 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Wed, 26 Apr 2023 14:51:38 +0200
Subject: [PATCH 06/96] Testing Calibration with Compute()

---
 modules/Calibration/calibrate_with_compute.R  | 98 +++++++++----------
 modules/Loading/Loading.R                     |  3 -
 .../testing_recipes/recipe_seasonal-tests.yml |  2 +-
 modules/test_seasonal.R                       |  2 +-
 4 files changed, 51 insertions(+), 54 deletions(-)

diff --git a/modules/Calibration/calibrate_with_compute.R b/modules/Calibration/calibrate_with_compute.R
index f9b3cb4c..c44d31a3 100644
--- a/modules/Calibration/calibrate_with_compute.R
+++ b/modules/Calibration/calibrate_with_compute.R
@@ -10,17 +10,17 @@ calibrate_with_compute <- function(recipe, hcst, obs, fcst = NULL) {
   method <- tolower(recipe$Analysis$Workflow$Calibration$method)
 
   if (method == "raw") {
-    warn(recipe$Run$logger,
-	 paste("The Calibration module has been called, but the calibration",
-	       "method in the recipe is 'raw'. The hcst and fcst will not be",
-	       "calibrated."))
+#     warn(recipe$Run$logger,
+# 	 paste("The Calibration module has been called, but the calibration",
+# 	       "method in the recipe is 'raw'. The hcst and fcst will not be",
+# 	       "calibrated."))
     fcst_calibrated <- fcst
     hcst_calibrated <- hcst
-    if (!is.null(hcst.full_val)) {
-      hcst_full_calibrated <- hcst.full_val
-    } else {
-      hcst_full_calibrated <- NULL
-    }
+    # if (!is.null(data$hcst.full_val)) {
+    #   hcst_full_calibrated <- data$hcst.full_val
+    # } else {
+    #   hcst_full_calibrated <- NULL
+    # }
     CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####"
 
   } else {
@@ -47,7 +47,7 @@ calibrate_with_compute <- function(recipe, hcst, obs, fcst = NULL) {
         obs.mm <- abind(obs.mm, obs,
   		      along=which(names(dim(obs)) == 'dat'))
       }
-      names(dim(obs.mm)) <- names(dim(obs))
+      names(dim(obs.mm)) <- names(dim(obs$data))
       obs$data <- obs.mm
       remove(obs.mm)
     }
@@ -74,22 +74,22 @@ calibrate_with_compute <- function(recipe, hcst, obs, fcst = NULL) {
                                            memb_dim = "ensemble",
                                            sdate_dim = "syear",
                                            ncores = ncores)
-	# In the case where anomalies have been computed, calibrate full values
-	if (!is.null(hcst.full_val)) {
-	  hcst_full_calibrated <- CST_Calibration(hcst.full_val,
-						  obs.full_val,
-						  cal.method = method,
-						  eval.method = "leave-one-out",
-						  multi.model = mm,
-						  na.fill = TRUE,
-						  na.rm = na.rm,
-						  apply_to = NULL,
-						  memb_dim = "ensemble",
-						  sdate_dim = "syear",
-						  ncores = ncores)
-	} else {
-	  hcst_full_calibrated <- NULL
-	}
+	# # In the case where anomalies have been computed, calibrate full values
+	# if (!is.null(data$hcst.full_val)) {
+	#   hcst_full_calibrated <- CST_Calibration(data$hcst.full_val,
+	# 					  data$obs.full_val,
+	# 					  cal.method = method,
+	# 					  eval.method = "leave-one-out",
+	# 					  multi.model = mm,
+	# 					  na.fill = TRUE,
+	# 					  na.rm = na.rm,
+	# 					  apply_to = NULL,
+	# 					  memb_dim = "ensemble",
+	# 					  sdate_dim = "syear",
+	# 					  ncores = ncores)
+	# } else {
+	#   hcst_full_calibrated <- NULL
+	# }
 
 	# Calibrate the forecast
         if (!is.null(fcst)) {
@@ -128,21 +128,21 @@ calibrate_with_compute <- function(recipe, hcst, obs, fcst = NULL) {
         				     na.rm = na.rm,
 					     wet.day = F)
       # Restore dimension order
-      hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order)
+      hcst_calibrated <- Reorder(hcst_calibrated, dim_order)
       # In the case where anomalies have been computed, calibrate full values
-      if (!is.null(hcst.full_val)) {
-	  hcst_full_calibrated <- CST_QuantileMapping(hcst.full_val,
-						      obs.full_val,
-						      exp_cor = NULL,
-						      sdate_dim = "syear",
-						      memb_dim = "ensemble",
-						      method = "QUANT",
-						      ncores = ncores,
-						      na.rm = na.rm,
-						      wet.day = F)
-      } else {
-	hcst_full_calibrated <- NULL
-      }
+      # if (!is.null(data$hcst.full_val)) {
+      #     hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val,
+      #   					      data$obs.full_val,
+      #   					      exp_cor = NULL,
+      #   					      sdate_dim = "syear",
+      #   					      memb_dim = "ensemble",
+      #   					      method = "QUANT",
+      #   					      ncores = ncores,
+      #   					      na.rm = na.rm,
+      #   					      wet.day = F)
+      # } else {
+      #   hcst_full_calibrated <- NULL
+      # }
 
       if (!is.null(fcst)) {
         # Calibrate the forecast
@@ -156,21 +156,21 @@ calibrate_with_compute <- function(recipe, hcst, obs, fcst = NULL) {
         				       na.rm = na.rm,
 					       wet.day = F)
         # Restore dimension order
-	fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order)
+	fcst_calibrated <- Reorder(fcst_calibrated, dim_order)
       } else {
         fcst_calibrated <- NULL
       }
     }
   }
-  info(recipe$Run$logger, CALIB_MSG)
+  # info(recipe$Run$logger, CALIB_MSG)
   ## TODO: Sort out returns
   return_list <- list(hcst = hcst_calibrated,
-		      obs = obs,
-		      fcst = fcst_calibrated)
-  if (!is.null(hcst_full_calibrated)) {
-    return_list <- append(return_list,
-			  list(hcst.full_val = hcst_full_calibrated,
-			       obs.full_val = obs.full_val))
-  }
+		      obs = obs)#,
+		      # fcst = fcst_calibrated)
+  # if (!is.null(hcst_full_calibrated)) {
+  #   return_list <- append(return_list,
+  #       		  list(hcst.full_val = hcst_full_calibrated,
+  #       		       obs.full_val = data$obs.full_val))
+  # }
   return(return_list)
 }
diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index 94e16493..81f8c4a4 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -5,9 +5,6 @@ source("modules/Loading/dates2load.R")
 source("modules/Loading/check_latlon.R")
 ## TODO: Move to prepare_outputs.R
 source("tools/libs.R")
-#TODO: remove these two lines when new as.s2dv_cube() is in CSTools
-source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/as.s2dv_cube.R')
-source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/zzz.R')
 
 load_datasets <- function(recipe, retrieve = T) {
 
diff --git a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml
index ff6ede09..d1279fb0 100644
--- a/modules/Loading/testing_recipes/recipe_seasonal-tests.yml
+++ b/modules/Loading/testing_recipes/recipe_seasonal-tests.yml
@@ -45,5 +45,5 @@ Analysis:
 Run:
   Loglevel: INFO
   Terminal: yes
-  output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/
+  output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/
   code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R
index b8541488..a6a15b62 100644
--- a/modules/test_seasonal.R
+++ b/modules/test_seasonal.R
@@ -9,7 +9,7 @@ recipe_file <- "modules/Loading/testing_recipes/recipe_seasonal-tests.yml"
 recipe <- prepare_outputs(recipe_file)
 
 # Load datasets
-data <- load_datasets(recipe)
+data <- load_datasets(recipe, retrieve = F)
 # Calibrate datasets
 calibrated_data <- calibrate_datasets(recipe, data)
 # Compute anomalies
-- 
GitLab


From 3933ff3fdd5ecfe8aaabbd89fc0c709345560601 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 26 May 2023 16:48:59 +0200
Subject: [PATCH 07/96] Modify Calibration module to work with Compute() (WIP)

---
 modules/Calibration/Calibration.R | 226 +++++++++++++++---------------
 1 file changed, 113 insertions(+), 113 deletions(-)

diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index 899b1291..b8c4c3d5 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -1,30 +1,32 @@
 
-calibrate_datasets <- function(recipe, data) {
+calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) {
   # Function that calibrates the hindcast using the method stated in the
   # recipe. If the forecast is not null, it calibrates it as well.
   #
   # data: list of s2dv_cube objects containing the hcst, obs and fcst.
   #       Optionally, it may also have hcst.full_val and obs.full_val.
   # recipe: object obtained when passing the .yml recipe file to read_yaml()
-  
+
   method <- tolower(recipe$Analysis$Workflow$Calibration$method)
 
   if (method == "raw") {
-    warn(recipe$Run$logger,
-	 paste("The Calibration module has been called, but the calibration",
-	       "method in the recipe is 'raw'. The hcst and fcst will not be",
-	       "calibrated."))
-    fcst_calibrated <- data$fcst
-    hcst_calibrated <- data$hcst
-    if (!is.null(data$hcst.full_val)) {
-      hcst_full_calibrated <- data$hcst.full_val
-    } else {
-      hcst_full_calibrated <- NULL
+    if (inherits(hcst, "s2dv_cube")) {
+      warn(recipe$Run$logger,
+           paste("The Calibration module has been called, but the calibration",
+                 "method in the recipe is 'raw'. The hcst and fcst will not be",
+                 "calibrated."))
     }
+    ## TODO: Improve efficiency
+    fcst_calibrated <- fcst
+    hcst_calibrated <- hcst
+    # if (!is.null(data$hcst.full_val)) {
+    #   hcst_full_calibrated <- data$hcst.full_val
+    # } else {
+    #   hcst_full_calibrated <- NULL
+    # }
     CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####"
 
   } else {
-    ## TODO: Calibrate full fields when present
     # Calibration function params
     mm <- recipe$Analysis$Datasets$Multimodel
     if (is.null(recipe$Analysis$ncores)) {
@@ -40,137 +42,135 @@ calibrate_datasets <- function(recipe, data) {
     
     CALIB_MSG <- "##### CALIBRATION COMPLETE #####"
     # Replicate observation array for the multi-model case
-    ## TODO: Implement for obs.full_val
     if (mm) {
       obs.mm <- obs$data
-      for(dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) {
-        obs.mm <- abind(obs.mm, data$obs$data,
-  		      along=which(names(dim(data$obs$data)) == 'dat'))
+      for(dat in 1:(dim(hcst$data)['dat'][[1]]-1)) {
+        obs.mm <- abind(obs.mm, obs$data,
+                      along=which(names(dim(obs$data)) == 'dat'))
       }
       names(dim(obs.mm)) <- names(dim(obs$data))
-      data$obs$data <- obs.mm
+      obs$data <- obs.mm
       remove(obs.mm)
     }
   
     if (recipe$Analysis$Variables$freq == "monthly_mean") {
     
       CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based")
-      ## TODO: implement other calibration methods
+      ## TODO: this belongs in the recipe checker
       if (!(method %in% CST_CALIB_METHODS)) {
-	error(recipe$Run$logger, 
-	      paste("Calibration method in the recipe is not available for",
-	            "monthly data."))
+        error(recipe$Run$logger, 
+              paste("Calibration method in the recipe is not available for",
+                    "monthly data."))
         stop()
       } else {
-        # Calibrate the hindcast
-        hcst_calibrated <- CST_Calibration(data$hcst, data$obs,
-                                           cal.method = method,
-                                           eval.method = "leave-one-out",
-                                           multi.model = mm,
-                                           na.fill = TRUE,
-                                           na.rm = na.rm,
-                                           apply_to = NULL,
-                                           alpha = NULL,
-                                           memb_dim = "ensemble",
-                                           sdate_dim = "syear",
-                                           ncores = ncores)
-	# In the case where anomalies have been computed, calibrate full values
-	if (!is.null(data$hcst.full_val)) {
-	  hcst_full_calibrated <- CST_Calibration(data$hcst.full_val,
-						  data$obs.full_val,
-						  cal.method = method,
-						  eval.method = "leave-one-out",
-						  multi.model = mm,
-						  na.fill = TRUE,
-						  na.rm = na.rm,
-						  apply_to = NULL,
-						  memb_dim = "ensemble",
-						  sdate_dim = "syear",
-						  ncores = ncores)
-	} else {
-	  hcst_full_calibrated <- NULL
-	}
+        arguments <- list(exp = hcst, obs = obs, exp_cor = NULL,
+                          cal.method = method, eval.method = "leave-one-out",
+                          multi.model = mm, na.fill = TRUE, na.rm = na.rm,
+                          apply_to = NULL, alpha = NULL,
+                          memb_dim = "ensemble", sdate_dim = "syear",
+                          dat_dim = "dat", ncores = ncores)
+        if (inherits(hcst, "s2dv_cube")) {
+          fun <- CST_Calibration
+        } else {
+          fun <- Calibration
+        }
+        hcst_calibrated <- do.call(fun, arguments)
+        # # In the case where anomalies have been computed, calibrate full values
+        # if (!is.null(data$hcst.full_val)) {
+        #   hcst_full_calibrated <- CST_Calibration(data$hcst.full_val,
+        #                                           data$obs.full_val,
+        #                                           cal.method = method,
+        #                                           eval.method = "leave-one-out",
+        #                                           multi.model = mm,
+        #                                           na.fill = TRUE,
+        #                                           na.rm = na.rm,
+        #                                           apply_to = NULL,
+        #                                           memb_dim = "ensemble",
+        #                                           sdate_dim = "syear",
+        #                                           ncores = ncores)
+        # } else {
+        #   hcst_full_calibrated <- NULL
+        # }
 
-	# Calibrate the forecast
-        if (!is.null(data$fcst)) {
-          fcst_calibrated <- CST_Calibration(data$hcst, data$obs, data$fcst,
-                                             cal.method = method,
-                                             eval.method = "leave-one-out",
-                                             multi.model = mm,
-                                             na.fill = TRUE,
-                                             na.rm = na.rm,
-                                             apply_to = NULL,
-                                             alpha = NULL,
-                                             memb_dim = "ensemble",
-                                             sdate_dim = "syear",
-                                             ncores = ncores)
+        # Calibrate the forecast
+        if (!is.null(fcst)) {
+          arguments <- list(exp = hcst, obs = obs, exp_cor = fcst,
+                            cal.method = method, eval.method = "leave-one-out",
+                            multi.model = mm, na.fill = TRUE, na.rm = na.rm,
+                            apply_to = NULL, alpha = NULL,
+                            memb_dim = "ensemble", sdate_dim = "syear",
+                            dat_dim = "dat", ncores = ncores)
+          fcst_calibrated <- do.call(fun, arguments)
         } else {
           fcst_calibrated <- NULL
         }
       } 
     } else if (recipe$Analysis$Variables$freq == "daily_mean") {
+      ## TODO: Move to recipe checker
       # Daily data calibration using Quantile Mapping
-      if (!(method %in% c("qmap"))) {
-	error(recipe$Run$logger,
-	      paste("Calibration method in the recipe is not available for",
-		    "daily data. Only quantile mapping 'qmap is implemented."))
-        stop()
-      }
+      # if (!(method %in% c("qmap"))) {
+      #   error(recipe$Run$logger,
+      #         paste("Calibration method in the recipe is not available for",
+      #               "daily data. Only quantile mapping 'qmap is implemented."))
+      #   stop()
+      # }
       # Calibrate the hindcast
-      dim_order <- names(dim(data$hcst$data))
-      hcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs,
-        			             exp_cor = NULL,
-					     sdate_dim = "syear",
-					     memb_dim = "ensemble",
-					     # window_dim = "time",
-        				     method = "QUANT",
-        				     ncores = ncores,
-        				     na.rm = na.rm,
-					     wet.day = F)
-      # Restore dimension order
-      hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order)
-      # In the case where anomalies have been computed, calibrate full values
-      if (!is.null(data$hcst.full_val)) {
-	  hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val,
-						      data$obs.full_val,
-						      exp_cor = NULL,
-						      sdate_dim = "syear",
-						      memb_dim = "ensemble",
-						      method = "QUANT",
-						      ncores = ncores,
-						      na.rm = na.rm,
-						      wet.day = F)
+      arguments <- list(exp = hcst, obs = obs, exp_cor = NULL,
+                        sdate_dim = "syear", memb_dim = "ensemble",
+                        method = "QUANT", ncores = ncores,
+                        na.rm = na.rm, wet.day = F)
+      if (inherits(hcst, "s2dv_cube")) {
+        ## TODO: Modify QuantileMapping to reorder dims?
+        dim_order <- names(hcst$dims)
+        fun <- CST_QuantileMapping
+        arguments <- list(exp = hcst, obs = obs, exp_cor = NULL,
+                          sdate_dim = "syear", memb_dim = "ensemble",
+                          method = "QUANT", ncores = ncores,
+                          na.rm = na.rm, wet.day = F)
       } else {
-	hcst_full_calibrated <- NULL
+        dim_order <- names(attr(data$hcst, "Dimensions"))
+        fun <- QuantileMapping
       }
-
-      if (!is.null(data$fcst)) {
+      hcst_calibrated <- do.call(fun, arguments)
+      # # Restore dimension order
+      # hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order)
+      # # In the case where anomalies have been computed, calibrate full values
+      # if (!is.null(data$hcst.full_val)) {
+      #     hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val,
+      #                                                 data$obs.full_val,
+      #                                                 exp_cor = NULL,
+      #                                                 sdate_dim = "syear",
+      #                                                 memb_dim = "ensemble",
+      #                                                 method = "QUANT",
+      #                                                 ncores = ncores,
+      #                                                 na.rm = na.rm,
+      #                                                 wet.day = F)
+      # } else {
+      #   hcst_full_calibrated <- NULL
+      # }
+      if (!is.null(fcst)) {
+        arguments <- list(exp = hcst, obs = obs, exp_cor = fcst,
+                          sdate_dim = "syear", memb_dim = "ensemble",
+                          method = "QUANT", ncores = ncores,
+                          na.rm = na.rm, wet.day = F)
         # Calibrate the forecast
-        fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs,
-        				       exp_cor = data$fcst,
-					       sdate_dim = "syear",
-					       memb_dim = "ensemble",
-					       # window_dim = "time",
-        				       method = "QUANT",
-        				       ncores = ncores,
-        				       na.rm = na.rm,
-					       wet.day = F)
+        fcst_calibrated <- do.call(fun, arguments)
         # Restore dimension order
-	fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order)
+        # fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order)
       } else {
         fcst_calibrated <- NULL
       }
     }
   }
-  info(recipe$Run$logger, CALIB_MSG)
+  # info(recipe$Run$logger, CALIB_MSG)
   ## TODO: Sort out returns
   return_list <- list(hcst = hcst_calibrated,
-		      obs = data$obs,
-		      fcst = fcst_calibrated)
-  if (!is.null(hcst_full_calibrated)) {
-    return_list <- append(return_list,
-			  list(hcst.full_val = hcst_full_calibrated,
-			       obs.full_val = data$obs.full_val))
-  }
+                      obs = obs) # ,
+                      # fcst = fcst_calibrated)
+  # if (!is.null(hcst_full_calibrated)) {
+  #   return_list <- append(return_list,
+  #                         list(hcst.full_val = hcst_full_calibrated,
+  #                              obs.full_val = data$obs.full_val))
+  # }
   return(return_list)
 }
-- 
GitLab


From f4ed1fd0f9803a17c90325d4c155e687b1b388a0 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 26 May 2023 16:49:31 +0200
Subject: [PATCH 08/96] Add functions to build the compute function, run the
 compute worrkflow and convert output to s2dv_cube (WIP)

---
 build_compute_workflow.R | 56 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 build_compute_workflow.R

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
new file mode 100644
index 00000000..c495b76f
--- /dev/null
+++ b/build_compute_workflow.R
@@ -0,0 +1,56 @@
+# Function to build a function to put inside compute
+build_compute_workflow <- function(recipe) {
+  ## TODO: Sort out system and order
+  ## Alternatively, this function could just be defined by the user
+  # Define function
+  my_compute_function = function(recipe, hcst, obs, fcst = NULL) {
+    add_calibration <- TRUE
+    if (add_calibration) {
+      data <- calibrate_datasets(recipe, hcst, obs, fcst)
+    }
+    return(data)
+  }
+  info(recipe$Run$logger, "##### COMPUTE FUNCTION DEFINED #####")
+  return(my_compute_function)
+}
+
+convert_to_s2dv_cube <- function(new_cube, original_cube) {
+  ## TODO: Make sure all the metadata is created correctly
+  attr(new_cube, "Variables") <- attr(original_cube, "Variables")
+  attr(new_cube, "FileSelectors") <- attr(original_cube, "FileSelectors")
+  class(new_cube) <- "startR_array"
+  new_cube <- as.s2dv_cube(new_cube)
+  return(new_cube)
+}
+
+run_compute_workflow <- function(recipe, data) {
+
+  # Step 1: Define the function
+  ## TODO: Sort out order
+  my_compute_function <- build_compute_workflow(recipe)
+  # Step 2: Define the call to compute()
+  ## TODO: Define the way the dimensions are split
+
+  step <- Step(fun = my_compute_function,
+               target_dims = list(c('sday', 'sweek', 'syear', 'ensemble'),
+                                  c('sday', 'sweek', 'syear')),
+               output_dims = list(c('sday', 'sweek', 'syear', 'ensemble'),
+                                  c('sday', 'sweek', 'syear')))
+  wf <- AddStep(inputs = list(hcst = data$hcst, obs = data$obs),
+                              step,
+                              recipe = recipe)
+  res <- Compute(wf$output2,
+                 chunks = list(latitude = 2,
+                               longitude = 2))
+
+  info(recipe$Run$logger,
+       "##### COMPUTE SECTION ENDED, CONVERTING TO s2dv_cube #####")
+  # Step 3: Convert result to s2dv_cube and return it
+  hcst <- convert_to_s2dv_cube(res$output1, data$hcst)
+  obs <- convert_to_s2dv_cube(res$output2, data$obs)
+  ## TODO: fcst
+  info(recipe$Run$logger,
+       "##### DATA RETURNED AS A LIST OF s2dv_cube OBJECTS #####")
+  return(list(hcst = hcst, obs = obs))
+}
+
-- 
GitLab


From 54b40e17627c5276f247d7321619323b516daec8 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 26 May 2023 16:57:09 +0200
Subject: [PATCH 09/96] Add TODOs

---
 build_compute_workflow.R | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index c495b76f..00a37e23 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -8,6 +8,9 @@ build_compute_workflow <- function(recipe) {
     if (add_calibration) {
       data <- calibrate_datasets(recipe, hcst, obs, fcst)
     }
+    ## TODO: Define what to return depending on functions.
+    ## Also, maybe return an additional variable to determine if the output
+    ## needs to be converted back to s2dv_cube or what.
     return(data)
   }
   info(recipe$Run$logger, "##### COMPUTE FUNCTION DEFINED #####")
@@ -15,6 +18,7 @@ build_compute_workflow <- function(recipe) {
 }
 
 convert_to_s2dv_cube <- function(new_cube, original_cube) {
+  ## TODO: Make function more general? 
   ## TODO: Make sure all the metadata is created correctly
   attr(new_cube, "Variables") <- attr(original_cube, "Variables")
   attr(new_cube, "FileSelectors") <- attr(original_cube, "FileSelectors")
-- 
GitLab


From 01ff5c16c9910e850ee00602326ac94d0631a613 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Mon, 29 May 2023 12:52:20 +0200
Subject: [PATCH 10/96] Try to use 'data' as argument

---
 build_compute_workflow.R          | 18 ++++++++++---
 modules/Calibration/Calibration.R | 42 ++++++++++++++-----------------
 2 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 00a37e23..2168f23b 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -3,10 +3,19 @@ build_compute_workflow <- function(recipe) {
   ## TODO: Sort out system and order
   ## Alternatively, this function could just be defined by the user
   # Define function
+  ## TODO: Define returns
   my_compute_function = function(recipe, hcst, obs, fcst = NULL) {
-    add_calibration <- TRUE
-    if (add_calibration) {
-      data <- calibrate_datasets(recipe, hcst, obs, fcst)
+    modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
+                                 ", | |,")[[1]])
+    data <- list(hcst = hcst, obs = obs, fcst = fcst)
+    for (module in modules) {
+      if (module == "calibration") {
+        data <- calibrate_datasets(recipe, data)
+      } else if (module == "anomalies") {
+        data <- compute_anomalies(recipe, data)
+      } else if (module == "skill") {
+        skill_metrics <- compute_skill_metrics(recipe, data)
+      }
     }
     ## TODO: Define what to return depending on functions.
     ## Also, maybe return an additional variable to determine if the output
@@ -34,7 +43,7 @@ run_compute_workflow <- function(recipe, data) {
   my_compute_function <- build_compute_workflow(recipe)
   # Step 2: Define the call to compute()
   ## TODO: Define the way the dimensions are split
-
+  ## TODO: Add use attributes
   step <- Step(fun = my_compute_function,
                target_dims = list(c('sday', 'sweek', 'syear', 'ensemble'),
                                   c('sday', 'sweek', 'syear')),
@@ -53,6 +62,7 @@ run_compute_workflow <- function(recipe, data) {
   hcst <- convert_to_s2dv_cube(res$output1, data$hcst)
   obs <- convert_to_s2dv_cube(res$output2, data$obs)
   ## TODO: fcst
+  ## TODO: skill
   info(recipe$Run$logger,
        "##### DATA RETURNED AS A LIST OF s2dv_cube OBJECTS #####")
   return(list(hcst = hcst, obs = obs))
diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index b8c4c3d5..b8bf782a 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -1,5 +1,4 @@
-
-calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) {
+calibrate_datasets <- function(recipe, data) {
   # Function that calibrates the hindcast using the method stated in the
   # recipe. If the forecast is not null, it calibrates it as well.
   #
@@ -10,15 +9,15 @@ calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) {
   method <- tolower(recipe$Analysis$Workflow$Calibration$method)
 
   if (method == "raw") {
-    if (inherits(hcst, "s2dv_cube")) {
+    if (inherits(data$hcst, "s2dv_cube")) {
       warn(recipe$Run$logger,
            paste("The Calibration module has been called, but the calibration",
                  "method in the recipe is 'raw'. The hcst and fcst will not be",
                  "calibrated."))
     }
     ## TODO: Improve efficiency
-    fcst_calibrated <- fcst
-    hcst_calibrated <- hcst
+    fcst_calibrated <- data$fcst
+    hcst_calibrated <- data$hcst
     # if (!is.null(data$hcst.full_val)) {
     #   hcst_full_calibrated <- data$hcst.full_val
     # } else {
@@ -42,13 +41,14 @@ calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) {
     
     CALIB_MSG <- "##### CALIBRATION COMPLETE #####"
     # Replicate observation array for the multi-model case
+    ## TODO: Adapt for Compute()
     if (mm) {
-      obs.mm <- obs$data
-      for(dat in 1:(dim(hcst$data)['dat'][[1]]-1)) {
-        obs.mm <- abind(obs.mm, obs$data,
-                      along=which(names(dim(obs$data)) == 'dat'))
+      obs.mm <- data$obs$data
+      for (dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) {
+        obs.mm <- abind(obs.mm, data$obs$data,
+                      along=which(names(dim(data$obs$data)) == 'dat'))
       }
-      names(dim(obs.mm)) <- names(dim(obs$data))
+      names(dim(obs.mm)) <- names(dim(data$obs$data))
       obs$data <- obs.mm
       remove(obs.mm)
     }
@@ -63,13 +63,13 @@ calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) {
                     "monthly data."))
         stop()
       } else {
-        arguments <- list(exp = hcst, obs = obs, exp_cor = NULL,
+        arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = NULL,
                           cal.method = method, eval.method = "leave-one-out",
                           multi.model = mm, na.fill = TRUE, na.rm = na.rm,
                           apply_to = NULL, alpha = NULL,
                           memb_dim = "ensemble", sdate_dim = "syear",
                           dat_dim = "dat", ncores = ncores)
-        if (inherits(hcst, "s2dv_cube")) {
+        if (inherits(data$hcst, "s2dv_cube")) {
           fun <- CST_Calibration
         } else {
           fun <- Calibration
@@ -93,8 +93,8 @@ calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) {
         # }
 
         # Calibrate the forecast
-        if (!is.null(fcst)) {
-          arguments <- list(exp = hcst, obs = obs, exp_cor = fcst,
+        if (!is.null(data$fcst)) {
+          arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = data$fcst,
                             cal.method = method, eval.method = "leave-one-out",
                             multi.model = mm, na.fill = TRUE, na.rm = na.rm,
                             apply_to = NULL, alpha = NULL,
@@ -115,18 +115,14 @@ calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) {
       #   stop()
       # }
       # Calibrate the hindcast
-      arguments <- list(exp = hcst, obs = obs, exp_cor = NULL,
+      arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = NULL,
                         sdate_dim = "syear", memb_dim = "ensemble",
                         method = "QUANT", ncores = ncores,
                         na.rm = na.rm, wet.day = F)
-      if (inherits(hcst, "s2dv_cube")) {
+      if (inherits(data$hcst, "s2dv_cube")) {
         ## TODO: Modify QuantileMapping to reorder dims?
         dim_order <- names(hcst$dims)
         fun <- CST_QuantileMapping
-        arguments <- list(exp = hcst, obs = obs, exp_cor = NULL,
-                          sdate_dim = "syear", memb_dim = "ensemble",
-                          method = "QUANT", ncores = ncores,
-                          na.rm = na.rm, wet.day = F)
       } else {
         dim_order <- names(attr(data$hcst, "Dimensions"))
         fun <- QuantileMapping
@@ -148,8 +144,8 @@ calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) {
       # } else {
       #   hcst_full_calibrated <- NULL
       # }
-      if (!is.null(fcst)) {
-        arguments <- list(exp = hcst, obs = obs, exp_cor = fcst,
+      if (!is.null(data$fcst)) {
+        arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = data$fcst,
                           sdate_dim = "syear", memb_dim = "ensemble",
                           method = "QUANT", ncores = ncores,
                           na.rm = na.rm, wet.day = F)
@@ -165,7 +161,7 @@ calibrate_datasets <- function(recipe, hcst, obs, fcst = NULL) {
   # info(recipe$Run$logger, CALIB_MSG)
   ## TODO: Sort out returns
   return_list <- list(hcst = hcst_calibrated,
-                      obs = obs) # ,
+                      obs = data$obs) # ,
                       # fcst = fcst_calibrated)
   # if (!is.null(hcst_full_calibrated)) {
   #   return_list <- append(return_list,
-- 
GitLab


From 278a994cb7ceb5b20f6d775ced0f7e14cb8cec20 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 30 May 2023 12:24:00 +0200
Subject: [PATCH 11/96] Test to retrieve metrics with Compute() (WIP)

---
 build_compute_workflow.R | 19 ++++++++++++++-----
 modules/Skill/Skill.R    | 23 ++++++++++++++++-------
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 2168f23b..b3e6ed0d 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -6,8 +6,11 @@ build_compute_workflow <- function(recipe) {
   ## TODO: Define returns
   my_compute_function = function(recipe, hcst, obs, fcst = NULL) {
     modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
-                                 ", | |,")[[1]])
+                                ", | |,")[[1]])
     data <- list(hcst = hcst, obs = obs, fcst = fcst)
+    skill_metrics <- NULL
+    ## TODO: Define returns?
+    return_list <- data
     for (module in modules) {
       if (module == "calibration") {
         data <- calibrate_datasets(recipe, data)
@@ -15,12 +18,13 @@ build_compute_workflow <- function(recipe) {
         data <- compute_anomalies(recipe, data)
       } else if (module == "skill") {
         skill_metrics <- compute_skill_metrics(recipe, data)
+        # return_list <- c(return_list, skill_metrics = skill_metrics)
       }
     }
     ## TODO: Define what to return depending on functions.
     ## Also, maybe return an additional variable to determine if the output
     ## needs to be converted back to s2dv_cube or what.
-    return(data)
+    return(list(data$hcst, data$obs, skill_metrics))
   }
   info(recipe$Run$logger, "##### COMPUTE FUNCTION DEFINED #####")
   return(my_compute_function)
@@ -31,6 +35,7 @@ convert_to_s2dv_cube <- function(new_cube, original_cube) {
   ## TODO: Make sure all the metadata is created correctly
   attr(new_cube, "Variables") <- attr(original_cube, "Variables")
   attr(new_cube, "FileSelectors") <- attr(original_cube, "FileSelectors")
+
   class(new_cube) <- "startR_array"
   new_cube <- as.s2dv_cube(new_cube)
   return(new_cube)
@@ -44,15 +49,18 @@ run_compute_workflow <- function(recipe, data) {
   # Step 2: Define the call to compute()
   ## TODO: Define the way the dimensions are split
   ## TODO: Add use attributes
+  ## TODO: Handle skill output
   step <- Step(fun = my_compute_function,
                target_dims = list(c('sday', 'sweek', 'syear', 'ensemble'),
                                   c('sday', 'sweek', 'syear')),
                output_dims = list(c('sday', 'sweek', 'syear', 'ensemble'),
-                                  c('sday', 'sweek', 'syear')))
+                                  c('sday', 'sweek', 'syear'),
+                                  c('metric', 'syear'))) #,
+                                  # c('sday', 'sweek', 'syear')))
   wf <- AddStep(inputs = list(hcst = data$hcst, obs = data$obs),
                               step,
                               recipe = recipe)
-  res <- Compute(wf$output2,
+  res <- Compute(wf$output1, #wf$output1
                  chunks = list(latitude = 2,
                                longitude = 2))
 
@@ -61,10 +69,11 @@ run_compute_workflow <- function(recipe, data) {
   # Step 3: Convert result to s2dv_cube and return it
   hcst <- convert_to_s2dv_cube(res$output1, data$hcst)
   obs <- convert_to_s2dv_cube(res$output2, data$obs)
+  skill_metrics <- res$output3
   ## TODO: fcst
   ## TODO: skill
   info(recipe$Run$logger,
        "##### DATA RETURNED AS A LIST OF s2dv_cube OBJECTS #####")
-  return(list(hcst = hcst, obs = obs))
+  return(list(hcst = hcst, obs = obs, skill_metrics = skill_metrics))
 }
 
diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index 9f97e688..767cf7f7 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -118,7 +118,8 @@ compute_skill_metrics <- function(recipe, data) {
       skill_metrics[[ metric ]] <- skill
     # Ranked Probability Skill Score and Fair version
     } else if (metric %in% c('rpss', 'frpss')) {
-      skill <- RPSS(data$hcst$data, data$obs$data,
+      # skill <- RPSS(data$hcst$data, data$obs$data,
+      skill <- RPSS(data$hcst, data$obs,
 		    time_dim = time_dim,
 		    memb_dim = memb_dim,
           	    Fair = Fair,
@@ -262,11 +263,12 @@ compute_skill_metrics <- function(recipe, data) {
       # Compute SpecsVerification version of the metrics
       ## Retain _specs in metric name for clarity
       metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name
-      if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr',
-			       'rpss'))) {
-	warn(recipe$Run$logger,
-	     "Some of the requested SpecsVerification metrics are not available.")
-      }
+      ## TODO: Move this to the recipe checker
+      # if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr',
+      #   		       'rpss'))) {
+      #   warn(recipe$Run$logger,
+      #        "Some of the requested SpecsVerification metrics are not available.")
+      # }
       capture.output(
       skill <- Compute_verif_metrics(data$hcst$data, data$obs$data,
           			     skill_metrics = metric_name,
@@ -282,7 +284,14 @@ compute_skill_metrics <- function(recipe, data) {
       skill_metrics[[ metric ]] <- skill
     }
   }
-  info(recipe$Run$logger, "##### SKILL METRIC COMPUTATION COMPLETE #####")
+  if (inherits(data$hcst, "s2dv_cube")) {
+    info(recipe$Run$logger,
+         "##### SKILL METRIC COMPUTATION COMPLETE #####")
+  } else {
+    dimensions <- names(dim(skill_metrics[[1]]))
+    skill_metrics <- abind(skill_metrics, along = 0)
+    names(dim(skill_metrics)) <- c("metric", dimensions)
+  }
   return(skill_metrics)
 }
 
-- 
GitLab


From 52e003c1998f489572b158a060ca60bc24947cde Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 1 Jun 2023 17:15:52 +0200
Subject: [PATCH 12/96] Update modules and create input and output dims
 dynamically

---
 build_compute_workflow.R                      | 53 +++++++++++++------
 modules/Calibration/Calibration.R             |  4 +-
 modules/Skill/Skill.R                         | 53 +++----------------
 modules/test_compute.R                        | 30 +++++++++++
 modules/test_seasonal.R                       |  3 +-
 .../atomic_recipes/recipe_system7c3s-tas.yml  |  5 +-
 6 files changed, 82 insertions(+), 66 deletions(-)
 create mode 100644 modules/test_compute.R

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index b3e6ed0d..83b26c77 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -9,8 +9,6 @@ build_compute_workflow <- function(recipe) {
                                 ", | |,")[[1]])
     data <- list(hcst = hcst, obs = obs, fcst = fcst)
     skill_metrics <- NULL
-    ## TODO: Define returns?
-    return_list <- data
     for (module in modules) {
       if (module == "calibration") {
         data <- calibrate_datasets(recipe, data)
@@ -43,37 +41,58 @@ convert_to_s2dv_cube <- function(new_cube, original_cube) {
 
 run_compute_workflow <- function(recipe, data) {
 
+  modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
+                              ", | |,")[[1]])
   # Step 1: Define the function
   ## TODO: Sort out order
   my_compute_function <- build_compute_workflow(recipe)
   # Step 2: Define the call to compute()
-  ## TODO: Define the way the dimensions are split
+  # Create target dimensions
+  ## TODO: Refine this part
+  target_dims <- list(hcst = c('sday', 'sweek', 'syear', 'ensemble'),
+                      obs = c('sday', 'sweek', 'syear'))
+  if (!is.null(data$fcst)) {
+    target_dims <- c(target_dims,
+                     list(fcst = c('sday', 'sweek', 'syear', 'ensemble')))
+  }
+
+  # Create output dimensions
+  ## TODO: Add several conditions depending on user requests
+  output_dims <- list(hcst = c('sday', 'sweek', 'syear', 'ensemble'),
+                      obs = c('sday', 'sweek', 'syear'))
+  if (!is.null(data$fcst)) {
+    output_dims <- c(output_dims,
+                     list(fcst = c('sday', 'sweek', 'syear', 'ensemble')))
+  }
+  if ("skill" %in% modules) {
+    output_dims <- c(output_dims,
+                     list(skill = c('metric', 'syear')))
+  }
+  ## TODO: Create output dimensions dynamically
   ## TODO: Add use attributes
   ## TODO: Handle skill output
   step <- Step(fun = my_compute_function,
-               target_dims = list(c('sday', 'sweek', 'syear', 'ensemble'),
-                                  c('sday', 'sweek', 'syear')),
-               output_dims = list(c('sday', 'sweek', 'syear', 'ensemble'),
-                                  c('sday', 'sweek', 'syear'),
-                                  c('metric', 'syear'))) #,
-                                  # c('sday', 'sweek', 'syear')))
+               target_dims = target_dims,
+               output_dims = output_dims)
   wf <- AddStep(inputs = list(hcst = data$hcst, obs = data$obs),
                               step,
                               recipe = recipe)
-  res <- Compute(wf$output1, #wf$output1
-                 chunks = list(latitude = 2,
-                               longitude = 2))
+  res <- Compute(wf$hcst,
+                 chunks = recipe$Run$startR_workflow$chunk_along)
 
   info(recipe$Run$logger,
        "##### COMPUTE SECTION ENDED, CONVERTING TO s2dv_cube #####")
   # Step 3: Convert result to s2dv_cube and return it
-  hcst <- convert_to_s2dv_cube(res$output1, data$hcst)
-  obs <- convert_to_s2dv_cube(res$output2, data$obs)
-  skill_metrics <- res$output3
   ## TODO: fcst
-  ## TODO: skill
+  hcst <- convert_to_s2dv_cube(res$hcst, data$hcst)
+  obs <- convert_to_s2dv_cube(res$obs, data$obs)
+  ## TODO: Transform skill metrics back into a list
+  metric_list <- c("rpss", "rpss_significance") ## to be removed
+  skill <- .drop_dims(res$skill)
+  # skill_metrics <- apply(seq(dim(skill)[1]), function(x) {res$skill[x, , , ]})
+  
   info(recipe$Run$logger,
-       "##### DATA RETURNED AS A LIST OF s2dv_cube OBJECTS #####")
+       "##### DATA RETURNED AS A NAMED LIST #####")
   return(list(hcst = hcst, obs = obs, skill_metrics = skill_metrics))
 }
 
diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index 2222c2b1..4b6e00ca 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -68,7 +68,8 @@ calibrate_datasets <- function(recipe, data) {
                           multi.model = mm, na.fill = TRUE, na.rm = na.rm,
                           apply_to = NULL, alpha = NULL,
                           memb_dim = "ensemble", sdate_dim = "syear",
-                          dat_dim = "dat", ncores = ncores)
+                          ncores = ncores)
+                          # dat_dim = "dat", ncores = ncores)
         if (inherits(data$hcst, "s2dv_cube")) {
           fun <- CST_Calibration
         } else {
@@ -158,7 +159,6 @@ calibrate_datasets <- function(recipe, data) {
       }
     }
   }
-  # info(recipe$Run$logger, CALIB_MSG)
   ## TODO: Sort out returns
   return_list <- list(hcst = hcst_calibrated,
                       obs = data$obs) # ,
diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index bcf3689b..619dc91b 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -17,45 +17,7 @@ source("modules/Skill/R/tmp/GetProbs.R")
 source("modules/Skill/R/tmp/RandomWalkTest.R")
 source("modules/Skill/R/RPS_clim.R")
 source("modules/Skill/R/CRPS_clim.R")
-## TODO: Implement this in the future
-## Which parameter are required?
-# if (!("obs" %in% ls()) || is.null(obs)) {
-#   error(logger,
-#         "There is no object 'obs' in the global environment or it is NULL")
-# }
-# if (stream == "fcst" && (!("fcst" %in% ls()) || is.null(fcst))) {
-#   error(logger,
-#         "There is no object 'fcst' in the global environment or it is NULL")
-# }
-# if (!("hcst" %in% ls()) || is.null(hcst)) {
-#   error(logger,
-#         "There is no object 'hcst' in the global environment or it is NULL")
-# }
-# if (!("metric" %in% ls()) || is.null(metric)) {
-#   warn(logger,
-#        "Verification metric not found and it is set as 'EnsCorr'.")
-#   metric <- 'EnsCorr'
-# }
-# if (metric %in% c('FRPSS', 'RPSS')) {
-#   metric_fun <- "veriApply"
-#   metric_method <- "FairRpss"
-# } else if (metric %in% c("FCRPSS", "CRPSS")) {
-#   metric_fun <- "veriApply"
-# } else if (metric %in% c("EnsCorr", "EnsCor")) {
-#   metric_fun <- "veriApply"
-#   metric_method <- "EnsCorr"
-# #...
-# } else {
-#   error(logger, "Unknown verification metric defined in the recipe.")
-#   metric_fun <- 'NotFound'
-# }
-# info(logger, paste("#-------------------------- ", "\n",
-#                    " running Skill module ", "\n",
-#                    " it can call ", metric_fun ))
 
-# compute_skill_metrics <- function(recipe, data$hcst, obs,
-#                                 clim_data$hcst = NULL,
-#                                 clim_obs = NULL) {
 compute_skill_metrics <- function(recipe, data) {
 
   # data$hcst: s2dv_cube containing the hindcast
@@ -92,13 +54,14 @@ compute_skill_metrics <- function(recipe, data) {
   } else {
     na.rm = recipe$Analysis$remove_NAs
   }
-  if (is.null(recipe$Analysis$Workflow$Skill$cross_validation)) {
-    warn(recipe$Run$logger,
-         "cross_validation parameter not defined, setting it to FALSE.")
-    cross.val <- FALSE
-  } else {
-    cross.val <- recipe$Analysis$Workflow$Skill$cross_validation
-  }
+  ## TODO: Move this to the recipe checker
+  # if (is.null(recipe$Analysis$Workflow$Skill$cross_validation)) {
+  #   warn(recipe$Run$logger,
+  #        "cross_validation parameter not defined, setting it to FALSE.")
+  #   cross.val <- FALSE
+  # } else {
+  #   cross.val <- recipe$Analysis$Workflow$Skill$cross_validation
+  # }
   skill_metrics <- list()
   for (metric in strsplit(metrics, ", | |,")[[1]]) {
     # Whether the fair version of the metric is to be computed
diff --git a/modules/test_compute.R b/modules/test_compute.R
new file mode 100644
index 00000000..16246069
--- /dev/null
+++ b/modules/test_compute.R
@@ -0,0 +1,30 @@
+source("modules/Loading/Loading.R")
+source("modules/Calibration/Calibration.R")
+source("modules/Anomalies/Anomalies.R")
+source("modules/Skill/Skill.R")
+source("modules/Saving/Saving.R")
+source("modules/Visualization/Visualization.R")
+source("build_compute_workflow.R")
+
+recipe_file <- "recipes/atomic_recipes/recipe_system7c3s-tas.yml"
+# recipe_file <- "recipes/atomic_recipes/recipe_test_multivar.yml"
+recipe <- prepare_outputs(recipe_file)
+
+# Load datasets
+data <- load_datasets(recipe, retrieve = F)
+
+new_data <- run_compute_workflow(recipe, data)
+# Calibrate datasets
+data <- calibrate_datasets(recipe, data)
+# Compute anomalies
+data <- compute_anomalies(recipe, data)
+# Compute skill metrics 
+skill_metrics <- compute_skill_metrics(recipe, data)
+# Compute percentiles and probability bins
+probabilities <- compute_probabilities(recipe, data)
+# Export all data to netCDF
+## TODO: Fix plotting
+# save_data(recipe, data, skill_metrics, probabilities)
+# Plot data
+# plot_data(recipe, calibrated_data, skill_metrics, probabilities,
+# 	  significance = T)
diff --git a/modules/test_seasonal.R b/modules/test_seasonal.R
index 75b27b53..394e2e1a 100644
--- a/modules/test_seasonal.R
+++ b/modules/test_seasonal.R
@@ -5,7 +5,8 @@ source("modules/Skill/Skill.R")
 source("modules/Saving/Saving.R")
 source("modules/Visualization/Visualization.R")
 
-recipe_file <- "recipes/atomic_recipes/recipe_test_multivar.yml"
+recipe_file <- "recipes/atomic_recipes/recipe_system7c3s-tas.yml"
+# recipe_file <- "recipes/atomic_recipes/recipe_test_multivar.yml"
 recipe <- prepare_outputs(recipe_file)
 
 # Load datasets
diff --git a/recipes/atomic_recipes/recipe_system7c3s-tas.yml b/recipes/atomic_recipes/recipe_system7c3s-tas.yml
index e5cd2aba..a57ca5af 100644
--- a/recipes/atomic_recipes/recipe_system7c3s-tas.yml
+++ b/recipes/atomic_recipes/recipe_system7c3s-tas.yml
@@ -36,7 +36,7 @@ Analysis:
       method: mse_min
       save: 'none' # 'all'/'none'/'exp_only'/'fcst_only'
     Skill:
-      metric: RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS
+      metric: RPSS # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS
       save: 'all' # 'all'/'none'
     Probabilities:
       percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]]
@@ -53,3 +53,6 @@ Run:
   Terminal: yes
   output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/
   code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
+  startR_workflow:
+    modules: calibration skill # Modules to run inside Compute(), in order
+    chunk_along: {latitude: 2, longitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks...}
-- 
GitLab


From 3e36d723de9d19e011f002cae7d57531253892a4 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Wed, 7 Jun 2023 17:16:17 +0200
Subject: [PATCH 13/96] Modify test, add pre-processing module and forecast,
 define all inputs and outputs dynamically

---
 build_compute_workflow.R              | 69 ++++++++++++++++-----------
 modules/Calibration/Calibration.R     |  7 +--
 modules/Preprocessing/Preprocessing.R |  7 +++
 modules/Skill/Skill.R                 |  1 +
 modules/test_compute.R                | 16 ++-----
 5 files changed, 57 insertions(+), 43 deletions(-)
 create mode 100644 modules/Preprocessing/Preprocessing.R

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 83b26c77..aa46c99b 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -8,6 +8,7 @@ build_compute_workflow <- function(recipe) {
     modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
                                 ", | |,")[[1]])
     data <- list(hcst = hcst, obs = obs, fcst = fcst)
+    data <- preprocess_datasets(recipe, data)
     skill_metrics <- NULL
     for (module in modules) {
       if (module == "calibration") {
@@ -16,20 +17,22 @@ build_compute_workflow <- function(recipe) {
         data <- compute_anomalies(recipe, data)
       } else if (module == "skill") {
         skill_metrics <- compute_skill_metrics(recipe, data)
-        # return_list <- c(return_list, skill_metrics = skill_metrics)
       }
     }
-    ## TODO: Define what to return depending on functions.
-    ## Also, maybe return an additional variable to determine if the output
-    ## needs to be converted back to s2dv_cube or what.
-    return(list(data$hcst, data$obs, skill_metrics))
+    ## TODO: Define what to return depending the modules called + the recipe
+    # Eliminate NULL elements from the return list
+    return_list <- list(hcst = data$hcst,
+                        obs = data$obs,
+                        fcst = data$fcst,
+                        skill = skill_metrics)
+    return_list <- return_list[!sapply(return_list, is.null)]
+    return(return_list)
   }
   info(recipe$Run$logger, "##### COMPUTE FUNCTION DEFINED #####")
   return(my_compute_function)
 }
 
 convert_to_s2dv_cube <- function(new_cube, original_cube) {
-  ## TODO: Make function more general? 
   ## TODO: Make sure all the metadata is created correctly
   attr(new_cube, "Variables") <- attr(original_cube, "Variables")
   attr(new_cube, "FileSelectors") <- attr(original_cube, "FileSelectors")
@@ -44,22 +47,23 @@ run_compute_workflow <- function(recipe, data) {
   modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
                               ", | |,")[[1]])
   # Step 1: Define the function
-  ## TODO: Sort out order
   my_compute_function <- build_compute_workflow(recipe)
-  # Step 2: Define the call to compute()
+  
+  # ---------------------------------------------------------------------------
+  # Step 2: Define the inputs and outputs for Compute()
   # Create target dimensions
-  ## TODO: Refine this part
   target_dims <- list(hcst = c('sday', 'sweek', 'syear', 'ensemble'),
                       obs = c('sday', 'sweek', 'syear'))
+  inputs <- list(hcst = data$hcst, obs = data$obs)
   if (!is.null(data$fcst)) {
     target_dims <- c(target_dims,
                      list(fcst = c('sday', 'sweek', 'syear', 'ensemble')))
+    inputs <- c(inputs, list(fcst = data$fcst))
   }
-
   # Create output dimensions
   ## TODO: Add several conditions depending on user requests
   output_dims <- list(hcst = c('sday', 'sweek', 'syear', 'ensemble'),
-                      obs = c('sday', 'sweek', 'syear'))
+                      obs = c('sday', 'sweek', 'syear', 'ensemble'))
   if (!is.null(data$fcst)) {
     output_dims <- c(output_dims,
                      list(fcst = c('sday', 'sweek', 'syear', 'ensemble')))
@@ -68,31 +72,42 @@ run_compute_workflow <- function(recipe, data) {
     output_dims <- c(output_dims,
                      list(skill = c('metric', 'syear')))
   }
-  ## TODO: Create output dimensions dynamically
+
+  # ---------------------------------------------------------------------------
+  # Step 3: Generate the Step and call Compute()
   ## TODO: Add use attributes
-  ## TODO: Handle skill output
   step <- Step(fun = my_compute_function,
                target_dims = target_dims,
                output_dims = output_dims)
-  wf <- AddStep(inputs = list(hcst = data$hcst, obs = data$obs),
-                              step,
-                              recipe = recipe)
+  wf <- AddStep(inputs = inputs,
+                step = step,
+                recipe = recipe)
   res <- Compute(wf$hcst,
                  chunks = recipe$Run$startR_workflow$chunk_along)
 
   info(recipe$Run$logger,
-       "##### COMPUTE SECTION ENDED, CONVERTING TO s2dv_cube #####")
-  # Step 3: Convert result to s2dv_cube and return it
-  ## TODO: fcst
-  hcst <- convert_to_s2dv_cube(res$hcst, data$hcst)
-  obs <- convert_to_s2dv_cube(res$obs, data$obs)
-  ## TODO: Transform skill metrics back into a list
-  metric_list <- c("rpss", "rpss_significance") ## to be removed
-  skill <- .drop_dims(res$skill)
-  # skill_metrics <- apply(seq(dim(skill)[1]), function(x) {res$skill[x, , , ]})
-  
+       "##### COMPUTE SECTION ENDED, REFORMATTING OUTPUT #####")
+
+  # ---------------------------------------------------------------------------
+  # Step 4: Convert results to s2dv_cube/list format
+  result <- list()
+  for (cube in c("hcst", "obs", "fcst")) {
+    if (!is.null(res[[cube]])) {
+      result[[cube]] <- convert_to_s2dv_cube(res[[cube]], data[[cube]])
+    } else {
+      result[[cube]] <- NULL
+    }
+  }
+  ## TODO: Transform skill metrics back into a NAMED list. To do this, Apply()
+  ## should return the attributes of the skill array so that we have the name
+  ## of each metric, or we should find an alternative way to retrieve them.
+  if (!is.null(res$skill)) {
+    metric_list <- c("rpss", "rpss_significance") ## to be removed
+    result$skill <- .drop_dims(res$skill)
+    # result$skill_metrics <- apply(seq(dim(skill)[1]), function(x) {res$skill[x, , , ]})
+  }
   info(recipe$Run$logger,
        "##### DATA RETURNED AS A NAMED LIST #####")
-  return(list(hcst = hcst, obs = obs, skill_metrics = skill_metrics))
+  return(result)
 }
 
diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index 4b6e00ca..6c586f54 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -100,7 +100,8 @@ calibrate_datasets <- function(recipe, data) {
                             multi.model = mm, na.fill = TRUE, na.rm = na.rm,
                             apply_to = NULL, alpha = NULL,
                             memb_dim = "ensemble", sdate_dim = "syear",
-                            dat_dim = "dat", ncores = ncores)
+                            ncores = ncores)
+                            # dat_dim = "dat", ncores = ncores)
           fcst_calibrated <- do.call(fun, arguments)
         } else {
           fcst_calibrated <- NULL
@@ -161,8 +162,8 @@ calibrate_datasets <- function(recipe, data) {
   }
   ## TODO: Sort out returns
   return_list <- list(hcst = hcst_calibrated,
-                      obs = data$obs) # ,
-                      # fcst = fcst_calibrated)
+                      obs = data$obs,
+                      fcst = fcst_calibrated)
   # if (!is.null(hcst_full_calibrated)) {
   #   return_list <- append(return_list,
   #                         list(hcst.full_val = hcst_full_calibrated,
diff --git a/modules/Preprocessing/Preprocessing.R b/modules/Preprocessing/Preprocessing.R
new file mode 100644
index 00000000..8e86a38d
--- /dev/null
+++ b/modules/Preprocessing/Preprocessing.R
@@ -0,0 +1,7 @@
+preprocess_datasets <- function(recipe, data) {
+  # Remove 'var_dir' dimension
+
+  # Add 'ensemble' dimension to obs
+  dim(data$obs) <- c(dim(data$obs), ensemble = 1)
+  return(data)
+}
diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index 619dc91b..b048b2c6 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -309,6 +309,7 @@ compute_skill_metrics <- function(recipe, data) {
     dimensions <- names(dim(skill_metrics[[1]]))
     skill_metrics <- abind(skill_metrics, along = 0)
     names(dim(skill_metrics)) <- c("metric", dimensions)
+    skill_metrics <- sticky(skill_metrics)
   }
   # Return results
   return(skill_metrics)
diff --git a/modules/test_compute.R b/modules/test_compute.R
index 16246069..4076c51d 100644
--- a/modules/test_compute.R
+++ b/modules/test_compute.R
@@ -1,4 +1,5 @@
 source("modules/Loading/Loading.R")
+source("modules/Preprocessing/Preprocessing.R")
 source("modules/Calibration/Calibration.R")
 source("modules/Anomalies/Anomalies.R")
 source("modules/Skill/Skill.R")
@@ -14,17 +15,6 @@ recipe <- prepare_outputs(recipe_file)
 data <- load_datasets(recipe, retrieve = F)
 
 new_data <- run_compute_workflow(recipe, data)
-# Calibrate datasets
-data <- calibrate_datasets(recipe, data)
-# Compute anomalies
-data <- compute_anomalies(recipe, data)
-# Compute skill metrics 
-skill_metrics <- compute_skill_metrics(recipe, data)
-# Compute percentiles and probability bins
-probabilities <- compute_probabilities(recipe, data)
-# Export all data to netCDF
-## TODO: Fix plotting
-# save_data(recipe, data, skill_metrics, probabilities)
 # Plot data
-# plot_data(recipe, calibrated_data, skill_metrics, probabilities,
-# 	  significance = T)
+plot_data(recipe, calibrated_data, skill_metrics,
+ 	  significance = T)
-- 
GitLab


From 2dbba621b99acb1260f73561a2650494d36f0d58 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 16 Jun 2023 12:25:59 +0200
Subject: [PATCH 14/96] Source Start() from master branch

---
 modules/Loading/Loading.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index 295d3e87..4563a0a8 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -6,9 +6,10 @@ source("modules/Loading/R/get_timeidx.R")
 source("modules/Loading/R/check_latlon.R")
 ## TODO: Move to prepare_outputs.R
 source("tools/libs.R")
-## TODO: remove these two lines when new as.s2dv_cube() is in CSTools
-source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/as.s2dv_cube.R')
-source('https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-new_s2dv_cube/R/zzz.R')
+## TODO: Remove when new startR is released:
+source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Start.R")
+source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/zzz.R")
+source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/Utils.R")
 
 load_datasets <- function(recipe, retrieve = T) {
 
-- 
GitLab


From f80a5708740ac7798326b02bded50c16734ae0d8 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 16 Jun 2023 15:39:08 +0200
Subject: [PATCH 15/96] Make long name retrieval more flexible ('longname' or
 'long_name')

---
 modules/Visualization/R/plot_ensemble_mean.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R
index e3d75138..a3f1fe3c 100644
--- a/modules/Visualization/R/plot_ensemble_mean.R
+++ b/modules/Visualization/R/plot_ensemble_mean.R
@@ -39,7 +39,7 @@ plot_ensemble_mean <- function(recipe, fcst, outdir) {
     # Define brks, centered on in the case of anomalies
     ##
     if (grepl("anomaly",
-              fcst$attrs$Variable$metadata[[variable]]$long_name)) {
+              fcst$attrs$Variable$metadata[[variable]]$long)) {
       variable <- paste(variable, "anomaly")
       max_value <- max(abs(var_ens_mean))
       ugly_intervals <- seq(-max_value, max_value, max_value/20)
-- 
GitLab


From 12899a3c00b161f7a29979ba82f42cf8e2bf24bb Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 16 Jun 2023 15:39:22 +0200
Subject: [PATCH 16/96] Remove 'var_dir' dimension

---
 modules/Preprocessing/Preprocessing.R | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/modules/Preprocessing/Preprocessing.R b/modules/Preprocessing/Preprocessing.R
index 8e86a38d..6f118670 100644
--- a/modules/Preprocessing/Preprocessing.R
+++ b/modules/Preprocessing/Preprocessing.R
@@ -1,6 +1,11 @@
 preprocess_datasets <- function(recipe, data) {
   # Remove 'var_dir' dimension
-
+  for (element in names(data)) {
+    data[[element]] <- Subset(x = data[[element]],
+                              along = c('var_dir'),
+                              indices = list(1),
+                              drop = 'selected')
+  }
   # Add 'ensemble' dimension to obs
   dim(data$obs) <- c(dim(data$obs), ensemble = 1)
   return(data)
-- 
GitLab


From b8218ab9f9a3d60c3c5f74e87bf7616353348d22 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 16 Jun 2023 15:39:54 +0200
Subject: [PATCH 17/96] Adapt Anomalies for Compute() (WIP)

---
 modules/Anomalies/Anomalies.R | 164 +++++++++++++++++++---------------
 1 file changed, 94 insertions(+), 70 deletions(-)

diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R
index 28092625..8fc38a15 100644
--- a/modules/Anomalies/Anomalies.R
+++ b/modules/Anomalies/Anomalies.R
@@ -3,11 +3,15 @@
 
 compute_anomalies <- function(recipe, data) {
 
-  if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) {
-    error(recipe$Run$logger, 
-          paste("The anomaly module has been called, but the element",
-                "'Workflow:Anomalies:compute' is missing from the recipe."))
-    stop()
+  ## TODO: Is this necessary?
+  # Check
+  if (inherits(data$hcst, "s2dv_cube")) {
+    if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) {
+      error(recipe$Run$logger, 
+            paste("The anomaly module has been called, but the element",
+                  "'Workflow:Anomalies:compute' is missing from the recipe."))
+      stop()
+    }
   }
 
   if (recipe$Analysis$Workflow$Anomalies$compute) {
@@ -18,89 +22,109 @@ compute_anomalies <- function(recipe, data) {
       cross <- FALSE
       cross_msg <- "without"
     }
-    original_dims <- data$hcst$dim
+    original_dims <- dim(data$hcst)
 
     # Compute anomalies
-    anom <- CST_Anomaly(data$hcst, data$obs,
-                        cross = cross,
-                        memb = TRUE,
-                        memb_dim = 'ensemble',
-                        dim_anom = 'syear',
-                        dat_dim = c('dat', 'ensemble'),
-                        ftime_dim = 'time',
-                        ncores = recipe$Analysis$ncores)
-    # Reorder dims
-    anom$exp$data <- Reorder(anom$exp$data, names(original_dims))
-    anom$obs$data <- Reorder(anom$obs$data, names(original_dims))
-
+    ## TODO: No cross-validation case
+    arguments <- list(exp = data$hcst, obs = data$obs,
+                      memb = TRUE, memb_dim = 'ensemble',
+                      dat_dim = c('dat', 'ensemble'),
+                      ncores = recipe$Analysis$ncores)
+    if (inherits(data$hcst, "s2dv_cube")) {
+      anom <- do.call(what = CST_Anomaly,
+                      args = c(arguments, list(cross = cross,
+                                               dim_anom = 'syear',
+                                               ftime_dim = 'time')))
+      anom$exp$data <- Reorder(anom$exp$data, names(original_dims))
+      anom$obs$data <- Reorder(anom$obs$data, names(original_dims))
+      # Change variable metadata
+      for (var in data$hcst$attrs$Variable$varName) {
+        # Change hcst longname
+        data$hcst$attrs$Variable$metadata[[var]]$long_name <-
+          paste(data$hcst$attrs$Variable$metadata[[var]]$long_name, "anomaly")
+        # Change obs longname
+        data$obs$attrs$Variable$metadata[[var]]$long_name <- 
+        paste(data$obs$attrs$Variable$metadata[[var]]$long_name, "anomaly")
+      }
+    } else {
+      anom <- do.call(what = Ano_CrossValid,
+                      args = c(arguments, list(time_dim = 'syear')))
+      ## TODO: Check if dim reorder is really necessary in all cases
+      anom$exp <- Reorder(anom$exp, names(original_dims))
+      anom$obs <- Reorder(anom$obs, names(original_dims))
+    }
     # Save full fields
     hcst_fullvalue <- data$hcst
     obs_fullvalue <- data$obs
-    
-    # Hindcast climatology 
-
+    # Save anomalies
     data$hcst <- anom$exp
     data$obs <- anom$obs
     remove(anom)
-    # Change variable metadata
-    for (var in data$hcst$attrs$Variable$varName) {
-      # Change hcst longname
-      data$hcst$attrs$Variable$metadata[[var]]$long_name <-
-        paste(data$hcst$attrs$Variable$metadata[[var]]$long_name, "anomaly")
-      # Change obs longname
-      data$obs$attrs$Variable$metadata[[var]]$long_name <- 
-	paste(data$obs$attrs$Variable$metadata[[var]]$long_name, "anomaly")
-    }
+    
     # Compute forecast anomaly field
     if (!is.null(data$fcst)) {
-      # Compute hindcast climatology ensemble mean
-      clim <- s2dv::Clim(hcst_fullvalue$data, obs_fullvalue$data,
-                         time_dim = "syear",
-                         dat_dim = c("dat", "ensemble"),
-                         memb = FALSE,
-                         memb_dim = "ensemble",
-                         ftime_dim = "time",
-                         ncores = recipe$Analysis$ncores)
-      clim_hcst <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1,
-                             name = "syear")
+      arguments <- list(time_dim = 'syear', dat_dim = c('dat', 'ensemble'),
+                        memb = FALSE, memb_dim = 'ensemble', ftime_dim = 'time',
+                        ncores = recipe$Analysis$ncores)
+      if (inherits(data$fcst, "s2dv_cube")) {
+        clim <- do.call(what = s2dv::Clim,
+                        args = c(list(exp = hcst_fullvalue$data,
+                                      obs = obs_fullvalue$data),
+                                 arguments))
+      } else {
+        clim <- do.call(what = s2dv::Clim,
+                        args = c(list(exp = hcst_fullvalue,
+                                      obs = obs_fullvalue),
+                                 arguments))
+      }
+      clim$clim_exp <- InsertDim(clim$clim_exp, posdim = 1, lendim = 1,
+                                 name = "syear")
       # Store original dimensions
-      dims <- dim(clim_hcst)
+      dims <- dim(clim$clim_exp)
+      ## Is there a better way to do this?
       # Repeat the array as many times as ensemble members
-      clim_hcst <- rep(clim_hcst, data$fcst$dim[['ensemble']])
+      clim$clim_exp <- rep(clim$clim_exp, dim(data$fcst)[['ensemble']])
       # Rename and reorder dimensions
-      dim(clim_hcst) <- c(dims, ensemble = data$fcst$dim[['ensemble']])
-      clim_hcst <- Reorder(clim_hcst, order = names(data$fcst$dim))
+      dim(clim$clim_exp) <- c(dims, ensemble = dim(data$fcst)[['ensemble']])
+      clim$clim_exp <- Reorder(clim$clim_exp, order = names(dim(data$fcst)))
       # Get fcst anomalies
-      data$fcst$data <- data$fcst$data - clim_hcst
-      # Change metadata
-      for (var in data$fcst$attrs$Variable$varName) {
-	data$fcst$attrs$Variable$metadata[[var]]$long_name <-
-	  paste(data$fcst$attrs$Variable$metadata[[var]]$long_name, "anomaly")
+      if (inherits(data$fcst, "s2dv_cube")) {
+        data$fcst$data <- data$fcst$data - clim$clim_exp
+        # Change metadata
+        for (var in data$fcst$attrs$Variable$varName) {
+          data$fcst$attrs$Variable$metadata[[var]]$long_name <-
+            paste(data$fcst$attrs$Variable$metadata[[var]]$long_name, "anomaly")
+        }
+      } else {
+        data$fcst <- data$fcst - clim$clim_exp
       }
     }
 
-    info(recipe$Run$logger,
-         paste("The anomalies have been computed,", cross_msg,
-               "cross-validation. The original full fields are returned as",
-               "$hcst.full_val and $obs.full_val."))
+    # Display success messages and save data
+    if (inherits(data$hcst, "s2dv_cube")) {     
+      info(recipe$Run$logger,
+           paste("The anomalies have been computed,", cross_msg,
+                 "cross-validation. The original full fields are returned as",
+                 "$hcst.full_val and $obs.full_val."))
 
-    info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####")
-    # Save outputs
-    recipe$Run$output_dir <- paste0(recipe$Run$output_dir,
-                                    "/outputs/Anomalies/")
-    # Save forecast
-    if (recipe$Analysis$Workflow$Anomalies$save %in% 
-        c('all', 'exp_only', 'fcst_only')) {
-      save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst')
-    }
-    # Save hindcast
-    if (recipe$Analysis$Workflow$Anomalies$save %in%
-        c('all', 'exp_only')) {
-      save_forecast(recipe = recipe, data_cube = data$hcst, type = 'hcst')
-    }
-    # Save observation
-    if (recipe$Analysis$Workflow$Anomalies$save == 'all') {
-      save_observations(recipe = recipe, data_cube = data$obs)
+      info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####")
+      # Save outputs
+      recipe$Run$output_dir <- paste0(recipe$Run$output_dir,
+                                      "/outputs/Anomalies/")
+      # Save forecast
+      if (recipe$Analysis$Workflow$Anomalies$save %in% 
+          c('all', 'exp_only', 'fcst_only')) {
+        save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst')
+      }
+      # Save hindcast
+      if (recipe$Analysis$Workflow$Anomalies$save %in%
+          c('all', 'exp_only')) {
+        save_forecast(recipe = recipe, data_cube = data$hcst, type = 'hcst')
+      }
+      # Save observation
+      if (recipe$Analysis$Workflow$Anomalies$save == 'all') {
+        save_observations(recipe = recipe, data_cube = data$obs)
+      }
     }
   } else {
     warn(recipe$Run$logger, paste("The Anomalies module has been called, but",
-- 
GitLab


From fbe54e7ec68d7b9bb3d6b7acae8ff87e93fbaf4f Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 16 Jun 2023 15:41:13 +0200
Subject: [PATCH 18/96] Fix broken link

---
 modules/Calibration/Calibration.R | 6 ++----
 modules/Loading/Loading.R         | 2 +-
 modules/Skill/Skill.R             | 1 -
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index 6c586f54..7308f0a3 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -68,8 +68,7 @@ calibrate_datasets <- function(recipe, data) {
                           multi.model = mm, na.fill = TRUE, na.rm = na.rm,
                           apply_to = NULL, alpha = NULL,
                           memb_dim = "ensemble", sdate_dim = "syear",
-                          ncores = ncores)
-                          # dat_dim = "dat", ncores = ncores)
+                          dat_dim = NULL, ncores = ncores)
         if (inherits(data$hcst, "s2dv_cube")) {
           fun <- CST_Calibration
         } else {
@@ -100,8 +99,7 @@ calibrate_datasets <- function(recipe, data) {
                             multi.model = mm, na.fill = TRUE, na.rm = na.rm,
                             apply_to = NULL, alpha = NULL,
                             memb_dim = "ensemble", sdate_dim = "syear",
-                            ncores = ncores)
-                            # dat_dim = "dat", ncores = ncores)
+                            dat_dim = NULL, ncores = ncores)
           fcst_calibrated <- do.call(fun, arguments)
         } else {
           fcst_calibrated <- NULL
diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index 4563a0a8..abd01333 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -9,7 +9,7 @@ source("tools/libs.R")
 ## TODO: Remove when new startR is released:
 source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Start.R")
 source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/zzz.R")
-source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/Utils.R")
+source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Utils.R")
 
 load_datasets <- function(recipe, retrieve = T) {
 
diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index b048b2c6..619dc91b 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -309,7 +309,6 @@ compute_skill_metrics <- function(recipe, data) {
     dimensions <- names(dim(skill_metrics[[1]]))
     skill_metrics <- abind(skill_metrics, along = 0)
     names(dim(skill_metrics)) <- c("metric", dimensions)
-    skill_metrics <- sticky(skill_metrics)
   }
   # Return results
   return(skill_metrics)
-- 
GitLab


From e71e474931496821bd987ea1ea74ffd31d66dd0e Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 16 Jun 2023 15:42:33 +0200
Subject: [PATCH 19/96] Changes to testing script and recipe

---
 modules/test_compute.R                           | 6 ++++--
 recipes/atomic_recipes/recipe_system7c3s-tas.yml | 6 +++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/modules/test_compute.R b/modules/test_compute.R
index 4076c51d..4b000122 100644
--- a/modules/test_compute.R
+++ b/modules/test_compute.R
@@ -14,7 +14,9 @@ recipe <- prepare_outputs(recipe_file)
 # Load datasets
 data <- load_datasets(recipe, retrieve = F)
 
-new_data <- run_compute_workflow(recipe, data)
+# Run workflow with compute
+result <- run_compute_workflow(recipe, data)
+
 # Plot data
-plot_data(recipe, calibrated_data, skill_metrics,
+plot_data(recipe, result$data, result$skill,
  	  significance = T)
diff --git a/recipes/atomic_recipes/recipe_system7c3s-tas.yml b/recipes/atomic_recipes/recipe_system7c3s-tas.yml
index a57ca5af..b3f15fbf 100644
--- a/recipes/atomic_recipes/recipe_system7c3s-tas.yml
+++ b/recipes/atomic_recipes/recipe_system7c3s-tas.yml
@@ -42,7 +42,7 @@ Analysis:
       percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]]
       save: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only'
     Visualization:
-      plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles
+      plots: skill_metrics, forecast_ensemble_mean
     Indicators:
       index: no
   ncores: 10
@@ -54,5 +54,5 @@ Run:
   output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/
   code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
   startR_workflow:
-    modules: calibration skill # Modules to run inside Compute(), in order
-    chunk_along: {latitude: 2, longitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks...}
+    modules: calibration, anomalies, skill # Modules to run inside Compute(), in order
+    chunk_along: {latitude: 2, longitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
-- 
GitLab


From 0b42b7690cd0649c7e554a59abba6e9a8e312cb3 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 16 Jun 2023 16:14:53 +0200
Subject: [PATCH 20/96] Improve input and output dims and their flexibility,
 fix bugs in skill outputs

---
 build_compute_workflow.R | 48 ++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index aa46c99b..47aedccb 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -20,11 +20,11 @@ build_compute_workflow <- function(recipe) {
       }
     }
     ## TODO: Define what to return depending the modules called + the recipe
-    # Eliminate NULL elements from the return list
     return_list <- list(hcst = data$hcst,
                         obs = data$obs,
                         fcst = data$fcst,
                         skill = skill_metrics)
+    # Eliminate NULL elements from the return list
     return_list <- return_list[!sapply(return_list, is.null)]
     return(return_list)
   }
@@ -42,35 +42,50 @@ convert_to_s2dv_cube <- function(new_cube, original_cube) {
   return(new_cube)
 }
 
-run_compute_workflow <- function(recipe, data) {
+run_compute_workflow <- function(recipe, data, my_compute_function = NULL) {
 
   modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
                               ", | |,")[[1]])
   # Step 1: Define the function
-  my_compute_function <- build_compute_workflow(recipe)
+  if (is.null(my_compute_function)) {
+    my_compute_function <- build_compute_workflow(recipe)
+  }
   
   # ---------------------------------------------------------------------------
   # Step 2: Define the inputs and outputs for Compute()
   # Create target dimensions
-  target_dims <- list(hcst = c('sday', 'sweek', 'syear', 'ensemble'),
-                      obs = c('sday', 'sweek', 'syear'))
+  original_dims <- c('dat', 'var', 'var_dir', 'sday', 'sweek', 'syear', 'time',
+                     'latitude', 'longitude', 'ensemble')
+  # Remove the chunking dimensions from the original dimensions in the arrays
+  exp_target_dims <- 
+    original_dims[!original_dims %in% names(recipe$Run$startR_workflow$chunk_along)]
+  obs_target_dims <- exp_target_dims[!exp_target_dims == 'ensemble']
+  # Default inputs: hindcast and observations
+  target_dims <- list(hcst = exp_target_dims,
+                      obs = obs_target_dims)
   inputs <- list(hcst = data$hcst, obs = data$obs)
+  # Add forecast if not empty
   if (!is.null(data$fcst)) {
     target_dims <- c(target_dims,
-                     list(fcst = c('sday', 'sweek', 'syear', 'ensemble')))
+                     list(exp_target_dims))
     inputs <- c(inputs, list(fcst = data$fcst))
   }
+
   # Create output dimensions
-  ## TODO: Add several conditions depending on user requests
-  output_dims <- list(hcst = c('sday', 'sweek', 'syear', 'ensemble'),
-                      obs = c('sday', 'sweek', 'syear', 'ensemble'))
+  ## TODO: Add several conditions depending on module/user requests?
+  exp_output_dims <- exp_target_dims[!exp_target_dims == 'var_dir']
+  # Default outputs: hindcast and observations
+  output_dims <- list(hcst = exp_output_dims,
+                      obs = exp_output_dims)
+  # Add forecast if not empty
   if (!is.null(data$fcst)) {
     output_dims <- c(output_dims,
-                     list(fcst = c('sday', 'sweek', 'syear', 'ensemble')))
+                     list(fcst = exp_output_dims))
   }
+  # Add skill metrics if skill module is called
   if ("skill" %in% modules) {
     output_dims <- c(output_dims,
-                     list(skill = c('metric', 'syear')))
+                     list(skill = c('metric', 'var', 'time')))
   }
 
   # ---------------------------------------------------------------------------
@@ -93,9 +108,9 @@ run_compute_workflow <- function(recipe, data) {
   result <- list()
   for (cube in c("hcst", "obs", "fcst")) {
     if (!is.null(res[[cube]])) {
-      result[[cube]] <- convert_to_s2dv_cube(res[[cube]], data[[cube]])
+      res$data[[cube]] <- convert_to_s2dv_cube(res[[cube]], data[[cube]])
     } else {
-      result[[cube]] <- NULL
+      res$data[[cube]] <- NULL
     }
   }
   ## TODO: Transform skill metrics back into a NAMED list. To do this, Apply()
@@ -103,11 +118,12 @@ run_compute_workflow <- function(recipe, data) {
   ## of each metric, or we should find an alternative way to retrieve them.
   if (!is.null(res$skill)) {
     metric_list <- c("rpss", "rpss_significance") ## to be removed
-    result$skill <- .drop_dims(res$skill)
-    # result$skill_metrics <- apply(seq(dim(skill)[1]), function(x) {res$skill[x, , , ]})
+    res$skill <- lapply(seq(dim(res$skill)[1]), function(x) {res$skill[x, , , , , drop = F]})
+    ## TODO: Subset 'metric' dimension
+    names(res$skill) <- metric_list
   }
   info(recipe$Run$logger,
        "##### DATA RETURNED AS A NAMED LIST #####")
-  return(result)
+  return(res)
 }
 
-- 
GitLab


From 729177bbcae38eb1771b5256c74d901023746d25 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Mon, 19 Jun 2023 10:55:41 +0200
Subject: [PATCH 21/96] Simplify compute workflow

---
 build_compute_workflow.R | 63 +++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 36 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 47aedccb..b25444fd 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -1,35 +1,27 @@
-# Function to build a function to put inside compute
-build_compute_workflow <- function(recipe) {
-  ## TODO: Sort out system and order
-  ## Alternatively, this function could just be defined by the user
-  # Define function
-  ## TODO: Define returns
-  my_compute_function = function(recipe, hcst, obs, fcst = NULL) {
-    modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
-                                ", | |,")[[1]])
-    data <- list(hcst = hcst, obs = obs, fcst = fcst)
-    data <- preprocess_datasets(recipe, data)
-    skill_metrics <- NULL
-    for (module in modules) {
-      if (module == "calibration") {
-        data <- calibrate_datasets(recipe, data)
-      } else if (module == "anomalies") {
-        data <- compute_anomalies(recipe, data)
-      } else if (module == "skill") {
-        skill_metrics <- compute_skill_metrics(recipe, data)
-      }
+compute_workflow <- function(recipe, hcst, obs, fcst = NULL) {
+  modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
+                              ", | |,")[[1]])
+  data <- list(hcst = hcst, obs = obs, fcst = fcst)
+  rm(hcst, obs, fcst)
+  data <- preprocess_datasets(recipe, data)
+  skill_metrics <- NULL
+  for (module in modules) {
+    if (module == "calibration") {
+      data <- calibrate_datasets(recipe, data)
+    } else if (module == "anomalies") {
+      data <- compute_anomalies(recipe, data)
+    } else if (module == "skill") {
+      skill_metrics <- compute_skill_metrics(recipe, data)
     }
-    ## TODO: Define what to return depending the modules called + the recipe
-    return_list <- list(hcst = data$hcst,
-                        obs = data$obs,
-                        fcst = data$fcst,
-                        skill = skill_metrics)
-    # Eliminate NULL elements from the return list
-    return_list <- return_list[!sapply(return_list, is.null)]
-    return(return_list)
   }
-  info(recipe$Run$logger, "##### COMPUTE FUNCTION DEFINED #####")
-  return(my_compute_function)
+  ## TODO: Define what to return depending the modules called + the recipe
+  return_list <- list(hcst = data$hcst,
+                      obs = data$obs,
+                      fcst = data$fcst,
+                      skill = skill_metrics)
+  # Eliminate NULL elements from the return list
+  return_list <- return_list[!sapply(return_list, is.null)]
+  return(return_list)
 }
 
 convert_to_s2dv_cube <- function(new_cube, original_cube) {
@@ -42,14 +34,13 @@ convert_to_s2dv_cube <- function(new_cube, original_cube) {
   return(new_cube)
 }
 
-run_compute_workflow <- function(recipe, data, my_compute_function = NULL) {
+run_compute_workflow <- function(recipe, data) {
+  
+  # ---------------------------------------------------------------------------
+  # Step 1: Retrieve the modules that will be called inside the workflow
 
   modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
                               ", | |,")[[1]])
-  # Step 1: Define the function
-  if (is.null(my_compute_function)) {
-    my_compute_function <- build_compute_workflow(recipe)
-  }
   
   # ---------------------------------------------------------------------------
   # Step 2: Define the inputs and outputs for Compute()
@@ -91,7 +82,7 @@ run_compute_workflow <- function(recipe, data, my_compute_function = NULL) {
   # ---------------------------------------------------------------------------
   # Step 3: Generate the Step and call Compute()
   ## TODO: Add use attributes
-  step <- Step(fun = my_compute_function,
+  step <- Step(fun = compute_workflow,
                target_dims = target_dims,
                output_dims = output_dims)
   wf <- AddStep(inputs = inputs,
-- 
GitLab


From 084ef11ba3c87515e42380e86bfadcf5ebef939e Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Mon, 19 Jun 2023 16:47:30 +0200
Subject: [PATCH 22/96] Remove hardcoded array dimensions; do not add
 'ensemble' dim if retrieve = F

---
 build_compute_workflow.R  | 14 ++++++++------
 modules/Loading/Loading.R |  7 ++-----
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index b25444fd..47147997 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -44,13 +44,15 @@ run_compute_workflow <- function(recipe, data) {
   
   # ---------------------------------------------------------------------------
   # Step 2: Define the inputs and outputs for Compute()
-  # Create target dimensions
-  original_dims <- c('dat', 'var', 'var_dir', 'sday', 'sweek', 'syear', 'time',
-                     'latitude', 'longitude', 'ensemble')
-  # Remove the chunking dimensions from the original dimensions in the arrays
+  # Retrieve original dimensions
+  exp_dims <- names(attr(data$hcst, "Dimensions"))
+  obs_dims <- names(attr(data$obs, "Dimensions"))
+  # Remove the chunking dimensions from the original dimensions in the arrays,
+  # to create the target dimensions
   exp_target_dims <- 
-    original_dims[!original_dims %in% names(recipe$Run$startR_workflow$chunk_along)]
-  obs_target_dims <- exp_target_dims[!exp_target_dims == 'ensemble']
+    exp_dims[!exp_dims %in% names(recipe$Run$startR_workflow$chunk_along)]
+  obs_target_dims <-
+    obs_dims[!obs_dims %in% names(recipe$Run$startR_workflow$chunk_along)]
   # Default inputs: hindcast and observations
   target_dims <- list(hcst = exp_target_dims,
                       obs = obs_target_dims)
diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index abd01333..c3e124d5 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -297,7 +297,8 @@ load_datasets <- function(recipe, retrieve = T) {
                  split_multiselected_dims = TRUE,
                  retrieve = retrieve)
   }
-  
+ 
+  ## TODO: This part belongs to the pre-processing module
   # Adds ensemble dim to obs (for consistency with hcst/fcst)
   default_dims <- c(dat = 1, var = 1, sday = 1, 
                     sweek = 1, syear = 1, time = 1, 
@@ -310,10 +311,6 @@ load_datasets <- function(recipe, retrieve = T) {
     }
     # Convert obs to s2dv_cube
     obs <- as.s2dv_cube(obs)
-  } else {
-    ## TODO: How to handle this with retrieve = F?
-    default_dims[names(attr(obs, "Dimensions"))] <- attr(obs, "Dimensions")
-    attr(obs, "Dimensions") <- default_dims
   }
   
   # Checks and data summary (only if retrieve = TRUE)
-- 
GitLab


From b3f60efacffe5b8c84a0c0ced0dc8eff5f2b3f8c Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 20 Jun 2023 11:42:08 +0200
Subject: [PATCH 23/96] Fix bug related to dimname attributes, flexibilize
 skill output dimensions in compute workflow

---
 build_compute_workflow.R | 9 +++++++--
 modules/Skill/Skill.R    | 8 ++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 47147997..7897ed7b 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -66,6 +66,8 @@ run_compute_workflow <- function(recipe, data) {
 
   # Create output dimensions
   ## TODO: Add several conditions depending on module/user requests?
+  ## For example: if 'downscaling' or 'Indices' in modules, lat and lon cannot
+  ## be used as chunking dimensions
   exp_output_dims <- exp_target_dims[!exp_target_dims == 'var_dir']
   # Default outputs: hindcast and observations
   output_dims <- list(hcst = exp_output_dims,
@@ -77,8 +79,11 @@ run_compute_workflow <- function(recipe, data) {
   }
   # Add skill metrics if skill module is called
   if ("skill" %in% modules) {
+    skill_dims <- c('metric', 'var', 'time', 'latitude', 'longitude')
+    skill_output_dims <-
+      skill_dims[!skill_dims %in% names(recipe$Run$startR_workflow$chunk_along)]
     output_dims <- c(output_dims,
-                     list(skill = c('metric', 'var', 'time')))
+                     list(skill = skill_output_dims))
   }
 
   # ---------------------------------------------------------------------------
@@ -93,6 +98,7 @@ run_compute_workflow <- function(recipe, data) {
   res <- Compute(wf$hcst,
                  chunks = recipe$Run$startR_workflow$chunk_along)
 
+  browser()
   info(recipe$Run$logger,
        "##### COMPUTE SECTION ENDED, REFORMATTING OUTPUT #####")
 
@@ -119,4 +125,3 @@ run_compute_workflow <- function(recipe, data) {
        "##### DATA RETURNED AS A NAMED LIST #####")
   return(res)
 }
-
diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index 619dc91b..f9a92158 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -306,9 +306,17 @@ compute_skill_metrics <- function(recipe, data) {
       }
     }
   } else {
+    # Retrieve original dimensions
     dimensions <- names(dim(skill_metrics[[1]]))
+    # Bind list into an array
     skill_metrics <- abind(skill_metrics, along = 0)
+    # Retrieve dimname attributes
+    dimension_names <- dimnames(skill_metrics)
+    metric_names <- dimension_names[[1]]
+    dim(metric_names) <- c(metric = dim(skill_metrics)[[1]])
+    # Add 'metric' dimension to array and restore attributes
     names(dim(skill_metrics)) <- c("metric", dimensions)
+    dimnames(skill_metrics) <- dimension_names
   }
   # Return results
   return(skill_metrics)
-- 
GitLab


From a1fcbed0eeb868652e7473da2743029a12a3feef Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Mon, 3 Jul 2023 09:11:38 +0200
Subject: [PATCH 24/96] Changes to structure of Compute() funs

---
 build_compute_workflow.R                        | 17 +++++++++++++----
 {modules => example_scripts}/test_compute.R     |  0
 .../test_parallel_workflow.R                    |  0
 modules/Calibration/Calibration.R               |  2 +-
 4 files changed, 14 insertions(+), 5 deletions(-)
 rename {modules => example_scripts}/test_compute.R (100%)
 rename {modules => example_scripts}/test_parallel_workflow.R (100%)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 7897ed7b..b096874f 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -1,10 +1,19 @@
 compute_workflow <- function(recipe, hcst, obs, fcst = NULL) {
   modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
                               ", | |,")[[1]])
-  data <- list(hcst = hcst, obs = obs, fcst = fcst)
+  # Create data list
+  data <- list(hcst = list(), obs = list(), fcst = list())
+  # Fill list following the structure of the s2dv_cube
+  data$hcst$data <- hcst
+  data$obs$data <- obs
+  data$fcst$data <- fcst
+  # Remove duplicated objects
   rm(hcst, obs, fcst)
+  # Pre-process datasets
   data <- preprocess_datasets(recipe, data)
+  # Define other outputs
   skill_metrics <- NULL
+  # Loop over the modules
   for (module in modules) {
     if (module == "calibration") {
       data <- calibrate_datasets(recipe, data)
@@ -15,9 +24,9 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL) {
     }
   }
   ## TODO: Define what to return depending the modules called + the recipe
-  return_list <- list(hcst = data$hcst,
-                      obs = data$obs,
-                      fcst = data$fcst,
+  return_list <- list(hcst = data$hcst$data,
+                      obs = data$obs$data,
+                      fcst = data$fcst$data,
                       skill = skill_metrics)
   # Eliminate NULL elements from the return list
   return_list <- return_list[!sapply(return_list, is.null)]
diff --git a/modules/test_compute.R b/example_scripts/test_compute.R
similarity index 100%
rename from modules/test_compute.R
rename to example_scripts/test_compute.R
diff --git a/modules/test_parallel_workflow.R b/example_scripts/test_parallel_workflow.R
similarity index 100%
rename from modules/test_parallel_workflow.R
rename to example_scripts/test_parallel_workflow.R
diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index 7308f0a3..117cb5f0 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -41,7 +41,6 @@ calibrate_datasets <- function(recipe, data) {
     
     CALIB_MSG <- "##### CALIBRATION COMPLETE #####"
     # Replicate observation array for the multi-model case
-    ## TODO: Adapt for Compute()
     if (mm) {
       obs.mm <- data$obs$data
       for (dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) {
@@ -51,6 +50,7 @@ calibrate_datasets <- function(recipe, data) {
       names(dim(obs.mm)) <- names(dim(data$obs$data))
       obs$data <- obs.mm
       remove(obs.mm)
+      gc()
     }
   
     if (recipe$Analysis$Variables$freq == "monthly_mean") {
-- 
GitLab


From f3f50432afafe2075a24423e1cf61ce858a54c76 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 4 Jul 2023 17:03:04 +0200
Subject: [PATCH 25/96] Use s2dv_cubes inside self-defined Compute() fun

---
 build_compute_workflow.R          | 18 ++++----
 modules/Anomalies/Anomalies.R     | 14 +++---
 modules/Calibration/Calibration.R | 72 ++++++++++++++++---------------
 modules/Skill/Skill.R             |  6 +--
 4 files changed, 57 insertions(+), 53 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index b096874f..02062b3e 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -2,25 +2,25 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL) {
   modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
                               ", | |,")[[1]])
   # Create data list
-  data <- list(hcst = list(), obs = list(), fcst = list())
-  # Fill list following the structure of the s2dv_cube
-  data$hcst$data <- hcst
-  data$obs$data <- obs
-  data$fcst$data <- fcst
+  data <- list(hcst = hcst, obs = obs, fcst = fcst)
   # Remove duplicated objects
   rm(hcst, obs, fcst)
   # Pre-process datasets
   data <- preprocess_datasets(recipe, data)
+  # Transform data to s2dv_cube
+  data <- lapply(data, function(x) {
+                   class(x) <- "startR_array"
+                   as.s2dv_cube(x)})
   # Define other outputs
   skill_metrics <- NULL
   # Loop over the modules
   for (module in modules) {
     if (module == "calibration") {
-      data <- calibrate_datasets(recipe, data)
+      data <- calibrate_datasets(recipe, data, retrieve = F)
     } else if (module == "anomalies") {
-      data <- compute_anomalies(recipe, data)
+      data <- compute_anomalies(recipe, data, retrieve = F)
     } else if (module == "skill") {
-      skill_metrics <- compute_skill_metrics(recipe, data)
+      skill_metrics <- compute_skill_metrics(recipe, data, retrieve = F)
     }
   }
   ## TODO: Define what to return depending the modules called + the recipe
@@ -107,7 +107,7 @@ run_compute_workflow <- function(recipe, data) {
   res <- Compute(wf$hcst,
                  chunks = recipe$Run$startR_workflow$chunk_along)
 
-  browser()
+  # browser()
   info(recipe$Run$logger,
        "##### COMPUTE SECTION ENDED, REFORMATTING OUTPUT #####")
 
diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R
index 8fc38a15..41a125ab 100644
--- a/modules/Anomalies/Anomalies.R
+++ b/modules/Anomalies/Anomalies.R
@@ -1,11 +1,11 @@
 # Compute the hcst, obs and fcst anomalies with or without cross-validation
 # and return them, along with the hcst and obs climatologies.
 
-compute_anomalies <- function(recipe, data) {
+compute_anomalies <- function(recipe, data, retrieve = TRUE) {
 
   ## TODO: Is this necessary?
   # Check
-  if (inherits(data$hcst, "s2dv_cube")) {
+  if (retrieve) {
     if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) {
       error(recipe$Run$logger, 
             paste("The anomaly module has been called, but the element",
@@ -22,7 +22,7 @@ compute_anomalies <- function(recipe, data) {
       cross <- FALSE
       cross_msg <- "without"
     }
-    original_dims <- dim(data$hcst)
+    original_dims <- dim(data$hcst$data)
 
     # Compute anomalies
     ## TODO: No cross-validation case
@@ -83,10 +83,10 @@ compute_anomalies <- function(recipe, data) {
       dims <- dim(clim$clim_exp)
       ## Is there a better way to do this?
       # Repeat the array as many times as ensemble members
-      clim$clim_exp <- rep(clim$clim_exp, dim(data$fcst)[['ensemble']])
+      clim$clim_exp <- rep(clim$clim_exp, data$fcst$dims[['ensemble']])
       # Rename and reorder dimensions
-      dim(clim$clim_exp) <- c(dims, ensemble = dim(data$fcst)[['ensemble']])
-      clim$clim_exp <- Reorder(clim$clim_exp, order = names(dim(data$fcst)))
+      dim(clim$clim_exp) <- c(dims, ensemble = data$fcst$dims[['ensemble']])
+      clim$clim_exp <- Reorder(clim$clim_exp, order = names(dim(data$fcst$data)))
       # Get fcst anomalies
       if (inherits(data$fcst, "s2dv_cube")) {
         data$fcst$data <- data$fcst$data - clim$clim_exp
@@ -101,7 +101,7 @@ compute_anomalies <- function(recipe, data) {
     }
 
     # Display success messages and save data
-    if (inherits(data$hcst, "s2dv_cube")) {     
+    if (retrieve) {     
       info(recipe$Run$logger,
            paste("The anomalies have been computed,", cross_msg,
                  "cross-validation. The original full fields are returned as",
diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index 117cb5f0..1436e401 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -1,4 +1,4 @@
-calibrate_datasets <- function(recipe, data) {
+calibrate_datasets <- function(recipe, data, retrieve = TRUE) {
   # Function that calibrates the hindcast using the method stated in the
   # recipe. If the forecast is not null, it calibrates it as well.
   #
@@ -9,7 +9,7 @@ calibrate_datasets <- function(recipe, data) {
   method <- tolower(recipe$Analysis$Workflow$Calibration$method)
 
   if (method == "raw") {
-    if (inherits(data$hcst, "s2dv_cube")) {
+    if (retrieve) {
       warn(recipe$Run$logger,
            paste("The Calibration module has been called, but the calibration",
                  "method in the recipe is 'raw'. The hcst and fcst will not be",
@@ -63,6 +63,7 @@ calibrate_datasets <- function(recipe, data) {
                     "monthly data."))
         stop()
       } else {
+        ## TODO: Change again if all data ends up being an s2dv_cube
         arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = NULL,
                           cal.method = method, eval.method = "leave-one-out",
                           multi.model = mm, na.fill = TRUE, na.rm = na.rm,
@@ -75,24 +76,25 @@ calibrate_datasets <- function(recipe, data) {
           fun <- Calibration
         }
         hcst_calibrated <- do.call(fun, arguments)
-        # # In the case where anomalies have been computed, calibrate full values
-        # if (!is.null(data$hcst.full_val)) {
-        #   hcst_full_calibrated <- CST_Calibration(data$hcst.full_val,
-        #                                           data$obs.full_val,
-        #                                           cal.method = method,
-        #                                           eval.method = "leave-one-out",
-        #                                           multi.model = mm,
-        #                                           na.fill = TRUE,
-        #                                           na.rm = na.rm,
-        #                                           apply_to = NULL,
-        #                                           memb_dim = "ensemble",
-        #                                           sdate_dim = "syear",
-        #                                           ncores = ncores)
-        # } else {
-        #   hcst_full_calibrated <- NULL
-        # }
+        # In the case where anomalies have been computed, calibrate full values
+        if (!is.null(data$hcst.full_val)) {
+          hcst_full_calibrated <- CST_Calibration(data$hcst.full_val,
+                                                  data$obs.full_val,
+                                                  cal.method = method,
+                                                  eval.method = "leave-one-out",
+                                                  multi.model = mm,
+                                                  na.fill = TRUE,
+                                                  na.rm = na.rm,
+                                                  apply_to = NULL,
+                                                  memb_dim = "ensemble",
+                                                  sdate_dim = "syear",
+                                                  ncores = ncores)
+        } else {
+          hcst_full_calibrated <- NULL
+        }
 
         # Calibrate the forecast
+        ## TODO: Change again if all data ends up being an s2dv_cube
         if (!is.null(data$fcst)) {
           arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = data$fcst,
                             cal.method = method, eval.method = "leave-one-out",
@@ -115,6 +117,7 @@ calibrate_datasets <- function(recipe, data) {
       #   stop()
       # }
       # Calibrate the hindcast
+      ## TODO: Change again if all data ends up being an s2dv_cube
       arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = NULL,
                         sdate_dim = "syear", memb_dim = "ensemble",
                         method = "QUANT", ncores = ncores,
@@ -129,22 +132,23 @@ calibrate_datasets <- function(recipe, data) {
       }
       hcst_calibrated <- do.call(fun, arguments)
       # # Restore dimension order
-      # hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order)
-      # # In the case where anomalies have been computed, calibrate full values
-      # if (!is.null(data$hcst.full_val)) {
-      #     hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val,
-      #                                                 data$obs.full_val,
-      #                                                 exp_cor = NULL,
-      #                                                 sdate_dim = "syear",
-      #                                                 memb_dim = "ensemble",
-      #                                                 method = "QUANT",
-      #                                                 ncores = ncores,
-      #                                                 na.rm = na.rm,
-      #                                                 wet.day = F)
-      # } else {
-      #   hcst_full_calibrated <- NULL
-      # }
+      hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order)
+      # In the case where anomalies have been computed, calibrate full values
+      if (!is.null(data$hcst.full_val)) {
+          hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val,
+                                                      data$obs.full_val,
+                                                      exp_cor = NULL,
+                                                      sdate_dim = "syear",
+                                                      memb_dim = "ensemble",
+                                                      method = "QUANT",
+                                                      ncores = ncores,
+                                                      na.rm = na.rm,
+                                                      wet.day = F)
+      } else {
+        hcst_full_calibrated <- NULL
+      }
       if (!is.null(data$fcst)) {
+        ## TODO: Change again if all data ends up being an s2dv_cube
         arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = data$fcst,
                           sdate_dim = "syear", memb_dim = "ensemble",
                           method = "QUANT", ncores = ncores,
@@ -167,7 +171,7 @@ calibrate_datasets <- function(recipe, data) {
   #                         list(hcst.full_val = hcst_full_calibrated,
   #                              obs.full_val = data$obs.full_val))
   # }
-  if (inherits(hcst_calibrated, "sd2v_cube")) {
+  if (retrieve) {
     info(recipe$Run$logger, CALIB_MSG)
     ## TODO: What do we do with the full values?
     recipe$Run$output_dir <- paste0(recipe$Run$output_dir,
diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index a686b35e..87b951ed 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -20,7 +20,7 @@ source("modules/Skill/R/RPS_clim.R")
 source("modules/Skill/R/CRPS_clim.R")
 
 ## TODO: Should agg be a function parameter...?
-compute_skill_metrics <- function(recipe, data, agg = 'global') {
+compute_skill_metrics <- function(recipe, data, agg = 'global', retrieve = TRUE) {
 
   # data$hcst: s2dv_cube containing the hindcast
   # obs: s2dv_cube containing the observations
@@ -97,7 +97,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') {
     # Ranked Probability Skill Score and Fair version
     } else if (metric %in% c('rpss', 'frpss')) {
       # skill <- RPSS(data$hcst$data, data$obs$data,
-      skill <- RPSS(data$hcst, data$obs,
+      skill <- RPSS(data$hcst$data, data$obs$data,
 		    time_dim = time_dim,
 		    memb_dim = memb_dim,
           	    Fair = Fair,
@@ -281,7 +281,7 @@ compute_skill_metrics <- function(recipe, data, agg = 'global') {
       skill_metrics[[ metric ]] <- skill
     }
   }
-  if (inherits(data$hcst, "s2dv_cube")) {
+  if (retrieve) {
     info(recipe$Run$logger,
          "##### SKILL METRIC COMPUTATION COMPLETE #####")
 
-- 
GitLab


From f080c5f141589d7eccc17e6fd701de120cfc46b4 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Wed, 5 Jul 2023 14:53:29 +0200
Subject: [PATCH 26/96] Create recipe for testing; add creation of tmp
 directory to store metadata generated inside Compute()

---
 build_compute_workflow.R                      | 27 ++++++++-
 example_scripts/test_compute.R                |  2 +-
 modules/Skill/Skill.R                         | 26 ++++----
 .../atomic_recipes/recipe_test_compute.yml    | 60 +++++++++++++++++++
 4 files changed, 100 insertions(+), 15 deletions(-)
 create mode 100644 recipes/atomic_recipes/recipe_test_compute.yml

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 02062b3e..a7952036 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -94,6 +94,10 @@ run_compute_workflow <- function(recipe, data) {
     output_dims <- c(output_dims,
                      list(skill = skill_output_dims))
   }
+  ## TODO: Add Indices (Niño1+2, Niño3, Niño3.4, Niño4, NAO
+  if ("indices" %in% modules) {
+    idx_dims <- c('index', 'var', 'syear', 'time', 'region')
+  }
 
   # ---------------------------------------------------------------------------
   # Step 3: Generate the Step and call Compute()
@@ -113,23 +117,40 @@ run_compute_workflow <- function(recipe, data) {
 
   # ---------------------------------------------------------------------------
   # Step 4: Convert results to s2dv_cube/list format
-  result <- list()
   for (cube in c("hcst", "obs", "fcst")) {
     if (!is.null(res[[cube]])) {
       res$data[[cube]] <- convert_to_s2dv_cube(res[[cube]], data[[cube]])
     } else {
       res$data[[cube]] <- NULL
     }
+    res[[cube]] <- NULL
   }
   ## TODO: Transform skill metrics back into a NAMED list. To do this, Apply()
   ## should return the attributes of the skill array so that we have the name
   ## of each metric, or we should find an alternative way to retrieve them.
   if (!is.null(res$skill)) {
-    metric_list <- c("rpss", "rpss_significance") ## to be removed
-    res$skill <- lapply(seq(dim(res$skill)[1]), function(x) {res$skill[x, , , , , drop = F]})
+    metric_list <- readRDS(paste0(recipe$Run$output_dir,
+                                  "/outputs/tmp/Skill/metric_names.Rds"))
+    res$skill <- lapply(seq(dim(res$skill)[1]),
+                        function(x) {res$skill[x, , , , , drop = F]})
+    res$skill <- lapply(res$skill,
+                        function(x) {
+                          Subset(x, 
+                                 along = 'metric',
+                                 indices = 1,
+                                 drop = 'selected')
+                        })
     ## TODO: Subset 'metric' dimension
     names(res$skill) <- metric_list
   }
+  
+  # ---------------------------------------------------------------------------
+  # Step 5: Remove temporary files
+  unlink(paste0(recipe$Run$output_dir, "/outputs/tmp/"), recursive = TRUE)
+
+  # ---------------------------------------------------------------------------
+  # Step 6: Return outputs
+  res <- res[!sapply(res, is.null)]
   info(recipe$Run$logger,
        "##### DATA RETURNED AS A NAMED LIST #####")
   return(res)
diff --git a/example_scripts/test_compute.R b/example_scripts/test_compute.R
index 4b000122..02cd3d45 100644
--- a/example_scripts/test_compute.R
+++ b/example_scripts/test_compute.R
@@ -7,7 +7,7 @@ source("modules/Saving/Saving.R")
 source("modules/Visualization/Visualization.R")
 source("build_compute_workflow.R")
 
-recipe_file <- "recipes/atomic_recipes/recipe_system7c3s-tas.yml"
+recipe_file <- "recipes/atomic_recipes/recipe_test_compute.yml"
 # recipe_file <- "recipes/atomic_recipes/recipe_test_multivar.yml"
 recipe <- prepare_outputs(recipe_file)
 
diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index 87b951ed..aeae3b01 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -57,13 +57,13 @@ compute_skill_metrics <- function(recipe, data, agg = 'global', retrieve = TRUE)
     na.rm = recipe$Analysis$remove_NAs
   }
   ## TODO: Move this to the recipe checker
-  # if (is.null(recipe$Analysis$Workflow$Skill$cross_validation)) {
-  #   warn(recipe$Run$logger,
-  #        "cross_validation parameter not defined, setting it to FALSE.")
-  #   cross.val <- FALSE
-  # } else {
-  #   cross.val <- recipe$Analysis$Workflow$Skill$cross_validation
-  # }
+  if (is.null(recipe$Analysis$Workflow$Skill$cross_validation)) {
+  #  warn(recipe$Run$logger,
+  #       "cross_validation parameter not defined, setting it to FALSE.")
+    cross.val <- FALSE
+  } else {
+    cross.val <- recipe$Analysis$Workflow$Skill$cross_validation
+  }
   skill_metrics <- list()
   for (metric in strsplit(metrics, ", | |,")[[1]]) {
     # Whether the fair version of the metric is to be computed
@@ -314,12 +314,16 @@ compute_skill_metrics <- function(recipe, data, agg = 'global', retrieve = TRUE)
     # Bind list into an array
     skill_metrics <- abind(skill_metrics, along = 0)
     # Retrieve dimname attributes
-    dimension_names <- dimnames(skill_metrics)
-    metric_names <- dimension_names[[1]]
-    dim(metric_names) <- c(metric = dim(skill_metrics)[[1]])
+    metric_names <- dimnames(skill_metrics)[[1]]
+    tmp_file <- paste0(recipe$Run$output_dir,
+                       "/outputs/tmp/Skill/metric_names.Rds")
+    if (!file.exists(tmp_file)) {
+      dir.create(paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/"),
+                 recursive = TRUE)
+      saveRDS(metric_names, tmp_file)
+    }
     # Add 'metric' dimension to array and restore attributes
     names(dim(skill_metrics)) <- c("metric", dimensions)
-    dimnames(skill_metrics) <- dimension_names
   }
   # Return results
   return(skill_metrics)
diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml
new file mode 100644
index 00000000..22bc1410
--- /dev/null
+++ b/recipes/atomic_recipes/recipe_test_compute.yml
@@ -0,0 +1,60 @@
+Description:
+  Author: V. Agudetse
+
+Analysis:
+  Horizon: Seasonal
+  Variables:
+    name: tas
+    freq: monthly_mean
+  Datasets:
+    System:
+      name: Meteo-France-System7
+    Multimodel: False
+    Reference:
+      name: ERA5
+  Time:
+    sdate: '1101'
+    fcst_year: '2020'
+    hcst_start: '2000'
+    hcst_end: '2016'
+    ftime_min: 1
+    ftime_max: 3
+  Region:
+    latmin: -10
+    latmax: 10
+    lonmin: 0
+    lonmax: 20
+  Regrid:
+    method: bilinear
+    type: to_system
+  Workflow:
+    Anomalies:
+      compute: yes # yes/no, default yes
+      cross_validation: yes # yes/no, default yes
+      save: 'all' # 'all'/'none'/'exp_only'/'fcst_only'
+    Calibration:
+      method: mse_min
+      save: 'none' # 'all'/'none'/'exp_only'/'fcst_only'
+    Skill:
+      metric: RPS RPSS BSS10 # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS
+      save: 'all' # 'all'/'none'
+    Probabilities:
+      percentiles: [[1/3, 2/3], [1/10, 9/10]]
+      save: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only'
+    Visualization:
+      plots: skill_metrics, forecast_ensemble_mean
+      multi_panel: no
+      projection: cylindrical_equidistant
+    Indicators:
+      index: no
+  ncores: 10
+  remove_NAs: yes
+  Output_format: S2S4E
+Run:
+  Loglevel: INFO
+  Terminal: yes
+  output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/
+  code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
+  startR_workflow:
+    modules: calibration, anomalies, skill # Modules to run inside Compute(), in order
+    chunk_along: {latitude: 2, longitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
-- 
GitLab


From a1df866c18ffed0f1310189d9f20da6bf5951763 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 13 Jul 2023 12:59:53 +0200
Subject: [PATCH 27/96] Save s2dv_cube metadata after skill, change recipe to
 test chunking by var

---
 build_compute_workflow.R                      | 25 +++++++++++++------
 modules/Skill/Skill.R                         | 13 +++++-----
 .../atomic_recipes/recipe_test_compute.yml    |  2 +-
 3 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index a7952036..457c71d9 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -3,14 +3,20 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL) {
                               ", | |,")[[1]])
   # Create data list
   data <- list(hcst = hcst, obs = obs, fcst = fcst)
+  var_name <- as.vector(attributes(hcst)$FileSelectors[[1]]$var[[1]])
   # Remove duplicated objects
   rm(hcst, obs, fcst)
   # Pre-process datasets
   data <- preprocess_datasets(recipe, data)
   # Transform data to s2dv_cube
-  data <- lapply(data, function(x) {
+  data <- lapply(data, 
+                 function(x) {
                    class(x) <- "startR_array"
-                   as.s2dv_cube(x)})
+                   as.s2dv_cube(x)
+                 })
+  for (cube_idx in seq(1:length(data))) {
+    data[[cube_idx]]$attrs$Variable$varName <- var_name
+  }
   # Define other outputs
   skill_metrics <- NULL
   # Loop over the modules
@@ -66,11 +72,15 @@ run_compute_workflow <- function(recipe, data) {
   target_dims <- list(hcst = exp_target_dims,
                       obs = obs_target_dims)
   inputs <- list(hcst = data$hcst, obs = data$obs)
+  # Define input attributes from Start()
+  input_attributes <- list(hcst = c("Variables", "FileSelectors"),
+                           obs = c("Variables", "FileSelectors"))
   # Add forecast if not empty
   if (!is.null(data$fcst)) {
     target_dims <- c(target_dims,
                      list(exp_target_dims))
     inputs <- c(inputs, list(fcst = data$fcst))
+    input_attributes <- c(input_attributes, list(fcst = c("Variables", "FileSelectors")))
   }
 
   # Create output dimensions
@@ -104,14 +114,14 @@ run_compute_workflow <- function(recipe, data) {
   ## TODO: Add use attributes
   step <- Step(fun = compute_workflow,
                target_dims = target_dims,
-               output_dims = output_dims)
+               output_dims = output_dims,
+               use_attributes = input_attributes)
   wf <- AddStep(inputs = inputs,
                 step = step,
                 recipe = recipe)
   res <- Compute(wf$hcst,
                  chunks = recipe$Run$startR_workflow$chunk_along)
 
-  # browser()
   info(recipe$Run$logger,
        "##### COMPUTE SECTION ENDED, REFORMATTING OUTPUT #####")
 
@@ -129,8 +139,8 @@ run_compute_workflow <- function(recipe, data) {
   ## should return the attributes of the skill array so that we have the name
   ## of each metric, or we should find an alternative way to retrieve them.
   if (!is.null(res$skill)) {
-    metric_list <- readRDS(paste0(recipe$Run$output_dir,
-                                  "/outputs/tmp/Skill/metric_names.Rds"))
+    tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/")
+    metric_list <- readRDS(paste0(tmp_dir, "metric_names.Rds"))
     res$skill <- lapply(seq(dim(res$skill)[1]),
                         function(x) {res$skill[x, , , , , drop = F]})
     res$skill <- lapply(res$skill,
@@ -140,8 +150,9 @@ run_compute_workflow <- function(recipe, data) {
                                  indices = 1,
                                  drop = 'selected')
                         })
-    ## TODO: Subset 'metric' dimension
     names(res$skill) <- metric_list
+    ## TODO: Chunked metadata needs to be put back together!!!
+    res$metadata <- readRDS(paste0(tmp_dir, "skill_metadata.Rds"))
   }
   
   # ---------------------------------------------------------------------------
diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index aeae3b01..583c1534 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -315,12 +315,13 @@ compute_skill_metrics <- function(recipe, data, agg = 'global', retrieve = TRUE)
     skill_metrics <- abind(skill_metrics, along = 0)
     # Retrieve dimname attributes
     metric_names <- dimnames(skill_metrics)[[1]]
-    tmp_file <- paste0(recipe$Run$output_dir,
-                       "/outputs/tmp/Skill/metric_names.Rds")
-    if (!file.exists(tmp_file)) {
-      dir.create(paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/"),
-                 recursive = TRUE)
-      saveRDS(metric_names, tmp_file)
+    tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/")
+
+    if (!dir.exists(tmp_dir)) {
+      dir.create(tmp_dir, recursive = TRUE)
+      saveRDS(metric_names, paste0(tmp_dir, "metric_names.Rds"))
+      data$hcst$data <- NULL
+      saveRDS(data$hcst, paste0(tmp_dir, "skill_metadata.Rds"))
     }
     # Add 'metric' dimension to array and restore attributes
     names(dim(skill_metrics)) <- c("metric", dimensions)
diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml
index 22bc1410..38374870 100644
--- a/recipes/atomic_recipes/recipe_test_compute.yml
+++ b/recipes/atomic_recipes/recipe_test_compute.yml
@@ -4,7 +4,7 @@ Description:
 Analysis:
   Horizon: Seasonal
   Variables:
-    name: tas
+    name: tas prlr
     freq: monthly_mean
   Datasets:
     System:
-- 
GitLab


From 7f500d6588479ce2a52fde0c0db985e3d83c5888 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Wed, 26 Jul 2023 16:32:06 +0200
Subject: [PATCH 28/96] Update

---
 build_compute_workflow.R                       | 12 ++++++------
 modules/Skill/Skill.R                          |  3 ++-
 recipes/atomic_recipes/recipe_test_compute.yml |  2 +-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 457c71d9..97ddd2f9 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -1,4 +1,5 @@
-compute_workflow <- function(recipe, hcst, obs, fcst = NULL) {
+compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
+                             nchunks = chunk_indices) {
   modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
                               ", | |,")[[1]])
   # Create data list
@@ -19,6 +20,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL) {
   }
   # Define other outputs
   skill_metrics <- NULL
+  probabilities <- NULL
   # Loop over the modules
   for (module in modules) {
     if (module == "calibration") {
@@ -26,14 +28,15 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL) {
     } else if (module == "anomalies") {
       data <- compute_anomalies(recipe, data, retrieve = F)
     } else if (module == "skill") {
-      skill_metrics <- compute_skill_metrics(recipe, data, retrieve = F)
+      skill_metrics <- compute_skill_metrics(recipe, data, retrieve = F, nchunks = nchunks)
     }
   }
   ## TODO: Define what to return depending the modules called + the recipe
   return_list <- list(hcst = data$hcst$data,
                       obs = data$obs$data,
                       fcst = data$fcst$data,
-                      skill = skill_metrics)
+                      skill = skill_metrics,
+                      probabilities = probabilities)
   # Eliminate NULL elements from the return list
   return_list <- return_list[!sapply(return_list, is.null)]
   return(return_list)
@@ -135,9 +138,6 @@ run_compute_workflow <- function(recipe, data) {
     }
     res[[cube]] <- NULL
   }
-  ## TODO: Transform skill metrics back into a NAMED list. To do this, Apply()
-  ## should return the attributes of the skill array so that we have the name
-  ## of each metric, or we should find an alternative way to retrieve them.
   if (!is.null(res$skill)) {
     tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/")
     metric_list <- readRDS(paste0(tmp_dir, "metric_names.Rds"))
diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index 583c1534..ec6e8be6 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -20,7 +20,8 @@ source("modules/Skill/R/RPS_clim.R")
 source("modules/Skill/R/CRPS_clim.R")
 
 ## TODO: Should agg be a function parameter...?
-compute_skill_metrics <- function(recipe, data, agg = 'global', retrieve = TRUE) {
+compute_skill_metrics <- function(recipe, data, agg = 'global', retrieve = TRUE,
+                                  nchunks = nchunks) {
 
   # data$hcst: s2dv_cube containing the hindcast
   # obs: s2dv_cube containing the observations
diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml
index 38374870..e0ffe5d3 100644
--- a/recipes/atomic_recipes/recipe_test_compute.yml
+++ b/recipes/atomic_recipes/recipe_test_compute.yml
@@ -57,4 +57,4 @@ Run:
   code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
   startR_workflow:
     modules: calibration, anomalies, skill # Modules to run inside Compute(), in order
-    chunk_along: {latitude: 2, longitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
+    chunk_along: {var: 2,time: 3} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
-- 
GitLab


From 7adc3beab1d7c0aa0670d903bee6aa62e60d822d Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 27 Jul 2023 09:29:35 +0200
Subject: [PATCH 29/96] Remove commented recipe

---
 example_scripts/test_compute.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/example_scripts/test_compute.R b/example_scripts/test_compute.R
index 02cd3d45..963ed021 100644
--- a/example_scripts/test_compute.R
+++ b/example_scripts/test_compute.R
@@ -8,7 +8,6 @@ source("modules/Visualization/Visualization.R")
 source("build_compute_workflow.R")
 
 recipe_file <- "recipes/atomic_recipes/recipe_test_compute.yml"
-# recipe_file <- "recipes/atomic_recipes/recipe_test_multivar.yml"
 recipe <- prepare_outputs(recipe_file)
 
 # Load datasets
-- 
GitLab


From 5bc9ae4becd0d9c9bb628abed7bbdd11bcc53b58 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 8 Aug 2023 12:13:04 +0200
Subject: [PATCH 30/96] Add function to retrieve s2dv_cube metadata after
 Compute() (WIP)

---
 build_compute_workflow.R                      |  5 +-
 modules/Skill/Skill.R                         | 12 ++-
 .../atomic_recipes/recipe_test_compute.yml    |  4 +-
 tools/libs.R                                  |  1 -
 tools/retrieve_metadata.R                     | 78 +++++++++++++++++++
 5 files changed, 93 insertions(+), 7 deletions(-)
 create mode 100644 tools/retrieve_metadata.R

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 97ddd2f9..00ef2bc3 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -1,5 +1,6 @@
 compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
                              nchunks = chunk_indices) {
+  setwd(recipe$Run$code_dir)
   modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
                               ", | |,")[[1]])
   # Create data list
@@ -152,7 +153,9 @@ run_compute_workflow <- function(recipe, data) {
                         })
     names(res$skill) <- metric_list
     ## TODO: Chunked metadata needs to be put back together!!!
-    res$metadata <- readRDS(paste0(tmp_dir, "skill_metadata.Rds"))
+    source("tools/retrieve_metadata.R")
+    res$metadata <- retrieve_metadata(tmp_dir = tmp_dir,
+                                      chunks = recipe$Run$startR_workflow$chunk_along)
   }
   
   # ---------------------------------------------------------------------------
diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index ec6e8be6..8ee96357 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -317,15 +317,21 @@ compute_skill_metrics <- function(recipe, data, agg = 'global', retrieve = TRUE,
     # Retrieve dimname attributes
     metric_names <- dimnames(skill_metrics)[[1]]
     tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/")
-
+    # Save metric names (only done once)
     if (!dir.exists(tmp_dir)) {
       dir.create(tmp_dir, recursive = TRUE)
       saveRDS(metric_names, paste0(tmp_dir, "metric_names.Rds"))
-      data$hcst$data <- NULL
-      saveRDS(data$hcst, paste0(tmp_dir, "skill_metadata.Rds"))
     }
     # Add 'metric' dimension to array and restore attributes
     names(dim(skill_metrics)) <- c("metric", dimensions)
+    # Save s2dv_cube metadata
+    metadata_filename <- "skill_metadata"
+    for (chunk in names(nchunks)) {
+      metadata_filename <- paste0(metadata_filename, "_", chunk, "_", nchunks[[chunk]])
+    }
+    metadata_filename <- paste0(metadata_filename, ".Rds")
+    data$hcst$data <- NULL
+    saveRDS(data$hcst, paste0(tmp_dir, metadata_filename))
   }
   # Return results
   return(skill_metrics)
diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml
index e0ffe5d3..20550bdc 100644
--- a/recipes/atomic_recipes/recipe_test_compute.yml
+++ b/recipes/atomic_recipes/recipe_test_compute.yml
@@ -56,5 +56,5 @@ Run:
   output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/
   code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
   startR_workflow:
-    modules: calibration, anomalies, skill # Modules to run inside Compute(), in order
-    chunk_along: {var: 2,time: 3} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
+    modules: skill # Modules to run inside Compute(), in order
+    chunk_along: {longitude: 2, latitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
diff --git a/tools/libs.R b/tools/libs.R
index a67f9549..4d151db1 100644
--- a/tools/libs.R
+++ b/tools/libs.R
@@ -29,7 +29,6 @@ source("tools/data_summary.R")
 source("tools/read_atomic_recipe.R")
 source("tools/write_autosubmit_conf.R")
 source("tools/get_archive.R")
-# source("tools/add_dims.R") # Not sure if necessary yet
 
 # Settings
 options(bitmapType = 'cairo')
diff --git a/tools/retrieve_metadata.R b/tools/retrieve_metadata.R
new file mode 100644
index 00000000..a29528bb
--- /dev/null
+++ b/tools/retrieve_metadata.R
@@ -0,0 +1,78 @@
+retrieve_metadata <- function(tmp_dir, chunks) {
+  # Build metadata file pattern:
+  metadata_file_pattern <- "skill_metadata"
+  for (chunk in sort(names(chunks))) {
+    metadata_file_pattern <- paste0(metadata_file_pattern, "_", chunk, "_*")
+  }
+  metadata_file_pattern <- paste0(metadata_file_pattern, ".Rds")
+  metadata_files <- list.files(path = tmp_dir, pattern = glob2rx(metadata_file_pattern))
+  metadata <- readRDS(paste0(tmp_dir, metadata_files[1]))
+  # Piece together variable info
+  if ("var" %in% names(chunks)) {
+    # $attrs
+    for (i in 2:chunks[["var"]]) {
+      metadata_chunk <- readRDS(paste0(tmp_dir,
+                                       gsub("var_1", paste0("var_", i),
+                                            metadata_files[1])))
+      var_name <- metadata_chunk$attrs$Variable$varName
+      metadata$attrs$Variable$varName <- c(metadata$attrs$Variable$varName,
+                                           var_name)
+      metadata$attrs$Variable$metadata <- c(metadata$attrs$Variable$metadata,
+                                            metadata_chunk$attrs$Variable$metadata[var_name])
+    }
+    # $coords
+    metadata$coords$var <- metadata$attrs$Variable$varName
+    dim(metadata$coords$var) <- c(var = length(metadata$coords$var))
+    attr(metadata$coords$var, "values") <- TRUE
+    attr(metadata$coords$var, "indices") <- FALSE
+    ## TODO: $dims
+  }
+  # Piece together time info
+  if ("time" %in% names(chunks)) {
+    dates_dims <- dim(metadata$attrs$Dates)
+    time_dim_idx <- which(names(dates_dims) == "time")
+    for (i in 2:chunks[["time"]]) {
+      metadata_chunk <- readRDS(paste0(tmp_dir,
+                                       gsub("time_1", paste0("time_", i),
+                                            metadata_files[1])))
+      metadata$attrs$Dates <- c(metadata$attrs$Dates, metadata_chunk$attrs$Dates)
+    }
+    time_dim_length <- length(metadata$attrs$Dates)/prod(dates_dims[-time_dim_idx])
+    dim(metadata$attr$Dates) <- c(dates_dims[-time_dim_idx],
+                                  time = time_dim_length)
+    metadata$coords$time <- seq(1, time_dim_length)
+    dim(metadata$coords$time) <- c(time = time_dim_length)
+    attr(metadata$coords$time, "indices") <- TRUE
+    ## TODO: Add to 'dims'
+  }
+  # Piece together lon/lat info
+  if ("latitude" %in% names(chunks)) {
+    # $attrs
+    ## TODO: Preserve attributes
+    for (i in 2:chunks[["latitude"]]) {
+      metadata_chunk <- readRDS(paste0(tmp_dir,
+                                       gsub("latitude_1", paste0("latitude_", i),
+                                            metadata_files[1])))
+      metadata$attrs$Variable$metadata$latitude <- c(metadata$attrs$Variable$metadata$latitude,
+                                                     metadata_chunk$attrs$Variable$metadata$latitude)
+    }
+    # $coords
+    metadata$coords$latitude <- metadata$attrs$Variable$metadata$latitude
+    dim(metadata$coords$latitude) <- c(latitude = length(metadata$coords$latitude))
+  }
+  if ("longitude" %in% names(chunks)) {
+    # $attrs
+    ## TODO: Preserve attributes
+    for (i in 2:chunks[["longitude"]]) {
+      metadata_chunk <- readRDS(paste0(tmp_dir,
+                                       gsub("longitude_1", paste0("longitude_", i),
+                                            metadata_files[1])))
+      metadata$attrs$Variable$metadata$longitude <- c(metadata$attrs$Variable$metadata$longitude,
+                                                     metadata_chunk$attrs$Variable$metadata$longitude)
+    }
+    # $coords
+    metadata$coords$longitude <- metadata$attrs$Variable$metadata$longitude
+    dim(metadata$coords$longitude) <- c(longitude = length(metadata$coords$longitude))
+  }
+  return(metadata)
+}
-- 
GitLab


From 0e20aea909d2856135428200bb30a3589dad6f58 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Tue, 8 Aug 2023 13:12:15 +0200
Subject: [PATCH 31/96] Compute() with autosubmit

---
 .gitignore                                    |  2 +
 build_compute_workflow.R                      | 60 ++++++++++++++++++-
 .../atomic_recipes/recipe_test_compute.yml    |  8 +--
 3 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/.gitignore b/.gitignore
index e11ba7d3..1ce7a002 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,5 @@ out-logs/
 modules/Loading/testing_recipes/recipe_decadal_calendartest.yml
 modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml
 conf/vitigeoss-vars-dict.yml
+STARTR_CHUNKING_*/
+# TODO: ignore bigmemory object like a68e_1_2 and a68e_1_2.desc
diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 97ddd2f9..15fabcb3 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -1,5 +1,12 @@
 compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
                              nchunks = chunk_indices) {
+
+  # Load modules
+  source("modules/Preprocessing/Preprocessing.R")
+  source("modules/Calibration/Calibration.R")
+  source("modules/Anomalies/Anomalies.R")
+  source("modules/Skill/Skill.R")
+
   modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
                               ", | |,")[[1]])
   # Create data list
@@ -15,6 +22,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
                    class(x) <- "startR_array"
                    as.s2dv_cube(x)
                  })
+
   for (cube_idx in seq(1:length(data))) {
     data[[cube_idx]]$attrs$Variable$varName <- var_name
   }
@@ -115,15 +123,63 @@ run_compute_workflow <- function(recipe, data) {
   # ---------------------------------------------------------------------------
   # Step 3: Generate the Step and call Compute()
   ## TODO: Add use attributes
+  # Libraries to be loaded when running Compute()
+  libs_load <- c("log4r", "docopt", "ClimProjDiags", "multiApply", "yaml", "s2dv",
+                 "abind", "easyNCDF", "CSTools", "lubridate", "PCICt", 
+                 "RColorBrewer", "configr", "sf", "ggplot2", "rnaturalearth",
+                 "cowplot", "stringr", "pryr")
+
   step <- Step(fun = compute_workflow,
                target_dims = target_dims,
                output_dims = output_dims,
-               use_attributes = input_attributes)
+               use_attributes = input_attributes, use_libraries = libs_load)
   wf <- AddStep(inputs = inputs,
                 step = step,
                 recipe = recipe)
+
+  #---------------------------------------
+  # Compute locally
+#  res <- Compute(wf$hcst,
+#                 chunks = recipe$Run$startR_workflow$chunk_along)
+
+  #---------------------------------------
+  # Compute with autosubmit (NOTE: ssh to AS machine to run this!)
+  run_on <- 'local'
+  if (run_on == 'local') {
+    r_module_ver <- "R/4.1.2-foss-2015a-bare"
+    cdo_module_ver <- "CDO/1.9.8-foss-2015a"
+  } else if (run_on == 'nord3') {
+    r_module_ver <- "R/4.1.2-foss-2019b"
+    cdo_module_ver <- "CDO/1.9.8-foss-2019b"
+  }
+  #NOTE: autosubmit_suite_dir can be anywhere
+  autosubmit_suite_dir <- recipe$Run$code_dir #"/home/Earth/aho/startR_local_autosubmit/"
+  #TODO: Review the core/thread setting
+  #TODO: Add more parameters in recipe, e.g., expid, hpc_user, wallclock
   res <- Compute(wf$hcst,
-                 chunks = recipe$Run$startR_workflow$chunk_along)
+                 chunks = recipe$Run$startR_workflow$chunk_along,
+                 threads_compute = recipe$Analysis$Workflow$ncores, 
+                 threads_load = 2,
+                 cluster = list(
+                   queue_host = run_on, # name in platforms.yml
+                   r_module = r_module_ver,
+                   CDO_module = cdo_module_ver,
+                   autosubmit_module = 'autosubmit/4.0.0b-foss-2015a-Python-3.7.3',
+                   cores_per_job = recipe$Analysis$Workflow$ncores,
+                   job_wallclock = '01:00:00',
+                   max_jobs = prod(unlist(recipe$Run$startR_workflow$chunk_along)),
+                   polling_period = 10,
+                   extra_queue_params = list('#SBATCH --constraint=medmem', '#SBATCH --exclusive'),
+                   expid = "a68e",  # to be changed!
+                   hpc_user = "bsc32734",  # to be changed!
+                   run_dir = recipe$Run$code_dir
+                 ),
+                 workflow_manager = 'autosubmit', # 'ecFlow'
+                 autosubmit_suite_dir = autosubmit_suite_dir,
+                 autosubmit_server = NULL, #or 'bscesautosubmit02',
+                 wait = TRUE
+                 )
+#---------------------------------------
 
   info(recipe$Run$logger,
        "##### COMPUTE SECTION ENDED, REFORMATTING OUTPUT #####")
diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml
index e0ffe5d3..2bd19f6e 100644
--- a/recipes/atomic_recipes/recipe_test_compute.yml
+++ b/recipes/atomic_recipes/recipe_test_compute.yml
@@ -26,7 +26,7 @@ Analysis:
     lonmax: 20
   Regrid:
     method: bilinear
-    type: to_system
+    type: to_system #to_reference #'r360x181'
   Workflow:
     Anomalies:
       compute: yes # yes/no, default yes
@@ -53,8 +53,8 @@ Analysis:
 Run:
   Loglevel: INFO
   Terminal: yes
-  output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/
-  code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
+  output_dir: /esarchive/scratch/aho/tmp/
+  code_dir: /esarchive/scratch/aho/git/auto-s2s/
   startR_workflow:
     modules: calibration, anomalies, skill # Modules to run inside Compute(), in order
-    chunk_along: {var: 2,time: 3} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
+    chunk_along: {var: 1,time: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
-- 
GitLab


From bd222d33730a76be263748be08e569b41a5b1b9b Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Tue, 8 Aug 2023 14:30:08 +0200
Subject: [PATCH 32/96] Add startR workflow checks to recipe checker

---
 tools/check_recipe.R | 63 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/tools/check_recipe.R b/tools/check_recipe.R
index c1053534..079b8576 100644
--- a/tools/check_recipe.R
+++ b/tools/check_recipe.R
@@ -533,6 +533,69 @@ check_recipe <- function(recipe) {
     }
   }
 
+  # ---------------------------------------------------------------------
+  # STARTR WORKFLOW CHECKS
+  # ---------------------------------------------------------------------
+
+  STARTR_PARAMS <- c("modules", "chunk_along")
+  STARTR_MODULES <- c("calibration", "anomalies", "skill")
+  CHUNK_DIMS <- c("var", "time", "latitude", "longitude")
+  MODULES_USING_LATLON <- c("downscaling", "indices")
+  MODULES_USING_TIME <- c("indicators")
+  MODULES_USING_VAR <- c("indicators")
+  # Check that all required fields are present
+  if ("startR_workflow" %in% names(recipe$Run)) {
+    if (!all(STARTR_PARAMS %in% names(recipe$Run$startR_workflow))) {
+      error(recipe$Run$logger,
+            paste0("The element 'Run:startR_workflow' must contain all of the ",
+                   "following: ", paste(STARTR_PARAMS, collapse = ", "), "."))
+      error_status <- T
+    } else {
+      modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
+                                  ", | |,")[[1]])
+      # Check modules
+      if (!any(modules %in% STARTR_MODULES)) {
+        error(recipe$Run$logger,
+              paste0("The element 'Run:startR_workflow:modules' can only ",
+                     "contain the following modules: ",
+                     paste(STARTR_MODULES, collapse = ", "), "."))
+        error_status <- T
+      }
+      # Check chunking dims
+      if (!(any(names(recipe$Run$startR_workflow$chunk_along) %in% CHUNK_DIMS))) {
+        error(recipe$Run$logger,
+              paste0("The dimensions in 'Run:startR_workflow:chunk_along' can ",
+                     "only be: ", paste(CHUNK_DIMS, collapse = ", "), "."))
+        error_status <- T
+      }
+      # Check compatibility between module selection and chunking dimensions
+      if (any(c("latitude", "longitude") %in% names(recipe$Run$startR_workflow$chunk_along)) &&
+          any(modules %in% MODULES_USING_LATLON)) {
+        error(recipe$Run$logger,
+              paste0("latitude and longitude cannot be chunking dimensions if",
+                     "any of the following modules is in the startR workflow: ",
+                     paste(MODULES_USING_LATLON, collapse = ", "), "."))
+        error_status <- T
+      }
+      if ("time" %in% names(recipe$Run$startR_workflow$chunk_along) &&
+          any(modules %in% MODULES_USING_TIME)) {
+        error(recipe$Run$logger,
+              paste0("time cannot be a chunking dimension if any of the ",
+                     "following modules is in the startR workflow: ",
+                     paste(MODULES_USING_TIME, collapse = ", "), "."))
+        error_status <- T
+      }
+      if ("var" %in% names(recipe$Run$startR_workflow$chunk_along) &&
+          any(modules %in% MODULES_USING_VAR)) {
+        error(recipe$Run$logger,
+              paste0("var cannot be a chunking dimension if any of the ",
+                     "following modules is in the startR workflow: ",
+                     paste(MODULES_USING_VAR, collapse = ", "), "."))
+        error_status <- T
+      }
+    }
+  }
+
   # ---------------------------------------------------------------------
   # WORKFLOW CHECKS
   # ---------------------------------------------------------------------
-- 
GitLab


From f78738b0ea691eb2de6f218b4a88e2b51210ecc2 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Tue, 8 Aug 2023 16:02:03 +0200
Subject: [PATCH 33/96] Correct ncores; add init_commands

---
 build_compute_workflow.R | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 15fabcb3..e60688ba 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -143,8 +143,9 @@ run_compute_workflow <- function(recipe, data) {
 #                 chunks = recipe$Run$startR_workflow$chunk_along)
 
   #---------------------------------------
-  # Compute with autosubmit (NOTE: ssh to AS machine to run this!)
-  run_on <- 'local'
+  # Compute with autosubmit
+  #NOTE: 'local' is not recommended; only use it for small tests.
+  run_on <- 'nord3' 
   if (run_on == 'local') {
     r_module_ver <- "R/4.1.2-foss-2015a-bare"
     cdo_module_ver <- "CDO/1.9.8-foss-2015a"
@@ -158,14 +159,15 @@ run_compute_workflow <- function(recipe, data) {
   #TODO: Add more parameters in recipe, e.g., expid, hpc_user, wallclock
   res <- Compute(wf$hcst,
                  chunks = recipe$Run$startR_workflow$chunk_along,
-                 threads_compute = recipe$Analysis$Workflow$ncores, 
+                 threads_compute = recipe$Analysis$ncores, 
                  threads_load = 2,
                  cluster = list(
                    queue_host = run_on, # name in platforms.yml
                    r_module = r_module_ver,
                    CDO_module = cdo_module_ver,
+                   init_commands = list('module load GDAL PROJ GEOS'),
                    autosubmit_module = 'autosubmit/4.0.0b-foss-2015a-Python-3.7.3',
-                   cores_per_job = recipe$Analysis$Workflow$ncores,
+                   cores_per_job = recipe$Analysis$ncores,
                    job_wallclock = '01:00:00',
                    max_jobs = prod(unlist(recipe$Run$startR_workflow$chunk_along)),
                    polling_period = 10,
-- 
GitLab


From 66f2d31e25670fe31b5fd6b393d1f4f76569b968 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Wed, 9 Aug 2023 12:43:29 +0200
Subject: [PATCH 34/96] missing parenthesis

---
 build_compute_workflow.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 936c0bff..ac882507 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -152,7 +152,7 @@ run_compute_workflow <- function(recipe, data) {
                    chunks = recipe$Run$startR_workflow$chunk_along,
                    threads_compute = recipe$Analysis$ncores,
                    threads_load = 2)
-  } else if (run_on %in% c('nord3', 'as_machine') {
+  } else if (run_on %in% c('nord3', 'as_machine')) {
     #NOTE: autosubmit_suite_dir can be anywhere
     ## TODO: Change code_dir to autosubmit dir when necessary
     autosubmit_suite_dir <- recipe$Run$code_dir
-- 
GitLab


From 1e692b81ecb29ad78468a6a40f8dc3880ae56033 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Wed, 9 Aug 2023 12:47:22 +0200
Subject: [PATCH 35/96] Remove threads_compute from local call

---
 build_compute_workflow.R | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index ac882507..b12b9b52 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -149,9 +149,7 @@ run_compute_workflow <- function(recipe, data) {
   if (run_on == 'local') {
     # Compute locally, in serial
     res <- Compute(wf$hcst,
-                   chunks = recipe$Run$startR_workflow$chunk_along,
-                   threads_compute = recipe$Analysis$ncores,
-                   threads_load = 2)
+                   chunks = recipe$Run$startR_workflow$chunk_along)
   } else if (run_on %in% c('nord3', 'as_machine')) {
     #NOTE: autosubmit_suite_dir can be anywhere
     ## TODO: Change code_dir to autosubmit dir when necessary
-- 
GitLab


From 372e4a1c13a7f1a36dad9bbfd2eea31638629f3e Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 10 Aug 2023 11:42:55 +0200
Subject: [PATCH 36/96] test locally

---
 recipes/atomic_recipes/recipe_test_compute.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml
index 93dbcf8b..10b7b875 100644
--- a/recipes/atomic_recipes/recipe_test_compute.yml
+++ b/recipes/atomic_recipes/recipe_test_compute.yml
@@ -56,7 +56,7 @@ Run:
   output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/
   code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
   startR_workflow:
-    modules: anomalies calibration skill # Modules to run inside Compute(), in order
+    modules: calibration skill # Modules to run inside Compute(), in order
     chunk_along: {time: 3} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
     run_on: local # options: local, as_machine, nord3
     chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local'
-- 
GitLab


From 321ba66cba506a47cc5e5c49ca3dc1ed547d33f6 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Wed, 6 Sep 2023 14:27:53 +0200
Subject: [PATCH 37/96] Remove sourced startR functions after release of v2.3.0

---
 modules/Loading/Loading.R | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index 04e8ac38..7f59669e 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -6,10 +6,6 @@ source("modules/Loading/R/get_timeidx.R")
 source("modules/Loading/R/check_latlon.R")
 ## TODO: Move to prepare_outputs.R
 source("tools/libs.R")
-## TODO: Remove when new startR is released:
-source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Start.R")
-source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/zzz.R")
-source("https://earth.bsc.es/gitlab/es/startR/-/raw/master/R/Utils.R")
 
 load_datasets <- function(recipe, retrieve = T) {
 
-- 
GitLab


From 9e8d499c10ca6c2002e89242c534eef76c5f5f22 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Mon, 2 Oct 2023 09:48:51 +0200
Subject: [PATCH 38/96] Copy seasonal loading function

---
 modules/Loading/R/load_seasonal.R | 452 ++++++++++++++++++++++++++++++
 1 file changed, 452 insertions(+)
 create mode 100644 modules/Loading/R/load_seasonal.R

diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R
new file mode 100644
index 00000000..7f59669e
--- /dev/null
+++ b/modules/Loading/R/load_seasonal.R
@@ -0,0 +1,452 @@
+## TODO: remove paths to personal scratchs 
+source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R")
+# Load required libraries/funs
+source("modules/Loading/R/dates2load.R")
+source("modules/Loading/R/get_timeidx.R")
+source("modules/Loading/R/check_latlon.R")
+## TODO: Move to prepare_outputs.R
+source("tools/libs.R")
+
+load_datasets <- function(recipe, retrieve = T) {
+
+  # retrieve <- F
+  # -------------------------------------------
+  # Set params -----------------------------------------
+  
+  hcst.inityear <- recipe$Analysis$Time$hcst_start
+  hcst.endyear <- recipe$Analysis$Time$hcst_end
+  lats.min <- recipe$Analysis$Region$latmin
+  lats.max <- recipe$Analysis$Region$latmax
+  lons.min <- recipe$Analysis$Region$lonmin
+  lons.max <- recipe$Analysis$Region$lonmax
+  ref.name <- recipe$Analysis$Datasets$Reference$name
+  exp.name <- recipe$Analysis$Datasets$System$name
+  
+  variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]]
+  store.freq <- recipe$Analysis$Variables$freq
+  
+  # get sdates array
+  ## LOGGER: Change dates2load to extract logger from recipe?
+  sdates <- dates2load(recipe, recipe$Run$logger)
+  
+  idxs <- NULL
+  idxs$hcst <- get_timeidx(sdates$hcst,
+                           recipe$Analysis$Time$ftime_min,
+                           recipe$Analysis$Time$ftime_max,
+                           time_freq=store.freq)
+  
+  if (!(is.null(sdates$fcst))) {
+    idxs$fcst <- get_timeidx(sdates$fcst,
+                             recipe$Analysis$Time$ftime_min,
+                             recipe$Analysis$Time$ftime_max,
+                             time_freq=store.freq)
+  }
+  
+  ## TODO: Examine this verifications part, verify if it's necessary
+  # stream <- verifications$stream
+  # sdates <- verifications$fcst.sdate
+  
+  ## TODO: define fcst.name
+  ##fcst.name <- recipe$Analysis$Datasets$System[[sys]]$name
+  
+  # get esarchive datasets dict:
+  ## TODO: Adapt to 'filesystem' option in recipe
+  archive <- read_yaml("conf/archive.yml")$esarchive
+  exp_descrip <- archive$System[[exp.name]]
+  
+  freq.hcst <- unlist(exp_descrip[[store.freq]][variable[1]])
+  reference_descrip <- archive$Reference[[ref.name]]
+  freq.obs <- unlist(reference_descrip[[store.freq]][variable[1]])
+  obs.dir <- reference_descrip$src
+  fcst.dir <- exp_descrip$src
+  hcst.dir <- exp_descrip$src
+  fcst.nmember <- exp_descrip$nmember$fcst
+  hcst.nmember <- exp_descrip$nmember$hcst
+  
+  ## TODO: it is necessary?
+  ##if ("accum" %in% names(reference_descrip)) {
+  ##  accum <- unlist(reference_descrip$accum[store.freq][[1]])
+  ##} else {
+  ##  accum <- FALSE
+  ##}
+
+  var_dir_obs <- reference_descrip[[store.freq]][variable]
+  var_dir_exp <- exp_descrip[[store.freq]][variable]
+  
+  # -----------
+  obs.path <- paste0(archive$src,
+                     obs.dir, store.freq, "/$var$", "$var_dir$",
+                     "/$var$_$file_date$.nc")
+  
+  hcst.path <- paste0(archive$src,
+                      hcst.dir, store.freq, "/$var$", "$var_dir$",
+                      "$var$_$file_date$.nc")
+  
+  fcst.path <- paste0(archive$src,
+                      hcst.dir, store.freq, "/$var$", "$var_dir$",
+                      "/$var$_$file_date$.nc")
+  
+  # Define regrid parameters:
+  #-------------------------------------------------------------------
+  regrid_params <- get_regrid_params(recipe, archive)
+  
+  # Longitude circular sort and latitude check
+  #-------------------------------------------------------------------
+  circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max)
+
+  if (recipe$Analysis$Variables$freq == "monthly_mean") {
+    split_multiselected_dims = TRUE
+  } else {
+    split_multiselected_dims = FALSE
+  }
+  
+  # Load hindcast 
+  #-------------------------------------------------------------------
+  hcst <- Start(dat = hcst.path,
+                var = variable,
+                var_dir = var_dir_exp,
+                file_date = sdates$hcst,
+                time = idxs$hcst,
+                var_dir_depends = 'var',
+                latitude = values(list(lats.min, lats.max)),
+                latitude_reorder = Sort(),
+                longitude = values(list(lons.min, lons.max)),
+                longitude_reorder = circularsort,
+                transform = regrid_params$fcst.transform,
+                transform_params = list(grid = regrid_params$fcst.gridtype,
+                                        method = regrid_params$fcst.gridmethod),
+                transform_vars = c('latitude', 'longitude'),
+                synonims = list(latitude = c('lat', 'latitude'),
+                                longitude = c('lon', 'longitude'),
+                                ensemble = c('member', 'ensemble')),
+                ensemble = indices(1:hcst.nmember),
+                metadata_dims = 'var', # change to just 'var'?
+                return_vars = list(latitude = 'dat',
+                                   longitude = 'dat',
+                                   time = 'file_date'),
+                split_multiselected_dims = split_multiselected_dims,
+                retrieve = retrieve)
+
+  if (store.freq %in% c("daily_mean", "daily")) {
+    # Adjusts dims for daily case, could be removed if startR allows
+    # multidim split
+    names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear"
+    default_dims <- c(dat = 1, var = 1, sday = 1, 
+                      sweek = 1, syear = 1, time = 1, 
+                      latitude = 1, longitude = 1, ensemble = 1)
+    default_dims[names(dim(hcst))] <- dim(hcst)
+    dim(hcst) <- default_dims
+    # Change time attribute dimensions
+    default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1)
+    names(dim(attr(hcst, "Variables")$common$time))[which(names(
+      dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear"
+    default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- 
+      dim(attr(hcst, "Variables")$common$time)
+    dim(attr(hcst, "Variables")$common$time) <- default_time_dims
+  }
+
+  # Adjust dates for models where the time stamp goes into the next month
+  if (recipe$Analysis$Variables$freq == "monthly_mean") {
+    attr(hcst, "Variables")$common$time[] <-
+      attr(hcst, "Variables")$common$time - seconds(exp_descrip$time_stamp_lag)
+  }
+  dates <- attr(hcst, "Variables")$common$time
+  # Convert hcst to s2dv_cube object
+  if (retrieve) {
+    if ("var_dir" %in% names(dim(hcst))) {
+      hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected")
+    }
+    hcst <- as.s2dv_cube(hcst)
+    dim(hcst$attrs$Dates) <- dim(dates)
+  }
+  # Load forecast
+  #-------------------------------------------------------------------
+  if (!is.null(recipe$Analysis$Time$fcst_year)) {
+    # the call uses file_date instead of fcst_syear so that it can work
+    # with the daily case and the current version of startR not allowing
+    # multiple dims split
+    
+    fcst <- Start(dat = fcst.path,
+                  var = variable,
+                  var_dir = var_dir_exp,
+                  var_dir_depends = 'var',
+                  file_date = sdates$fcst,
+                  time = idxs$fcst,
+                  latitude = values(list(lats.min, lats.max)),
+                  latitude_reorder = Sort(),
+                  longitude = values(list(lons.min, lons.max)),
+                  longitude_reorder = circularsort,
+                  transform = regrid_params$fcst.transform,
+                  transform_params = list(grid = regrid_params$fcst.gridtype,
+                                          method = regrid_params$fcst.gridmethod),
+                  transform_vars = c('latitude', 'longitude'),
+                  synonims = list(latitude = c('lat', 'latitude'),
+                                  longitude = c('lon', 'longitude'),
+                                  ensemble = c('member', 'ensemble')),
+                  ensemble = indices(1:fcst.nmember),
+                  metadata_dims = 'var',
+                  return_vars = list(latitude = 'dat',
+                                     longitude = 'dat',
+                                     time = 'file_date'),
+                  split_multiselected_dims = split_multiselected_dims,
+                  retrieve = retrieve)
+    
+    if (store.freq %in% c("daily_mean", "daily")) {
+      # Adjusts dims for daily case, could be removed if startR allows
+      # multidim split
+      names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear"
+      default_dims <- c(dat = 1, var = 1, sday = 1,
+                        sweek = 1, syear = 1, time = 1,
+                        latitude = 1, longitude = 1, ensemble = 1)
+      default_dims[names(dim(fcst))] <- dim(fcst)
+      dim(fcst) <- default_dims
+      # Change time attribute dimensions
+      ## TODO: Is this still necessary?
+      default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1)
+      names(dim(attr(fcst, "Variables")$common$time))[which(names(
+        dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear"
+      default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- 
+        dim(attr(fcst, "Variables")$common$time)
+      dim(attr(fcst, "Variables")$common$time) <- default_time_dims
+    } else if (recipe$Analysis$Variables$freq == "monthly_mean") {
+      attr(fcst, "Variables")$common$time[] <-
+	attr(fcst, "Variables")$common$time - seconds(exp_descrip$time_stamp_lag)
+    }
+    # Convert fcst to s2dv_cube
+    if (retrieve) {
+      if ("var_dir" %in% names(dim(fcst))) {
+        fcst <- Subset(fcst, along = "var_dir", indices = 1, drop = "selected")
+      }
+      fcst <- as.s2dv_cube(fcst)
+    }
+  } else {
+    fcst <- NULL
+  }
+  
+  # Load reference
+  #-------------------------------------------------------------------
+  
+  # Obtain dates and date dimensions from the loaded hcst data to make sure
+  # the corresponding observations are loaded correctly.
+
+  # Separate Start() call for monthly vs daily data
+  if (store.freq == "monthly_mean") {
+    dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m")
+    dim(dates_file) <- dim(dates)
+    
+    obs <- Start(dat = obs.path,
+                 var = variable,
+                 var_dir = var_dir_obs,
+                 var_dir_depends = 'var',
+                 file_date = dates_file,
+                 latitude = values(list(lats.min, lats.max)),
+                 latitude_reorder = Sort(),
+                 longitude = values(list(lons.min, lons.max)),
+                 longitude_reorder = circularsort,
+                 transform = regrid_params$obs.transform,
+                 transform_params = list(grid = regrid_params$obs.gridtype,
+                                         method = regrid_params$obs.gridmethod),
+                 transform_vars = c('latitude', 'longitude'),
+                 synonims = list(latitude = c('lat','latitude'),
+                                 longitude = c('lon','longitude')),
+                 metadata_dims = 'var',
+                 return_vars = list(latitude = 'dat',
+                                    longitude = 'dat',
+                                    time = 'file_date'),
+                 split_multiselected_dims = TRUE,
+                 retrieve = retrieve)
+    
+  } else if (store.freq %in% c("daily_mean", "daily")) {
+    
+    # Get year and month for file_date
+    dates_file <- sapply(dates, format, '%Y%m')
+    dim(dates_file) <- dim(dates)
+    # Set hour to 12:00 to ensure correct date retrieval for daily data
+    lubridate::hour(dates) <- 12 
+    lubridate::minute(dates) <- 00
+    # Restore correct dimensions
+    dim(dates) <- dim(dates_file)
+    
+    obs <- Start(dat = obs.path,
+                 var = variable,
+                 var_dir = var_dir_obs,
+                 var_dir_depends = 'var',
+                 file_date = sort(unique(dates_file)),
+                 time = dates,
+                 time_var = 'time',
+                 time_across = 'file_date',
+                 merge_across_dims = TRUE,
+                 merge_across_dims_narm = TRUE,
+                 latitude = values(list(lats.min, lats.max)),
+                 latitude_reorder = Sort(),
+                 longitude = values(list(lons.min, lons.max)),
+                 longitude_reorder = circularsort,
+                 transform = regrid_params$obs.transform,
+                 transform_params = list(grid = regrid_params$obs.gridtype,
+                                         method = regrid_params$obs.gridmethod),
+                 transform_vars = c('latitude', 'longitude'),
+                 synonims = list(latitude = c('lat','latitude'),
+                                 longitude = c('lon','longitude')),
+                 metadata_dims = 'var',
+                 return_vars = list(latitude = 'dat',
+                                    longitude = 'dat',
+                                    time = 'file_date'),
+                 split_multiselected_dims = TRUE,
+                 retrieve = retrieve)
+  }
+ 
+  ## TODO: This part belongs to the pre-processing module
+  # Adds ensemble dim to obs (for consistency with hcst/fcst)
+  default_dims <- c(dat = 1, var = 1, sday = 1, 
+                    sweek = 1, syear = 1, time = 1, 
+                    latitude = 1, longitude = 1, ensemble = 1)
+  if (retrieve) {
+    dim(obs) <- c(dim(obs), "ensemble" = 1)
+    # Remove var_dir dimension
+    if ("var_dir" %in% names(dim(obs))) {
+      obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected")
+    }
+    # Convert obs to s2dv_cube
+    obs <- as.s2dv_cube(obs)
+  }
+  
+  # Checks and data summary (only if retrieve = TRUE)
+  # ---------------------------------------------------------------------------
+  ## TODO: See if these checks can be performed on the startR_cube metadata
+  if (retrieve) {
+    # Check for consistency between hcst and obs grid
+    if (!(recipe$Analysis$Regrid$type == 'none')) {
+      if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) {
+        lat_error_msg <- paste("Latitude mismatch between hcst and obs.",
+          		       "Please check the original grids and the",
+          		       "regrid parameters in your recipe.")
+        error(recipe$Run$logger, lat_error_msg)
+        hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1],
+          		       "; Last hcst lat: ", hcst$lat[length(hcst$lat)])
+        info(recipe$Run$logger, hcst_lat_msg)
+        obs_lat_msg <- paste0("First obs lat: ", obs$lat[1],
+          		      "; Last obs lat: ", obs$lat[length(obs$lat)])
+        info(recipe$Run$logger, obs_lat_msg)
+        stop("hcst and obs don't share the same latitudes.")
+      }
+      if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) {
+        lon_error_msg <- paste("Longitude mismatch between hcst and obs.",
+          		       "Please check the original grids and the",
+          		       "regrid parameters in your recipe.")
+        error(recipe$Run$logger, lon_error_msg)
+        hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1],
+          		       "; Last hcst lon: ", hcst$lon[length(hcst$lon)])
+        info(recipe$Run$logger, hcst_lon_msg)
+        obs_lon_msg <- paste0("First obs lon: ", obs$lon[1],
+          		      "; Last obs lon: ", obs$lon[length(obs$lon)])
+        info(recipe$Run$logger, obs_lon_msg)
+        stop("hcst and obs don't share the same longitudes.")
+      }
+    }
+    # Unit manipulation and conversion:
+    # Remove negative values in accumulative variables
+    dictionary <- read_yaml("conf/variable-dictionary.yml")
+    for (var_idx in 1:length(variable)) {
+      var_name <- variable[var_idx]
+      if (dictionary$vars[[var_name]]$accum) {
+        info(recipe$Run$logger,
+             paste0("Accumulated variable ", var_name,
+                    ": setting negative values to zero."))
+        # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data,
+        #                                                along = "var",
+        #                                                indices = var_idx, F), 0)
+        obs$data[, var_idx, , , , , , , ][obs$data[, var_idx, , , , , , , ] < 0] <- 0
+        hcst$data[, var_idx, , , , , , , ][hcst$data[, var_idx, , , , , , , ] < 0] <- 0
+        if (!is.null(fcst)) {
+          fcst$data[, var_idx, , , , , , , ][fcst$data[, var_idx, , , , , , , ] < 0] <- 0
+        }
+      }
+      
+      # Convert prlr from m/s to mm/day
+      ## TODO: Make a unit conversion function
+      if (variable[[var_idx]] == "prlr") {
+        # Verify that the units are m/s and the same in obs and hcst
+        if (((obs$attrs$Variable$metadata[[var_name]]$units == "m s-1") ||
+             (obs$attrs$Variable$metadata[[var_name]]$units == "m s**-1")) &&
+            ((hcst$attrs$Variable$metadata[[var_name]]$units == "m s-1") ||
+             (hcst$attrs$Variable$metadata[[var_name]]$units == "m s**-1"))) {
+          info(recipe$Run$logger, "Converting precipitation from m/s to mm/day.")
+          obs$data[, var_idx, , , , , , , ] <-
+            obs$data[, var_idx, , , , , , , ]*86400*1000
+          obs$attrs$Variable$metadata[[var_name]]$units <- "mm/day"
+          hcst$data[, var_idx, , , , , , , ] <-
+            hcst$data[, var_idx, , , , , , , ]*86400*1000
+          hcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day"
+          if (!is.null(fcst)) {
+            fcst$data[, var_idx, , , , , , , ] <- 
+              fcst$data[, var_idx, , , , , , , ]*86400*1000
+            fcst$attrs$Variable$metadata[[var_name]]$units <- "mm/day"
+          }
+        }
+      }
+    }
+    # Compute anomalies if requested
+    # Print a summary of the loaded data for the user, for each object
+    if (recipe$Run$logger$threshold <= 2) {
+      data_summary(hcst, recipe)
+      data_summary(obs, recipe)
+      if (!is.null(fcst)) {
+        data_summary(fcst, recipe)
+      }
+    }
+  }
+  
+  info(recipe$Run$logger,
+       "##### DATA LOADING COMPLETED SUCCESSFULLY #####")
+  
+  ############################################################################
+  #
+  #     CHECKS ON MISSING FILES
+  #
+  ############################################################################
+  
+  #obs.NA_dates.ind <- Apply(obs, 
+  #                          fun=(function(x){ all(is.na(x))}), 
+  #                          target_dims=c('time', 'latitude', 'longitude'))[[1]]
+  #obs.NA_dates <- dates_file[obs.NA_dates.ind]
+  #obs.NA_dates <- obs.NA_dates[order(obs.NA_dates)]
+  #obs.NA_files <- paste0(obs.dir, store.freq,"/",variable,"_",
+  #                       freq.obs,"obs.grid","/",variable,"_",obs.NA_dates,".nc")
+  #
+  #if (any(is.na(hcst))){
+  #  fatal(recipe$Run$logger,
+  #    paste(" ERROR: MISSING HCST VALUES FOUND DURING LOADING # ",
+  #          " ################################################# ",
+  #          " ######           MISSING FILES               #### ",
+  #          " ################################################# ",
+  #          "hcst files:",
+  #          hcst.NA_files,
+  #          " ################################################# ",
+  #          " ################################################# ",
+  #          sep="\n"))
+  #  quit(status = 1)
+  #}
+  #
+  #if (any(is.na(obs)) && !identical(obs.NA_dates,character(0))){
+  #  fatal(recipe$logger,
+  #    paste(" ERROR: MISSING OBS VALUES FOUND DURING  LOADING # ",
+  #          " ################################################# ",
+  #          " ######           MISSING FILES               #### ",
+  #          " ################################################# ",
+  #          "obs files:",
+  #          obs.NA_files,
+  #          " ################################################# ",
+  #          " ################################################# ",
+  #          sep="\n"))
+  #  quit(status=1)
+  #}
+  #
+  #info(recipe$logger,
+  #     "######### DATA LOADING COMPLETED SUCCESFULLY ##############")
+  
+  ############################################################################
+  ############################################################################
+  
+  return(list(hcst = hcst, fcst = fcst, obs = obs))
+  
+}
-- 
GitLab


From 21d46fae2d3f4c1c843f3932a5e77913470fe6cd Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Mon, 2 Oct 2023 13:11:34 +0200
Subject: [PATCH 39/96] Remove sourcing of startR dev branch

---
 tools/libs.R | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/libs.R b/tools/libs.R
index c8dc6e13..c3de4208 100644
--- a/tools/libs.R
+++ b/tools/libs.R
@@ -1,15 +1,15 @@
 # Libraries
 library(log4r)
 library(docopt)
-# library(startR)
+library(startR)
 #source all the .R files
-path <- "/esarchive/scratch/vagudets/repos/startR/R/"
-ff <- lapply(list.files(path), function(x) paste0(path, x))
-invisible(lapply(ff, source))
+# path <- "/esarchive/scratch/vagudets/repos/startR/R/"
+# ff <- lapply(list.files(path), function(x) paste0(path, x))
+# invisible(lapply(ff, source))
  # load all the libraries
-lib <- c('parallel', 'abind', 'bigmemory', 'future', 'multiApply',
-         'PCICt', 'ClimProjDiags', 'ncdf4', 'plyr')
-invisible(lapply(lib, library, character.only = TRUE))
+# lib <- c('parallel', 'abind', 'bigmemory', 'future', 'multiApply',
+#          'PCICt', 'ClimProjDiags', 'ncdf4', 'plyr')
+# invisible(lapply(lib, library, character.only = TRUE))
 library(ClimProjDiags)
 library(multiApply)
 library(yaml)
-- 
GitLab


From a84047403c218c4cd314bd92cff3e267d7be4eab Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Thu, 5 Oct 2023 09:27:40 +0200
Subject: [PATCH 40/96] Adjust Units to add retrieve, plus other changes

---
 build_compute_workflow.R          |  9 ++++++---
 example_scripts/test_compute.R    |  7 ++++---
 modules/Calibration/Calibration.R |  1 -
 modules/Loading/Loading.R         |  2 +-
 modules/Units/Units.R             | 13 +++++++++----
 5 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index b12b9b52..d0806a4a 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -2,6 +2,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
                              nchunks = chunk_indices) {
   # Load modules
   source("modules/Preprocessing/Preprocessing.R")
+  source("modules/Units/Units.R")
   source("modules/Calibration/Calibration.R")
   source("modules/Anomalies/Anomalies.R")
   source("modules/Skill/Skill.R")
@@ -28,14 +29,16 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
   # Define other outputs
   skill_metrics <- NULL
   probabilities <- NULL
+  # Change units
+  data <- Units(recipe, data, retrieve = F)
   # Loop over the modules
   for (module in modules) {
     if (module == "calibration") {
-      data <- calibrate_datasets(recipe, data, retrieve = F)
+      data <- Calibration(recipe, data, retrieve = F)
     } else if (module == "anomalies") {
-      data <- compute_anomalies(recipe, data, retrieve = F)
+      data <- Anomalies(recipe, data, retrieve = F)
     } else if (module == "skill") {
-      skill_metrics <- compute_skill_metrics(recipe, data, retrieve = F, nchunks = nchunks)
+      skill_metrics <- Skill(recipe, data, retrieve = F, nchunks = nchunks)
     }
   }
   ## TODO: Define what to return depending the modules called + the recipe
diff --git a/example_scripts/test_compute.R b/example_scripts/test_compute.R
index 963ed021..01aba3fc 100644
--- a/example_scripts/test_compute.R
+++ b/example_scripts/test_compute.R
@@ -1,4 +1,5 @@
 source("modules/Loading/Loading.R")
+source("modules/Units/Units.R")
 source("modules/Preprocessing/Preprocessing.R")
 source("modules/Calibration/Calibration.R")
 source("modules/Anomalies/Anomalies.R")
@@ -11,11 +12,11 @@ recipe_file <- "recipes/atomic_recipes/recipe_test_compute.yml"
 recipe <- prepare_outputs(recipe_file)
 
 # Load datasets
-data <- load_datasets(recipe, retrieve = F)
+data <- Loading(recipe, retrieve = F)
 
 # Run workflow with compute
 result <- run_compute_workflow(recipe, data)
 
 # Plot data
-plot_data(recipe, result$data, result$skill,
- 	  significance = T)
+Visualization(recipe, result$data, result$skill,
+ 	      significance = T)
diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index c974939a..06c2f6a6 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -11,7 +11,6 @@ Calibration <- function(recipe, data, retrieve = TRUE) {
 
   method <- tolower(recipe$Analysis$Workflow$Calibration$method)
 
-  browser()
   if (method == "raw") {
     if (retrieve) {
       warn(recipe$Run$logger,
diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index b90887d7..5453e9ae 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -7,7 +7,7 @@ Loading <- function(recipe, retrieve = TRUE) {
   time_horizon <- tolower(recipe$Analysis$Horizon)
   if (time_horizon == "seasonal") {
     source("modules/Loading/R/load_seasonal.R")
-    data <- load_seasonal(recipe, retrieve = TRUE)
+    data <- load_seasonal(recipe, retrieve = retrieve)
   } else if (time_horizon == "decadal") {
     source("modules/Loading/R/load_decadal.R")
     data <- load_decadal(recipe)
diff --git a/modules/Units/Units.R b/modules/Units/Units.R
index a143c0a2..eb527e9e 100644
--- a/modules/Units/Units.R
+++ b/modules/Units/Units.R
@@ -9,7 +9,7 @@ source("modules/Units/R/transform_units_pressure.R")
 # $freq
 # $flux (if precipitation)
 # $units (optional)
-Units <- function(recipe, data) {
+Units <- function(recipe, data, retrieve = T) {
   # from recipe read the user defined units
   # from data the original units
   # deaccumulate option for CDS accumulated variables?
@@ -59,12 +59,15 @@ Units <- function(recipe, data) {
   ## if "/" appears substitute by -1 in at the end of next unit. How to know?
   # Check if all units are equal (if so, no conversion needed; else convert)
   if (length(unique(c(unlist(orig_units), user_units))) == length(user_units)) {
-    info(recipe$Run$logger, "##### NO UNIT CONVERSION NEEDED #####")
+    if (retrieve) {
+      info(recipe$Run$logger, "##### NO UNIT CONVERSION NEEDED #####")
+    }
     res <- data
   } else {
     if (recipe$Run$filesystem == 'esarchive' &&
         (sum(!sapply(unique(orig_units), is.null)) != 1)) {
-     warn(recipe$Run$logger, 
+      ## TODO: Warning
+      warn(recipe$Run$logger, 
            paste("The units in", paste(names(orig_units), collapse = ', '),
                  "were not all equal and will be uniformized.",
                  "If this is not expected, please contact the ES data team."))
@@ -86,7 +89,9 @@ Units <- function(recipe, data) {
                   }
                   return(result)
                   }, simplify = TRUE) # instead of lapply to get the named list directly
-    info(recipe$Run$logger, "##### UNIT CONVERSION COMPLETE #####")
+    if (retrieve) {
+      info(recipe$Run$logger, "##### UNIT CONVERSION COMPLETE #####")
+    }
   }
   return(res)
 }
-- 
GitLab


From c362ef35d7e212cb16044e2d7c7095d37ff0b4ed Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Fri, 6 Oct 2023 16:12:05 +0200
Subject: [PATCH 41/96] Use asplit or Apply to make dimensions flexible

---
 modules/Units/R/transform_units_temperature.R | 53 ++++++++++++++++---
 1 file changed, 45 insertions(+), 8 deletions(-)

diff --git a/modules/Units/R/transform_units_temperature.R b/modules/Units/R/transform_units_temperature.R
index 366f0d34..1735984a 100644
--- a/modules/Units/R/transform_units_temperature.R
+++ b/modules/Units/R/transform_units_temperature.R
@@ -1,17 +1,54 @@
 transform_units_temperature <- function(data, original_units, new_units,
                                         var_name, var_index = 1,
                                         var_dim = "var") {
-  ## TODO: Hard-coded subsetting
+
+  # Method 1: use asplit
+  data_arr <- data[[1]]$data
+  data_list <- asplit(data_arr, which(names(dim(data_arr)) == var_dim))
   if (original_units == 'c' & new_units == 'k') {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] + 273.15
-      data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'K'
+    data_list[[var_index]] <- data_list[[var_index]] + 273.15
+    data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "K"
   }
   if (original_units == 'k' & new_units == 'c') {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] - 273.15
-      data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "C"
-
+    data_list[[var_index]] <- data_list[[var_index]] - 273.15
+    data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "C"
   }
+
+  # Combine list back to array
+  data_arr <- array(unlist(data_list),
+                    dim = c(dim(data_list[[1]]), var = length(data_list)))
+  data[[1]]$data <- aperm(data_arr, match(names(dim(data[[1]]$data)), 
+                          names(dim(data_arr))))
+
   return(data)
 }
+
+  # Method 2: use Apply
+  if (original_units == 'c' & new_units == 'k') {
+
+    data[[1]]$data <- multiApply::Apply(data[[1]]$data,
+                                        target_dims = var_dim, output_dims = var_dim,
+                                        fun = .transform_units_temp,
+                                        type = "c_to_k", var_index = var_index)$output1
+    data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "K"
+
+  }
+  if (original_units == 'k' & new_units == 'c') {
+    data[[1]]$data <- multiApply::Apply(data[[1]]$data,
+                                        target_dims = var_dim, output_dims = var_dim,
+                                        fun = .transform_units_temp,
+                                        type = "k_to_c", var_index = var_index)$output1
+    data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "C"
+
+  }
+
+ .transform_units_temp <- function(data, type, var_index) {
+   # data: 'var'
+   if (type == "k_to_c") {
+    data[var_index] <- data[var_index] - 273.15
+   } else if (type == "c_to_k") {
+     data[var_index] <- data[var_index] + 273.15
+   }
+   return(data)
+ }
+
-- 
GitLab


From 1af44013b0b6c2636d95212f5b34c8a1aa335d82 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Mon, 9 Oct 2023 12:39:45 +0200
Subject: [PATCH 42/96] remove Apply() approach

---
 modules/Units/R/transform_units_temperature.R | 31 -------------------
 1 file changed, 31 deletions(-)

diff --git a/modules/Units/R/transform_units_temperature.R b/modules/Units/R/transform_units_temperature.R
index 1735984a..eebd30bc 100644
--- a/modules/Units/R/transform_units_temperature.R
+++ b/modules/Units/R/transform_units_temperature.R
@@ -2,7 +2,6 @@ transform_units_temperature <- function(data, original_units, new_units,
                                         var_name, var_index = 1,
                                         var_dim = "var") {
 
-  # Method 1: use asplit
   data_arr <- data[[1]]$data
   data_list <- asplit(data_arr, which(names(dim(data_arr)) == var_dim))
   if (original_units == 'c' & new_units == 'k') {
@@ -22,33 +21,3 @@ transform_units_temperature <- function(data, original_units, new_units,
 
   return(data)
 }
-
-  # Method 2: use Apply
-  if (original_units == 'c' & new_units == 'k') {
-
-    data[[1]]$data <- multiApply::Apply(data[[1]]$data,
-                                        target_dims = var_dim, output_dims = var_dim,
-                                        fun = .transform_units_temp,
-                                        type = "c_to_k", var_index = var_index)$output1
-    data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "K"
-
-  }
-  if (original_units == 'k' & new_units == 'c') {
-    data[[1]]$data <- multiApply::Apply(data[[1]]$data,
-                                        target_dims = var_dim, output_dims = var_dim,
-                                        fun = .transform_units_temp,
-                                        type = "k_to_c", var_index = var_index)$output1
-    data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "C"
-
-  }
-
- .transform_units_temp <- function(data, type, var_index) {
-   # data: 'var'
-   if (type == "k_to_c") {
-    data[var_index] <- data[var_index] - 273.15
-   } else if (type == "c_to_k") {
-     data[var_index] <- data[var_index] + 273.15
-   }
-   return(data)
- }
-
-- 
GitLab


From 230ea62084eb9b6676daa65b715d8cccbc63db75 Mon Sep 17 00:00:00 2001
From: aho <an.ho@bsc.es>
Date: Tue, 10 Oct 2023 12:07:35 +0200
Subject: [PATCH 43/96] Make dimensions flexible

---
 .../Units/R/transform_units_precipitation.R   | 75 +++++++++----------
 modules/Units/R/transform_units_pressure.R    | 24 +++---
 modules/Units/R/transform_units_temperature.R |  5 +-
 3 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/modules/Units/R/transform_units_precipitation.R b/modules/Units/R/transform_units_precipitation.R
index a24ef851..5cc1e812 100644
--- a/modules/Units/R/transform_units_precipitation.R
+++ b/modules/Units/R/transform_units_precipitation.R
@@ -1,80 +1,70 @@
 transform_units_precipitation <- function(data, original_units, new_units,
                                           var_name, freq, flux = FALSE, ncores = NULL,
                                           var_index = 1) {
-  ## TODO: Hard-coded subsetting
+
 ## TODO consider higher frequencies (e.g. 6hourly) 
 ## could create a constant hours <- 24 or hours <- 6 and use the same code
+
+  data_arr <- data[[1]]$data
+  data_list <- asplit(data_arr, which(names(dim(data_arr)) == "var"))
+
+
   if (original_units == "ms-1") {
     if (new_units == "mm") {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] * 3600 * 24 * 1000
-      data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'mm'
+      data_list[[var_index]] <- data_list[[var_index]] * 3600 * 24 * 1000
+      data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "mm"
     } else if (new_units == "m") {
-     data[[1]]$data[ , var_index, , , , , , , ] <- 
-       data[[1]]$data[ , var_index, , , , , , , ] * 3600 * 24
-     data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm'
+      data_list[[var_index]] <- data_list[[var_index]] * 3600 * 24
+      data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm'
     } else if (new_units == "kgm-2s-1") {
-     data[[1]]$data[ , var_index, , , , , , , ] <- 
-       data[[1]]$data[ , var_index, , , , , , , ] * 1000 
-     data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'kg m-2 s-1'
+      data_list[[var_index]] <- data_list[[var_index]] * 1000
+      data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'kg m-2 s-1'
     } else {
-      stop(paste("Unknown transformation from", original_units, "to",
-                 new_units))
+      stop(paste("Unknown transformation from", original_units, "to", new_units))
     }
   } else if (original_units == "mm") {
     if (new_units == "ms-1") {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] / (3600 * 24 * 1000)
+      data_list[[var_index]] <- data_list[[var_index]] / (3600 * 24 * 1000)
       data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm s-1'
     } else if (new_units == "m") {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] / 1000
+      data_list[[var_index]] <- data_list[[var_index]] / 1000
       data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm'
     } else if (new_units == "kgm-2s-1") {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] / (3600 * 24 )
+      data_list[[var_index]] <- data_list[[var_index]] / (3600 * 24)
       data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'kg m-2 s-1'
     } else {
-      stop(paste("Unknown transformation from", original_units, "to",
-                 new_units))
+      stop(paste("Unknown transformation from", original_units, "to", new_units))
     }
   } else if (original_units == "m") {
     if (new_units == "ms-1") {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] / (3600 * 24)
+      data_list[[var_index]] <- data_list[[var_index]] / (3600 * 24)
       data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm s-1'
     } else if (new_units == "mm") {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] * 1000
+      data_list[[var_index]] <- data_list[[var_index]] * 1000
       data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'mm'
     } else if (new_units == "kgm-2s-1") {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] * 1000 / (3600 * 24) 
+      data_list[[var_index]] <- data_list[[var_index]] * 1000 / (3600 * 24)
       data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'kg m-2 s-1'
     } else {
-      stop(paste("Unknown transformation from", original_units, "to",
-                 new_units))
+      stop(paste("Unknown transformation from", original_units, "to", new_units))
     }
   } else if (original_units == "kgm-2s-1") {
     if (new_units == "ms-1") {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] / 1000
+      data_list[[var_index]] <- data_list[[var_index]] / 1000
       data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm s-1'
     } else if (new_units == "mm") {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] * 3600 * 24
+      data_list[[var_index]] <- data_list[[var_index]] * 3600 * 24
       data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'mm'
     } else if (new_units == "m") {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] * 3600 * 24 / 1000
+      data_list[[var_index]] <- data_list[[var_index]] * 3600 * 24 / 1000
       data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'm'
     } else {
-      stop(paste("Unknown transformation from", original_units, "to",
-                 new_units))
+      stop(paste("Unknown transformation from", original_units, "to", new_units))
     }
   } else {
     stop("Unknown precipitation units transformation")
   }
+
   if (flux) {
     data[[1]]$attrs$Variable$metadata[[var_name]]$units <- paste0(
       data[[1]]$attrs$Variable$metadata[[var_name]]$units, "/day")
@@ -84,9 +74,8 @@ transform_units_precipitation <- function(data, original_units, new_units,
       time_pos <- which(lapply(data[[1]]$attrs$Variable$metadata[[var_name]]$dim,
                                function(x) {x$name}) == 'time')
       cal <- tolower(data[[1]]$attrs$Variable$metadata[[var_name]]$dim[[time_pos]]$calendar)
-      data_subset <- Subset(data[[1]]$data, along = "var", indices = var_index, drop = 'selected')
-      data[[1]]$data[ , var_index, , , , , , , ] <-
-        Apply(list(data_subset, data[[1]]$attrs$Dates),
+      data_list[[var_index]] <-
+        Apply(list(data_list[[var_index]], data[[1]]$attrs$Dates),
               target_dim = list(c('syear'), c('syear')),
               extra_info = list(cal = cal, days_in_month = .days_in_month),
               fun = function(x, y) {
@@ -96,9 +85,17 @@ transform_units_precipitation <- function(data, original_units, new_units,
               }, ncores = ncores)$output1
     }
   }
+
+  # Combine list back to array
+  data_arr <- array(unlist(data_list),
+                    dim = c(dim(data_list[[1]]), var = length(data_list)))
+  data[[1]]$data <- aperm(data_arr, match(names(dim(data[[1]]$data)),
+                          names(dim(data_arr))))
+
   return(data)
 }
 
+
 .days_in_month <- function(x, cal) {
   if (cal %in% c('gregorian', 'standard', 'proleptic_gregorian')) {
     N_DAYS_IN_MONTHS <- lubridate:::N_DAYS_IN_MONTHS
diff --git a/modules/Units/R/transform_units_pressure.R b/modules/Units/R/transform_units_pressure.R
index 9712e9fe..58d51b1e 100644
--- a/modules/Units/R/transform_units_pressure.R
+++ b/modules/Units/R/transform_units_pressure.R
@@ -1,33 +1,37 @@
 transform_units_pressure <- function(data, original_units, new_units, var_name,
                                      var_index = 1) {
 
-  ## TODO: Hard-coded subsetting
+  data_arr <- data[[1]]$data
+  data_list <- asplit(data_arr, which(names(dim(data_arr)) == "var"))
+
   if (original_units == 'pa') {
+    data_list[[var_index]] <- data_list[[var_index]] / 100
     if (new_units == 'hpa') {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] /100
       data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'hPa'
     } else if (new_units == 'mb') {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] /100
       data[[1]]$attrs$Variable$metadata[[var_name]]$units <- 'mb'
     }
   } else if (original_units == 'hpa') {
     if (new_units == 'pa') {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] * 100
+      data_list[[var_index]] <- data_list[[var_index]] * 100
       data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "Pa"
     } else if (new_units == "mb") {
       data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "mb"
     }
   } else if (original_units == "mb") {
     if (new_units == 'pa') {
-      data[[1]]$data[ , var_index, , , , , , , ] <- 
-        data[[1]]$data[ , var_index, , , , , , , ] * 100
+      data_list[[var_index]] <- data_list[[var_index]] * 100
       data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "Pa"
     } else if (new_units == "hPa") {
       data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "hPa"
-    }  
+    }
   }
+
+  # Combine list back to array
+  data_arr <- array(unlist(data_list),
+                    dim = c(dim(data_list[[1]]), var = length(data_list)))
+  data[[1]]$data <- aperm(data_arr, match(names(dim(data[[1]]$data)),
+                          names(dim(data_arr))))
+
   return(data)
 }
diff --git a/modules/Units/R/transform_units_temperature.R b/modules/Units/R/transform_units_temperature.R
index eebd30bc..985483f0 100644
--- a/modules/Units/R/transform_units_temperature.R
+++ b/modules/Units/R/transform_units_temperature.R
@@ -1,9 +1,8 @@
 transform_units_temperature <- function(data, original_units, new_units,
-                                        var_name, var_index = 1,
-                                        var_dim = "var") {
+                                        var_name, var_index = 1) {
 
   data_arr <- data[[1]]$data
-  data_list <- asplit(data_arr, which(names(dim(data_arr)) == var_dim))
+  data_list <- asplit(data_arr, which(names(dim(data_arr)) == "var"))
   if (original_units == 'c' & new_units == 'k') {
     data_list[[var_index]] <- data_list[[var_index]] + 273.15
     data[[1]]$attrs$Variable$metadata[[var_name]]$units <- "K"
-- 
GitLab


From 36d4bb6ab9d0ff4ebb20353e0961e34baf9e0dfa Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Wed, 17 Jan 2024 17:00:35 +0100
Subject: [PATCH 44/96] Add TODOs, fix typo

---
 build_compute_workflow.R  | 1 +
 tools/retrieve_metadata.R | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index afa17c05..5a5a59df 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -29,6 +29,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
   # Define other outputs
   skill_metrics <- NULL
   probabilities <- NULL
+  ## TODO: Not working
   # Change units
   # data <- Units(recipe, data, retrieve = F)
   # Loop over the modules
diff --git a/tools/retrieve_metadata.R b/tools/retrieve_metadata.R
index a29528bb..7b922103 100644
--- a/tools/retrieve_metadata.R
+++ b/tools/retrieve_metadata.R
@@ -38,7 +38,7 @@ retrieve_metadata <- function(tmp_dir, chunks) {
       metadata$attrs$Dates <- c(metadata$attrs$Dates, metadata_chunk$attrs$Dates)
     }
     time_dim_length <- length(metadata$attrs$Dates)/prod(dates_dims[-time_dim_idx])
-    dim(metadata$attr$Dates) <- c(dates_dims[-time_dim_idx],
+    dim(metadata$attrs$Dates) <- c(dates_dims[-time_dim_idx],
                                   time = time_dim_length)
     metadata$coords$time <- seq(1, time_dim_length)
     dim(metadata$coords$time) <- c(time = time_dim_length)
-- 
GitLab


From e6cb27f93ce73b4b9d279dcac81414079cb44f8f Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Thu, 18 Jan 2024 16:59:14 +0100
Subject: [PATCH 45/96] Attach metadata to skill metrics and adapt
 plot_skill_metrics() and Visualization()

---
 build_compute_workflow.R                     |  4 ++--
 example_scripts/test_compute.R               |  2 +-
 modules/Visualization/R/plot_skill_metrics.R | 23 +++++++++++---------
 modules/Visualization/Visualization.R        | 15 +++++++++----
 4 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 5a5a59df..2a55282c 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -218,8 +218,8 @@ run_compute_workflow <- function(recipe, data) {
     names(res$skill) <- metric_list
     ## TODO: Chunked metadata needs to be put back together!!!
     source("tools/retrieve_metadata.R")
-    res$metadata <- retrieve_metadata(tmp_dir = tmp_dir,
-                                      chunks = recipe$Run$startR_workflow$chunk_along)
+    res$skill$metadata <- retrieve_metadata(tmp_dir = tmp_dir,
+                                            chunks = recipe$Run$startR_workflow$chunk_along)
   }
   
   # ---------------------------------------------------------------------------
diff --git a/example_scripts/test_compute.R b/example_scripts/test_compute.R
index 01aba3fc..c82a1719 100644
--- a/example_scripts/test_compute.R
+++ b/example_scripts/test_compute.R
@@ -18,5 +18,5 @@ data <- Loading(recipe, retrieve = F)
 result <- run_compute_workflow(recipe, data)
 
 # Plot data
-Visualization(recipe, result$data, result$skill,
+Visualization(recipe, data = result$data, skill_metrics = result$skill,
  	      significance = T)
diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_skill_metrics.R
index b4c2b273..edf3df54 100644
--- a/modules/Visualization/R/plot_skill_metrics.R
+++ b/modules/Visualization/R/plot_skill_metrics.R
@@ -1,10 +1,10 @@
 library(stringr)
 
-plot_skill_metrics <- function(recipe, data_cube, skill_metrics,
+plot_skill_metrics <- function(recipe, skill_metrics,
                                outdir, significance = F, output_conf) {
   # recipe: Auto-S2S recipe
   # archive: Auto-S2S archive
-  # data_cube: s2dv_cube object with the corresponding hindcast data
+  # cube_info: s2dv_cube object with the corresponding hindcast data
   # skill_metrics: list of named skill metrics arrays
   # outdir: output directory
   # significance: T/F, whether to display the significance dots in the plots
@@ -19,9 +19,12 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics,
   if (!is.list(skill_metrics) || is.null(names(skill_metrics))) {
     stop("The element 'skill_metrics' must be a list of named arrays.")
   }
-  
-  latitude <- data_cube$coords$lat
-  longitude <- data_cube$coords$lon
+
+  cube_info <- skill_metrics[["metadata"]]
+  skill_metrics[["metadata"]] <- NULL
+
+  latitude <- cube_info$coords$lat
+  longitude <- cube_info$coords$lon
   archive <- get_archive(recipe)
   system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name
   hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-",
@@ -36,7 +39,7 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics,
   month_label <- tolower(month.name[init_month])
   month_abbreviation <- month.abb[init_month]
   # Get months
-  months <- lubridate::month(Subset(data_cube$attrs$Dates,
+  months <- lubridate::month(Subset(cube_info$attrs$Dates,
                                     "syear", indices = 1),
                              label = T, abb = F,locale = "en_GB")
   if (!is.null(recipe$Analysis$Workflow$Visualization$projection)) {
@@ -59,7 +62,7 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics,
                     "enscorr_specs", "rmsss", "msss")
   scores <- c("rps", "frps", "crps", "frps_specs", "mse")
   # Loop over variables and assign colorbar and plot parameters to each metric
-  for (var in 1:data_cube$dims[['var']]) {
+  for (var in 1:cube_info$dims[['var']]) {
     var_skill <- lapply(skill_metrics, function(x) {
                         ClimProjDiags::Subset(x, along = 'var',
                                               indices = var,
@@ -119,7 +122,7 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics,
           cols <- colorbar[2:(length(colorbar) - 1)]
           col_inf <- colorbar[1]
           col_sup <- colorbar[length(colorbar)]
-          units <- data_cube$attrs$Variable$metadata[[var_name]]$units
+          units <- cube_info$attrs$Variable$metadata[[var_name]]$units
         }
         # Reorder dimensions
         skill <- Reorder(skill, c("time", "longitude", "latitude"))
@@ -147,8 +150,8 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics,
           outfile <- paste0(outdir[var], name)
         }
         # Get variable name and long name
-        var_name <- data_cube$attrs$Variable$varName[[var]]
-        var_long_name <- data_cube$attrs$Variable$metadata[[var_name]]$long_name
+        var_name <- cube_info$attrs$Variable$varName[[var]]
+        var_long_name <- cube_info$attrs$Variable$metadata[[var_name]]$long_name
         # Multi-panel or single-panel plots
         if (recipe$Analysis$Workflow$Visualization$multi_panel) {
           # Define titles
diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R
index ae94ed3d..d0c1d3c8 100644
--- a/modules/Visualization/Visualization.R
+++ b/modules/Visualization/Visualization.R
@@ -14,7 +14,7 @@ source("modules/Visualization/R/plot_ensemble_mean.R")
 source("modules/Visualization/plot_data.R")
 
 Visualization <- function(recipe,
-                          data,
+                          data = NULL,
                           skill_metrics = NULL,
                           probabilities = NULL,
                           significance = F,
@@ -56,8 +56,15 @@ Visualization <- function(recipe,
   # Get plot types and create output directories
   plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots, ", | |,")[[1]]
   recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/plots/")
-  outdir <- get_dir(recipe = recipe,
-                    variable = data$hcst$attrs$Variable$varName)
+  ## TODO: Improve this?
+  if (!is.null(data$hcst)) {
+    variable <- data$hcst$attrs$Variable$varName
+  } else {
+    variable <- skill_metrics$metadata$attrs$Variable$varName
+  }
+
+  outdir <- get_dir(recipe = recipe, variable = variable)
+  browser()
   for (directory in outdir) {
    dir.create(directory, showWarnings = FALSE, recursive = TRUE)
   }
@@ -75,7 +82,7 @@ Visualization <- function(recipe,
   # Plot skill metrics
   if ("skill_metrics" %in% plots) {
     if (!is.null(skill_metrics)) {
-      plot_skill_metrics(recipe, data$hcst, skill_metrics, outdir,
+      plot_skill_metrics(recipe, skill_metrics, outdir,
                          significance, output_conf = output_conf)
     } else {
       error(recipe$Run$logger,
-- 
GitLab


From d27ac023478789db659a2ceac5daab129e4c0785 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Fri, 19 Jan 2024 15:23:25 +0100
Subject: [PATCH 46/96] test units module

---
 build_compute_workflow.R                        | 4 ++--
 modules/Units/R/transform_units_precipitation.R | 4 +++-
 recipes/atomic_recipes/recipe_test_compute.yml  | 4 ++--
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 2a55282c..0b60b6c3 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -22,7 +22,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
                    class(x) <- "startR_array"
                    as.s2dv_cube(x)
                  })
-
+  
   for (cube_idx in seq(1:length(data))) {
     data[[cube_idx]]$attrs$Variable$varName <- var_name
   }
@@ -31,7 +31,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
   probabilities <- NULL
   ## TODO: Not working
   # Change units
-  # data <- Units(recipe, data, retrieve = F)
+  data <- Units(recipe, data, retrieve = F)
   # Loop over the modules
   for (module in modules) {
     if (module == "calibration") {
diff --git a/modules/Units/R/transform_units_precipitation.R b/modules/Units/R/transform_units_precipitation.R
index 5cc1e812..52e1e0bd 100644
--- a/modules/Units/R/transform_units_precipitation.R
+++ b/modules/Units/R/transform_units_precipitation.R
@@ -76,10 +76,11 @@ transform_units_precipitation <- function(data, original_units, new_units,
       cal <- tolower(data[[1]]$attrs$Variable$metadata[[var_name]]$dim[[time_pos]]$calendar)
       data_list[[var_index]] <-
         Apply(list(data_list[[var_index]], data[[1]]$attrs$Dates),
-              target_dim = list(c('syear'), c('syear')),
+              target_dim = list(c("time"), c("time")),
               extra_info = list(cal = cal, days_in_month = .days_in_month),
               fun = function(x, y) {
                 date <- as.Date(y, "%Y-%m-%d")
+                dim(date) <- dim(y)
                 num_days <- .days_in_month(date, cal = .cal)
                 res <- x * num_days
               }, ncores = ncores)$output1
@@ -97,6 +98,7 @@ transform_units_precipitation <- function(data, original_units, new_units,
 
 
 .days_in_month <- function(x, cal) {
+  print(dim(x))
   if (cal %in% c('gregorian', 'standard', 'proleptic_gregorian')) {
     N_DAYS_IN_MONTHS <- lubridate:::N_DAYS_IN_MONTHS
     if (leap_year(year(x))) {
diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml
index 4d47d944..53b5eee1 100644
--- a/recipes/atomic_recipes/recipe_test_compute.yml
+++ b/recipes/atomic_recipes/recipe_test_compute.yml
@@ -4,9 +4,9 @@ Description:
 Analysis:
   Horizon: Seasonal
   Variables:
-    name: tas prlr
+    name: tas
     freq: monthly_mean
-    units: {tas: C, prlr: mm}
+    units: {tas: C}
   Datasets:
     System:
       name: Meteo-France-System7
-- 
GitLab


From 8e96e19410fafb107597905a74617a0f294864f7 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Mon, 22 Jan 2024 17:01:22 +0100
Subject: [PATCH 47/96] COmments and TODOs

---
 build_compute_workflow.R | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 0b60b6c3..53ae4fe1 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -29,7 +29,6 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
   # Define other outputs
   skill_metrics <- NULL
   probabilities <- NULL
-  ## TODO: Not working
   # Change units
   data <- Units(recipe, data, retrieve = F)
   # Loop over the modules
@@ -38,9 +37,15 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
       data <- Calibration(recipe, data, retrieve = F)
     } else if (module == "anomalies") {
       data <- Anomalies(recipe, data, retrieve = F)
+    } else if (module == "downscaling") {
+      data <- Downscaling(recipe, data, retrieve = F)
     } else if (module == "skill") {
       skill_metrics <- Skill(recipe, data, retrieve = F, nchunks = nchunks)
-    }
+    } else if (module = "probabilities") {
+      probabilities <- Probabilities(recipe, data, retrieve = F, nchunks = nchunks)
+    } else if (module = "indices") {
+      ## TODO: Won't work with plots...
+      data <- Indices(recipe, data, retrieve = F)
   }
   ## TODO: Define what to return depending the modules called + the recipe
   return_list <- list(hcst = data$hcst$data,
@@ -54,7 +59,6 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
 }
 
 convert_to_s2dv_cube <- function(new_cube, original_cube) {
-  ## TODO: Make sure all the metadata is created correctly
   attr(new_cube, "Variables") <- attr(original_cube, "Variables")
   attr(new_cube, "FileSelectors") <- attr(original_cube, "FileSelectors")
 
@@ -203,6 +207,7 @@ run_compute_workflow <- function(recipe, data) {
     }
     res[[cube]] <- NULL
   }
+  ## TODO: Replicate for probabilities and other modules
   if (!is.null(res$skill)) {
     tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/")
     metric_list <- readRDS(paste0(tmp_dir, "metric_names.Rds"))
@@ -216,7 +221,7 @@ run_compute_workflow <- function(recipe, data) {
                                  drop = 'selected')
                         })
     names(res$skill) <- metric_list
-    ## TODO: Chunked metadata needs to be put back together!!!
+    # Put chunked metadata back together
     source("tools/retrieve_metadata.R")
     res$skill$metadata <- retrieve_metadata(tmp_dir = tmp_dir,
                                             chunks = recipe$Run$startR_workflow$chunk_along)
@@ -231,5 +236,8 @@ run_compute_workflow <- function(recipe, data) {
   res <- res[!sapply(res, is.null)]
   info(recipe$Run$logger,
        "##### DATA RETURNED AS A NAMED LIST #####")
+  info(recipe$Run$logger,
+       paste0("Steps performed: ", recipe$Run$startR_workflow$modules))
+
   return(res)
 }
-- 
GitLab


From 3daf27463c0926573caed760a10c671c9937dc52 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Thu, 25 Jan 2024 09:02:18 +0100
Subject: [PATCH 48/96] Adapt logger calls to customized appender (WIP)

---
 build_compute_workflow.R              |  13 +-
 modules/Anomalies/Anomalies.R         |  37 +++---
 modules/Calibration/Calibration.R     |  14 +-
 modules/Loading/R/load_seasonal.R     |   4 +-
 modules/Skill/Skill.R                 |  63 ++++-----
 modules/Units/Units.R                 |  10 +-
 modules/Visualization/Visualization.R |  31 ++---
 tools/check_recipe.R                  | 184 +++++++++++++-------------
 tools/prepare_outputs.R               |  24 +++-
 9 files changed, 197 insertions(+), 183 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 53ae4fe1..8bc40fde 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -7,6 +7,8 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
   source("modules/Anomalies/Anomalies.R")
   source("modules/Skill/Skill.R")
 
+  recipe$Run$logger <- log4r::logger(threshold = recipe$Run$Loglevel,
+                                     appenders = list(console_appender(layout = .custom_log_layout())))
   modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
                               ", | |,")[[1]])
   # Create data list
@@ -41,11 +43,12 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
       data <- Downscaling(recipe, data, retrieve = F)
     } else if (module == "skill") {
       skill_metrics <- Skill(recipe, data, retrieve = F, nchunks = nchunks)
-    } else if (module = "probabilities") {
+    } else if (module == "probabilities") {
       probabilities <- Probabilities(recipe, data, retrieve = F, nchunks = nchunks)
-    } else if (module = "indices") {
+    } else if (module == "indices") {
       ## TODO: Won't work with plots...
       data <- Indices(recipe, data, retrieve = F)
+    }
   }
   ## TODO: Define what to return depending the modules called + the recipe
   return_list <- list(hcst = data$hcst$data,
@@ -194,7 +197,7 @@ run_compute_workflow <- function(recipe, data) {
                    wait = TRUE
                    )
   }
-  info(recipe$Run$logger,
+  info(recipe$Run$logger, retrieve = TRUE,
        "##### COMPUTE SECTION ENDED, REFORMATTING OUTPUT #####")
 
   # ---------------------------------------------------------------------------
@@ -234,9 +237,9 @@ run_compute_workflow <- function(recipe, data) {
   # ---------------------------------------------------------------------------
   # Step 6: Return outputs
   res <- res[!sapply(res, is.null)]
-  info(recipe$Run$logger,
+  info(recipe$Run$logger, retrieve = TRUE,
        "##### DATA RETURNED AS A NAMED LIST #####")
-  info(recipe$Run$logger,
+  info(recipe$Run$logger, retrieve = TRUE,
        paste0("Steps performed: ", recipe$Run$startR_workflow$modules))
 
   return(res)
diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R
index 06d16927..2f738bdc 100644
--- a/modules/Anomalies/Anomalies.R
+++ b/modules/Anomalies/Anomalies.R
@@ -8,13 +8,11 @@ Anomalies <- function(recipe, data, retrieve = TRUE) {
 
   ## TODO: Is this necessary?
   # Check
-  if (retrieve) {
-    if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) {
-      error(recipe$Run$logger, 
-            paste("The anomaly module has been called, but the element",
-                  "'Workflow:Anomalies:compute' is missing from the recipe."))
-      stop()
-    }
+  if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) {
+    error(recipe$Run$logger, retrieve = retrieve, 
+          paste("The anomaly module has been called, but the element",
+                "'Workflow:Anomalies:compute' is missing from the recipe."))
+    stop()
   }
   if (recipe$Analysis$Workflow$Anomalies$compute) {
     if (recipe$Analysis$Workflow$Anomalies$cross_validation) {
@@ -120,13 +118,14 @@ Anomalies <- function(recipe, data, retrieve = TRUE) {
     }
 
     # Display success messages and save data
-    if (retrieve) {     
-      info(recipe$Run$logger,
-           paste("The anomalies have been computed,", cross_msg,
-                 "cross-validation. The original full fields are returned as",
-                 "$hcst.full_val and $obs.full_val."))
+    info(recipe$Run$logger, retrieve = retrieve,
+         paste("The anomalies have been computed,", cross_msg,
+               "cross-validation. The original full fields are returned as",
+               "$hcst.full_val and $obs.full_val."))
 
-      info(recipe$Run$logger, "##### ANOMALIES COMPUTED SUCCESSFULLY #####")
+    info(recipe$Run$logger, retrieve = retrieve,
+         "##### ANOMALIES COMPUTED SUCCESSFULLY #####")
+    if (retrieve) {
       if (recipe$Analysis$Workflow$Anomalies$save != 'none') {
         info(recipe$Run$logger, "##### START SAVING ANOMALIES #####")
       }
@@ -148,19 +147,19 @@ Anomalies <- function(recipe, data, retrieve = TRUE) {
         save_observations(recipe = recipe, data_cube = data$obs)
       }
     }
-
   } else {
-    warn(recipe$Run$logger, paste("The Anomalies module has been called, but",
-         "recipe parameter Workflow:anomalies:compute is set to FALSE.",
-         "The full fields will be returned."))
+    warn(recipe$Run$logger, retrieve = retrieve,
+         paste("The Anomalies module has been called, but recipe parameter",
+               "Workflow:anomalies:compute is set to FALSE.",
+               "The full fields will be returned."))
     hcst_fullvalue <- NULL
     obs_fullvalue <- NULL
-    info(recipe$Run$logger, "##### ANOMALIES NOT COMPUTED #####")
+    info(recipe$Run$logger, retrieve = retrieve,
+         "##### ANOMALIES NOT COMPUTED #####")
   }
 
   ## TODO: Return fcst full value?
   .log_memory_usage(recipe$Run$logger, "After computing anomalies")
-
   return(list(hcst = data$hcst, obs = data$obs, fcst = data$fcst,
               hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue))
 }
diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index 20042e11..1639e459 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -12,12 +12,10 @@ Calibration <- function(recipe, data, retrieve = TRUE) {
   method <- tolower(recipe$Analysis$Workflow$Calibration$method)
 
   if (method == "raw") {
-    if (retrieve) {
-      warn(recipe$Run$logger,
-           paste("The Calibration module has been called, but the calibration",
-                 "method in the recipe is 'raw'. The hcst and fcst will not be",
-                 "calibrated."))
-    }
+    warn(recipe$Run$logger, retrieve = retrieve,
+         paste("The Calibration module has been called, but the calibration",
+               "method in the recipe is 'raw'. The hcst and fcst will not be",
+               "calibrated."))
     ## TODO: Improve efficiency
     fcst_calibrated <- data$fcst
     hcst_calibrated <- data$hcst
@@ -61,7 +59,7 @@ Calibration <- function(recipe, data, retrieve = TRUE) {
       CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based")
       ## TODO: this belongs in the recipe checker
       if (!(method %in% CST_CALIB_METHODS)) {
-        error(recipe$Run$logger, 
+        error(recipe$Run$logger, retrieve = retrieve, 
               paste("Calibration method in the recipe is not available for",
                     "monthly data."))
         stop()
@@ -174,7 +172,6 @@ Calibration <- function(recipe, data, retrieve = TRUE) {
   #                              obs.full_val = data$obs.full_val))
   # }
   if (retrieve) {
-    info(recipe$Run$logger, CALIB_MSG)
     # Saving
     if (recipe$Analysis$Workflow$Calibration$save != 'none') {
       info(recipe$Run$logger, "##### START SAVING CALIBRATED DATA #####")
@@ -194,5 +191,6 @@ Calibration <- function(recipe, data, retrieve = TRUE) {
       save_observations(recipe = recipe, data_cube = data$obs)
     }
   }
+  info(recipe$Run$logger, retrieve = retrieve, CALIB_MSG)
   return(return_list)
 }
diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R
index 4babacd1..1f953b20 100644
--- a/modules/Loading/R/load_seasonal.R
+++ b/modules/Loading/R/load_seasonal.R
@@ -311,10 +311,10 @@ load_seasonal <- function(recipe, retrieve = T) {
     if (!(recipe$Analysis$Regrid$type == 'none')) {
       compare_exp_obs_grids(hcst, obs)
     }
-    info(recipe$Run$logger,
-         "##### DATA LOADING COMPLETED SUCCESSFULLY #####")
     .log_memory_usage(recipe$Run$logger, when = "After loading")
   }
+  info(recipe$Run$logger, retrieve = retrieve,
+       "##### DATA LOADING COMPLETED SUCCESSFULLY #####")
   
   ############################################################################
   #
diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index b5b7c1b9..403043b8 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -307,14 +307,15 @@ Skill <- function(recipe, data, agg = 'global', retrieve = TRUE,
       skill_metrics[[ metric ]] <- skill
     }
   }
+  info(recipe$Run$logger, retrieve = retrieve)
+       "##### SKILL METRIC COMPUTATION COMPLETE #####")
+  .log_memory_usage(recipe$Run$logger, when = "After skill metric computation")
+  # Save outputs
   if (retrieve) {
-    info(recipe$Run$logger, 
-         "##### SKILL METRIC COMPUTATION COMPLETE #####")
-    .log_memory_usage(recipe$Run$logger, when = "After skill metric computation")
-    # Save outputs
     ## TODO: Handle the output differently to avoid changing the recipe?
     if (recipe$Analysis$Workflow$Skill$save != 'none') {
-      info(recipe$Run$logger, "##### START SAVING SKILL METRIC #####")
+      info(recipe$Run$logger, retrieve = retrieve,
+           "##### START SAVING SKILL METRIC #####")
     }
     recipe$Run$output_dir <- paste0(recipe$Run$output_dir,
                                     "/outputs/Skill/")
@@ -376,7 +377,7 @@ Skill <- function(recipe, data, agg = 'global', retrieve = TRUE,
   return(skill_metrics)
 }
 
-Probabilities <- function(recipe, data) {
+Probabilities <- function(recipe, data, retrieve = TRUE) {
   ## TODO: Do hcst and fcst at the same time
   if (is.null(recipe$Analysis$ncores)) {
     ncores <- 1
@@ -395,8 +396,9 @@ Probabilities <- function(recipe, data) {
   named_quantiles <- list()
   
   if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) {
-    error(recipe$Run$logger, "Quantiles and probability bins have been 
-          requested, but no thresholds are provided in the recipe.")
+    error(recipe$Run$logger, retrieve = retrieve,
+          paste("Quantiles and probability bins have been requested, but no",
+                "thresholds are provided in the recipe."))
     stop()
   } else {
     for (element in recipe$Analysis$Workflow$Probabilities$percentiles) {
@@ -471,34 +473,35 @@ Probabilities <- function(recipe, data) {
                       percentiles = named_quantiles)
     }
     
-    info(recipe$Run$logger,
+    info(recipe$Run$logger, retrieve = retrieve,
 	 "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####")
     .log_memory_usage(recipe$Run$logger, when = "After anomaly computation")
     # Save outputs
-    if (recipe$Analysis$Workflow$Probabilities$save != 'none') {
-      info(recipe$Run$logger,
-           "##### START SAVING PERCENTILES AND PROBABILITY CATEGORIES #####")
-    
-      recipe$Run$output_dir <- paste0(recipe$Run$output_dir,
-                                      "/outputs/Skill/")
-      # Save percentiles
-      if (recipe$Analysis$Workflow$Probabilities$save %in%
-          c('all', 'percentiles_only')) {
-        save_percentiles(recipe = recipe, percentiles = results$percentiles,
-                         data_cube = data$hcst)
-      }
-      # Save probability bins
-      if (recipe$Analysis$Workflow$Probabilities$save %in%
-          c('all', 'bins_only')) {
-        save_probabilities(recipe = recipe, probs = results$probs,
-                           data_cube = data$hcst, type = "hcst")
-        if (!is.null(results$probs_fcst)) {
-          save_probabilities(recipe = recipe, probs = results$probs_fcst,
-                             data_cube = data$fcst, type = "fcst")
+    if (retrieve) {
+      if (recipe$Analysis$Workflow$Probabilities$save != 'none') {
+        info(recipe$Run$logger, retrieve = retrieve,
+             "##### START SAVING PERCENTILES AND PROBABILITY CATEGORIES #####")
+      
+        recipe$Run$output_dir <- paste0(recipe$Run$output_dir,
+                                        "/outputs/Skill/")
+        # Save percentiles
+        if (recipe$Analysis$Workflow$Probabilities$save %in%
+            c('all', 'percentiles_only')) {
+          save_percentiles(recipe = recipe, percentiles = results$percentiles,
+                           data_cube = data$hcst)
+        }
+        # Save probability bins
+        if (recipe$Analysis$Workflow$Probabilities$save %in%
+            c('all', 'bins_only')) {
+          save_probabilities(recipe = recipe, probs = results$probs,
+                             data_cube = data$hcst, type = "hcst")
+          if (!is.null(results$probs_fcst)) {
+            save_probabilities(recipe = recipe, probs = results$probs_fcst,
+                               data_cube = data$fcst, type = "fcst")
+          }
         }
       }
     }
-
     # Return results
     return(results)
   }
diff --git a/modules/Units/Units.R b/modules/Units/Units.R
index eb527e9e..83964195 100644
--- a/modules/Units/Units.R
+++ b/modules/Units/Units.R
@@ -59,15 +59,13 @@ Units <- function(recipe, data, retrieve = T) {
   ## if "/" appears substitute by -1 in at the end of next unit. How to know?
   # Check if all units are equal (if so, no conversion needed; else convert)
   if (length(unique(c(unlist(orig_units), user_units))) == length(user_units)) {
-    if (retrieve) {
-      info(recipe$Run$logger, "##### NO UNIT CONVERSION NEEDED #####")
-    }
+    info(recipe$Run$logger, retrieve = retrieve, "##### NO UNIT CONVERSION NEEDED #####")
     res <- data
   } else {
     if (recipe$Run$filesystem == 'esarchive' &&
         (sum(!sapply(unique(orig_units), is.null)) != 1)) {
       ## TODO: Warning
-      warn(recipe$Run$logger, 
+      warn(recipe$Run$logger, retrieve = retrieve,
            paste("The units in", paste(names(orig_units), collapse = ', '),
                  "were not all equal and will be uniformized.",
                  "If this is not expected, please contact the ES data team."))
@@ -89,9 +87,7 @@ Units <- function(recipe, data, retrieve = T) {
                   }
                   return(result)
                   }, simplify = TRUE) # instead of lapply to get the named list directly
-    if (retrieve) {
-      info(recipe$Run$logger, "##### UNIT CONVERSION COMPLETE #####")
-    }
+    info(recipe$Run$logger, retrieve = retrieve, "##### UNIT CONVERSION COMPLETE #####")
   }
   return(res)
 }
diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R
index d0c1d3c8..55944e16 100644
--- a/modules/Visualization/Visualization.R
+++ b/modules/Visualization/Visualization.R
@@ -34,7 +34,7 @@ Visualization <- function(recipe,
     if (recipe$Analysis$Region$name %in% names(output_conf)) {
       output_conf <- output_conf[[recipe$Analysis$Region$name]]
     } else {
-      warn(recipe$Run$logger,
+      warn(recipe$Run$logger, retrieve = TRUE,
            paste0("The region name is not found in the 'output_conf' file. ",
                   "The default plot settings will be used."))
       output_conf <- NULL
@@ -70,9 +70,10 @@ Visualization <- function(recipe,
   }
 
   if ((is.null(skill_metrics)) && (is.null(data$fcst))) { 
-    error(recipe$Run$logger, "The Visualization module has been called,
-          but there is no fcst in 'data', and 'skill_metrics' is NULL
-          so there is no data that can be plotted.")
+    error(recipe$Run$logger, retrieve = TRUE, 
+          paste("The Visualization module has been called,",
+                "but there is no fcst in 'data', and 'skill_metrics' is NULL",
+                "so there is no data that can be plotted."))
     stop()
   }
   # Set default single-panel plots if not specified
@@ -85,7 +86,7 @@ Visualization <- function(recipe,
       plot_skill_metrics(recipe, skill_metrics, outdir,
                          significance, output_conf = output_conf)
     } else {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("The skill metric plots have been requested, but the ",
                    "parameter 'skill_metrics' is NULL"))
     }
@@ -113,11 +114,11 @@ Visualization <- function(recipe,
       if (recipe$Analysis$Workflow$Visualization$mask_ens %in%
           c('both', TRUE)) {
         if (is.null(skill_metrics)) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste0("For the forecast ensemble mean  plot, skill_metrics ",
                        "need to be provided to be masked."))
         } else if (!('enscorr' %in% names(skill_metrics))) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste0("For the forecast ensemble mean plot, enscor metric ",
                        "need to be provided to be masked"))
         } else {
@@ -130,11 +131,11 @@ Visualization <- function(recipe,
       # Plots with dotted negative correlated in ens-mean-fcst
       if (recipe$Analysis$Workflow$Visualization$dots %in% c('both', TRUE)) {
         if (is.null(skill_metrics)) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste0("For the forecast ensemble mean plot, skill_metrics ",
                        "need to be provided for the dots"))
         } else if (!('enscorr' %in% names(skill_metrics))) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste0("For the forecast ensemble mean plot, enscor metric ",
                        "needs to be provided for the dots"))
         } else {
@@ -146,7 +147,7 @@ Visualization <- function(recipe,
       }
 
     } else {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("The forecast ensemble mean plot has been requested, but ",
                    "there is no fcst element in 'data'"))
     }
@@ -177,11 +178,11 @@ Visualization <- function(recipe,
       if (recipe$Analysis$Workflow$Visualization$mask_terciles %in% 
           c('both', TRUE)) {
         if (is.null(skill_metrics)) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste0("For the most likely terciles plot, skill_metrics ",
                        "need to be provided to be masked."))
         } else if (!('rpss' %in% names(skill_metrics))) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste0("For the most likely terciles plot, rpss metric ",
                        "need to be provided to be masked"))
         } else {
@@ -195,11 +196,11 @@ Visualization <- function(recipe,
       # Plots with dotted terciles
       if (recipe$Analysis$Workflow$Visualization$dots %in% c('both', TRUE)) {
         if (is.null(skill_metrics)) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste0("For the most likely terciles plot, skill_metrics ",
                        "need to be provided for the dots"))
         } else if (!('rpss' %in% names(skill_metrics))) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste0("For the most likely terciles plot, rpss metric ",
                        "needs to be provided for the dots"))
         } else {
@@ -211,7 +212,7 @@ Visualization <- function(recipe,
         }
       }
     } else {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("For the most likely terciles plot, both the fcst and the ",
                    "probabilities must be provided."))
     }
diff --git a/tools/check_recipe.R b/tools/check_recipe.R
index 2fc9f72d..b6c7ba00 100644
--- a/tools/check_recipe.R
+++ b/tools/check_recipe.R
@@ -1,7 +1,7 @@
 check_recipe <- function(recipe) {
   # recipe: yaml recipe already read it
   ## TODO: set up logger-less case
-  info(recipe$Run$logger, paste("Checking recipe:", recipe$recipe_path))
+  info(recipe$Run$logger, retrieve = TRUE, paste("Checking recipe:", recipe$recipe_path))
 
   # ---------------------------------------------------------------------
   # ANALYSIS CHECKS
@@ -21,20 +21,20 @@ check_recipe <- function(recipe) {
 
   # Check basic elements in recipe:Analysis:
   if (!("Analysis" %in% names(recipe))) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           "The recipe must contain an element called 'Analysis'.")
     error_status <- T
   }
 
   if (!all(PARAMS %in% names(recipe$Analysis))) {
-    error(recipe$Run$logger, 
+    error(recipe$Run$logger, retrieve = TRUE, 
           paste0("The element 'Analysis' in the recipe must contain all of ",
                  "the following: ", paste(PARAMS, collapse = ", "), "."))
     error_status <- T
   }
 
   if (!any(HORIZONS %in% tolower(recipe$Analysis$Horizon))) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           paste0("The element 'Horizon' in the recipe must be one of the ",
                 "following: ", paste(HORIZONS, collapse = ", "), "."))
     error_status <- T
@@ -44,7 +44,7 @@ check_recipe <- function(recipe) {
     ## TODO: Specify filesystem
     archive <- read_yaml(ARCHIVE_SEASONAL)[[recipe$Run$filesystem]]
     if (!all(TIME_SETTINGS_SEASONAL %in% names(recipe$Analysis$Time))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("The element 'Time' in the recipe must contain all of the ",
                    "following: ", paste(TIME_SETTINGS_SEASONAL,
                                         collapse = ", "), "."))
@@ -53,7 +53,7 @@ check_recipe <- function(recipe) {
   } else if (tolower(recipe$Analysis$Horizon) == "decadal") {
     archive <- read_yaml(ARCHIVE_DECADAL)[[recipe$Run$filesystem]]
     if (!all(TIME_SETTINGS_DECADAL %in% names(recipe$Analysis$Time))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("The element 'Time' in the recipe must contain all of the ",
                    "following: ", paste(TIME_SETTINGS_DECADAL,
                                         collapse = ", "), "."))
@@ -74,7 +74,7 @@ check_recipe <- function(recipe) {
       (!is.null(recipe$Analysis$Variables$sic_threshold))) {
     if (!is.numeric(recipe$Analysis$Variables$sic_threshold) ||
         dplyr::between(recipe$Analysis$Variables$sic_threshold, 0, 1)) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste("The element Analysis:Variables:sic_threshold must be a",
                   "numeric value between 0 and 1."))
     }
@@ -82,14 +82,14 @@ check_recipe <- function(recipe) {
   # Check system names
   if (!is.null(archive)) {
     if (!all(recipe$Analysis$Datasets$System$name %in% names(archive$System))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             "The specified System name was not found in the archive.")
       error_status <- T
     }
     # Check reference names
     if (!all(recipe$Analysis$Datasets$Reference$name %in% 
         names(archive$Reference))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             "The specified Reference name was not found in the archive.")
       error_status <- T
     }
@@ -97,36 +97,36 @@ check_recipe <- function(recipe) {
   # Check ftime_min and ftime_max
   if ((!(recipe$Analysis$Time$ftime_min > 0)) ||
       (!is.integer(recipe$Analysis$Time$ftime_min))) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           "The element 'ftime_min' must be an integer larger than 0.")
     error_status <- T
   }
   if ((!(recipe$Analysis$Time$ftime_max > 0)) ||
       (!is.integer(recipe$Analysis$Time$ftime_max))) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           "The element 'ftime_max' must be an integer larger than 0.")
     error_status <- T
   }
   if (recipe$Analysis$Time$ftime_max < recipe$Analysis$Time$ftime_min) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           "'ftime_max' cannot be smaller than 'ftime_min'.")
     error_status <- T
   }
   # Check consistency of hindcast years
   if (!(as.numeric(recipe$Analysis$Time$hcst_start) %% 1 == 0) ||
       (!(recipe$Analysis$Time$hcst_start > 0))) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           "The element 'hcst_start' must be a valid year.")
     error_status <- T
   }
   if (!(as.numeric(recipe$Analysis$Time$hcst_end) %% 1 == 0) ||
       (!(recipe$Analysis$Time$hcst_end > 0))) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           "The element 'hcst_end' must be a valid year.")
     error_status <- T
   }
   if (recipe$Analysis$Time$hcst_end < recipe$Analysis$Time$hcst_start) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           "'hcst_end' cannot be smaller than 'hcst_start'.")
     error_status <- T
   }
@@ -142,19 +142,19 @@ check_recipe <- function(recipe) {
   ## TODO: To be implemented in the future
   # if (length(recipe$Analysis$Time$sdate$fcst_day) > 1 &&
   #     tolower(recipe$Analysis$Horizon) != "subseasonal") {
-  #   warn(recipe$Run$logger,
+  #   warn(recipe$Run$logger, retrieve = TRUE,
   #        paste("Only subseasonal verification allows multiple forecast days."),
   #              "Element fcst_day in recipe set as 1.")
   #   recipe$Analysis$Time$sdate$fcst_day <- '01'
   # }
   ## TODO: Delete, this parameter was deprecated
   # if (is.null(recipe$Analysis$Time$sdate$fcst_sday)) {
-  #   error(recipe$Run$logger,
+  #   error(recipe$Run$logger, retrieve = TRUE,
   #         paste("The element 'fcst_sday' in the recipe should be defined."))
   # }
 
   if (is.null(recipe$Analysis$Time$fcst_year)) {
-    warn(recipe$Run$logger,
+    warn(recipe$Run$logger, retrieve = TRUE,
          paste("The element 'fcst_year' is not defined in the recipe.",
                "No forecast year will be used."))
   }
@@ -171,7 +171,7 @@ check_recipe <- function(recipe) {
 
   # Regrid checks:
   if (length(recipe$Analysis$Regrid) != 2) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           "The 'Regrid' element must specify the 'method' and 'type'.")
     error_status <- T
   }
@@ -179,7 +179,7 @@ check_recipe <- function(recipe) {
   # ...
   # calculate number of workflows to create for each variable and
   if (length(recipe$Analysis$Horizon) > 1) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           "Only one single Horizon can be specified in the recipe")
     error_status <- T
   }
@@ -187,7 +187,7 @@ check_recipe <- function(recipe) {
   ## TODO: Refine this
   # nvar <- length(recipe$Analysis$Variables)
   # if (nvar > 2) {
-  #   error(recipe$Run$logger,
+  #   error(recipe$Run$logger, retrieve = TRUE,
   #         "Only two type of Variables can be listed: ECVs and Indicators.")
   #   stop("EXECUTION FAILED")
   # }
@@ -210,7 +210,7 @@ check_recipe <- function(recipe) {
   if (is.null(names(recipe$Analysis$Region))) {
     for (region in recipe$Analysis$Region) {
       if (!all(LIMITS %in% names(region))) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste0("There must be 4 elements in 'Region': ",
                    paste(LIMITS, collapse = ", "), "."))
         error_status <- T
@@ -219,7 +219,7 @@ check_recipe <- function(recipe) {
     if (length(recipe$Analysis$Region) > 1) {
       for (region in recipe$Analysis$Region) {
         if (!("name" %in% names(region)) || (is.null(region$name))) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste("If more than one region has been defined, every region",
                       "must have a unique name."))
         }
@@ -227,7 +227,7 @@ check_recipe <- function(recipe) {
     }
   # Atomic recipe
   } else if (!all(LIMITS %in% names(recipe$Analysis$Region))) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           paste0("There must be 4 elements in 'Region': ",
                  paste(LIMITS, collapse = ", "), "."))
     error_status <- T
@@ -237,7 +237,7 @@ check_recipe <- function(recipe) {
     # for (i in 1:length(recipe$Analysis$Region)) {
     #   if (!all(limits %in% names(recipe$Analysis$Region[[i]]))) {
     #     limits <- paste(limits, collapse = " ")
-    #     error(recipe$Run$logger,
+    #     error(recipe$Run$logger, retrieve = TRUE,
     #           paste0("Each region defined in element 'Region' ",
     #                  "should have 4 elements: ",
     #                  paste(limits, collapse = ", "), "."))
@@ -245,7 +245,7 @@ check_recipe <- function(recipe) {
     #   }
     #   if (length(recipe$Analysis$Region) > 1) {
     #     if (!("name" %in% names(recipe$Analysis$Region[[i]]))) {
-    #       error(recipe$Run$logger,
+    #       error(recipe$Run$logger, retrieve = TRUE,
     #             paste("If multiple regions are requested, each region must",
     #                   "have a 'name'".)
     #   # are numeric? class list mode list 
@@ -261,19 +261,19 @@ check_recipe <- function(recipe) {
        recipe$Analysis$Workflow$Calibration$method == FALSE) ||
       tolower(recipe$Analysis$Workflow$Calibration$method) == 'none' ||
       is.null(recipe$Analysis$Workflow$Calibration$method)) {
-    warn(recipe$Run$logger,
+    warn(recipe$Run$logger, retrieve = TRUE,
          "No Calibration method was specified, raw data verification.")
     recipe$Analysis$Workflow$Calibration$method <- 'raw'
   } else {
     if (is.null(recipe$Analysis$Workflow$Calibration$method)) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             "The 'Calibration' element 'method' must be specified.")
       error_status <- T
     }
     SAVING_OPTIONS_CALIB <- c("all", "none", "exp_only", "fcst_only")
     if ((is.null(recipe$Analysis$Workflow$Calibration$save)) ||
         (!(recipe$Analysis$Workflow$Calibration$save %in% SAVING_OPTIONS_CALIB))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("Please specify which Calibration module outputs you want ",
                    "to save with the 'save' parameter. The options are: ",
                    paste(SAVING_OPTIONS_CALIB, collapse = ", "), "."))
@@ -284,18 +284,18 @@ check_recipe <- function(recipe) {
   if ("Anomalies" %in% names(recipe$Analysis$Workflow)) {
     # Computation and cross-validation checks
     if (is.null(recipe$Analysis$Workflow$Anomalies$compute)) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             "Parameter 'compute' must be defined under 'Anomalies'.")
       error_status <- T
     } else if (!(is.logical(recipe$Analysis$Workflow$Anomalies$compute))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste("Parameter 'Anomalies:compute' must be a logical value",
                   "(True/False or yes/no)."))
       error_status <- T
     } else if ((recipe$Analysis$Workflow$Anomalies$compute)) {
       # Cross-validation check
       if (!is.logical(recipe$Analysis$Workflow$Anomalies$cross_validation)) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste("If anomaly computation is requested, parameter",
                     "'cross_validation' must be defined under 'Anomalies',
                     and it must be a logical value (True/False or yes/no)."))
@@ -305,7 +305,7 @@ check_recipe <- function(recipe) {
       SAVING_OPTIONS_ANOM <- c("all", "none", "exp_only", "fcst_only")
       if ((is.null(recipe$Analysis$Workflow$Anomalies$save)) ||
           (!(recipe$Analysis$Workflow$Anomalies$save %in% SAVING_OPTIONS_ANOM))) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste0("Please specify which Anomalies module outputs you want ",
                      "to save with the 'save' parameter. The options are: ",
                      paste(SAVING_OPTIONS_ANOM, collapse = ", "), "."))
@@ -327,12 +327,12 @@ check_recipe <- function(recipe) {
     if ("type" %in% names(downscal_params)) {
       if (length(downscal_params$type) == 0) {
         downscal_params$type <- "none"
-        warn(recipe$Run$logger,
+        warn(recipe$Run$logger, retrieve = TRUE,
              paste("Downscaling 'type' is empty in the recipe, setting it to",
           	 "'none'."))
       }
       if (!(downscal_params$type %in% DOWNSCAL_TYPES)) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste0("The type of Downscaling request in the recipe is not ",
             	   "available. It must be one of the following: ",
             	   paste(DOWNSCAL_TYPES, collapse = ", "), "."))
@@ -340,14 +340,14 @@ check_recipe <- function(recipe) {
       }
       if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) &&
           (is.null(downscal_params$target_grid))) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste("A target grid is required for the downscaling method",
             	  "requested in the recipe."))
         error_status <- T
       }
       if (downscal_params$type == "int") {
         if (is.null(downscal_params$int_method)) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste("Downscaling type 'int' was requested, but no",
             	    "interpolation method is provided in the recipe."))
           error_status <- T
@@ -355,7 +355,7 @@ check_recipe <- function(recipe) {
       } else if (downscal_params$type %in% 
                  c("int", "intbc", "intlr", "logreg")) {
         if (is.null(downscal_params$int_method)) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste("Downscaling type", downscal_params$type, 
                       "was requested in the recipe, but no",
             	      "interpolation method is provided."))
@@ -363,24 +363,24 @@ check_recipe <- function(recipe) {
         }
       } else if (downscal_params$type == "intbc") {
         if (is.null(downscal_params$bc_method)) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste("Downscaling type 'intbc' was requested in the recipe, but",
             	    "no bias correction method is provided."))
           error_status <- T
         } else if (!(downscal_params$bc_method %in% BC_METHODS)) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste0("The accepted Bias Correction methods for the downscaling",
             	     " module are: ", paste(BC_METHODS, collapse = ", "), "."))
           error_status <- T
         }
       } else if (downscal_params$type == "intlr") {
         if (length(downscal_params$lr_method) == 0) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste("Downscaling type 'intlr' was requested in the recipe, but",
             	    "no linear regression method was provided."))
           error_status <- T
         } else if (!(downscal_params$lr_method %in% LR_METHODS)) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste0("The accepted linear regression methods for the",
             	     " downscaling module are: ",
             	     paste(LR_METHODS, collapse = ", "), "."))
@@ -388,24 +388,24 @@ check_recipe <- function(recipe) {
         }
       } else if (downscal_params$type == "analogs") {
         if (is.null(downscal_params$nanalogs)) {
-          warn(recipe$Run$logger,
+          warn(recipe$Run$logger, retrieve = TRUE,
                paste("Downscaling type is 'analogs, but the number of analogs",
             	   "has not been provided in the recipe. The default is 3."))
         }
       } else if (downscal_params$type == "logreg") {
         if (is.null(downscal_params$int_method)) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste("Downscaling type 'logreg' was requested in the recipe, but",
             	    "no interpolation method was provided."))
           error_status <- T
         }
         if (is.null(downscal_params$log_reg_method)) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste("Downscaling type 'logreg' was requested in the recipe,",
             	    "but no logistic regression method is provided."))
           error_status <- T
         } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) {
-          error(recipe$Run$logger,
+          error(recipe$Run$logger, retrieve = TRUE,
                 paste0("The accepted logistic regression methods for the ",
             	     "downscaling module are: ",
             	     paste(LOGREG_METHODS, collapse = ", "), "."))
@@ -420,19 +420,19 @@ check_recipe <- function(recipe) {
     nino_indices <- paste0("nino", c("1+2", "3", "3.4", "4"))
     indices <- c("nao", nino_indices)
     if (!("anomalies" %in% tolower(names(recipe$Analysis$Workflow)))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("Indices uses Anomalies as input, but Anomalies are missing",
                    "in the recipe."))
       error_status <- T
     } else if (!(recipe$Analysis$Workflow$Anomalies$compute)) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste0("Indices uses Anomalies as input, but the parameter",
                      "'Anomalies:compute' is set as no/False."))
       error_status <- T
     }
     recipe_indices <- tolower(names(recipe$Analysis$Workflow$Indices))
     if (!all(recipe_indices %in% indices)) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("Some of the indices under 'Indices' are not available.",
                    "The available Indices are: 'NAO', 'Nino1+2', 'Nino3', ",
                    "'Nino3.4' and 'Nino4'."))
@@ -441,7 +441,7 @@ check_recipe <- function(recipe) {
     # Check that variables correspond with indices requested
     if (("nao" %in% recipe_indices) &&
         (!all(recipe_variables %in% c("psl", "z500")))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("It is not possible to compute the NAO with some of the ",
                    "variables requested. To compute the NAO, please make sure",
                    "your recipe requests only psl and/or z500."))
@@ -449,7 +449,7 @@ check_recipe <- function(recipe) {
     }
     if ((any(nino_indices %in% recipe_indices)) &&
         (!all(recipe_variables %in% c("tos", "sst")))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("It is not possible to compute El Nino indices with some ",
                    "of the variables requested. To compute El Nino, please ",
                    "make sure your recipe requests only tos."))
@@ -465,14 +465,14 @@ check_recipe <- function(recipe) {
                          "frpss_specs", "bss10_specs", "bss90_specs")
   if ("Skill" %in% names(recipe$Analysis$Workflow)) {
     if (is.null(recipe$Analysis$Workflow$Skill$metric)) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           "Parameter 'metric' must be defined under 'Skill'.")
     error_status <- T
     } else {
       requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, 
                                     ", | |,")[[1]]
       if (!all(tolower(requested_metrics) %in% AVAILABLE_METRICS)) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste0("Some of the metrics requested under 'Skill' are not ",
                      "available in SUNSET. Check the documentation to see the ",
                      "full list of accepted skill metrics."))
@@ -483,7 +483,7 @@ check_recipe <- function(recipe) {
     SAVING_OPTIONS_SKILL <- c("all", "none")
     if ((is.null(recipe$Analysis$Workflow$Skill$save)) ||
         (!(recipe$Analysis$Workflow$Skill$save %in% SAVING_OPTIONS_SKILL))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("Please specify whether you want to save the Skill metrics ",
                    "with the 'save' parameter. The options are: ",
                    paste(SAVING_OPTIONS_SKILL, collapse = ", "), "."))
@@ -494,11 +494,11 @@ check_recipe <- function(recipe) {
   # Probabilities
   if ("Probabilities" %in% names(recipe$Analysis$Workflow)) {
     if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             "Parameter 'percentiles' must be defined under 'Probabilities'.")
       error_status <- T
     } else if (!is.list(recipe$Analysis$Workflow$Probabilities$percentiles)) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste("Parameter 'Probabilities:percentiles' expects a list.",
                   "See documentation in the wiki for examples."))
       error_status <- T
@@ -507,7 +507,7 @@ check_recipe <- function(recipe) {
     SAVING_OPTIONS_PROBS <- c("all", "none", "bins_only", "percentiles_only")
     if ((is.null(recipe$Analysis$Workflow$Probabilities$save)) ||
         (!(recipe$Analysis$Workflow$Probabilities$save %in% SAVING_OPTIONS_PROBS))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("Please specify whether you want to save the percentiles ",
                    "and probability bins with the 'save' parameter. The ",
                    "options are: ",
@@ -522,14 +522,14 @@ check_recipe <- function(recipe) {
                       "most_likely_terciles")
     # Separate plots parameter and check if all elements are in PLOT_OPTIONS
     if (is.null(recipe$Analysis$Workflow$Visualization$plots)) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             "The 'plots' element must be defined under 'Visualization'.")
       error_status <- T
     } else {
       plots <- strsplit(recipe$Analysis$Workflow$Visualization$plots,
                         ", | |,")[[1]]
       if (!all(plots %in% PLOT_OPTIONS)) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste0("The options available for the plots are: ",
                      paste(PLOT_OPTIONS, collapse = ", "), "."))
         error_status <- T
@@ -538,41 +538,41 @@ check_recipe <- function(recipe) {
     # Check multi_panel option
     ## TODO: Multi-panel
     if (is.null(recipe$Analysis$Workflow$Visualization$multi_panel)) {
-      warn(recipe$Run$logger,
+      warn(recipe$Run$logger, retrieve = TRUE,
            paste0("Visualization:multi_panel not specified for the plots, the",
                   " default is 'no/False'."))
     } else if (!is.logical(recipe$Analysis$Workflow$Visualization$multi_panel)) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("Parameter 'Visualization:multi_panel' must be a logical ",
                    "value: either 'yes/True' or 'no/False'"))
       error_status <- T
     }
     # Check projection
     if (is.null(recipe$Analysis$Workflow$Visualization$projection)) {
-      warn(recipe$Run$logger,
+      warn(recipe$Run$logger, retrieve = TRUE,
            paste0("Visualization:projection not specified for the plots, the ",
                   "default projection is cylindrical equidistant."))
     }
     ## TODO: Add significance?
     if ("most_likely_terciles" %in% plots) {
       if (is.null(recipe$Analysis$Workflow$Visualization$mask_terciles)) {
-        warn(recipe$Run$logger,
+        warn(recipe$Run$logger, retrieve = TRUE,
              paste0("Visualization:mask_terciles not set for tercile plots,",
                     " the default setting is: 'no/False'."))
       } else if (!(recipe$Analysis$Workflow$Visualization$mask_terciles %in%
                    c(TRUE, FALSE, "both"))) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste0("Parameter Visualization:mask_terciles must be one of: ",
                      "yes/True, no/False, 'both'"))
         error_status <- T
       }
       if (is.null(recipe$Analysis$Workflow$Visualization$dots)) {
-        warn(recipe$Run$logger,
+        warn(recipe$Run$logger, retrieve = TRUE,
              paste0("Visualization:dots not set for tercile plots, the default",
                     " setting is: 'no/False'."))
       } else if (!(recipe$Analysis$Workflow$Visualization$dots %in%
                    c(TRUE, FALSE, "both"))) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste0("Parameter Visualization:plots must be one of: ",
                      "yes/True, no/False, 'both'"))
       }
@@ -581,14 +581,14 @@ check_recipe <- function(recipe) {
   # Scorecards
   if ("Scorecards" %in% names(recipe$Analysis$Workflow)) {
     if (is.null(recipe$Analysis$Workflow$Scorecards$metric)) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           "Parameter 'metric' must be defined under 'Scorecards'.")
     error_status <- T
     } else {
       sc_metrics <- strsplit(recipe$Analysis$Workflow$Scorecards$metric, 
                              ", | |,")[[1]]
       if (!all(tolower(sc_metrics) %in% tolower(requested_metrics))) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste0("All of the metrics requested under 'Scorecards' must ",
                      "be requested in the 'Skill' section."))
         error_status <- T
@@ -607,27 +607,27 @@ check_recipe <- function(recipe) {
     stop("The recipe must contain an element named 'Run'.")
   }
   if (!all(RUN_FIELDS %in% names(recipe$Run))) {
-    error(recipe$Run$logger, paste("Recipe element 'Run' must contain",
+    error(recipe$Run$logger, retrieve = TRUE, paste("Recipe element 'Run' must contain",
                                    "all of the following fields:",
                                    paste(RUN_FIELDS, collapse=", "), "."))
     error_status <- T
   }
   if (!is.character(recipe$Run$output_dir)) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           paste("The Run element 'output_dir' in", recipe$name, "file",
                 "should be a character string indicating the path where",
                 "the outputs should be saved."))
     error_status <- T
   }
   if (!is.character(recipe$Run$code_dir)) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           paste("The Run element 'code_dir' in", recipe$name, "file ",
                 "should be a character string indicating the path",
                 "where the code is."))
     error_status <- T
   }
   if (!is.logical(recipe$Run$Terminal)) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           paste("The Run element 'Terminal' in", recipe$name, "file ",
                 "should be a boolean value indicating whether or not to",
                 "print the logs in the terminal."))
@@ -660,39 +660,39 @@ check_recipe <- function(recipe) {
     auto_specs <- read_yaml("conf/autosubmit.yml")[[recipe$Run$filesystem]]
     # Check that the autosubmit configuration parameters are present
     if (!("auto_conf" %in% names(recipe$Run))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             "The 'auto_conf' is missing from the 'Run' section of the recipe.")
       error_status <- T
     } else if (!all(AUTO_PARAMS %in% names(recipe$Run$auto_conf))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("The element 'Run:auto_conf' must contain all of the ",
                    "following: ", paste(AUTO_PARAMS, collapse = ", "), "."))
       error_status <- T
     }
     # Check that the script is not NULL and exists
     if (is.null(recipe$Run$auto_conf$script)) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             "A script must be provided to run the recipe with autosubmit.")
       error_status <- T
     } else if (!file.exists(recipe$Run$auto_conf$script)) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             "Could not find the file for the script in 'auto_conf'.")
       error_status <- T
     }
     # Check that the experiment ID exists
     if (is.null(recipe$Run$auto_conf$expid)) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste("The Autosubmit EXPID is missing. You can create one by",
                   "running the following commands on the autosubmit machine:"))
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste("module load", auto_specs$module_version))
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste("autosubmit expid -H", auto_specs$platform,
                   "-d <Description>"))
       error_status <- T
     } else if (!dir.exists(paste0(auto_specs$experiment_dir,
                                   recipe$Run$auto_conf$expid))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("No folder in ", auto_specs$experiment_dir,
                    " for the EXPID", recipe$Run$auto_conf$expid,
                    ". Please make sure it is correct."))
@@ -700,17 +700,17 @@ check_recipe <- function(recipe) {
     }
     if ((recipe$Run$auto_conf$email_notifications) &&
         (is.null(recipe$Run$auto_conf$email_address))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             "Autosubmit notifications are enabled but email address is empty!")
       error_status <- T
     }
     if (is.null(recipe$Run$auto_conf$hpc_user)) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             "The 'Run:auto_conf:hpc_user' field can not be empty.")
       error_status <- T
     } else if ((recipe$Run$filesystem == "esarchive") &&
                (!substr(recipe$Run$auto_conf$hpc_user, 1, 5) == "bsc32")) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             "Please check your hpc_user ID. It should look like: 'bsc32xxx'")
       error_status <- T
     }
@@ -729,7 +729,7 @@ check_recipe <- function(recipe) {
   # Check that all required fields are present
   if ("startR_workflow" %in% names(recipe$Run)) {
     if (!all(STARTR_PARAMS %in% names(recipe$Run$startR_workflow))) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = TRUE,
             paste0("The element 'Run:startR_workflow' must contain all of the ",
                    "following: ", paste(STARTR_PARAMS, collapse = ", "), "."))
       error_status <- T
@@ -738,7 +738,7 @@ check_recipe <- function(recipe) {
                                   ", | |,")[[1]])
       # Check modules
       if (!any(modules %in% STARTR_MODULES)) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste0("The element 'Run:startR_workflow:modules' can only ",
                      "contain the following modules: ",
                      paste(STARTR_MODULES, collapse = ", "), "."))
@@ -746,7 +746,7 @@ check_recipe <- function(recipe) {
       }
       # Check chunking dims
       if (!(any(names(recipe$Run$startR_workflow$chunk_along) %in% CHUNK_DIMS))) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste0("The dimensions in 'Run:startR_workflow:chunk_along' can ",
                      "only be: ", paste(CHUNK_DIMS, collapse = ", "), "."))
         error_status <- T
@@ -754,7 +754,7 @@ check_recipe <- function(recipe) {
       # Check compatibility between module selection and chunking dimensions
       if (any(c("latitude", "longitude") %in% names(recipe$Run$startR_workflow$chunk_along)) &&
           any(modules %in% MODULES_USING_LATLON)) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste0("latitude and longitude cannot be chunking dimensions if",
                      "any of the following modules is in the startR workflow: ",
                      paste(MODULES_USING_LATLON, collapse = ", "), "."))
@@ -762,7 +762,7 @@ check_recipe <- function(recipe) {
       }
       if ("time" %in% names(recipe$Run$startR_workflow$chunk_along) &&
           any(modules %in% MODULES_USING_TIME)) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste0("time cannot be a chunking dimension if any of the ",
                      "following modules is in the startR workflow: ",
                      paste(MODULES_USING_TIME, collapse = ", "), "."))
@@ -770,7 +770,7 @@ check_recipe <- function(recipe) {
       }
       if ("var" %in% names(recipe$Run$startR_workflow$chunk_along) &&
           any(modules %in% MODULES_USING_VAR)) {
-        error(recipe$Run$logger,
+        error(recipe$Run$logger, retrieve = TRUE,
               paste0("var cannot be a chunking dimension if any of the ",
                      "following modules is in the startR workflow: ",
                      paste(MODULES_USING_VAR, collapse = ", "), "."))
@@ -786,15 +786,15 @@ check_recipe <- function(recipe) {
   # e.g. only one calibration method
   ## TODO: Implement number of dependent verifications
   #nverifications <- check_number_of_dependent_verifications(recipe)
-  # info(recipe$Run$logger, paste("Start Dates:",
+  # info(recipe$Run$logger, retrieve = TRUE, paste("Start Dates:",
   #                               paste(fcst.sdate, collapse = " ")))
 
   # Return error if any check has failed
   if (error_status) {
-    error(recipe$Run$logger, "RECIPE CHECK FAILED.")
+    error(recipe$Run$logger, retrieve = TRUE, "RECIPE CHECK FAILED.")
     stop("The recipe contains some errors. Find the full list in the",
          " main.log file.")
   } else {
-    info(recipe$Run$logger, "##### RECIPE CHECK SUCCESSFULL #####")
+    info(recipe$Run$logger, retrieve = TRUE, "##### RECIPE CHECK SUCCESSFULL #####")
   }
 }
diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R
index f9942525..0540fec9 100644
--- a/tools/prepare_outputs.R
+++ b/tools/prepare_outputs.R
@@ -72,13 +72,13 @@ prepare_outputs <- function(recipe_file,
   # logger set-up
   if (recipe$Run$Terminal) {
     logger <- log4r::logger(threshold = recipe$Run$Loglevel,  
-                            appenders = list(console_appender(layout = default_log_layout()),
+                            appenders = list(console_appender(layout = .custom_log_layout()),
                                              file_appender(logfile, append = TRUE,   
-                                                           layout = default_log_layout())))
+                                                           layout = .custom_log_layout())))
   } else {
     logger <- log4r::logger(threshold = recipe$Run$Loglevel,
                             appenders = list(file_appender(logfile, append = TRUE,             
-                                             layout = default_log_layout())))
+                                             layout = .custom_log_layout())))
   }
   recipe$Run$logger <- logger
   recipe$Run$logfile <- logfile
@@ -86,7 +86,7 @@ prepare_outputs <- function(recipe_file,
   # Set up default filesystem
   if (is.null(recipe$Run$filesystem)) {
     recipe$Run$filesystem <- "esarchive"
-    warn(recipe$Run$logger,
+    warn(recipe$Run$logger, retrieve = TRUE,
          "Filesystem not specified in the recipe. Setting it to 'esarchive'.")
   }
   # Restructure the recipe to make the atomic recipe more readable
@@ -95,10 +95,24 @@ prepare_outputs <- function(recipe_file,
   }
   # Run recipe checker
   if (disable_checks) {
-    warn(recipe$Run$logger,
+    warn(recipe$Run$logger, retrieve = TRUE,
          "Recipe checks disabled. The recipe will not be checked for errors.")
   } else {
     check_recipe(recipe)
   }
   return(recipe)
 }
+
+.custom_log_layout <- function() {
+  # Custom modification of the default log4r log layout to remove ANSI
+  # formatting from log messages when outputting to files.
+  time_format <- "%Y-%m-%d %H:%M:%S"
+  function(level, retrieve, ...) {
+    if (retrieve) {
+      msg <- paste0(..., collapse = "")
+      # Strip msg of ANSI formatting
+      # msg <- ansi_strip(msg)
+      sprintf("%-5s [%s] %s\n", level, Sys.time(), msg)
+    }
+  }
+} 
-- 
GitLab


From c6256dd3f5d35806959f40b8d27ef12da2fc2bd7 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Wed, 14 Feb 2024 16:24:13 +0100
Subject: [PATCH 49/96] Fix bug when no forecast in self-defined Compute(=)
 function

---
 build_compute_workflow.R | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 8bc40fde..22881188 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -5,14 +5,21 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
   source("modules/Units/Units.R")
   source("modules/Calibration/Calibration.R")
   source("modules/Anomalies/Anomalies.R")
+  source("modules/Downscaling/Downscaling.R")
   source("modules/Skill/Skill.R")
 
+  # Define appender with custom log layout so that it knows not to append
+  # within Compute().
   recipe$Run$logger <- log4r::logger(threshold = recipe$Run$Loglevel,
                                      appenders = list(console_appender(layout = .custom_log_layout())))
   modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
                               ", | |,")[[1]])
   # Create data list
-  data <- list(hcst = hcst, obs = obs, fcst = fcst)
+  data <- list(hcst = hcst,
+               obs = obs,
+               fcst = fcst)
+  data <- data[!sapply(data, is.null)]
+  print(names(data))
   var_name <- as.vector(attributes(hcst)$FileSelectors[[1]]$var[[1]])
   # Remove duplicated objects
   rm(hcst, obs, fcst)
@@ -40,10 +47,12 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
     } else if (module == "anomalies") {
       data <- Anomalies(recipe, data, retrieve = F)
     } else if (module == "downscaling") {
+      ## TODO: Test, adapt if necessary
       data <- Downscaling(recipe, data, retrieve = F)
     } else if (module == "skill") {
       skill_metrics <- Skill(recipe, data, retrieve = F, nchunks = nchunks)
     } else if (module == "probabilities") {
+      ## TODO: Adapt probabilities output
       probabilities <- Probabilities(recipe, data, retrieve = F, nchunks = nchunks)
     } else if (module == "indices") {
       ## TODO: Won't work with plots...
-- 
GitLab


From 9422ab3bf530854ca60cec6f6aceeecd441359e4 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Wed, 14 Feb 2024 16:27:44 +0100
Subject: [PATCH 50/96] uncomment lines

---
 modules/Calibration/Calibration.R | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index 1639e459..d1e4bc88 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -110,26 +110,20 @@ Calibration <- function(recipe, data, retrieve = TRUE) {
       } 
     } else if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) {
       # Daily data calibration using Quantile Mapping
-      # if (!(method %in% c("qmap"))) {
-      #   error(recipe$Run$logger,
-      #         paste("Calibration method in the recipe is not available for",
-      #               "daily data. Only quantile mapping 'qmap is implemented."))
-      #   stop()
-      # }
+      if (!(method %in% c("qmap"))) {
+        error(recipe$Run$logger,  retrieve = retrieve,
+              paste("Calibration method in the recipe is not available for",
+                    "daily data. Only quantile mapping 'qmap is implemented."))
+        stop()
+      }
       # Calibrate the hindcast
       ## TODO: Change again if all data ends up being an s2dv_cube
       arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = NULL,
                         sdate_dim = "syear", memb_dim = "ensemble",
                         method = "QUANT", ncores = ncores,
                         na.rm = na.rm, wet.day = F)
-      if (inherits(data$hcst, "s2dv_cube")) {
-        ## TODO: Modify QuantileMapping to reorder dims?
-        dim_order <- names(data$hcst$dims)
-        fun <- CST_QuantileMapping
-      } else {
-        dim_order <- names(attr(data$hcst, "Dimensions"))
-        fun <- QuantileMapping
-      }
+      dim_order <- names(data$hcst$dims)
+      fun <- CST_QuantileMapping
       hcst_calibrated <- do.call(fun, arguments)
       # # Restore dimension order
       hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order)
@@ -156,7 +150,7 @@ Calibration <- function(recipe, data, retrieve = TRUE) {
         # Calibrate the forecast
         fcst_calibrated <- do.call(fun, arguments)
         # Restore dimension order
-        # fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order)
+        fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order)
       } else {
         fcst_calibrated <- NULL
       }
@@ -174,7 +168,8 @@ Calibration <- function(recipe, data, retrieve = TRUE) {
   if (retrieve) {
     # Saving
     if (recipe$Analysis$Workflow$Calibration$save != 'none') {
-      info(recipe$Run$logger, "##### START SAVING CALIBRATED DATA #####")
+      info(recipe$Run$logger, retrieve = retrieve,
+           "##### START SAVING CALIBRATED DATA #####")
     }
     ## TODO: What do we do with the full values?
     recipe$Run$output_dir <- paste0(recipe$Run$output_dir,
-- 
GitLab


From a5196e9315de9524efe55ca270ac2ffb4210e2f8 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Wed, 14 Feb 2024 16:41:26 +0100
Subject: [PATCH 51/96] Adapt Downscaling module to work within Compute()

---
 modules/Downscaling/Downscaling.R             | 42 ++++++++++---------
 modules/Downscaling/tmp/Intbc.R               | 14 +++++--
 .../atomic_recipes/recipe_test_compute.yml    | 19 +++++++--
 tools/check_recipe.R                          |  5 ++-
 4 files changed, 51 insertions(+), 29 deletions(-)

diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R
index 59233dc2..f1911cfc 100644
--- a/modules/Downscaling/Downscaling.R
+++ b/modules/Downscaling/Downscaling.R
@@ -6,13 +6,12 @@ source('modules/Downscaling/tmp/Analogs.R')
 source('modules/Downscaling/tmp/LogisticReg.R')
 source('modules/Downscaling/tmp/Utils.R')
 
-Downscaling <- function(recipe, data) {
+Downscaling <- function(recipe, data, retrieve = TRUE) {
   # Function that downscale the hindcast using the method stated in the
   # recipe. For the moment, forecast must be null.
   #
   # data: list of s2dv_cube objects containing the hcst, obs and fcst.
   # recipe: object obtained when passing the .yml recipe file to read_yaml()
-  
   type <- tolower(recipe$Analysis$Workflow$Downscaling$type)
   
   if (type == "none") {
@@ -22,7 +21,7 @@ Downscaling <- function(recipe, data) {
   } else {
 
     if (!is.null(data$fcst)) {
-      warn(recipe$Run$logger,
+      warn(recipe$Run$logger, retrieve = retrieve,
            "The downscaling will be only performed to the hindcast data")
       data$fcst <- NULL
     }
@@ -271,24 +270,29 @@ Downscaling <- function(recipe, data) {
       DOWNSCAL_MSG <- "##### DOWNSCALING COMPLETE #####"
     }
   } 
-  print(DOWNSCAL_MSG)
+  info(recipe$Run$logger, retrieve = retrieve, DOWNSCAL_MSG)
 
   # Saving
-  if (recipe$Analysis$Workflow$Downscaling$save != 'none') {
-    info(recipe$Run$logger, "##### START SAVING DOWNSCALED DATA #####")
-  }
-  ## TODO: What do we do with the full values?
-  recipe$Run$output_dir <- paste0(recipe$Run$output_dir, 
-                                  "/outputs/Downscaling/")
-  # if ((recipe$Analysis$Workflow$Downscaling$save %in% 
-  #      c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) {
-  #   save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst')
-  # }
-  if (recipe$Analysis$Workflow$Downscaling$save %in% c('all', 'exp_only')) {
-    save_forecast(recipe = recipe, data_cube = hcst_downscal$exp, type = 'hcst')
-  }
-  if (recipe$Analysis$Workflow$Downscaling$save == 'all') {
-    save_observations(recipe = recipe, data_cube = hcst_downscal$obs)
+  if (retrieve) {
+    if (recipe$Analysis$Workflow$Downscaling$save != 'none') {
+      info(recipe$Run$logger, retrieve = retrieve,
+           "##### START SAVING DOWNSCALED DATA #####")
+    }
+    recipe$Run$output_dir <- paste0(recipe$Run$output_dir, 
+                                    "/outputs/Downscaling/")
+    # if ((recipe$Analysis$Workflow$Downscaling$save %in% 
+    #      c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) {
+    #   save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst')
+    # }
+    if (recipe$Analysis$Workflow$Downscaling$save %in% c('all', 'exp_only')) {
+      save_forecast(recipe = recipe,
+                    data_cube = hcst_downscal$exp,
+                    type = 'hcst')
+    }
+    if (recipe$Analysis$Workflow$Downscaling$save == 'all') {
+      save_observations(recipe = recipe,
+                        data_cube = hcst_downscal$obs)
+    }
   }
 
   return(list(hcst = hcst_downscal$exp, obs = hcst_downscal$obs, fcst = NULL))
diff --git a/modules/Downscaling/tmp/Intbc.R b/modules/Downscaling/tmp/Intbc.R
index dc5d050b..63c8be7c 100644
--- a/modules/Downscaling/tmp/Intbc.R
+++ b/modules/Downscaling/tmp/Intbc.R
@@ -306,8 +306,11 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid,
 
   if (bc_method == 'qm' | bc_method == 'quantile_mapping') {
    
-    res <- QuantileMapping(exp = exp_interpolated$data, obs = obs_ref, na.rm = TRUE, 
-                           memb_dim = member_dim, sdate_dim = sdate_dim, ncores = ncores, ...)
+    res <- CSTools::QuantileMapping(exp = exp_interpolated$data, obs = obs_ref,
+                                    na.rm = TRUE,
+                                    memb_dim = member_dim,
+                                    sdate_dim = sdate_dim,
+                                    ncores = ncores, ...)
   }
   else if (bc_method == 'dbc' | bc_method == 'dynamical_bias') {
     # the temporal dimension must be only one dimension called "time"
@@ -328,8 +331,11 @@ Intbc <- function(exp, obs, exp_lats, exp_lons, obs_lats, obs_lons, target_grid,
     if (dim(obs_ref)[sdate_dim] == 1) {
       warning('Simple Bias Correction should not be used with only one observation. Returning NA.')
     }
-    res <- Calibration(exp = exp_interpolated$data, obs = obs_ref, memb_dim = member_dim,
-                       sdate_dim = sdate_dim, ncores = ncores, cal.method = bc_method) 
+    res <- CSTools::Calibration(exp = exp_interpolated$data, obs = obs_ref, 
+                                memb_dim = member_dim,
+                                sdate_dim = sdate_dim,
+                                ncores = ncores,
+                                cal.method = bc_method) 
   } 
  
   # Return a list of three elements
diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml
index 53b5eee1..eaca75d7 100644
--- a/recipes/atomic_recipes/recipe_test_compute.yml
+++ b/recipes/atomic_recipes/recipe_test_compute.yml
@@ -15,7 +15,7 @@ Analysis:
       name: ERA5
   Time:
     sdate: '1101'
-    fcst_year: '2020'
+    fcst_year: # '2020'
     hcst_start: '2000'
     hcst_end: '2016'
     ftime_min: 1
@@ -30,12 +30,23 @@ Analysis:
     type: to_system #to_reference #'r360x181'
   Workflow:
     Anomalies:
-      compute: yes # yes/no, default yes
+      compute: no # yes/no, default yes
       cross_validation: yes # yes/no, default yes
       save: 'all' # 'all'/'none'/'exp_only'/'fcst_only'
     Calibration:
-      method: mse_min
+      method: raw
       save: 'none' # 'all'/'none'/'exp_only'/'fcst_only'
+    Downscaling:
+      # Assumption 1: leave-one-out cross-validation is always applied
+      # Assumption 2: for analogs, we select the best analog (minimum distance)
+      type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'.
+      int_method: conservative # regridding method accepted by CDO.
+      bc_method: bias # If type intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'.
+      lr_method: # If type intlr. Options: 'basic', 'large_scale', '4nn'
+      log_reg_method: # If type logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members'
+      target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO
+      nanalogs: # If type analgs. Number of analogs to be searched
+      save: 'all' # 'all'/'none'/'exp_only'
     Skill:
       metric: RPS RPSS BSS10 # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS
       save: 'all' # 'all'/'none'
@@ -57,7 +68,7 @@ Run:
   output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/
   code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
   startR_workflow:
-    modules: calibration skill # Modules to run inside Compute(), in order
+    modules: downscaling skill # Modules to run inside Compute(), in order
     chunk_along: {time: 3} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
     run_on: local # options: local, as_machine, nord3
     chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local'
diff --git a/tools/check_recipe.R b/tools/check_recipe.R
index b6c7ba00..6e41e1d3 100644
--- a/tools/check_recipe.R
+++ b/tools/check_recipe.R
@@ -721,7 +721,8 @@ check_recipe <- function(recipe) {
   # ---------------------------------------------------------------------
 
   STARTR_PARAMS <- c("modules", "chunk_along")
-  STARTR_MODULES <- c("calibration", "anomalies", "skill")
+  STARTR_MODULES <- c("calibration", "anomalies", "downscaling", 
+                      "skill", "probabilities")
   CHUNK_DIMS <- c("var", "time", "latitude", "longitude")
   MODULES_USING_LATLON <- c("downscaling", "indices")
   MODULES_USING_TIME <- c("indicators")
@@ -755,7 +756,7 @@ check_recipe <- function(recipe) {
       if (any(c("latitude", "longitude") %in% names(recipe$Run$startR_workflow$chunk_along)) &&
           any(modules %in% MODULES_USING_LATLON)) {
         error(recipe$Run$logger, retrieve = TRUE,
-              paste0("latitude and longitude cannot be chunking dimensions if",
+              paste0("latitude and longitude cannot be chunking dimensions if ",
                      "any of the following modules is in the startR workflow: ",
                      paste(MODULES_USING_LATLON, collapse = ", "), "."))
         error_status <- T
-- 
GitLab


From 81cac495d9b452e133bad3026863d3202d646084 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Wed, 14 Feb 2024 16:41:39 +0100
Subject: [PATCH 52/96] Fix bug in logger call

---
 modules/Skill/Skill.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index 403043b8..75c2e689 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -307,7 +307,7 @@ Skill <- function(recipe, data, agg = 'global', retrieve = TRUE,
       skill_metrics[[ metric ]] <- skill
     }
   }
-  info(recipe$Run$logger, retrieve = retrieve)
+  info(recipe$Run$logger, retrieve = retrieve,
        "##### SKILL METRIC COMPUTATION COMPLETE #####")
   .log_memory_usage(recipe$Run$logger, when = "After skill metric computation")
   # Save outputs
-- 
GitLab


From ce9d468cd40681bbadef3e985df04d85a4d3d3da Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Thu, 15 Feb 2024 12:14:50 +0100
Subject: [PATCH 53/96] Adapt logger calls and fix obs dimensions

---
 modules/Calibration/Calibration.R     | 168 +++++++++++++-------------
 modules/Loading/Loading.R             |   2 +-
 modules/Loading/R/dates2load.R        |  16 +--
 modules/Loading/R/load_decadal.R      |   2 +-
 modules/Loading/R/load_seasonal.R     |   7 +-
 modules/Loading/R/load_tas_tos.R      |   7 +-
 modules/Saving/R/save_corr.R          |   2 +-
 modules/Saving/R/save_forecast.R      |   4 +-
 modules/Saving/R/save_metrics.R       |   3 +-
 modules/Saving/R/save_observations.R  |   3 +-
 modules/Saving/R/save_percentiles.R   |   3 +-
 modules/Saving/R/save_probabilities.R |   2 +-
 modules/Saving/Saving.R               |   7 +-
 tools/data_summary.R                  |  24 ++--
 14 files changed, 129 insertions(+), 121 deletions(-)

diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R
index d1e4bc88..2dab5eba 100644
--- a/modules/Calibration/Calibration.R
+++ b/modules/Calibration/Calibration.R
@@ -8,7 +8,7 @@ Calibration <- function(recipe, data, retrieve = TRUE) {
   # data: list of s2dv_cube objects containing the hcst, obs and fcst.
   #       Optionally, it may also have hcst.full_val and obs.full_val.
   # recipe: object obtained when passing the .yml recipe file to read_yaml()
-
+  
   method <- tolower(recipe$Analysis$Workflow$Calibration$method)
 
   if (method == "raw") {
@@ -16,19 +16,20 @@ Calibration <- function(recipe, data, retrieve = TRUE) {
          paste("The Calibration module has been called, but the calibration",
                "method in the recipe is 'raw'. The hcst and fcst will not be",
                "calibrated."))
-    ## TODO: Improve efficiency
     fcst_calibrated <- data$fcst
     hcst_calibrated <- data$hcst
-    # if (!is.null(data$hcst.full_val)) {
-    #   hcst_full_calibrated <- data$hcst.full_val
-    # } else {
-    #   hcst_full_calibrated <- NULL
-    # }
+    if (!is.null(data$hcst.full_val)) {
+      hcst_full_calibrated <- data$hcst.full_val
+    } else {
+      hcst_full_calibrated <- NULL
+    }
     CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####"
 
   } else {
+    ## TODO: Calibrate full fields when present
     # Calibration function params
-    mm <- recipe$Analysis$Datasets$Multimodel
+    mm <- !is.null(recipe$Analysis$Datasets$Multimodel) && 
+          !tolower(recipe$Analysis$Datasets$Multimodel) %in% c('no','false')
     if (is.null(recipe$Analysis$ncores)) {
       ncores <- 1
     } else {
@@ -42,41 +43,29 @@ Calibration <- function(recipe, data, retrieve = TRUE) {
     
     CALIB_MSG <- "##### CALIBRATION COMPLETE #####"
     # Replicate observation array for the multi-model case
-    if (mm) {
-      obs.mm <- data$obs$data
-      for (dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) {
-        obs.mm <- abind(obs.mm, data$obs$data,
-                      along=which(names(dim(data$obs$data)) == 'dat'))
-      }
-      names(dim(obs.mm)) <- names(dim(data$obs$data))
-      obs$data <- obs.mm
-      remove(obs.mm)
-      gc()
-    }
-  
+    
     if (recipe$Analysis$Variables$freq == "monthly_mean") {
     
       CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based")
-      ## TODO: this belongs in the recipe checker
+      ## TODO: implement other calibration methods
       if (!(method %in% CST_CALIB_METHODS)) {
         error(recipe$Run$logger, retrieve = retrieve, 
               paste("Calibration method in the recipe is not available for",
                     "monthly data."))
         stop()
       } else {
-        ## TODO: Change again if all data ends up being an s2dv_cube
-        arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = NULL,
-                          cal.method = method, eval.method = "leave-one-out",
-                          multi.model = mm, na.fill = TRUE, na.rm = na.rm,
-                          apply_to = NULL, alpha = NULL,
-                          memb_dim = "ensemble", sdate_dim = "syear",
-                          dat_dim = NULL, ncores = ncores)
-        if (inherits(data$hcst, "s2dv_cube")) {
-          fun <- CST_Calibration
-        } else {
-          fun <- Calibration
-        }
-        hcst_calibrated <- do.call(fun, arguments)
+        # Calibrate the hindcast
+        hcst_calibrated <- CST_Calibration(data$hcst, data$obs,
+                                           cal.method = method,
+                                           eval.method = "leave-one-out",
+                                           multi.model = mm,
+                                           na.fill = TRUE,
+                                           na.rm = na.rm,
+                                           apply_to = NULL,
+                                           alpha = NULL,
+                                           memb_dim = "ensemble",
+                                           sdate_dim = "syear",
+                                           ncores = ncores)
         # In the case where anomalies have been computed, calibrate full values
         if (!is.null(data$hcst.full_val)) {
           hcst_full_calibrated <- CST_Calibration(data$hcst.full_val,
@@ -95,15 +84,18 @@ Calibration <- function(recipe, data, retrieve = TRUE) {
         }
 
         # Calibrate the forecast
-        ## TODO: Change again if all data ends up being an s2dv_cube
         if (!is.null(data$fcst)) {
-          arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = data$fcst,
-                            cal.method = method, eval.method = "leave-one-out",
-                            multi.model = mm, na.fill = TRUE, na.rm = na.rm,
-                            apply_to = NULL, alpha = NULL,
-                            memb_dim = "ensemble", sdate_dim = "syear",
-                            dat_dim = NULL, ncores = ncores)
-          fcst_calibrated <- do.call(fun, arguments)
+          fcst_calibrated <- CST_Calibration(data$hcst, data$obs, data$fcst,
+                                             cal.method = method,
+                                             eval.method = "leave-one-out",
+                                             multi.model = mm,
+                                             na.fill = TRUE,
+                                             na.rm = na.rm,
+                                             apply_to = NULL,
+                                             alpha = NULL,
+                                             memb_dim = "ensemble",
+                                             sdate_dim = "syear",
+                                             ncores = ncores)
         } else {
           fcst_calibrated <- NULL
         }
@@ -111,21 +103,24 @@ Calibration <- function(recipe, data, retrieve = TRUE) {
     } else if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) {
       # Daily data calibration using Quantile Mapping
       if (!(method %in% c("qmap"))) {
-        error(recipe$Run$logger,  retrieve = retrieve,
+        error(recipe$Run$logger, retrieve = retrieve,
               paste("Calibration method in the recipe is not available for",
                     "daily data. Only quantile mapping 'qmap is implemented."))
         stop()
       }
       # Calibrate the hindcast
-      ## TODO: Change again if all data ends up being an s2dv_cube
-      arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = NULL,
-                        sdate_dim = "syear", memb_dim = "ensemble",
-                        method = "QUANT", ncores = ncores,
-                        na.rm = na.rm, wet.day = F)
-      dim_order <- names(data$hcst$dims)
-      fun <- CST_QuantileMapping
-      hcst_calibrated <- do.call(fun, arguments)
-      # # Restore dimension order
+      dim_order <- names(dim(data$hcst$data))
+      hcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs,
+                                             exp_cor = NULL,
+                                             sdate_dim = "syear",
+                                             memb_dim = "ensemble",
+                                             # window_dim = "time",
+                                             method = "QUANT",
+                                             ncores = ncores,
+                                             na.rm = na.rm,
+                                             wet.day = F)
+      # Restore dimension order
+      browser()
       hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order)
       # In the case where anomalies have been computed, calibrate full values
       if (!is.null(data$hcst.full_val)) {
@@ -141,14 +136,18 @@ Calibration <- function(recipe, data, retrieve = TRUE) {
       } else {
         hcst_full_calibrated <- NULL
       }
+
       if (!is.null(data$fcst)) {
-        ## TODO: Change again if all data ends up being an s2dv_cube
-        arguments <- list(exp = data$hcst, obs = data$obs, exp_cor = data$fcst,
-                          sdate_dim = "syear", memb_dim = "ensemble",
-                          method = "QUANT", ncores = ncores,
-                          na.rm = na.rm, wet.day = F)
         # Calibrate the forecast
-        fcst_calibrated <- do.call(fun, arguments)
+        fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs,
+                                               exp_cor = data$fcst,
+                                               sdate_dim = "syear",
+                                               memb_dim = "ensemble",
+                                               # window_dim = "time",
+                                               method = "QUANT",
+                                               ncores = ncores,
+                                               na.rm = na.rm,
+                                               wet.day = F)
         # Restore dimension order
         fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order)
       } else {
@@ -156,36 +155,39 @@ Calibration <- function(recipe, data, retrieve = TRUE) {
       }
     }
   }
-  ## TODO: Sort out returns
-  return_list <- list(hcst = hcst_calibrated,
-                      obs = data$obs,
-                      fcst = fcst_calibrated)
-  # if (!is.null(hcst_full_calibrated)) {
-  #   return_list <- append(return_list,
-  #                         list(hcst.full_val = hcst_full_calibrated,
-  #                              obs.full_val = data$obs.full_val))
-  # }
+  info(recipe$Run$logger, retrieve = retrieve, CALIB_MSG)
+  .log_memory_usage(recipe$Run$logger, when = "After calibration")
+  # Saving
   if (retrieve) {
-    # Saving
     if (recipe$Analysis$Workflow$Calibration$save != 'none') {
       info(recipe$Run$logger, retrieve = retrieve,
            "##### START SAVING CALIBRATED DATA #####")
-    }
-    ## TODO: What do we do with the full values?
-    recipe$Run$output_dir <- paste0(recipe$Run$output_dir,
-                                    "/outputs/Calibration/")
-    if (recipe$Analysis$Workflow$Calibration$save %in% 
-        c('all', 'exp_only', 'fcst_only') && (!is.null(data$fcst))) {
-      save_forecast(recipe = recipe, data_cube = fcst_calibrated, type = 'fcst')
-    }
-    if (recipe$Analysis$Workflow$Calibration$save %in%
-        c('all', 'exp_only')) {
-      save_forecast(recipe = recipe, data_cube = hcst_calibrated, type = 'hcst')
-    }
-    if (recipe$Analysis$Workflow$Calibration$save == 'all') {
-      save_observations(recipe = recipe, data_cube = data$obs)
+    
+      ## TODO: What do we do with the full values?
+      recipe$Run$output_dir <- paste0(recipe$Run$output_dir,
+                                      "/outputs/Calibration/")
+      if ((recipe$Analysis$Workflow$Calibration$save %in% 
+           c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) {
+        save_forecast(recipe = recipe, data_cube = fcst_calibrated, type = 'fcst')
+      }
+      if (recipe$Analysis$Workflow$Calibration$save %in%
+          c('all', 'exp_only')) {
+        save_forecast(recipe = recipe, data_cube = hcst_calibrated, type = 'hcst')
+      }
+      if (recipe$Analysis$Workflow$Calibration$save == 'all') {
+        save_observations(recipe = recipe, data_cube = data$obs)
+      }
     }
   }
-  info(recipe$Run$logger, retrieve = retrieve, CALIB_MSG)
+
+  ## TODO: Sort out returns
+  return_list <- list(hcst = hcst_calibrated,
+                      obs = data$obs,
+                      fcst = fcst_calibrated)
+  if (!is.null(hcst_full_calibrated)) {
+    return_list <- append(return_list,
+                          list(hcst.full_val = hcst_full_calibrated,
+                               obs.full_val = data$obs.full_val))
+  }
   return(return_list)
 }
diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index 59c22baa..4e3a8870 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -24,7 +24,7 @@ Loading <- function(recipe, retrieve = TRUE) {
        }
     } else if (time_horizon == "decadal") {
       source("modules/Loading/R/load_decadal.R")
-      data <- load_decadal(recipe)
+      data <- load_decadal(recipe, retrieve = retrieve)
     } else {
       stop("Incorrect time horizon.")
     }
diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R
index f084ce62..2458231e 100644
--- a/modules/Loading/R/dates2load.R
+++ b/modules/Loading/R/dates2load.R
@@ -15,27 +15,27 @@
 
 library(lubridate)
 
-dates2load <- function(recipe, logger) {
+dates2load <- function(recipe) {
 
   temp_freq <- recipe$Analysis$Variables$freq
-  recipe <- recipe$Analysis$Time
+  Time <- recipe$Analysis$Time
   # hcst dates
-  file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end),
-                       recipe$sdate)
+  file_dates <- paste0(strtoi(Time$hcst_start):strtoi(Time$hcst_end),
+                       Time$sdate)
   
   if (temp_freq == "monthly_mean") {
     file_dates <- .add_dims(file_dates)
   }
  # fcst dates (if fcst_year empty it creates an empty object)
-  if (! is.null(recipe$fcst_year)) {
-    file_dates.fcst <- paste0(recipe$fcst_year, recipe$sdate)
+  if (!is.null(Time$fcst_year)) {
+    file_dates.fcst <- paste0(Time$fcst_year, Time$sdate)
     if (temp_freq == "monthly_mean") {
       file_dates.fcst <- .add_dims(file_dates.fcst)
     }
   } else {
     file_dates.fcst <- NULL
-    info(logger,
-         paste("fcst_year empty in the recipe, creating empty fcst object..."))
+    info(recipe$Run$logger, retrieve = TRUE,
+         "fcst_year empty in the recipe, creating empty fcst object...")
   }
   return(list(hcst = file_dates, fcst = file_dates.fcst))
   ## TODO: document header of fun
diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R
index 9268b090..4d92d4e9 100644
--- a/modules/Loading/R/load_decadal.R
+++ b/modules/Loading/R/load_decadal.R
@@ -11,7 +11,7 @@ source("modules/Loading/R/compare_exp_obs_grids.R")
 # recipe_file <- "recipes/atomic_recipes/recipe_decadal.yml"
 # recipe_file <- "recipes/atomic_recipes/recipe_decadal_daily.yml"
 
-load_decadal <- function(recipe) {
+load_decadal <- function(recipe, retrieve = TRUE) {
   ## 
   archive <- read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]]
 
diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R
index 1f953b20..6e8d40a9 100644
--- a/modules/Loading/R/load_seasonal.R
+++ b/modules/Loading/R/load_seasonal.R
@@ -5,7 +5,7 @@ source("modules/Loading/R/get_timeidx.R")
 source("modules/Loading/R/check_latlon.R")
 source("modules/Loading/R/compare_exp_obs_grids.R")
 
-load_seasonal <- function(recipe, retrieve = T) {
+load_seasonal <- function(recipe, retrieve = TRUE) {
   # -------------------------------------------
   # Set params -----------------------------------------
   
@@ -22,8 +22,7 @@ load_seasonal <- function(recipe, retrieve = T) {
   store.freq <- recipe$Analysis$Variables$freq
   
   # get sdates array
-  ## LOGGER: Change dates2load to extract logger from recipe?
-  sdates <- dates2load(recipe, recipe$Run$logger)
+  sdates <- dates2load(recipe)
   
   idxs <- NULL
   idxs$hcst <- get_timeidx(sdates$hcst,
@@ -297,10 +296,10 @@ load_seasonal <- function(recipe, retrieve = T) {
   # Adjust obs dimensions and compare exp and obs grids, only for retrieve = TRUE
   if (retrieve) {
     # Adds ensemble dim to obs (for consistency with hcst/fcst)
+    obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected")
     default_dims <- c(dat = 1, var = 1, sday = 1, 
                       sweek = 1, syear = 1, time = 1, 
                       latitude = 1, longitude = 1, ensemble = 1)
-    
     default_dims[names(dim(obs))] <- dim(obs)
     dim(obs) <- default_dims
     
diff --git a/modules/Loading/R/load_tas_tos.R b/modules/Loading/R/load_tas_tos.R
index ea231b56..e3972ba8 100644
--- a/modules/Loading/R/load_tas_tos.R
+++ b/modules/Loading/R/load_tas_tos.R
@@ -6,7 +6,7 @@ source("modules/Loading/R/check_latlon.R")
 source('modules/Loading/R/mask_tas_tos.R')
 source("modules/Loading/R/compare_exp_obs_grids.R")
 
-load_tas_tos <- function(recipe) {
+load_tas_tos <- function(recipe, retrieve = TRUE) {
   
   # -------------------------------------------
   # Set params -----------------------------------------
@@ -33,8 +33,7 @@ load_tas_tos <- function(recipe) {
   
   
   # get sdates array
-  ## LOGGER: Change dates2load to extract logger from recipe?
-  sdates <- dates2load(recipe, recipe$Run$logger)
+  sdates <- dates2load(recipe)
   
   idxs <- NULL
   idxs$hcst <- get_timeidx(sdates$hcst,
@@ -480,7 +479,7 @@ load_tas_tos <- function(recipe) {
     compare_exp_obs_grids(exp = hcst, obs = obs)
   }
   
-  info(recipe$Run$logger,
+  info(recipe$Run$logger, retrieve = retrieve,
        "##### DATA LOADING COMPLETED SUCCESSFULLY #####")
   
   return(list(hcst = hcst, fcst = fcst, obs = obs))
diff --git a/modules/Saving/R/save_corr.R b/modules/Saving/R/save_corr.R
index 050fe68d..46dac25f 100644
--- a/modules/Saving/R/save_corr.R
+++ b/modules/Saving/R/save_corr.R
@@ -122,6 +122,6 @@ save_corr <- function(recipe,
       ArrayToNc(vars, outfile)
     }
   }
-  info(recipe$Run$logger,
+  info(recipe$Run$logger, retrieve = TRUE,
        "##### ENSEMBLE CORRELATION SAVED TO NETCDF FILE #####")
 }
diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R
index 00a22850..23b16836 100644
--- a/modules/Saving/R/save_forecast.R
+++ b/modules/Saving/R/save_forecast.R
@@ -142,6 +142,6 @@ save_forecast <- function(recipe,
       }
     }
   }
-  info(recipe$Run$logger, paste("#####", toupper(type),
-                                "SAVED TO NETCDF FILE #####"))
+  info(recipe$Run$logger, retrieve = TRUE,
+       paste("#####", toupper(type), "SAVED TO NETCDF FILE #####"))
 }
diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R
index cd4252ab..060addfd 100644
--- a/modules/Saving/R/save_metrics.R
+++ b/modules/Saving/R/save_metrics.R
@@ -131,6 +131,7 @@ save_metrics <- function(recipe,
       ArrayToNc(vars, outfile)
     }
   }
-  info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####")
+  info(recipe$Run$logger, retrieve = TRUE, 
+       "##### SKILL METRICS SAVED TO NETCDF FILE #####")
 }
 
diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R
index 127e9890..fd7c0832 100644
--- a/modules/Saving/R/save_observations.R
+++ b/modules/Saving/R/save_observations.R
@@ -141,5 +141,6 @@ save_observations <- function(recipe,
       }
     }
   }
-  info(recipe$Run$logger, "##### OBS SAVED TO NETCDF FILE #####")
+  info(recipe$Run$logger, retrieve = TRUE,
+       "##### OBS SAVED TO NETCDF FILE #####")
 }
diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R
index 862ed5ff..96603110 100644
--- a/modules/Saving/R/save_percentiles.R
+++ b/modules/Saving/R/save_percentiles.R
@@ -113,5 +113,6 @@ save_percentiles <- function(recipe,
       ArrayToNc(vars, outfile)
     }
   }
-  info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####")
+  info(recipe$Run$logger, retrieve = TRUE,
+       "##### PERCENTILES SAVED TO NETCDF FILE #####")
 }
diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R
index b7da0449..56acf158 100644
--- a/modules/Saving/R/save_probabilities.R
+++ b/modules/Saving/R/save_probabilities.R
@@ -127,7 +127,7 @@ save_probabilities <- function(recipe,
       }
     }
   }
-  info(recipe$Run$logger,
+  info(recipe$Run$logger, retrieve = TRUE,
        paste("#####", toupper(type),
              "PROBABILITIES SAVED TO NETCDF FILE #####"))
 }
diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R
index fc9fe4ee..cd5a0a47 100644
--- a/modules/Saving/Saving.R
+++ b/modules/Saving/Saving.R
@@ -31,12 +31,13 @@ Saving <- function(recipe, data,
 
   # Sanity checks
   if (is.null(recipe)) {
-    error(recipe$Run$logger, "The 'recipe' parameter is mandatory.")
+    error(recipe$Run$logger, retrieve = TRUE, 
+          "The 'recipe' parameter is mandatory.")
     stop()
   }
-
+  ## TODO: Modify this
   if (is.null(data)) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = TRUE,
           paste("The 'data' parameter is mandatory. It should be a list",
                 "of at least two s2dv_cubes containing the hcst and obs."))
     stop()
diff --git a/tools/data_summary.R b/tools/data_summary.R
index b76101ba..4c2c8e21 100644
--- a/tools/data_summary.R
+++ b/tools/data_summary.R
@@ -20,32 +20,36 @@ data_summary <- function(data_cube, recipe) {
   sdate_max <- format(max(as.Date(data_cube$attrs$Dates)),
                       format = date_format)
   # Log the summary
-  info(recipe$Run$logger, "DATA SUMMARY:")
-  info(recipe$Run$logger, paste(object_name, "months:", months))
-  info(recipe$Run$logger, paste(object_name, "range:", sdate_min, "to", 
+  info(recipe$Run$logger, retrieve = TRUE, "DATA SUMMARY:")
+  info(recipe$Run$logger, retrieve = TRUE,
+       paste(object_name, "months:", months))
+  info(recipe$Run$logger, retrieve = TRUE, 
+       paste(object_name, "range:", sdate_min, "to", 
                                  sdate_max))
-  info(recipe$Run$logger, paste(object_name, "dimensions:"))
+  info(recipe$Run$logger, retrieve = TRUE,
+       paste(object_name, "dimensions:"))
   # Use capture.output() and for loop to display results neatly
   output_string <- capture.output(dim(data_cube$data))
   for (i in output_string) {
-    info(recipe$Run$logger, i)
+    info(recipe$Run$logger, retrieve = TRUE, i)
   }
   # Print statistical summary of the data for every variable
-  info(recipe$Run$logger, paste0("Statistical summary of the data in ",
-                                 object_name, ":"))
+  info(recipe$Run$logger, retrieve = TRUE, 
+       paste0("Statistical summary of the data in ", object_name, ":"))
   for (var_index in 1:data_cube$dims[['var']]) {
     variable_name <- data_cube$attrs$Variable$varName[var_index]
     variable_units <- data_cube$attrs$Variable$metadata[[variable_name]]$units
-    info(recipe$Run$logger,
+    info(recipe$Run$logger, retrieve = TRUE,
          paste0("Variable: ", variable_name,
                 " (units: ", variable_units, ")"))
     output_string <- capture.output(summary(Subset(data_cube$data,
                                                    along = "var",
                                                    indices = var_index)))
     for (i in output_string) {
-      info(recipe$Run$logger, i)
+      info(recipe$Run$logger, retrieve = TRUE, i)
     }
   }
-  info(recipe$Run$logger, "---------------------------------------------")
+  info(recipe$Run$logger, retrieve = TRUE,
+       "---------------------------------------------")
   invisible(gc())
 }
-- 
GitLab


From d60679cacb1925147cd51954a8e2c62b2367b304 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 20 Feb 2024 12:53:00 +0100
Subject: [PATCH 54/96] Add startR workflow checks for Indices module

---
 tools/check_recipe.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/check_recipe.R b/tools/check_recipe.R
index 6e41e1d3..d39a784c 100644
--- a/tools/check_recipe.R
+++ b/tools/check_recipe.R
@@ -722,7 +722,7 @@ check_recipe <- function(recipe) {
 
   STARTR_PARAMS <- c("modules", "chunk_along")
   STARTR_MODULES <- c("calibration", "anomalies", "downscaling", 
-                      "skill", "probabilities")
+                      "skill", "probabilities", "indices")
   CHUNK_DIMS <- c("var", "time", "latitude", "longitude")
   MODULES_USING_LATLON <- c("downscaling", "indices")
   MODULES_USING_TIME <- c("indicators")
-- 
GitLab


From a41ca0e40872e24e4e6374172679a9216d353604 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 20 Feb 2024 12:53:42 +0100
Subject: [PATCH 55/96] Add 'retrieve' parameter to logger calls

---
 modules/Anomalies/Anomalies.R | 6 ++++--
 tools/Utils.R                 | 8 ++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/modules/Anomalies/Anomalies.R b/modules/Anomalies/Anomalies.R
index 2f738bdc..d1163b57 100644
--- a/modules/Anomalies/Anomalies.R
+++ b/modules/Anomalies/Anomalies.R
@@ -127,7 +127,8 @@ Anomalies <- function(recipe, data, retrieve = TRUE) {
          "##### ANOMALIES COMPUTED SUCCESSFULLY #####")
     if (retrieve) {
       if (recipe$Analysis$Workflow$Anomalies$save != 'none') {
-        info(recipe$Run$logger, "##### START SAVING ANOMALIES #####")
+        info(recipe$Run$logger, retrieve = retrieve,
+             "##### START SAVING ANOMALIES #####")
       }
       # Save outputs
       recipe$Run$output_dir <- paste0(recipe$Run$output_dir,
@@ -159,7 +160,8 @@ Anomalies <- function(recipe, data, retrieve = TRUE) {
   }
 
   ## TODO: Return fcst full value?
-  .log_memory_usage(recipe$Run$logger, "After computing anomalies")
+  .log_memory_usage(recipe$Run$logger, retrieve = retrieve, 
+                    "After computing anomalies")
   return(list(hcst = data$hcst, obs = data$obs, fcst = data$fcst,
               hcst.full_val = hcst_fullvalue, obs.full_val = obs_fullvalue))
 }
diff --git a/tools/Utils.R b/tools/Utils.R
index c0acf374..a5cc84c2 100644
--- a/tools/Utils.R
+++ b/tools/Utils.R
@@ -1,13 +1,13 @@
 ## TODO: Write header
 ## TODO: Add if 'DEBUG'
-.log_memory_usage <- function(logger, when) {
-  debug(logger, paste0(when, ":"))
+.log_memory_usage <- function(logger, retrieve = TRUE, when = NULL) {
+  debug(logger, retrieve = retrieve, paste0(when, ":"))
   mem_info <- capture.output(memuse::Sys.meminfo())
   for (i in mem_info) {
-    debug(recipe$Run$logger, i)
+    debug(recipe$Run$logger, retrieve = retrieve, i)
   }
   proc_mem <- capture.output(memuse::Sys.procmem())
   for (i in proc_mem) {
-    debug(recipe$Run$logger, i)
+    debug(recipe$Run$logger, retrieve = retrieve, i)
   }
 }
-- 
GitLab


From ffe655738912ae65bef405d24484442a11a5e5d5 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 20 Feb 2024 12:54:46 +0100
Subject: [PATCH 56/96] Add Files attribute to Step()

---
 build_compute_workflow.R | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 22881188..df05dc0a 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -7,6 +7,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
   source("modules/Anomalies/Anomalies.R")
   source("modules/Downscaling/Downscaling.R")
   source("modules/Skill/Skill.R")
+  source("modules/Indices/Indices.R")
 
   # Define appender with custom log layout so that it knows not to append
   # within Compute().
@@ -19,7 +20,6 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
                obs = obs,
                fcst = fcst)
   data <- data[!sapply(data, is.null)]
-  print(names(data))
   var_name <- as.vector(attributes(hcst)$FileSelectors[[1]]$var[[1]])
   # Remove duplicated objects
   rm(hcst, obs, fcst)
@@ -31,7 +31,6 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
                    class(x) <- "startR_array"
                    as.s2dv_cube(x)
                  })
-  
   for (cube_idx in seq(1:length(data))) {
     data[[cube_idx]]$attrs$Variable$varName <- var_name
   }
@@ -55,7 +54,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
       ## TODO: Adapt probabilities output
       probabilities <- Probabilities(recipe, data, retrieve = F, nchunks = nchunks)
     } else if (module == "indices") {
-      ## TODO: Won't work with plots...
+      ## TODO: Won't work with plots... OR WILL IT?
       data <- Indices(recipe, data, retrieve = F)
     }
   }
@@ -103,8 +102,8 @@ run_compute_workflow <- function(recipe, data) {
                       obs = obs_target_dims)
   inputs <- list(hcst = data$hcst, obs = data$obs)
   # Define input attributes from Start()
-  input_attributes <- list(hcst = c("Variables", "FileSelectors"),
-                           obs = c("Variables", "FileSelectors"))
+  input_attributes <- list(hcst = c("Variables", "Dimensions", "FileSelectors", "Files"),
+                           obs = c("Variables", "Dimensions", "FileSelectors", "Files"))
   # Add forecast if not empty
   if (!is.null(data$fcst)) {
     target_dims <- c(target_dims,
@@ -114,7 +113,14 @@ run_compute_workflow <- function(recipe, data) {
   }
 
   # Create output dimensions
-  exp_output_dims <- exp_target_dims[!exp_target_dims == 'var_dir']
+  ## TODO: Improve this conditions
+  if ("indices" %in% modules) {
+    spatial_output_dims <- c("region")
+  } else {
+    spatial_output_dims <- c("latitude", "longitude")
+  }
+  exp_output_dims <- c(exp_target_dims[!exp_target_dims %in% c('var_dir', 'latitude', 'longitude')],
+                       spatial_output_dims)
   # Default outputs: hindcast and observations
   output_dims <- list(hcst = exp_output_dims,
                       obs = exp_output_dims)
@@ -125,16 +131,16 @@ run_compute_workflow <- function(recipe, data) {
   }
   # Add skill metrics if skill module is called
   if ("skill" %in% modules) {
-    skill_dims <- c('metric', 'var', 'time', 'latitude', 'longitude')
+    skill_dims <- c('metric', 'var', 'time', spatial_output_dims)
     skill_output_dims <-
       skill_dims[!skill_dims %in% names(recipe$Run$startR_workflow$chunk_along)]
     output_dims <- c(output_dims,
                      list(skill = skill_output_dims))
   }
   ## TODO: Add Indices (Niño1+2, Niño3, Niño3.4, Niño4, NAO)
-  if ("indices" %in% modules) {
-    idx_dims <- c('index', 'var', 'syear', 'time', 'region')
-  }
+  # if ("indices" %in% modules) {
+  #   idx_dims <- c('index', 'var', 'syear', 'time', 'region')
+  # }
 
   # ---------------------------------------------------------------------------
   # Step 3: Generate the Step and call Compute()
@@ -243,8 +249,17 @@ run_compute_workflow <- function(recipe, data) {
   # Step 5: Remove temporary files
   unlink(paste0(recipe$Run$output_dir, "/outputs/tmp/"), recursive = TRUE)
 
+
+  # ---------------------------------------------------------------------------
+  # Step 6: Save data
+  ## PROBLEM: Defining the output dir
+  # Saving(recipe = recipe, data = res$data, skill_metrics = res$skill)
+  if (recipe$Analysis$Workflow$Skill$save == 'all') {
+    save_metrics(recipe = recipe, skill = res$skill,
+                 agg = "global")
+  }
   # ---------------------------------------------------------------------------
-  # Step 6: Return outputs
+  # Step 7: Return outputs
   res <- res[!sapply(res, is.null)]
   info(recipe$Run$logger, retrieve = TRUE,
        "##### DATA RETURNED AS A NAMED LIST #####")
-- 
GitLab


From 9d2bb54f5128da9e5eb4d480ab57a1b01ba10826 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 20 Feb 2024 12:55:33 +0100
Subject: [PATCH 57/96] Modify indices module to work in Compute() (WIP)

---
 modules/Indices/Indices.R       | 14 +++++---
 modules/Indices/R/compute_nao.R | 62 ++++++++++++++++++++-------------
 2 files changed, 48 insertions(+), 28 deletions(-)

diff --git a/modules/Indices/Indices.R b/modules/Indices/Indices.R
index fb9a7277..b767e983 100644
--- a/modules/Indices/Indices.R
+++ b/modules/Indices/Indices.R
@@ -2,7 +2,13 @@ source("modules/Indices/R/compute_nao.R")
 source("modules/Indices/R/compute_nino.R")
 source("modules/Indices/R/drop_indices_dims.R")
 source("modules/Saving/Saving.R")
-Indices <- function(recipe, data) {
+## TODO: Remove later
+source("/home/shared/Earth/vagudets/git/s2dv/R/NAO.R")
+source("/home/shared/Earth/vagudets/git/s2dv/R/Utils.R")
+source("/home/shared/Earth/vagudets/git/s2dv/R/EOF.R")
+source("/home/shared/Earth/vagudets/git/s2dv/R/ProjectField.R")
+
+Indices <- function(recipe, data, retrieve = TRUE) {
   # Define parameters
   nao <- NULL
   if ("nao" %in% tolower(names(recipe$Analysis$Workflow$Indices))) {
@@ -29,7 +35,7 @@ Indices <- function(recipe, data) {
     nao <- compute_nao(data = data, recipe = recipe,
                        obsproj = obsproj,
                        plot_ts = plot_ts, plot_sp = plot_sp,
-                       alpha = alpha)
+                       alpha = alpha, retrieve = retrieve)
   }
   ninos <- list()
   num_ninos <- sum(tolower(substr(names(recipe$Analysis$Workflow$Indices),
@@ -101,11 +107,11 @@ Indices <- function(recipe, data) {
   }
   if (is.null(nao)) {
     return(ninos)
-    info(recipe$Run$logger,
+    info(recipe$Run$logger, retrieve = retrieve,
          "##### EL NINO INDICES COMPUTED SUCCESSFULLY #####")
   } else {
     return(nao)
-    info(recipe$Run$logger,
+    info(recipe$Run$logger, retrieve = retrieve,
          "##### NAO INDEX COMPUTED SUCCESSFULLY #####")
   }
 }
diff --git a/modules/Indices/R/compute_nao.R b/modules/Indices/R/compute_nao.R
index 5bb76e4e..c24aebe7 100644
--- a/modules/Indices/R/compute_nao.R
+++ b/modules/Indices/R/compute_nao.R
@@ -1,6 +1,6 @@
 
 compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp,
-                        alpha, logo = NULL) {
+                        alpha, logo = NULL, retrieve = TRUE) {
   ## TODO: if fcst object in data compute the nao too
   if (!is.null(data$fcst)) {
     warning("NAO computed only for hindcast data.")
@@ -117,6 +117,7 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp,
                          Dates = data$obs$attrs$Dates,
                          Dataset = recipe$Analysis$Datasets$Reference$name))
   if (recipe$Analysis$Workflow$Indices$NAO$save == 'all') {
+    ## TODO: Fix output directory
     file_dest <- paste0(recipe$Run$output_dir, "/outputs/Indices/")
     if (tolower(recipe$Analysis$Horizon) == "seasonal") {
       # Use startdates param from SaveExp to correctly name the files:
@@ -134,23 +135,25 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp,
     # need to recover original dimensions after saving to make Skill module work
     nao_original_dims_hcst <- nao$hcst$data
     nao$hcst$data <- .drop_indices_dims(nao$hcst$data)
-    CST_SaveExp(data = nao$hcst,
-          destination = file_dest, 
-          startdates = as.vector(file_dates),
-          dat_dim = NULL, sdate_dim = 'syear', 
-          ftime_dim = 'time', var_dim = NULL, 
-          memb_dim = 'ensemble')
-    nao_original_dims_obs <- nao$obs$data
-    nao$obs$data <- .drop_indices_dims(nao$obs$data)
-    CST_SaveExp(data = nao$obs, #res,
-          destination = file_dest,
-          startdates = as.vector(file_dates),
-          dat_dim = NULL, sdate_dim = 'syear',
-          ftime_dim = 'time', var_dim = NULL,
-          memb_dim = NULL)
-    nao$hcst$data <- nao_original_dims_hcst
-    nao$obs$data <- nao_original_dims_obs
-    nao_original_dims_hcst <- nao_original_dims_obs <- NULL
+    if (retrieve) {
+      CST_SaveExp(data = nao$hcst,
+            destination = file_dest, 
+            startdates = as.vector(file_dates),
+            dat_dim = NULL, sdate_dim = 'syear', 
+            ftime_dim = 'time', var_dim = NULL, 
+            memb_dim = 'ensemble')
+      nao_original_dims_obs <- nao$obs$data
+      nao$obs$data <- .drop_indices_dims(nao$obs$data)
+      CST_SaveExp(data = nao$obs, #res,
+            destination = file_dest,
+            startdates = as.vector(file_dates),
+            dat_dim = NULL, sdate_dim = 'syear',
+            ftime_dim = 'time', var_dim = NULL,
+            memb_dim = NULL)
+      nao$hcst$data <- nao_original_dims_hcst
+      nao$obs$data <- nao_original_dims_obs
+      nao_original_dims_hcst <- nao_original_dims_obs <- NULL
+    }
     gc()
   }
   # Read variable long_name to plot it
@@ -162,13 +165,24 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp,
     dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"),
                showWarnings = F, recursive = T)
     source("modules/Indices/R/plot_deterministic_forecast.R")
-    for (tstep in 1:dim(nao$hcst$data)['time']) {
-        mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) +
-               (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1)
-        mes <- ifelse(mes > 12, mes - 12, mes)
+    ## TODO: is there a better way to define the time steps? Probably yes.
+    time_step_dates <- Subset(data$hcst$attrs$Dates,
+                              along = "syear",
+                              indices = 1,
+                              drop = T)
+    for (tstep in 1:length(time_step_dates)) {
+      mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) +
+             (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1)
+      mes <- ifelse(mes > 12, mes - 12, mes)
       fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min)
-      obs <- Subset(nao$obs$data, along = 'time', ind = tstep)
-      exp <- Subset(nao$hcst$data, along = 'time', ind = tstep) 
+      ## TODO: What to do if there is no 'time' dimension?...
+      if ('time' %in% dim(nao$obs$data)) {
+        obs <- Subset(nao$obs$data, along = 'time', ind = tstep)
+        exp <- Subset(nao$hcst$data, along = 'time', ind = tstep)
+      } else {
+        obs <- nao$obs$data
+        exp <- nao$hcst$data
+      }
       if (gsub(".", "", recipe$Analysis$Datasets$System$name) == "") {
         system <- recipe$Analysis$Datasets$System$name
       } else {
-- 
GitLab


From a4b76000b030de691b2715197108a95bf4d7dcc3 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 20 Feb 2024 16:59:49 +0100
Subject: [PATCH 58/96] Adapt compute_nao() to work within Compute()

---
 modules/Indices/R/compute_nao.R | 60 ++++++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 19 deletions(-)

diff --git a/modules/Indices/R/compute_nao.R b/modules/Indices/R/compute_nao.R
index c24aebe7..0499c147 100644
--- a/modules/Indices/R/compute_nao.R
+++ b/modules/Indices/R/compute_nao.R
@@ -133,9 +133,9 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp,
              recipe$Analysis$Time$hcst_start : recipe$Analysis$Time$hcst_end)
     }
     # need to recover original dimensions after saving to make Skill module work
-    nao_original_dims_hcst <- nao$hcst$data
-    nao$hcst$data <- .drop_indices_dims(nao$hcst$data)
     if (retrieve) {
+      nao_original_dims_hcst <- nao$hcst$data
+      nao$hcst$data <- .drop_indices_dims(nao$hcst$data)
       CST_SaveExp(data = nao$hcst,
             destination = file_dest, 
             startdates = as.vector(file_dates),
@@ -171,12 +171,11 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp,
                               indices = 1,
                               drop = T)
     for (tstep in 1:length(time_step_dates)) {
-      mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) +
-             (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1)
-      mes <- ifelse(mes > 12, mes - 12, mes)
-      fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min)
+      mes <- lubridate::month(time_step_dates[tstep])
+      fmonth <- mes - as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) + 1
+      fmonth <- sprintf("%02d", ifelse(fmonth < 0, fmonth + 12, fmonth))
       ## TODO: What to do if there is no 'time' dimension?...
-      if ('time' %in% dim(nao$obs$data)) {
+      if ('time' %in% names(dim(nao$obs$data))) {
         obs <- Subset(nao$obs$data, along = 'time', ind = tstep)
         exp <- Subset(nao$hcst$data, along = 'time', ind = tstep)
       } else {
@@ -196,7 +195,7 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp,
         plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_",
                            system, "_", recipe$Analysis$Datasets$Reference$name,
                            "_s", recipe$Analysis$Time$sdate, "_ftime",
-                           sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".png") 
+                           fmonth, ".png") 
         caption <- paste0("NAO method: ",
                           ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj,
                                  "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n",
@@ -212,8 +211,7 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp,
                           recipe$Analysis$Time$hcst_end)
         plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_",
                            system, "_", recipe$Analysis$Datasets$Reference$name,
-                           "_ftime",
-                           sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".png") 
+                           "_ftime", fmonth, ".png") 
         caption <- paste0("NAO method: ",
                           ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj,
                                  "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n",
@@ -272,12 +270,26 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp,
                               time_dim = 'syear', method = 'pearson', alpha = alpha, 
                               test.type = 'two-sided', pval = FALSE)}, 
                                 ncores = recipe$Analysis$ncores)
-    
-    for (tstep in 1:dim(nao$obs$data)['time']) {
-      fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min)
+
+
+    time_step_dates <- Subset(data$hcst$attrs$Dates,
+                              along = "syear",
+                              indices = 1,
+                              drop = T)
+    for (tstep in 1:length(time_step_dates)) {
+      mes <- lubridate::month(time_step_dates[tstep])
+      fmonth <- mes - as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) + 1
+      fmonth <- sprintf("%02d", ifelse(fmonth < 0, fmonth + 12, fmonth))
+    # for (tstep in 1:dim(nao$obs$data)['time']) {
+    #   fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min)
       ## Observations
-      map <- drop(Subset(correl_obs$r, along = 'time', ind = tstep))
-      sig <- drop(Subset(correl_obs$sign, along = 'time', ind = tstep))
+      if ("time" %in% names(dim(data$hcst$data))) {
+        map <- drop(Subset(correl_obs$r, along = 'time', ind = tstep))
+        sig <- drop(Subset(correl_obs$sign, along = 'time', ind = tstep))
+      } else {
+        map <- drop(correl_obs$r)
+        sig <- drop(correl_obs$sign)
+      }
       if (tolower(recipe$Analysis$Horizon) == "seasonal") {
         mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) +
                (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1)
@@ -337,8 +349,13 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp,
                    fileout = plotfile, width = 8, height = 6,
                    brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr'))
       ## Ensemble-mean
-      map <- drop(Subset(correl_hcst$r, along = 'time', ind = tstep))
-      sig <- drop(Subset(correl_hcst$sign, along = 'time', ind = tstep))
+      if ("time" %in% names(dim(data$hcst$data))) {
+        map <- drop(Subset(correl_hcst$r, along = 'time', ind = tstep))
+        sig <- drop(Subset(correl_hcst$sign, along = 'time', ind = tstep))
+      } else {
+        map <- drop(correl_hcst$r)
+        sig <- drop(correl_hcst$sign)
+      }
       if (tolower(recipe$Analysis$Horizon) == "seasonal") {
         toptitle <- paste(recipe$Analysis$Datasets$System$name, "\n",
                           "NAO Index -", var_name, "\n",
@@ -394,8 +411,13 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp,
                    brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr'))
       
       # Full hcst corr
-      map <- drop(Subset(correl_hcst_full$r, along = 'time', ind = tstep))
-      sig <- drop(Subset(correl_hcst_full$sign, along = 'time', ind = tstep))
+      if ("time" %in% names(dim(correl_hcst_full$r))) {
+        map <- drop(Subset(correl_hcst_full$r, along = 'time', ind = tstep))
+        sig <- drop(Subset(correl_hcst_full$sign, along = 'time', ind = tstep))
+      } else {
+        map <- drop(correl_hcst_full$r)
+        sig <- drop(correl_hcst_full$sign)
+      }
       if (tolower(recipe$Analysis$Horizon) == "seasonal") {
         toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n",
                           "NAO Index -",var_name, "\n",
-- 
GitLab


From 818cb7f99f63af664167606820f0c09112e43053 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 27 Feb 2024 10:15:24 +0100
Subject: [PATCH 59/96] Fully adapt compute_nao() to work within Compute()

---
 modules/Indices/R/compute_nao.R | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/modules/Indices/R/compute_nao.R b/modules/Indices/R/compute_nao.R
index 0499c147..b566f2e2 100644
--- a/modules/Indices/R/compute_nao.R
+++ b/modules/Indices/R/compute_nao.R
@@ -49,10 +49,11 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp,
                         Variable = c(data$hcst$Variable[1], level = 'surface'),
                         data$hcst$Va, Datasets = data$hcst$Datasets,
                         time_dims = c('syear', 'time'),
-                        Dates = data$hcst$Dates)
+                        Dates = data$hcst$attrs$Dates)
       obs <- s2dv_cube(data = obs1$data, lat = obs1$lat, lon = obs1$lon,
                        Variable = c(data$obs$Variable[1], level = 'surface'),
-                       Datasets = data$obs$Datasets, time_dims = c('syear', 'time'))
+                       Datasets = data$obs$Datasets, time_dims = c('syear', 'time'),
+                       Dates = data$obs$attrs$Dates)
 # TODO check and create data object for the next steps
      data <- list(hcst = hcst, obs = obs)
      lons <- data$hcst$coords$longitude
@@ -83,15 +84,8 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp,
   nao$exp <- InsertDim(nao$exp, posdim = 1, lendim = 1, name = 'region')
   nao$obs <- InsertDim(nao$obs, posdim = 1, lendim = 1, name = 'region')
   hcst_dates <- data$hcst$attrs$Dates
-  hcst_dates <- drop(data$hcst$attrs$Dates)
-
-  if (!("time" %in% names(dim(hcst_dates)))) {
-     if (is.null(dim(hcst_dates))) {
-       hcst_dates <- array(hcst_dates, c(syear = length(hcst_dates)))
-     }
-     hcst_dates <- InsertDim(hcst_dates, pos = 1, len = 1, name = 'time')
-     hcst_dates <- as.POSIXct(hcst_dates, origin = '1970-01-01', tz = 'UTC')
-  }
+  dim(hcst_dates) <- dim(hcst_dates)[intersect(names(dim(hcst_dates)),
+                                               names(dim(nao$exp)))]
   nao <- list(hcst = s2dv_cube(
                        data = nao$exp, 
                        varName = "nao",
@@ -165,7 +159,6 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp,
     dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"),
                showWarnings = F, recursive = T)
     source("modules/Indices/R/plot_deterministic_forecast.R")
-    ## TODO: is there a better way to define the time steps? Probably yes.
     time_step_dates <- Subset(data$hcst$attrs$Dates,
                               along = "syear",
                               indices = 1,
@@ -174,7 +167,6 @@ compute_nao <- function(data, recipe, obsproj, plot_ts, plot_sp,
       mes <- lubridate::month(time_step_dates[tstep])
       fmonth <- mes - as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) + 1
       fmonth <- sprintf("%02d", ifelse(fmonth < 0, fmonth + 12, fmonth))
-      ## TODO: What to do if there is no 'time' dimension?...
       if ('time' %in% names(dim(nao$obs$data))) {
         obs <- Subset(nao$obs$data, along = 'time', ind = tstep)
         exp <- Subset(nao$hcst$data, along = 'time', ind = tstep)
-- 
GitLab


From 1de88788b32ce3a9c9d7cafc1ec43ab39c8c0e9b Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 27 Feb 2024 15:30:19 +0100
Subject: [PATCH 60/96] Adapt saving data functions and module output (WIP)

---
 modules/Saving/R/save_metrics.R | 20 +++++++++++--------
 modules/Saving/Saving.R         |  9 +++++----
 modules/Skill/Skill.R           | 35 +++++++++++++++------------------
 3 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R
index 060addfd..9e217b95 100644
--- a/modules/Saving/R/save_metrics.R
+++ b/modules/Saving/R/save_metrics.R
@@ -1,7 +1,7 @@
 save_metrics <- function(recipe,
                          skill,
+                         module,
                          dictionary = NULL,
-                         data_cube,
                          agg = "global",
                          outdir = NULL) {
   # This function adds metadata to the skill metrics in 'skill'
@@ -12,7 +12,9 @@ save_metrics <- function(recipe,
   dictionary <- read_yaml("conf/variable-dictionary.yml")
 
   global_attributes <- .get_global_attributes(recipe, archive)
-  ## TODO: Sort out the logic once default behavior is decided
+  cube_info <- skill[["metadata"]]
+  skill[["metadata"]] <- NULL
+
   if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && 
       (recipe$Analysis$Workflow$Anomalies$compute)) {
     global_attributes <- c(list(from_anomalies = "Yes"),
@@ -27,7 +29,7 @@ save_metrics <- function(recipe,
   calendar <- archive$System[[global_attributes$system]]$calendar
 
   # Generate vector containing leadtimes
-  dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1),
+  dates <- as.PCICt(ClimProjDiags::Subset(cube_info$attrs$Dates, 'syear', 1),
                     cal = calendar)
 
   if (fcst.horizon == 'decadal') {
@@ -66,14 +68,16 @@ save_metrics <- function(recipe,
   times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar)
   time <- times$time
   # Loop over variable dimension
-  for (var in 1:data_cube$dims[['var']]) {
+  for (var in 1:cube_info$dims[['var']]) {
     # Subset skill arrays
     subset_skill <- lapply(skill, function(x) {
                            ClimProjDiags::Subset(x, along = 'var',
                                                  indices = var,
                                                  drop = 'selected')})
     # Generate name of output file
-    variable <- data_cube$attrs$Variable$varName[[var]]
+    variable <- cube_info$attrs$Variable$varName[[var]]
+    recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/outputs/",
+                                    module, "/")
     outdir <- get_dir(recipe = recipe, variable = variable)
     if (!dir.exists(outdir)) {
       dir.create(outdir, recursive = T)
@@ -116,14 +120,14 @@ save_metrics <- function(recipe,
     } else if (tolower(agg) == "region") {
       region <- array(1:dim(skill[[1]])['region'], c(dim(skill[[1]])['region']))
       # TODO: check metadata when more than 1 region is store in the data array
-      metadata <- list(region = list(long_name = data_cube$attrs$Variable$metadata$region$name))
+      metadata <- list(region = list(long_name = cube_info$attrs$Variable$metadata$region$name))
       attr(region, 'variables') <-  metadata
       vars <- list(region, time)
       vars <- c(vars, subset_skill)
       ArrayToNc(vars, outfile)
     } else {
-      latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)]
-      longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)]
+      latitude <- cube_info$coords$lat[1:length(cube_info$coords$lat)]
+      longitude <- cube_info$coords$lon[1:length(cube_info$coords$lon)]
       latlon <- .get_latlon(latitude, longitude)
       # Compile variables into a list and export to netCDF
       vars <- list(latlon$lat, latlon$lon, time)
diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R
index cd5a0a47..17f6492e 100644
--- a/modules/Saving/Saving.R
+++ b/modules/Saving/Saving.R
@@ -18,11 +18,9 @@ source("modules/Saving/R/drop_dims.R")
 Saving <- function(recipe, data,
 		   skill_metrics = NULL,
 		   probabilities = NULL,
-                   agg = 'global',
-		   archive = NULL) {
+                   agg = 'global') {
   # Wrapper for the saving functions.
   # recipe: The auto-s2s recipe
-  # archive: The auto-s2s archive
   # data: output of load_datasets()
   # data: output of calibrate_datasets()
   # skill_metrics: output of compute_skill_metrics()
@@ -57,6 +55,7 @@ Saving <- function(recipe, data,
   }
 
   # Iterate over variables to subset s2dv_cubes and save outputs
+  ## leave as-is
   save_forecast(recipe = recipe,
                 data_cube = data$hcst,
                 outdir = outdir[var],
@@ -72,13 +71,15 @@ Saving <- function(recipe, data,
                     outdir = outdir[var])
 
   # Export skill metrics
+  ## TODO: change 
   if (!is.null(skill_metrics)) {
       save_metrics(recipe = recipe,
                    skill = skill_metrics,
-                   data_cube = data$hcst, agg = agg,
+                   # data_cube = data$hcst, agg = agg,
                    outdir = outdir[var])
   }
   if (!is.null(corr_metrics)) {
+    ## TODO: change (low priority)
     save_corr(recipe = recipe,
               skill = corr_metrics, 
               data_cube = data$hcst,
diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index 75c2e689..b89e415c 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -307,6 +307,7 @@ Skill <- function(recipe, data, agg = 'global', retrieve = TRUE,
       skill_metrics[[ metric ]] <- skill
     }
   }
+  ## TODO: Create list of returns with data and metadata
   info(recipe$Run$logger, retrieve = retrieve,
        "##### SKILL METRIC COMPUTATION COMPLETE #####")
   .log_memory_usage(recipe$Run$logger, when = "After skill metric computation")
@@ -317,35 +318,31 @@ Skill <- function(recipe, data, agg = 'global', retrieve = TRUE,
       info(recipe$Run$logger, retrieve = retrieve,
            "##### START SAVING SKILL METRIC #####")
     }
-    recipe$Run$output_dir <- paste0(recipe$Run$output_dir,
-                                    "/outputs/Skill/")
+    # recipe$Run$output_dir <- paste0(recipe$Run$output_dir,
+    #                                 "/outputs/Skill/")
     # Separate 'corr' from the rest of the metrics because of extra 'ensemble' dim
     if (recipe$Analysis$Workflow$Skill$save == 'all') {
-      corr_metric_names <- grep("^corr_individual_members", names(skill_metrics))
+      corr_metric_names <- grep("^corr_individual_members",
+                                names(skill_metrics$metrics))
       if (length(corr_metric_names) == 0) {
         save_metrics(recipe = recipe, skill = skill_metrics,
-                     data_cube = data$hcst, agg = agg)
+                     module = "Skill", agg = agg)
       } else {
         # Save corr
         if (length(skill_metrics[corr_metric_names]) > 0) {
-           save_corr(recipe = recipe, skill = skill_metrics[corr_metric_names],
-                     data_cube = data$hcst)
+           save_corr(recipe = recipe,
+                     skill = list(metrics = skill_metrics$metrics[corr_metric_names],
+                                  metadata = skill_metrics$metadata),
+                     module = "Skill",
+                     agg = agg)
         }
         # Save other skill metrics
         if (length(skill_metrics[-corr_metric_names]) > 0) {
-          save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names],
-                       data_cube = data$hcst, agg = agg)
-        } else {
-          # Save corr
-          if (length(skill_metrics[corr_metric_names]) > 0) {
-             save_corr(recipe = recipe, skill = skill_metrics[corr_metric_names],
-                       data_cube = data$hcst)
-          }
-          # Save other skill metrics
-          if (length(skill_metrics[-corr_metric_names]) > 0) {
-            save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names],
-                         data_cube = data$hcst, agg = agg)
-          }
+          save_metrics(recipe = recipe,
+                       skill = list(metrics = skill_metrics$metrics[-corr_metric_names],
+                                    metadata = skill_metrics$metadata),
+                       module = "Skill",
+                       agg = agg)
         }
       }
     }
-- 
GitLab


From d8fd75d4336bdab3c688a9eb501a0b0faafb3b37 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 27 Feb 2024 15:30:45 +0100
Subject: [PATCH 61/96] Fix bug in retrieve_metadata() related to incorrect
 output dimensions

---
 tools/retrieve_metadata.R | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/tools/retrieve_metadata.R b/tools/retrieve_metadata.R
index 7b922103..3550b19a 100644
--- a/tools/retrieve_metadata.R
+++ b/tools/retrieve_metadata.R
@@ -1,4 +1,14 @@
-retrieve_metadata <- function(tmp_dir, chunks) {
+# For each chunking dimension, retrieve the metadata for each chunk from its
+# .Rds file and put the pieces together, generating the complete coordinates,
+# dimensions and attributes of the resulting s2dv_cube.
+# 
+# tmp_dir: string, The temporary directory where the metadata files are stored.
+# chunks: named list of chunking dimensions with the amount of chunks for each.
+# array_dims: vector containing the dimensions of the final array.
+
+retrieve_metadata <- function(tmp_dir, chunks, array_dims) {
+  ## TODO: add array_dims parameter
+  ## For each chunking dimension
   # Build metadata file pattern:
   metadata_file_pattern <- "skill_metadata"
   for (chunk in sort(names(chunks))) {
@@ -25,25 +35,26 @@ retrieve_metadata <- function(tmp_dir, chunks) {
     dim(metadata$coords$var) <- c(var = length(metadata$coords$var))
     attr(metadata$coords$var, "values") <- TRUE
     attr(metadata$coords$var, "indices") <- FALSE
-    ## TODO: $dims
+    # $dims
+    metadata$dims["var"] <- array_dims["var"]
   }
   # Piece together time info
   if ("time" %in% names(chunks)) {
     dates_dims <- dim(metadata$attrs$Dates)
-    time_dim_idx <- which(names(dates_dims) == "time")
     for (i in 2:chunks[["time"]]) {
       metadata_chunk <- readRDS(paste0(tmp_dir,
                                        gsub("time_1", paste0("time_", i),
                                             metadata_files[1])))
       metadata$attrs$Dates <- c(metadata$attrs$Dates, metadata_chunk$attrs$Dates)
     }
-    time_dim_length <- length(metadata$attrs$Dates)/prod(dates_dims[-time_dim_idx])
-    dim(metadata$attrs$Dates) <- c(dates_dims[-time_dim_idx],
-                                  time = time_dim_length)
+    time_dim_length <- array_dims[["time"]]
+    dim(metadata$attrs$Dates) <- c(dates_dims[names(dates_dims) != "time"],
+                                   time = time_dim_length)
     metadata$coords$time <- seq(1, time_dim_length)
     dim(metadata$coords$time) <- c(time = time_dim_length)
     attr(metadata$coords$time, "indices") <- TRUE
-    ## TODO: Add to 'dims'
+    #$dims
+    metadata$dims["time"] <- array_dims["time"]
   }
   # Piece together lon/lat info
   if ("latitude" %in% names(chunks)) {
@@ -59,7 +70,10 @@ retrieve_metadata <- function(tmp_dir, chunks) {
     # $coords
     metadata$coords$latitude <- metadata$attrs$Variable$metadata$latitude
     dim(metadata$coords$latitude) <- c(latitude = length(metadata$coords$latitude))
+    # $dims
+    metadata$dims["latitude"] <- array_dims["latitude"]
   }
+
   if ("longitude" %in% names(chunks)) {
     # $attrs
     ## TODO: Preserve attributes
@@ -68,11 +82,13 @@ retrieve_metadata <- function(tmp_dir, chunks) {
                                        gsub("longitude_1", paste0("longitude_", i),
                                             metadata_files[1])))
       metadata$attrs$Variable$metadata$longitude <- c(metadata$attrs$Variable$metadata$longitude,
-                                                     metadata_chunk$attrs$Variable$metadata$longitude)
+                                                      metadata_chunk$attrs$Variable$metadata$longitude)
     }
     # $coords
     metadata$coords$longitude <- metadata$attrs$Variable$metadata$longitude
     dim(metadata$coords$longitude) <- c(longitude = length(metadata$coords$longitude))
+    # $dims
+    metadata$dims["longitude"] <- array_dims["longitude"]
   }
   return(metadata)
 }
-- 
GitLab


From 5fdaeb3c11d4d6eb4cfa998851739dcb5cab598b Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 27 Feb 2024 15:30:59 +0100
Subject: [PATCH 62/96] Saving and metadata bug

---
 build_compute_workflow.R | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index df05dc0a..fd5388a2 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -72,6 +72,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
 convert_to_s2dv_cube <- function(new_cube, original_cube) {
   attr(new_cube, "Variables") <- attr(original_cube, "Variables")
   attr(new_cube, "FileSelectors") <- attr(original_cube, "FileSelectors")
+  attr(new_cube, "Files") <- attr(original_cube, "Files")
 
   class(new_cube) <- "startR_array"
   new_cube <- as.s2dv_cube(new_cube)
@@ -102,18 +103,19 @@ run_compute_workflow <- function(recipe, data) {
                       obs = obs_target_dims)
   inputs <- list(hcst = data$hcst, obs = data$obs)
   # Define input attributes from Start()
-  input_attributes <- list(hcst = c("Variables", "Dimensions", "FileSelectors", "Files"),
-                           obs = c("Variables", "Dimensions", "FileSelectors", "Files"))
+  STARTR_CUBE_ATTRS <- c("Variables", "Dimensions", "FileSelectors", "Files")
+  input_attributes <- list(hcst = STARTR_CUBE_ATTRS,
+                           obs = STARTR_CUBE_ATTRS)
   # Add forecast if not empty
   if (!is.null(data$fcst)) {
     target_dims <- c(target_dims,
                      list(exp_target_dims))
     inputs <- c(inputs, list(fcst = data$fcst))
-    input_attributes <- c(input_attributes, list(fcst = c("Variables", "FileSelectors")))
+    input_attributes <- c(input_attributes, list(fcst = STARTR_CUBE_ATTRS))
   }
 
   # Create output dimensions
-  ## TODO: Improve this conditions
+  ## TODO: Improve this condition
   if ("indices" %in% modules) {
     spatial_output_dims <- c("region")
   } else {
@@ -229,20 +231,21 @@ run_compute_workflow <- function(recipe, data) {
   if (!is.null(res$skill)) {
     tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/")
     metric_list <- readRDS(paste0(tmp_dir, "metric_names.Rds"))
+    ## TODO: Fails if skill has 'region' instead of 'latlon'. Should handle 
+    ##       dimensions dynamically. Can Subset() be used?
     res$skill <- lapply(seq(dim(res$skill)[1]),
-                        function(x) {res$skill[x, , , , , drop = F]})
-    res$skill <- lapply(res$skill,
                         function(x) {
-                          Subset(x, 
+                          Subset(res$skill,
                                  along = 'metric',
-                                 indices = 1,
+                                 indices = x,
                                  drop = 'selected')
                         })
     names(res$skill) <- metric_list
     # Put chunked metadata back together
     source("tools/retrieve_metadata.R")
     res$skill$metadata <- retrieve_metadata(tmp_dir = tmp_dir,
-                                            chunks = recipe$Run$startR_workflow$chunk_along)
+                                            chunks = recipe$Run$startR_workflow$chunk_along,
+                                            array_dims = dim(res$skill[[1]]))
   }
   
   # ---------------------------------------------------------------------------
@@ -256,7 +259,7 @@ run_compute_workflow <- function(recipe, data) {
   # Saving(recipe = recipe, data = res$data, skill_metrics = res$skill)
   if (recipe$Analysis$Workflow$Skill$save == 'all') {
     save_metrics(recipe = recipe, skill = res$skill,
-                 agg = "global")
+                 agg = "global", module = "Skill")
   }
   # ---------------------------------------------------------------------------
   # Step 7: Return outputs
@@ -265,6 +268,5 @@ run_compute_workflow <- function(recipe, data) {
        "##### DATA RETURNED AS A NAMED LIST #####")
   info(recipe$Run$logger, retrieve = TRUE,
        paste0("Steps performed: ", recipe$Run$startR_workflow$modules))
-
   return(res)
 }
-- 
GitLab


From 2851a2b1e3153ebe39e0a687aaa25bce882592ce Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 27 Feb 2024 15:31:39 +0100
Subject: [PATCH 63/96] Testing

---
 example_scripts/test_compute.R                |  1 +
 .../atomic_recipes/recipe_test_compute.yml    | 50 ++++++++++---------
 2 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/example_scripts/test_compute.R b/example_scripts/test_compute.R
index c82a1719..8179ba73 100644
--- a/example_scripts/test_compute.R
+++ b/example_scripts/test_compute.R
@@ -4,6 +4,7 @@ source("modules/Preprocessing/Preprocessing.R")
 source("modules/Calibration/Calibration.R")
 source("modules/Anomalies/Anomalies.R")
 source("modules/Skill/Skill.R")
+source("modules/Indices/Indices.R")
 source("modules/Saving/Saving.R")
 source("modules/Visualization/Visualization.R")
 source("build_compute_workflow.R")
diff --git a/recipes/atomic_recipes/recipe_test_compute.yml b/recipes/atomic_recipes/recipe_test_compute.yml
index eaca75d7..f082a1ed 100644
--- a/recipes/atomic_recipes/recipe_test_compute.yml
+++ b/recipes/atomic_recipes/recipe_test_compute.yml
@@ -4,12 +4,12 @@ Description:
 Analysis:
   Horizon: Seasonal
   Variables:
-    name: tas
+    name: psl
     freq: monthly_mean
-    units: {tas: C}
+    # units: {tas: C}
   Datasets:
     System:
-      name: Meteo-France-System7
+      name: ECMWF-SEAS5
     Multimodel: False
     Reference:
       name: ERA5
@@ -17,36 +17,38 @@ Analysis:
     sdate: '1101'
     fcst_year: # '2020'
     hcst_start: '2000'
-    hcst_end: '2016'
+    hcst_end: '2010'
     ftime_min: 1
     ftime_max: 3
   Region:
-    latmin: -10
-    latmax: 10
-    lonmin: 0
-    lonmax: 20
+    latmin: 20
+    latmax: 80
+    lonmin: -80
+    lonmax: 40
   Regrid:
     method: bilinear
-    type: to_system #to_reference #'r360x181'
+    type: "r180x90" #to_reference #'r360x181'
   Workflow:
     Anomalies:
-      compute: no # yes/no, default yes
+      compute: yes # yes/no, default yes
       cross_validation: yes # yes/no, default yes
       save: 'all' # 'all'/'none'/'exp_only'/'fcst_only'
     Calibration:
       method: raw
       save: 'none' # 'all'/'none'/'exp_only'/'fcst_only'
-    Downscaling:
-      # Assumption 1: leave-one-out cross-validation is always applied
-      # Assumption 2: for analogs, we select the best analog (minimum distance)
-      type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'.
-      int_method: conservative # regridding method accepted by CDO.
-      bc_method: bias # If type intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'.
-      lr_method: # If type intlr. Options: 'basic', 'large_scale', '4nn'
-      log_reg_method: # If type logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members'
-      target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO
-      nanalogs: # If type analgs. Number of analogs to be searched
-      save: 'all' # 'all'/'none'/'exp_only'
+    Indices:
+      NAO: {obsproj: TRUE, save: 'all', plot_ts: TRUE, plot_sp: yes}
+      # Downscaling:
+      #   # Assumption 1: leave-one-out cross-validation is always applied
+      #   # Assumption 2: for analogs, we select the best analog (minimum distance)
+      #   type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'.
+      #   int_method: conservative # regridding method accepted by CDO.
+      #   bc_method: bias # If type intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'.
+      #   lr_method: # If type intlr. Options: 'basic', 'large_scale', '4nn'
+      #   log_reg_method: # If type logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members'
+      #   target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO
+      #   nanalogs: # If type analgs. Number of analogs to be searched
+      #   save: 'all' # 'all'/'none'/'exp_only'
     Skill:
       metric: RPS RPSS BSS10 # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS
       save: 'all' # 'all'/'none'
@@ -68,8 +70,10 @@ Run:
   output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/
   code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
   startR_workflow:
-    modules: downscaling skill # Modules to run inside Compute(), in order
-    chunk_along: {time: 3} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
+    modules: anomalies skill # Modules to run inside Compute(), in order
+    return: hcst, fcst, obs # Choose only if they will fit into memory
+    save: hcst, fcst, obs # Only if returned
+    chunk_along: {time: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
     run_on: local # options: local, as_machine, nord3
     chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local'
     expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O
-- 
GitLab


From 88c948af816ce6355b27c8e51b364b8a6c78c779 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Thu, 29 Feb 2024 15:52:52 +0100
Subject: [PATCH 64/96] Remove TODOs; formatting

---
 build_compute_workflow.R | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index fd5388a2..ba1bd00b 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -46,7 +46,6 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
     } else if (module == "anomalies") {
       data <- Anomalies(recipe, data, retrieve = F)
     } else if (module == "downscaling") {
-      ## TODO: Test, adapt if necessary
       data <- Downscaling(recipe, data, retrieve = F)
     } else if (module == "skill") {
       skill_metrics <- Skill(recipe, data, retrieve = F, nchunks = nchunks)
@@ -54,7 +53,6 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
       ## TODO: Adapt probabilities output
       probabilities <- Probabilities(recipe, data, retrieve = F, nchunks = nchunks)
     } else if (module == "indices") {
-      ## TODO: Won't work with plots... OR WILL IT?
       data <- Indices(recipe, data, retrieve = F)
     }
   }
@@ -121,7 +119,9 @@ run_compute_workflow <- function(recipe, data) {
   } else {
     spatial_output_dims <- c("latitude", "longitude")
   }
-  exp_output_dims <- c(exp_target_dims[!exp_target_dims %in% c('var_dir', 'latitude', 'longitude')],
+  exp_output_dims <- c(exp_target_dims[!exp_target_dims %in% c('var_dir',
+                                                               'latitude',
+                                                               'longitude')],
                        spatial_output_dims)
   # Default outputs: hindcast and observations
   output_dims <- list(hcst = exp_output_dims,
@@ -140,6 +140,9 @@ run_compute_workflow <- function(recipe, data) {
                      list(skill = skill_output_dims))
   }
   ## TODO: Add Indices (Niño1+2, Niño3, Niño3.4, Niño4, NAO)
+  ## Instead of having the index dimension, we could add a second
+  ## loop and then define outputs dynamically based on the recipe;
+  ## one s2dv_cube per index. At least in the case of ENSO.
   # if ("indices" %in% modules) {
   #   idx_dims <- c('index', 'var', 'syear', 'time', 'region')
   # }
@@ -147,7 +150,7 @@ run_compute_workflow <- function(recipe, data) {
   # ---------------------------------------------------------------------------
   # Step 3: Generate the Step and call Compute()
   # Libraries to be loaded when running Compute()
-  libs_load <- c("log4r", "docopt", "ClimProjDiags", "multiApply", "yaml",
+  LIBS_LOAD <- c("log4r", "docopt", "ClimProjDiags", "multiApply", "yaml",
                  "s2dv", "abind", "easyNCDF", "CSTools", "lubridate", "PCICt",
                  "RColorBrewer", "configr", "sf", "ggplot2", "rnaturalearth",
                  "cowplot", "stringr", "pryr")
@@ -156,7 +159,7 @@ run_compute_workflow <- function(recipe, data) {
                target_dims = target_dims,
                output_dims = output_dims,
                use_attributes = input_attributes, 
-               use_libraries = libs_load)
+               use_libraries = LIBS_LOAD)
 
   wf <- AddStep(inputs = inputs,
                 step = step,
@@ -252,12 +255,11 @@ run_compute_workflow <- function(recipe, data) {
   # Step 5: Remove temporary files
   unlink(paste0(recipe$Run$output_dir, "/outputs/tmp/"), recursive = TRUE)
 
-
   # ---------------------------------------------------------------------------
   # Step 6: Save data
   ## PROBLEM: Defining the output dir
   # Saving(recipe = recipe, data = res$data, skill_metrics = res$skill)
-  if (recipe$Analysis$Workflow$Skill$save == 'all') {
+  if (!is.null(res$skill) && recipe$Analysis$Workflow$Skill$save == 'all') {
     save_metrics(recipe = recipe, skill = res$skill,
                  agg = "global", module = "Skill")
   }
-- 
GitLab


From 78dd5e9b8b2926883233340736087f81cb7d9771 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Wed, 27 Mar 2024 10:18:12 +0100
Subject: [PATCH 65/96] Add retrieve parameter to Statistics module

---
 modules/Statistics/Statistics.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R
index 085bcdc5..fc21c364 100644
--- a/modules/Statistics/Statistics.R
+++ b/modules/Statistics/Statistics.R
@@ -1,4 +1,4 @@
-Statistics <- function(recipe, data, agg = 'global') {
+Statistics <- function(recipe, data, agg = 'global', retrieve = TRUE) {
   # data$hcst: s2dv_cube containing the hindcast 
   # data$obs: s2dv_cube containing the observations
   # recipe: auto-s2s recipe as provided by read_yaml
-- 
GitLab


From acb20b727a0e48b4125900243025cc776a057770 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Mon, 8 Apr 2024 11:31:52 +0200
Subject: [PATCH 66/96] Fix some unit tests and logger call

---
 tests/testthat/test-seasonal_daily.R       | 2 +-
 tests/testthat/test-seasonal_downscaling.R | 2 +-
 tests/testthat/test-seasonal_monthly.R     | 2 +-
 tools/prepare_outputs.R                    | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test-seasonal_daily.R b/tests/testthat/test-seasonal_daily.R
index 6cfa4384..1d69bffc 100644
--- a/tests/testthat/test-seasonal_daily.R
+++ b/tests/testthat/test-seasonal_daily.R
@@ -146,7 +146,7 @@ is.list(skill_metrics),
 TRUE
 )
 expect_equal(
-names(skill_metrics),
+names(skill_metrics)[!names(skill_metrics) == "metadata"],
 c("enscorr_specs")
 )
 expect_equal(
diff --git a/tests/testthat/test-seasonal_downscaling.R b/tests/testthat/test-seasonal_downscaling.R
index 8a52657a..78b043b6 100644
--- a/tests/testthat/test-seasonal_downscaling.R
+++ b/tests/testthat/test-seasonal_downscaling.R
@@ -213,7 +213,7 @@ is.list(skill_metrics),
 TRUE
 )
 expect_equal(
-names(skill_metrics),
+names(skill_metrics)[!names(skill_metrics) == "metadata"],
 c("bss10", "bss10_significance", "crpss", "crpss_significance","rpss", "rpss_significance", "mean_bias")
 )
 expect_equal(
diff --git a/tests/testthat/test-seasonal_monthly.R b/tests/testthat/test-seasonal_monthly.R
index 18bde4c0..b1b775ea 100644
--- a/tests/testthat/test-seasonal_monthly.R
+++ b/tests/testthat/test-seasonal_monthly.R
@@ -183,7 +183,7 @@ is.list(skill_metrics),
 TRUE
 )
 expect_equal(
-names(skill_metrics),
+names(skill_metrics)[!names(skill_metrics) == "metadata"],
 c("rpss", "rpss_significance", "crpss", "crpss_significance", "enscorr",
   "enscorr_significance", "corr_individual_members", "corr_individual_members_significance",
   "enscorr_specs")
diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R
index 05dcf12d..bddfd74a 100644
--- a/tools/prepare_outputs.R
+++ b/tools/prepare_outputs.R
@@ -99,7 +99,7 @@ prepare_outputs <- function(recipe_file,
     along with this program.  If not, see <http://www.gnu.org/licenses/>."
 
   for (i in disclosure_message) {
-    info(recipe$Run$logger, i)
+    info(recipe$Run$logger, retrieve = TRUE, i)
   }
   # Set up default filesystem
   if (is.null(recipe$Run$filesystem)) {
-- 
GitLab


From 5c63ac06545db611352109c72e14bb4fca48a691 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Mon, 8 Apr 2024 14:29:14 +0200
Subject: [PATCH 67/96] Use ClimProjDiags::ArrayToList() to rearrange skill
 metric array into list

---
 build_compute_workflow.R | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 390b39ed..ec13e121 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -234,16 +234,10 @@ run_compute_workflow <- function(recipe, data) {
   if (!is.null(res$skill)) {
     tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/")
     metric_list <- readRDS(paste0(tmp_dir, "metric_names.Rds"))
-    ## TODO: Fails if skill has 'region' instead of 'latlon'. Should handle 
-    ##       dimensions dynamically. Can Subset() be used?
-    res$skill <- lapply(seq(dim(res$skill)[1]),
-                        function(x) {
-                          Subset(res$skill,
-                                 along = 'metric',
-                                 indices = x,
-                                 drop = 'selected')
-                        })
-    names(res$skill) <- metric_list
+    res$skill <- ClimProjDiags::ArrayToList(res$skill,
+                                            dim = 'metric',
+                                            level = 'list',
+                                            names = metric_list)
     # Put chunked metadata back together
     source("tools/retrieve_metadata.R")
     res$skill$metadata <- retrieve_metadata(tmp_dir = tmp_dir,
-- 
GitLab


From 1eb97dcdf4fc7ed8ec096133ac5a104cca65335e Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 9 Apr 2024 14:49:44 +0200
Subject: [PATCH 68/96] Remove browser() calls and fix bug in save_metrics

---
 modules/Saving/R/save_metrics.R | 3 +--
 modules/Skill/Skill.R           | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R
index c643eec5..ac895fd4 100644
--- a/modules/Saving/R/save_metrics.R
+++ b/modules/Saving/R/save_metrics.R
@@ -14,7 +14,6 @@ save_metrics <- function(recipe,
   global_attributes <- .get_global_attributes(recipe, archive)
   cube_info <- metrics[["metadata"]]
   metrics[["metadata"]] <- NULL
-  browser()
 
   if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && 
       (recipe$Analysis$Workflow$Anomalies$compute)) {
@@ -155,7 +154,7 @@ save_metrics <- function(recipe,
         country <- get_countries(grid)
         ArrayToNc(append(country, time, subset_metric), outfile)
       } else if (tolower(agg) == "region") {
-        region <- array(1:dim(skill[[1]])['region'], c(dim(skill[[1]])['region']))
+        region <- array(1:dim(metrics[[1]])['region'], c(dim(metrics[[1]])['region']))
         # TODO: check metadata when more than 1 region is store in the data array
         metadata <- list(region = list(long_name = cube_info$attrs$Variable$metadata$region$name))
         attr(region, 'variables') <-  metadata
diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R
index 22b75238..e5dbcb06 100644
--- a/modules/Skill/Skill.R
+++ b/modules/Skill/Skill.R
@@ -371,7 +371,6 @@ Skill <- function(recipe, data, agg = 'global', retrieve = TRUE,
     #                                 "/outputs/Skill/")
     # Separate 'corr' from the rest of the metrics because of extra 'ensemble' dim
     if (recipe$Analysis$Workflow$Skill$save == 'all') {
-      browser()
       corr_metric_names <- grep("^corr_individual_members",
                                 names(skill_metrics))
       if (length(corr_metric_names) == 0) {
-- 
GitLab


From 24f7da5d1fe4641728c28eeecc3b395c88d04ee9 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Wed, 24 Apr 2024 16:38:07 +0200
Subject: [PATCH 69/96] Load decadal with retrieve = FALSE (wip)

---
 modules/Loading/R/load_decadal.R          | 209 +++++++++++-----------
 recipes/atomic_recipes/recipe_decadal.yml |   6 +-
 2 files changed, 110 insertions(+), 105 deletions(-)

diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R
index 8f594d86..3e6750c2 100644
--- a/modules/Loading/R/load_decadal.R
+++ b/modules/Loading/R/load_decadal.R
@@ -11,7 +11,7 @@ source("modules/Loading/R/compare_exp_obs_grids.R")
 # recipe_file <- "recipes/atomic_recipes/recipe_decadal.yml"
 # recipe_file <- "recipes/atomic_recipes/recipe_decadal_daily.yml"
 
-load_decadal <- function(recipe, retrieve = TRUE) {
+load_decadal <- function(recipe, retrieve = retrieve) {
   ## 
   archive <- read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]]
   
@@ -120,7 +120,7 @@ load_decadal <- function(recipe, retrieve = TRUE) {
     return_vars = list(latitude = NULL, longitude = NULL,
                        time = c('syear', 'chunk')),
     silent = !DEBUG,
-    retrieve = T)
+    retrieve = retrieve)
   
   if (length(variable) > 1) {
     Start_default_arg_list <- c(Start_default_arg_list, 
@@ -160,16 +160,21 @@ load_decadal <- function(recipe, retrieve = TRUE) {
   }
   
   tmp_time_attr <- attr(hcst, 'Variables')$common$time
-  
+ 
+  ## TODO: for retrieve = FALSE, do this in pre-processing
   # change syear to c(sday, sweek, syear)
   # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble]
-  dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7])
+  if (retrieve) {
+    dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7])
+  }
   if (!identical(dim(tmp_time_attr), dim(hcst)[c('syear', 'time')])) {
-    error(recipe$Run$logger,
+    error(recipe$Run$logger, retrieve = retrieve,
           "hcst has problem in matching data and time attr dimension.")
     stop()
   }
   dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr))
+  # Get hcst dates
+  dates <- attr(hcst, 'Variables')$common$time
   
   #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1
   if (multi_path) {
@@ -177,9 +182,9 @@ load_decadal <- function(recipe, retrieve = TRUE) {
   }
   
   # Change class from startR_array to s2dv_cube
-  suppressWarnings(
+  if (retrieve) {
     hcst <- as.s2dv_cube(hcst)
-  )
+  }
   
   #-------------------------------------------
   # Step 2: Load the fcst
@@ -240,7 +245,7 @@ load_decadal <- function(recipe, retrieve = TRUE) {
     # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble]
     dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7])
     if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) {
-      error(recipe$Run$logger,
+      error(recipe$Run$logger, retrieve = retrieve,
             "fcst has problem in matching data and time attr dimension.")
       stop()
     }
@@ -252,15 +257,14 @@ load_decadal <- function(recipe, retrieve = TRUE) {
     }
     
     # Change class from startR_array to s2dv_cube
-    suppressWarnings(
+    if (retrieve) {
       fcst <- as.s2dv_cube(fcst)
-    )
-    
-    # Only syear could be different
-    if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) {
-      error(recipe$Run$logger,
-            "hcst and fcst do not share the same dimension structure.")
-      stop()
+      # Only syear could be different
+      if (!identical(dim(hcst$data)[-5], dim(fcst$data)[-5])) {
+        error(recipe$Run$logger, retrieve = retrieve,
+              "hcst and fcst do not share the same dimension structure.")
+        stop()
+      }
     }
     
   } else {
@@ -281,7 +285,7 @@ load_decadal <- function(recipe, retrieve = TRUE) {
   # Get from startR_cube
   #  dates <- attr(hcst, 'Variables')$common$time
   # Get from s2dv_cube
-  dates <- hcst$attrs$Dates  
+  # dates <- hcst$attrs$Dates  
   dates_file <- sapply(dates, format, '%Y%m')
   dim(dates_file) <- dim(dates)
   
@@ -319,7 +323,7 @@ load_decadal <- function(recipe, retrieve = TRUE) {
                  return_vars = list(latitude = NULL, longitude = NULL,
                                     time = 'file_date'),
                  silent = !DEBUG,
-                 retrieve = TRUE)
+                 retrieve = retrieve)
     
   } else if (store.freq == "monthly_mean") {
     #////////////////
@@ -347,7 +351,7 @@ load_decadal <- function(recipe, retrieve = TRUE) {
                                     time = 'file_date'),
                  metadata_dims = 'var',
                  silent = !DEBUG,
-                 retrieve = TRUE)
+                 retrieve = retrieve)
   }
   
   
@@ -355,100 +359,101 @@ load_decadal <- function(recipe, retrieve = TRUE) {
   # sday sweek syear  time 
   #    1     1     2    14 
   
-  # Remove var_dir dimension
-  obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected")
-  
-  # Only ensemble dim could be different
-  if (!identical(dim(obs), dim(hcst$data)[-9])) {
-    error(recipe$Run$logger,
-          "obs and hcst dimensions do not match.")
-    stop()
-  }
-  # Add ensemble dim to obs
-  dim(obs) <- c(dim(obs), ensemble = 1)
-  
-  # Change class from startR_array to s2dv_cube
-  suppressWarnings(
+  obs_dates <- attr(obs, 'Variables')$common$time
+
+  if (retrieve) {
+    # Remove var_dir dimension
+    obs <- Subset(obs, along = "var_dir", indices = 1, drop = "selected")
+    # Only ensemble dim could be different
+    if (!identical(dim(obs), dim(hcst$data)[-9])) {
+      error(recipe$Run$logger, retrieve = retrieve,
+            "obs and hcst dimensions do not match.")
+      stop()
+    }
+    # Add ensemble dim to obs
+    dim(obs) <- c(dim(obs), ensemble = 1)
+    # Transform to s2dv_cube
     obs <- as.s2dv_cube(obs)
-  )
+  }
   
   #-------------------------------------------
   # Step 4. Verify the consistance between data
   #-------------------------------------------
-  # dimension
-  if (any(!names(dim(obs$data)) %in% names(dim(hcst$data)))) {
-    error(recipe$Run$logger,
-          "hcst and obs don't share the same dimension names.")
-    stop()
-  } else {
-    ens_ind <- which(names(dim(obs$data)) == 'ensemble')
-    match_ind <- match(names(dim(obs$data))[-ens_ind], names(dim(hcst$data)))
-    if (!all(dim(hcst$data)[match_ind] == dim(obs$data)[-ens_ind])) {
-      error(recipe$Run$logger,
-            "hcst and obs don't share the same dimension length.")
-      stop()
-    }
-  }
-  
-  # time attribute
-  if (!identical(format(hcst$attrs$Dates, '%Y%m'),
-                 format(obs$attrs$Dates, '%Y%m'))) {
-    error(recipe$Run$logger,
-          "hcst and obs don't share the same time.")
-    stop()
-  }
-  
-  # lat and lon attributes
-  if (!(recipe$Analysis$Regrid$type == 'none')) {
-    if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) {
-      lat_error_msg <- paste("Latitude mismatch between hcst and obs.",
-                             "Please check the original grids and the",
-                             "regrid parameters in your recipe.")
-      error(recipe$Run$logger, lat_error_msg)
-      hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1],
-                             "; Last hcst lat: ", hcst$lat[length(hcst$lat)])
-      info(recipe$Run$logger, hcst_lat_msg)
-      obs_lat_msg <- paste0("First obs lat: ", obs$lat[1],
-                            "; Last obs lat: ", obs$lat[length(obs$lat)])
-      info(recipe$Run$logger, obs_lat_msg)
-      stop("hcst and obs don't share the same latitudes.")
-    }
-    
-    if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) {
-      lon_error_msg <- paste("Longitude mismatch between hcst and obs.",
-                             "Please check the original grids and the",
-                             "regrid parameters in your recipe.")
-      error(recipe$Run$logger, lon_error_msg)
-      hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1],
-                             "; Last hcst lon: ", hcst$lon[length(hcst$lon)])
-      info(recipe$Run$logger, hcst_lon_msg)
-      obs_lon_msg <- paste0("First obs lon: ", obs$lon[1],
-                            "; Last obs lon: ", obs$lon[length(obs$lon)])
-      info(recipe$Run$logger, obs_lon_msg)
-      stop("hcst and obs don't share the same longitudes.")
-    }
-  }
-  
-  # Check fcst
-  if (!is.null(fcst)) {
+  if (retrieve) {
     # dimension
-    if (any(!names(dim(fcst$data)) %in% names(dim(hcst$data)))) {
-      error(recipe$Run$logger,
-            "hcst and fcst don't share the same dimension names.")
+    if (any(!names(dim(obs$data)) %in% names(dim(hcst$data)))) {
+      error(recipe$Run$logger, retrieve = retrieve,
+            "hcst and obs don't share the same dimension names.")
       stop()
     } else {
-      ens_ind <- which(names(dim(fcst$data)) %in% c('ensemble', 'syear'))
-      match_ind <- match(names(dim(fcst$data))[-ens_ind], names(dim(hcst$data)))
-      if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) {
-        error(recipe$Run$logger,
-              "hcst and fcst don't share the same dimension length.")
+      ens_ind <- which(names(dim(obs$data)) == 'ensemble')
+      match_ind <- match(names(dim(obs$data))[-ens_ind], names(dim(hcst$data)))
+      if (!all(dim(hcst$data)[match_ind] == dim(obs$data)[-ens_ind])) {
+        error(recipe$Run$logger, retrieve = retrieve,
+              "hcst and obs don't share the same dimension length.")
         stop()
       }
     }
     
+    # time attribute
+    if (!identical(format(dates, '%Y%m'), format(obs_dates, '%Y%m'))) {
+      error(recipe$Run$logger, retrieve = retrieve,
+            "hcst and obs don't share the same time.")
+      stop()
+    }
+    
     # lat and lon attributes
     if (!(recipe$Analysis$Regrid$type == 'none')) {
-      compare_exp_obs_grids(hcst, obs)
+      if (!isTRUE(all.equal(as.vector(hcst$lat), as.vector(obs$lat)))) {
+        lat_error_msg <- paste("Latitude mismatch between hcst and obs.",
+                               "Please check the original grids and the",
+                               "regrid parameters in your recipe.")
+        error(recipe$Run$logger, retrieve = retrieve, lat_error_msg)
+        hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1],
+                               "; Last hcst lat: ", hcst$lat[length(hcst$lat)])
+        info(recipe$Run$logger, retrieve = retrieve, hcst_lat_msg)
+        obs_lat_msg <- paste0("First obs lat: ", obs$lat[1],
+                              "; Last obs lat: ", obs$lat[length(obs$lat)])
+        info(recipe$Run$logger, retrieve = retrieve, obs_lat_msg)
+        stop("hcst and obs don't share the same latitudes.")
+      }
+      
+      if (!isTRUE(all.equal(as.vector(hcst$lon), as.vector(obs$lon)))) {
+        lon_error_msg <- paste("Longitude mismatch between hcst and obs.",
+                               "Please check the original grids and the",
+                               "regrid parameters in your recipe.")
+        error(recipe$Run$logger, retrieve = retrieve, lon_error_msg)
+        hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1],
+                               "; Last hcst lon: ", hcst$lon[length(hcst$lon)])
+        info(recipe$Run$logger, retrieve = retrieve, hcst_lon_msg)
+        obs_lon_msg <- paste0("First obs lon: ", obs$lon[1],
+                              "; Last obs lon: ", obs$lon[length(obs$lon)])
+        info(recipe$Run$logger, retrieve = retrieve, obs_lon_msg)
+        stop("hcst and obs don't share the same longitudes.")
+      }
+    }
+  
+    # Check fcst
+    if (!is.null(fcst)) {
+      # dimension
+      if (any(!names(dim(fcst$data)) %in% names(dim(hcst$data)))) {
+        error(recipe$Run$logger, retrieve = retrieve,
+              "hcst and fcst don't share the same dimension names.")
+        stop()
+      } else {
+        ens_ind <- which(names(dim(fcst$data)) %in% c('ensemble', 'syear'))
+        match_ind <- match(names(dim(fcst$data))[-ens_ind], names(dim(hcst$data)))
+        if (!all(dim(hcst$data)[match_ind] == dim(fcst$data)[-ens_ind])) {
+          error(recipe$Run$logger, retrieve = retrieve,
+                "hcst and fcst don't share the same dimension length.")
+          stop()
+        }
+      }
+      
+      # lat and lon attributes
+      if (!(recipe$Analysis$Regrid$type == 'none')) {
+        compare_exp_obs_grids(hcst, obs)
+      }
     }
   }
   
@@ -460,7 +465,7 @@ load_decadal <- function(recipe, retrieve = TRUE) {
   # for (var_idx in 1:length(variable)) {
   #   var_name <- variable[var_idx]
   #   if (dictionary$vars[[var_name]]$accum) {
-  #     info(recipe$Run$logger,
+  #     info(recipe$Run$logger, retrieve = retrieve,
   #          paste0("Accumulated variable ", var_name,
   #                 ": setting negative values to zero."))
   #     # obs$data[, var_idx, , , , , , , ] <- pmax(Subset(obs$data,
@@ -477,9 +482,9 @@ load_decadal <- function(recipe, retrieve = TRUE) {
   # Step 6. Print summary
   #-------------------------------------------
   
-  info(recipe$Run$logger,
+  info(recipe$Run$logger, retrieve = retrieve,
        "##### DATA LOADING COMPLETED SUCCESSFULLY #####")
   
-  .log_memory_usage(recipe$Run$logger, when = "After loading")
+  .log_memory_usage(recipe$Run$logger, retrieve = retrieve, when = "After loading")
   return(list(hcst = hcst, fcst = fcst, obs = obs))
 }
diff --git a/recipes/atomic_recipes/recipe_decadal.yml b/recipes/atomic_recipes/recipe_decadal.yml
index 26312b34..5ac1fa52 100644
--- a/recipes/atomic_recipes/recipe_decadal.yml
+++ b/recipes/atomic_recipes/recipe_decadal.yml
@@ -37,7 +37,7 @@ Analysis:
       method: bias
       save: 'all'
     Skill:
-      metric: RPSS Corr
+      metric: RPSS
       save: 'all'
     Probabilities:
       percentiles: [[1/3, 2/3]]
@@ -52,6 +52,6 @@ Analysis:
 Run:
   Loglevel: INFO
   Terminal: yes
-  output_dir: /esarchive/scratch/aho/git/auto-s2s/out-logs/
-  code_dir: /esarchive/scratch/aho/git/auto-s2s/
+  output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/
+  code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
 
-- 
GitLab


From 90f5eaf0f323e9121a891f3a590fa5446e409d9d Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Fri, 26 Apr 2024 11:48:48 +0200
Subject: [PATCH 70/96] load decadal data with retrieve = FALSE

---
 modules/Loading/R/load_decadal.R | 40 ++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R
index 3e6750c2..65d69b93 100644
--- a/modules/Loading/R/load_decadal.R
+++ b/modules/Loading/R/load_decadal.R
@@ -16,9 +16,7 @@ load_decadal <- function(recipe, retrieve = retrieve) {
   archive <- read_yaml(paste0("conf/archive_decadal.yml"))[[recipe$Run$filesystem]]
   
   # Print Start() info or not
-  DEBUG <- FALSE
-  
-  ## TODO: this should come from the main script 
+  DEBUG <- TRUE
   # Create output folder and log:
   
   #-------------------------
@@ -132,7 +130,7 @@ load_decadal <- function(recipe, retrieve = retrieve) {
   if (!multi_path) {
     Start_hcst_arg_list <- Start_default_arg_list
     hcst <- do.call(Start, Start_hcst_arg_list)
-    
+    hcst_dims <- attr(hcst, "Dimensions")
   } else {
     Start_hcst_arg_list <- Start_default_arg_list
     Start_hcst_arg_list[['syear']] <- NULL
@@ -141,7 +139,6 @@ load_decadal <- function(recipe, retrieve = retrieve) {
     Start_hcst_arg_list[['return_vars']][['time']] <- Start_hcst_arg_list[['return_vars']][['time']][-remove_ind]
     
     hcst <- do.call(Start, Start_hcst_arg_list)
-    
     # Reshape and reorder dimensions
     ## dat should be 1, syear should be length of dat; reorder dimensions
     dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6])
@@ -166,12 +163,14 @@ load_decadal <- function(recipe, retrieve = retrieve) {
   # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble]
   if (retrieve) {
     dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7])
-  }
-  if (!identical(dim(tmp_time_attr), dim(hcst)[c('syear', 'time')])) {
-    error(recipe$Run$logger, retrieve = retrieve,
-          "hcst has problem in matching data and time attr dimension.")
-    stop()
-  }
+  } else {
+
+  ## TODO: Why tf does this raise an error?
+  # if (!identical(dim(tmp_time_attr), hcst_dims[c('syear', 'time')])) {
+  #   error(recipe$Run$logger, retrieve = retrieve,
+  #         "hcst has problem in matching data and time attr dimension.")
+  #   stop()
+  # }
   dim(attr(hcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr))
   # Get hcst dates
   dates <- attr(hcst, 'Variables')$common$time
@@ -224,8 +223,11 @@ load_decadal <- function(recipe, retrieve = retrieve) {
       # Reshape and reorder dimensions
       ## dat should be 1, syear should be length of dat; reorder dimensions
       ## dim(fcst) should be [dat, var, syear, time, latitude, longitude, ensemble]
-      dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6])
-      fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble'))
+      if (retrieve) {
+        dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6])
+        fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time',
+                                      'latitude', 'longitude', 'ensemble'))
+      }
       
       # Manipulate time attr because Start() cannot read it correctly
       wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only
@@ -243,11 +245,13 @@ load_decadal <- function(recipe, retrieve = retrieve) {
     
     # change syear to c(sday, sweek, syear)
     # dim(fcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble]
-    dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7])
-    if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) {
-      error(recipe$Run$logger, retrieve = retrieve,
-            "fcst has problem in matching data and time attr dimension.")
-      stop()
+    if (retrieve) {
+      dim(fcst) <- c(dim(fcst)[1:2], sday = 1, sweek = 1, dim(fcst)[3:7])
+      if (!identical(dim(tmp_time_attr), dim(fcst)[c('syear', 'time')])) {
+        error(recipe$Run$logger, retrieve = retrieve,
+              "fcst has problem in matching data and time attr dimension.")
+        stop()
+      }
     }
     dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr))
     
-- 
GitLab


From 3af9f8a7d78498824e36db257193c0d77fdb0eaf Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 26 Apr 2024 14:32:56 +0200
Subject: [PATCH 71/96] Fix dimensions and issues (WIP)

---
 modules/Loading/R/load_decadal.R | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R
index 65d69b93..ddc778ef 100644
--- a/modules/Loading/R/load_decadal.R
+++ b/modules/Loading/R/load_decadal.R
@@ -164,6 +164,9 @@ load_decadal <- function(recipe, retrieve = retrieve) {
   if (retrieve) {
     dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7])
   } else {
+    attr(hcst, "Dimensions") <- c(hcst_dims[1:2], sday = 1, sweek = 1,
+                                  hcst_dims[3:7])
+  }
 
   ## TODO: Why tf does this raise an error?
   # if (!identical(dim(tmp_time_attr), hcst_dims[c('syear', 'time')])) {
@@ -217,7 +220,8 @@ load_decadal <- function(recipe, retrieve = retrieve) {
       Start_fcst_arg_list[['syear']] <- NULL
       Start_fcst_arg_list[['chunk_depends']] <- NULL
       remove_ind <- which(Start_fcst_arg_list[['return_vars']][['time']] == 'syear')
-      Start_fcst_arg_list[['return_vars']][['time']] <- Start_fcst_arg_list[['return_vars']][['time']][-remove_ind]
+      Start_fcst_arg_list[['return_vars']][['time']] <- 
+        Start_fcst_arg_list[['return_vars']][['time']][-remove_ind]
       fcst <- do.call(Start, Start_fcst_arg_list)
       
       # Reshape and reorder dimensions
@@ -252,7 +256,12 @@ load_decadal <- function(recipe, retrieve = retrieve) {
               "fcst has problem in matching data and time attr dimension.")
         stop()
       }
+    } else {
+      fcst_dims <- attr(fcst, "Dimensions")
+      attr(fcst, "Dimensions") <- c(fcst_dims[1:2], sday = 1, sweek = 1,
+                                    fcst_dims[3:7])
     }
+
     dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr))
     
     #TODO: as.s2dv_cube() needs to be improved to recognize "variable" is under $dat1
@@ -269,8 +278,7 @@ load_decadal <- function(recipe, retrieve = retrieve) {
               "hcst and fcst do not share the same dimension structure.")
         stop()
       }
-    }
-    
+    }  
   } else {
     fcst <- NULL
   }
-- 
GitLab


From c600f469eb6553599e291291add28cf93c818a86 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Fri, 3 May 2024 15:40:26 +0200
Subject: [PATCH 72/96] Create recipe for testing

---
 .../recipe_test_compute_decadal.yml           | 66 +++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 recipes/atomic_recipes/recipe_test_compute_decadal.yml

diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal.yml b/recipes/atomic_recipes/recipe_test_compute_decadal.yml
new file mode 100644
index 00000000..e8154363
--- /dev/null
+++ b/recipes/atomic_recipes/recipe_test_compute_decadal.yml
@@ -0,0 +1,66 @@
+Description:
+  Author: An-Chi Ho
+  '': split version
+Analysis:
+  Horizon: Decadal
+  Variables:
+    name: tas
+    freq: monthly_mean
+  Datasets:
+    System: 
+      name: HadGEM3-GC31-MM #EC-Earth3-i4 #CanESM5 
+      member: r1i1p1f2,r2i1p1f2,r3i1p1f2 #'all'
+    Multimodel: no
+    Reference:
+      name: ERA5 #JRA-55
+  Time:
+    fcst_year: [2020,2021]
+    hcst_start: 1990 
+    hcst_end: 1993 
+#    season: 'Annual'
+    ftime_min: 2
+    ftime_max: 24
+  Region:
+    latmin: 10 #-90
+    latmax: 20 #90
+    lonmin: 0
+    lonmax: 15 #359.9
+  Regrid:
+    method: bilinear
+    type: to_system #to_reference
+  Workflow:
+    Anomalies:
+      compute: no
+      cross_validation:
+      save:
+    Calibration:
+      method: bias
+      save: 'all'
+    Skill:
+      metric: RPSS
+      save: 'all'
+    Probabilities:
+      percentiles: [[1/3, 2/3]]
+      save: 'all'
+    Indicators:
+      index: FALSE
+    Visualization:
+      plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles
+  ncores:  # Optional, int: number of cores, defaults to 1
+  remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE
+  Output_format: S2S4E
+Run:
+  Loglevel: INFO
+  Terminal: yes
+  output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/
+  code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
+  startR_workflow:
+    modules: anomalies skill # Modules to run inside Compute(), in order
+    return: hcst, fcst, obs # Choose only if they will fit into memory
+    save: hcst, fcst, obs # Only if returned
+    chunk_along: {time: 6} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
+    run_on: local # options: local, as_machine, nord3
+    chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local'
+    expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O
+    hpc_user: bsc32762 # your hpc username. Only if run_on is not 'local'
+    slurm_directives: ['#SBATCH --constraint=medmem', '#SBATCH --exclusive'] # Only if run_on is not 'local'
-- 
GitLab


From 5c7e78a211e917adf7cdcb5fb9a2ce04151c35b8 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Mon, 6 May 2024 15:55:36 +0200
Subject: [PATCH 73/96] Fix issue with sday and sweek dimensions in decadal
 case

---
 example_scripts/test_compute.R                   |  2 +-
 modules/Loading/R/load_decadal.R                 | 13 +++----------
 modules/Preprocessing/Preprocessing.R            | 16 ++++++++++++----
 .../recipe_test_compute_decadal.yml              |  2 +-
 4 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/example_scripts/test_compute.R b/example_scripts/test_compute.R
index 8179ba73..5622c5ec 100644
--- a/example_scripts/test_compute.R
+++ b/example_scripts/test_compute.R
@@ -9,7 +9,7 @@ source("modules/Saving/Saving.R")
 source("modules/Visualization/Visualization.R")
 source("build_compute_workflow.R")
 
-recipe_file <- "recipes/atomic_recipes/recipe_test_compute.yml"
+recipe_file <- "recipes/atomic_recipes/recipe_test_compute_decadal.yml"
 recipe <- prepare_outputs(recipe_file)
 
 # Load datasets
diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R
index ddc778ef..10651836 100644
--- a/modules/Loading/R/load_decadal.R
+++ b/modules/Loading/R/load_decadal.R
@@ -119,7 +119,7 @@ load_decadal <- function(recipe, retrieve = retrieve) {
                        time = c('syear', 'chunk')),
     silent = !DEBUG,
     retrieve = retrieve)
-  
+ 
   if (length(variable) > 1) {
     Start_default_arg_list <- c(Start_default_arg_list, 
                                 list(table = table, grid = grid, version = version,
@@ -163,12 +163,9 @@ load_decadal <- function(recipe, retrieve = retrieve) {
   # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble]
   if (retrieve) {
     dim(hcst) <- c(dim(hcst)[1:2], sday = 1, sweek = 1, dim(hcst)[3:7])
-  } else {
-    attr(hcst, "Dimensions") <- c(hcst_dims[1:2], sday = 1, sweek = 1,
-                                  hcst_dims[3:7])
   }
 
-  ## TODO: Why tf does this raise an error?
+  ## TODO: Why does this raise an error?
   # if (!identical(dim(tmp_time_attr), hcst_dims[c('syear', 'time')])) {
   #   error(recipe$Run$logger, retrieve = retrieve,
   #         "hcst has problem in matching data and time attr dimension.")
@@ -256,11 +253,7 @@ load_decadal <- function(recipe, retrieve = retrieve) {
               "fcst has problem in matching data and time attr dimension.")
         stop()
       }
-    } else {
-      fcst_dims <- attr(fcst, "Dimensions")
-      attr(fcst, "Dimensions") <- c(fcst_dims[1:2], sday = 1, sweek = 1,
-                                    fcst_dims[3:7])
-    }
+    } 
 
     dim(attr(fcst, 'Variables')$common$time) <- c(sday = 1, sweek = 1, dim(tmp_time_attr))
     
diff --git a/modules/Preprocessing/Preprocessing.R b/modules/Preprocessing/Preprocessing.R
index 6f118670..d6a287e8 100644
--- a/modules/Preprocessing/Preprocessing.R
+++ b/modules/Preprocessing/Preprocessing.R
@@ -1,10 +1,18 @@
 preprocess_datasets <- function(recipe, data) {
   # Remove 'var_dir' dimension
   for (element in names(data)) {
-    data[[element]] <- Subset(x = data[[element]],
-                              along = c('var_dir'),
-                              indices = list(1),
-                              drop = 'selected')
+    if ("var_dir" %in% names(dim(data[[element]]))) {
+      data[[element]] <- Subset(x = data[[element]],
+                                along = c('var_dir'),
+                                indices = list(1),
+                                drop = 'selected')
+    }
+    if (!("sday" %in% names(dim(data[[element]])))) {
+      dim(data[[element]]) <- c(dim(data[[element]]), sday = 1)
+    }
+    if (!("sweek" %in% names(dim(data[[element]])))) {
+      dim(data[[element]]) <- c(dim(data[[element]]), sweek = 1)
+    }
   }
   # Add 'ensemble' dimension to obs
   dim(data$obs) <- c(dim(data$obs), ensemble = 1)
diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal.yml b/recipes/atomic_recipes/recipe_test_compute_decadal.yml
index e8154363..985d5c0b 100644
--- a/recipes/atomic_recipes/recipe_test_compute_decadal.yml
+++ b/recipes/atomic_recipes/recipe_test_compute_decadal.yml
@@ -58,7 +58,7 @@ Run:
     modules: anomalies skill # Modules to run inside Compute(), in order
     return: hcst, fcst, obs # Choose only if they will fit into memory
     save: hcst, fcst, obs # Only if returned
-    chunk_along: {time: 6} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
+    chunk_along: {latitude: 2, longitude: 1} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
     run_on: local # options: local, as_machine, nord3
     chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local'
     expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O
-- 
GitLab


From c5440d0e1ca71567dc543bcd690f52a1470ab98e Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Fri, 10 May 2024 16:11:52 +0200
Subject: [PATCH 74/96] TEST: Add sweek and sday dimensions for decadal output

---
 build_compute_workflow.R                               | 6 ++++--
 recipes/atomic_recipes/recipe_test_compute_decadal.yml | 8 ++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index ec13e121..a4403cb5 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -122,7 +122,8 @@ run_compute_workflow <- function(recipe, data) {
   exp_output_dims <- c(exp_target_dims[!exp_target_dims %in% c('var_dir',
                                                                'latitude',
                                                                'longitude')],
-                       spatial_output_dims)
+                       spatial_output_dims,
+                       c("sday", "sweek"))
   # Default outputs: hindcast and observations
   output_dims <- list(hcst = exp_output_dims,
                       obs = exp_output_dims)
@@ -169,7 +170,7 @@ run_compute_workflow <- function(recipe, data) {
   ## TODO: Create function to generate call to Compute()?
   # Compute locally
   run_on <- recipe$Run$startR_workflow$run_on
-  
+
   r_module_ver <- list(local = "R/4.1.2-foss-2015a-bare",
                        as_machine = "R/4.1.2-foss-2015a-bare",
                        nord3 = "R/4.1.2-foss-2019b")
@@ -181,6 +182,7 @@ run_compute_workflow <- function(recipe, data) {
     # Compute locally, in serial
     res <- Compute(wf$hcst,
                    chunks = recipe$Run$startR_workflow$chunk_along)
+    print("made it here")
   } else if (run_on %in% c('nord3', 'as_machine')) {
     #NOTE: autosubmit_suite_dir can be anywhere
     ## TODO: Change code_dir to autosubmit dir when necessary
diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal.yml b/recipes/atomic_recipes/recipe_test_compute_decadal.yml
index 985d5c0b..8ea48e9a 100644
--- a/recipes/atomic_recipes/recipe_test_compute_decadal.yml
+++ b/recipes/atomic_recipes/recipe_test_compute_decadal.yml
@@ -21,10 +21,10 @@ Analysis:
     ftime_min: 2
     ftime_max: 24
   Region:
-    latmin: 10 #-90
+    latmin: -10 #-90
     latmax: 20 #90
     lonmin: 0
-    lonmax: 15 #359.9
+    lonmax: 30 #359.9
   Regrid:
     method: bilinear
     type: to_system #to_reference
@@ -55,10 +55,10 @@ Run:
   output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/
   code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
   startR_workflow:
-    modules: anomalies skill # Modules to run inside Compute(), in order
+    modules: skill # Modules to run inside Compute(), in order
     return: hcst, fcst, obs # Choose only if they will fit into memory
     save: hcst, fcst, obs # Only if returned
-    chunk_along: {latitude: 2, longitude: 1} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
+    chunk_along: {latitude: 2, longitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
     run_on: local # options: local, as_machine, nord3
     chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local'
     expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O
-- 
GitLab


From 2cb80e23bdcd646244ef856548daa09b0fa01e7f Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Mon, 13 May 2024 14:54:45 +0200
Subject: [PATCH 75/96] Remove absolute paths to temporary files and replace
 with local copy

---
 modules/Indices/Indices.R            |    8 +-
 modules/Indices/R/tmp/EOF.R          |  292 ++++
 modules/Indices/R/tmp/NAO.R          |  434 ++++++
 modules/Indices/R/tmp/ProjectField.R |  272 ++++
 modules/Indices/R/tmp/Utils.R        | 1884 ++++++++++++++++++++++++++
 5 files changed, 2886 insertions(+), 4 deletions(-)
 create mode 100644 modules/Indices/R/tmp/EOF.R
 create mode 100644 modules/Indices/R/tmp/NAO.R
 create mode 100644 modules/Indices/R/tmp/ProjectField.R
 create mode 100644 modules/Indices/R/tmp/Utils.R

diff --git a/modules/Indices/Indices.R b/modules/Indices/Indices.R
index b767e983..a6ab15de 100644
--- a/modules/Indices/Indices.R
+++ b/modules/Indices/Indices.R
@@ -3,10 +3,10 @@ source("modules/Indices/R/compute_nino.R")
 source("modules/Indices/R/drop_indices_dims.R")
 source("modules/Saving/Saving.R")
 ## TODO: Remove later
-source("/home/shared/Earth/vagudets/git/s2dv/R/NAO.R")
-source("/home/shared/Earth/vagudets/git/s2dv/R/Utils.R")
-source("/home/shared/Earth/vagudets/git/s2dv/R/EOF.R")
-source("/home/shared/Earth/vagudets/git/s2dv/R/ProjectField.R")
+source("modules/Indices/R/tmp/NAO.R")
+source("modules/Indices/R/tmp/Utils.R")
+source("modules/Indices/R/tmp/EOF.R")
+source("modules/Indices/R/tmp/ProjectField.R")
 
 Indices <- function(recipe, data, retrieve = TRUE) {
   # Define parameters
diff --git a/modules/Indices/R/tmp/EOF.R b/modules/Indices/R/tmp/EOF.R
new file mode 100644
index 00000000..38c3fae0
--- /dev/null
+++ b/modules/Indices/R/tmp/EOF.R
@@ -0,0 +1,292 @@
+#'Area-weighted empirical orthogonal function analysis using SVD
+#'
+#'Perform an area-weighted EOF analysis using single value decomposition (SVD) 
+#'based on a covariance matrix or a correlation matrix if parameter 'corr' is 
+#'set to TRUE.
+#'
+#'@param ano A numerical array of anomalies with named dimensions to calculate
+#'  EOF. The dimensions must have at least 'time_dim' and 'space_dim'. NAs 
+#'  could exist but it should be consistent along time_dim. That is, if one grid
+#'  point has NAs, all the time steps at this point should be NAs. 
+#'@param lat A vector of the latitudes of 'ano'.
+#'@param lon A vector of the longitudes of 'ano'.
+#'@param time_dim A character string indicating the name of the time dimension
+#' of 'ano'. The default value is 'sdate'. 
+#'@param space_dim A vector of two character strings. The first is the dimension
+#'  name of latitude of 'ano' and the second is the dimension name of longitude
+#'  of 'ano'. The default value is c('lat', 'lon').
+#'@param neofs A positive integer of the modes to be kept. The default value is
+#'  15. If time length or the product of the length of space_dim is smaller than
+#'  neofs, neofs will be changed to the minimum of the three values.
+#'@param corr A logical value indicating whether to base on a correlation (TRUE)
+#'  or on a covariance matrix (FALSE). The default value is FALSE.
+#'@param ncores An integer indicating the number of cores to use for parallel 
+#'  computation. The default value is NULL.
+#'
+#'@return 
+#'A list containing:
+#'\item{EOFs}{
+#'  An array of EOF patterns normalized to 1 (unitless) with dimensions 
+#'  (number of modes, rest of the dimensions of 'ano' except 'time_dim'). 
+#'  Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed 
+#'  field.
+#'}
+#'\item{PCs}{
+#'  An array of principal components with the units of the original field to 
+#'  the power of 2, with dimensions (time_dim, number of modes, rest of the
+#'  dimensions of 'ano' except 'space_dim'). 
+#'  'PCs' contains already the percentage of explained variance so, 
+#'  to reconstruct the original field it's only needed to multiply 'EOFs'
+#'  by 'PCs'.
+#'} 
+#'\item{var}{
+#'  An array of the percentage (%) of variance fraction of total variance 
+#'  explained by each mode (number of modes). The dimensions are (number of 
+#'  modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim').
+#'}
+#'\item{mask}{
+#'  An array of the mask with dimensions (space_dim, rest of the dimensions of 
+#'  'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that 
+#'  'ano' has value and NA for the positions that 'ano' has NA. It is used to 
+#'  replace NAs with 0s for EOF calculation and mask the result with NAs again
+#'  after the calculation.
+#'}
+#'\item{wght}{
+#'  An array of the area weighting with dimensions 'space_dim'. It is calculated
+#'  by cosine of 'lat' and used to compute the fraction of variance explained by
+#'  each EOFs.
+#'}
+#'\item{tot_var}{
+#'  A number or a numeric array of the total variance explained by all the modes.
+#'  The dimensions are same as 'ano' except 'time_dim' and 'space_dim'.
+#'}
+#'
+#'@seealso ProjectField, NAO, PlotBoxWhisker
+#'@examples
+#'# This example computes the EOFs along forecast horizons and plots the one 
+#'# that explains the greatest amount of variability. The example data has low  
+#'# resolution so the result may not be explanatory, but it displays how to 
+#'# use this function.
+#'\dontshow{
+#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101')
+#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'),
+#'                                     c('observation'), startDates,
+#'                                     leadtimemin = 1,
+#'                                     leadtimemax = 4,
+#'                                     output = 'lonlat',
+#'                                     latmin = 27, latmax = 48,
+#'                                     lonmin = -12, lonmax = 40)
+#'}
+#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs)
+#'tmp <- MeanDims(ano$exp, c('dataset', 'member'))
+#'ano <- tmp[1, , ,]
+#'names(dim(ano)) <- names(dim(tmp))[-2]
+#'eof <- EOF(ano, sampleData$lat, sampleData$lon)
+#'\dontrun{
+#'PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat)
+#'}
+#'
+#'@import multiApply
+#'@importFrom stats sd
+#'@export
+EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), 
+                neofs = 15, corr = FALSE, ncores = NULL) {
+
+  # Check inputs 
+  ## ano
+  if (is.null(ano)) {
+    stop("Parameter 'ano' cannot be NULL.")
+  }
+  if (!is.numeric(ano)) {
+    stop("Parameter 'ano' must be a numeric array.")
+  }
+  if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) {
+    stop("Parameter 'ano' must have dimension names.")
+  }
+  ## time_dim
+  if (!is.character(time_dim) | length(time_dim) > 1) {
+    stop("Parameter 'time_dim' must be a character string.")
+  }
+  if (!time_dim %in% names(dim(ano))) {
+    stop("Parameter 'time_dim' is not found in 'ano' dimension.")
+  }
+  ## space_dim
+  if (!is.character(space_dim) | length(space_dim) != 2) {
+    stop("Parameter 'space_dim' must be a character vector of 2.")
+  }
+  if (!all(space_dim %in% names(dim(ano)))) {
+    stop("Parameter 'space_dim' is not found in 'ano' dimension.")
+  }
+  ## lat
+  if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) {
+    stop("Parameter 'lat' must be a numeric vector with the same ",
+         "length as the latitude dimension of 'ano'.")
+  }
+  if (any(lat > 90 | lat < -90)) {
+    stop("Parameter 'lat' must contain values within the range [-90, 90].")
+  }
+  ## lon
+  if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) {
+    stop("Parameter 'lon' must be a numeric vector with the same ",
+         "length as the longitude dimension of 'ano'.")
+  }
+  if (any(lon > 360 | lon < -360)) {
+    .warning("Some 'lon' is out of the range [-360, 360].")
+  }
+  ## neofs
+  if (!is.numeric(neofs) | neofs %% 1 != 0 | neofs <= 0 | length(neofs) > 1) {
+    stop("Parameter 'neofs' must be a positive integer.")
+  }
+  ## corr
+  if (!is.logical(corr) | length(corr) > 1) {
+    stop("Parameter 'corr' must be one logical value.")
+  }
+  ## ncores
+  if (!is.null(ncores)) {
+    if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
+      length(ncores) > 1) {
+      stop("Parameter 'ncores' must be a positive integer.")
+    }
+  }
+
+  ###############################
+  # Calculate EOF
+
+#  # Replace mask of NAs with 0s for EOF analysis.
+#  ano[!is.finite(ano)] <- 0
+
+  # Area weighting. Weights for EOF; needed to compute the
+  # fraction of variance explained by each EOFs
+  space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a))
+  wght <- array(cos(lat * pi / 180), dim = dim(ano)[space_ind])
+  
+  # We want the covariance matrix to be weigthed by the grid
+  # cell area so the anomaly field is weighted by its square
+  # root since the covariance matrix equals transpose(ano)
+  # times ano.
+  wght <- sqrt(wght)
+
+  # neofs is bounded
+  if (neofs != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)) {
+    neofs <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)
+    .warning(paste0("Parameter 'neofs' is changed to ", neofs, ", the minimum among ",
+                    "the length of time_dim, the production of the length of space_dim, ",
+                    "and neofs."))
+  }
+
+  res <- Apply(ano, 
+               target_dims = c(time_dim, space_dim), 
+               output_dims = list(EOFs = c('mode', space_dim),
+                                  PCs = c(time_dim, 'mode'),
+                                  var = 'mode',
+                                  tot_var = NULL,
+                                  mask = space_dim),
+               fun = .EOF, 
+               corr = corr, neofs = neofs,
+               wght = wght,
+               ncores = ncores)
+
+  return(c(res, wght = list(wght)))
+
+}
+
+.EOF <- function(ano, neofs = 15, corr = FALSE, wght = wght) {
+  # ano: [time, lat, lon]
+
+  # Dimensions
+  nt <- dim(ano)[1]
+  ny <- dim(ano)[2]
+  nx <- dim(ano)[3]
+
+  # Check if all the time steps at one grid point are NA-consistent.
+  # The grid point should have all NAs or no NA along time dim.
+  if (anyNA(ano)) {
+    ano_latlon <- array(ano, dim = c(nt, ny * nx))  # [time, lat*lon]
+    na_ind <- which(is.na(ano_latlon), arr.ind = T)
+    if (dim(na_ind)[1] != nt * length(unique(na_ind[, 2]))) {
+      stop("Detect certain grid points have NAs but not consistent across time ",
+           "dimension. If the grid point is NA, it should have NA at all time step.")
+    }
+  }
+
+  # Build the mask
+  mask <- ano[1, , ]
+  mask[!is.finite(mask)] <- NA
+  mask[is.finite(mask)] <- 1
+  dim(mask) <- c(ny, nx) 
+
+  # Replace mask of NAs with 0s for EOF analysis.
+  ano[!is.finite(ano)] <- 0
+
+  ano <- ano * InsertDim(wght, 1, nt)
+
+  # The use of the correlation matrix is done under the option corr.
+  if (corr) {
+    stdv <- apply(ano, c(2, 3), sd, na.rm = T)
+    ano <- ano / InsertDim(stdv, 1, nt)
+  }
+  
+  # Time/space matrix for SVD
+  dim(ano) <- c(nt, ny * nx)
+  dim.dat <- dim(ano)
+  
+  # 'transpose' means the array needs to be transposed before
+  # calling La.svd for computational efficiency because the
+  # spatial dimension is larger than the time dimension. This
+  # goes with transposing the outputs of LA.svd also.
+  if (dim.dat[2] > dim.dat[1]) {
+    transpose <- TRUE
+  } else {
+    transpose <- FALSE
+  }
+  if (transpose) {
+    pca <- La.svd(t(ano))
+  } else {
+    pca <- La.svd(ano)
+  }
+  
+  # La.svd conventions: decomposition X = U D t(V) La.svd$u
+  # returns U La.svd$d returns diagonal values of D La.svd$v
+  # returns t(V) !!  The usual convention is PC=U and EOF=V.
+  # If La.svd is called for ano (transpose=FALSE case): EOFs:
+  # $v PCs: $u If La.svd is called for t(ano) (transposed=TRUE
+  # case): EOFs: t($u) PCs: t($v)
+
+  if (transpose) {
+    pca.EOFs <- t(pca$u)
+    pca.PCs <- t(pca$v)
+  } else {
+    pca.EOFs <- pca$v
+    pca.PCs <- pca$u
+  }
+  
+  # The numbers of transposition is limited to neofs
+  PC <- pca.PCs[, 1:neofs]
+  EOF <- pca.EOFs[1:neofs, ]
+  dim(EOF) <- c(neofs, ny, nx)
+
+  # To sort out crash when neofs=1.
+  if (neofs == 1) {
+    PC <- InsertDim(PC, 2, 1, name = 'new')
+  }
+  
+  # Computation of the % of variance associated with each mode
+  W <- pca$d[1:neofs]
+  tot.var <- sum(pca$d^2)
+  var.eof <- 100 * pca$d[1:neofs]^2 / tot.var
+
+  for (e in 1:neofs) {
+    # Set all masked grid points to NA in the EOFs
+    # Divide patterns by area weights so that EOF * PC gives unweigthed (original) data
+    EOF[e, , ] <- EOF[e, , ] * mask / wght    
+    # PC is multiplied by the explained variance,
+    # so that the reconstruction is only EOF * PC 
+    PC[, e] <- PC[, e] * W[e]
+  }
+
+  if (neofs == 1) {
+    var.eof <- as.array(var.eof)
+  }
+
+  return(invisible(list(EOFs = EOF, PCs = PC, var = var.eof, tot_var = tot.var, mask = mask)))
+}
diff --git a/modules/Indices/R/tmp/NAO.R b/modules/Indices/R/tmp/NAO.R
new file mode 100644
index 00000000..75731055
--- /dev/null
+++ b/modules/Indices/R/tmp/NAO.R
@@ -0,0 +1,434 @@
+#'Compute the North Atlantic Oscillation (NAO) Index
+#'
+#'Compute the North Atlantic Oscillation (NAO) index based on the leading EOF 
+#'of the sea level pressure (SLP) anomalies over the north Atlantic region 
+#'(20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and 
+#'observed anomalies onto the observed EOF pattern or the forecast 
+#'anomalies onto the EOF pattern of the other years of the forecast. 
+#'By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month 
+#'lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns
+#'cross-validated PCs of the NAO index for forecast (exp) and observations 
+#'(obs) based on the leading EOF pattern.
+#'
+#'@param exp A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) 
+#'  forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with 
+#'  dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least.
+#'  If only NAO of observational data needs to be computed, this parameter can
+#'  be left to NULL. The default value is NULL.
+#'@param obs A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) 
+#'  observed anomalies from \code{Ano()} or \code{Ano_CrossValid()} with 
+#'  dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least.
+#'  If only NAO of experimental data needs to be computed, this parameter can 
+#'  be left to NULL. The default value is NULL.
+#'@param lat A vector of the latitudes of 'exp' and 'obs'.
+#'@param lon A vector of the longitudes of 'exp' and 'obs'.
+#'@param time_dim A character string indicating the name of the time dimension
+#' of 'exp' and 'obs'. The default value is 'sdate'. 
+#'@param memb_dim A character string indicating the name of the member 
+#'  dimension of 'exp' (and 'obs', optional). If 'obs' has memb_dim, the length
+#'  must be 1. The default value is 'member'.
+#'@param space_dim A vector of two character strings. The first is the dimension
+#'  name of latitude of 'ano' and the second is the dimension name of longitude
+#'  of 'ano'. The default value is c('lat', 'lon').
+#'@param ftime_dim A character string indicating the name of the forecast time 
+#'  dimension of 'exp' and 'obs'. The default value is 'ftime'.
+#'@param ftime_avg A numeric vector of the forecast time steps to average
+#'  across the target period. If average is not needed, set NULL. The default 
+#'  value is 2:4, i.e., from 2nd to 4th forecast time steps.
+#'@param obsproj A logical value indicating whether to compute the NAO index by
+#'  projecting the forecast anomalies onto the leading EOF of observational 
+#'  reference (TRUE) or compute the NAO by first computing the leading 
+#'  EOF of the forecast anomalies (in cross-validation mode, i.e. leaving the 
+#'  year you are evaluating out), and then projecting forecast anomalies onto 
+#'  this EOF (FALSE). The default value is TRUE.
+#'@param ncores An integer indicating the number of cores to use for parallel 
+#'  computation. The default value is NULL.
+#'
+#'@return 
+#'A list which contains:
+#'\item{exp}{
+#'  A numeric array of forecast NAO index in verification format with the same 
+#'  dimensions as 'exp' except space_dim and ftime_dim. If ftime_avg is NULL, 
+#'  ftime_dim remains.
+#'  }
+#'\item{obs}{
+#'  A numeric array of observed NAO index in verification format with the same
+#'  dimensions as 'obs' except space_dim and ftime_dim. If ftime_avg is NULL,
+#'  ftime_dim remains.
+#'}
+#'
+#'@references
+#'Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of 
+#'  multi-model seasonal forecasts of the wintertime North Atlantic 
+#'  Oscillation. Climate Dynamics, 21, 501-514. 
+#'  DOI: 10.1007/s00382-003-0350-4
+#'
+#'@examples
+#'# Make up synthetic data
+#'set.seed(1)
+#'exp <- array(rnorm(1620), dim = c(member = 2, sdate = 3, ftime = 5, lat = 6, lon = 9))
+#'set.seed(2)
+#'obs <- array(rnorm(1620), dim = c(member = 1, sdate = 3, ftime = 5, lat = 6, lon = 9))
+#'lat <- seq(20, 80, length.out = 6)
+#'lon <- seq(-80, 40, length.out = 9) 
+#'nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon)
+#'
+#'# plot the NAO index
+#'  \dontrun{
+#'nao$exp <- Reorder(nao$exp, c(2, 1))
+#'nao$obs <- Reorder(nao$obs, c(2, 1))
+#'PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS",
+#'        monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X")
+#'  }
+#'
+#'@import multiApply
+#'@importFrom ClimProjDiags Subset
+#'@export
+NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate',
+                memb_dim = 'member', space_dim = c('lat', 'lon'),
+                ftime_dim = 'ftime', ftime_avg = 2:4, 
+                obsproj = TRUE, ncores = NULL) {
+
+  # Check inputs 
+  ## exp and obs (1)
+  if (is.null(obs) & is.null(exp)) {
+    stop("Parameter 'exp' and 'obs' cannot both be NULL.")
+  }
+  if (!is.null(exp)) {
+    if (!is.numeric(exp)) {
+      stop("Parameter 'exp' must be a numeric array.")
+    }
+    if (is.null(dim(exp))) {
+      stop("Parameter 'exp' must have at least dimensions ",
+           "time_dim, memb_dim, space_dim, and ftime_dim.")
+    }
+    if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0)) {
+      stop("Parameter 'exp' must have dimension names.")
+    }
+  }
+  if (!is.null(obs)) {
+    if (!is.numeric(obs)) {
+      stop("Parameter 'obs' must be a numeric array.")
+    }
+    if (is.null(dim(obs))) {
+      stop("Parameter 'obs' must have at least dimensions ",
+           "time_dim, space_dim, and ftime_dim.")
+    }
+    if (any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) {
+      stop("Parameter 'obs' must have dimension names.")
+    }
+  }
+  ## time_dim
+  if (!is.character(time_dim) | length(time_dim) > 1) {
+    stop("Parameter 'time_dim' must be a character string.")
+  }
+  if (!is.null(exp)) {
+    if (!time_dim %in% names(dim(exp))) {
+      stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.")
+    }
+  }
+  if (!is.null(obs)) {
+    if (!time_dim %in% names(dim(obs))) {
+      stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.")
+    }
+  }
+  ## memb_dim
+  if (!is.character(memb_dim) | length(memb_dim) > 1) {
+    stop("Parameter 'memb_dim' must be a character string.")
+  }
+  if (!is.null(exp)) {
+    if (!memb_dim %in% names(dim(exp))) {
+      stop("Parameter 'memb_dim' is not found in 'exp' dimension.")
+    }
+  }
+  if (!is.null(obs)) {
+    if (memb_dim %in% names(dim(obs))) {
+      if (dim(obs)[memb_dim] != 1) {
+        stop("The length of parameter 'memb_dim' in 'obs' must be 1.")
+      } else {
+        add_member_back <- TRUE
+        obs <- ClimProjDiags::Subset(obs, memb_dim, 1, drop = 'selected')
+      }
+    } else {
+      add_member_back <- FALSE
+    }
+  }
+  ## space_dim
+  if (!is.character(space_dim) | length(space_dim) != 2) {
+    stop("Parameter 'space_dim' must be a character vector of 2.")
+  }
+  if (!is.null(exp)) {
+    if (!all(space_dim %in% names(dim(exp)))) {
+      stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.")
+    }
+  }
+  if (!is.null(obs)) {
+    if (!all(space_dim %in% names(dim(obs)))) {
+      stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.")
+    }
+  }
+  ## ftime_dim
+  if (!is.character(ftime_dim) | length(ftime_dim) > 1) {
+    stop("Parameter 'ftime_dim' must be a character string.")
+  }
+  if (!is.null(exp) && !is.null(ftime_avg)) {
+    if (!ftime_dim %in% names(dim(exp))) {
+      stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.")
+    }
+  }
+  if (!is.null(obs) && !is.null(ftime_avg)) {
+    if (!ftime_dim %in% names(dim(obs))) {
+      stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.")
+    }
+  }
+  ## exp and obs (2)
+  if (!is.null(exp) & !is.null(obs)) {
+    name_exp <- sort(names(dim(exp)))
+    name_obs <- sort(names(dim(obs)))
+    name_exp <- name_exp[-which(name_exp == memb_dim)]
+    throw_error <- FALSE
+    if (length(name_exp) != length(name_obs)) {
+      throw_error <- TRUE
+    } else if (any(name_exp != name_obs)) {
+      throw_error <- TRUE
+    } else if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) {
+      throw_error <- TRUE
+    }
+    if (throw_error) {
+      stop("Parameter 'exp' and 'obs' must have the same names and lengths ",
+           "of all the dimensions except 'memb_dim'.")
+    }
+  }
+  ## ftime_avg
+  if (!is.null(ftime_avg)) {
+    if (!is.vector(ftime_avg) | !is.numeric(ftime_avg)) {
+      stop("Parameter 'ftime_avg' must be an integer vector.")
+    }
+    if (!is.null(exp)) {
+      if (max(ftime_avg) > dim(exp)[ftime_dim] | min(ftime_avg) < 1) {
+        stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.")
+      }
+    } else {
+      if (max(ftime_avg) > dim(obs)[ftime_dim] | min(ftime_avg) < 1) {
+        stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.")
+      }
+    }
+  }
+  ## sdate >= 2
+  if (!is.null(exp)) {
+    if (dim(exp)[time_dim] < 2) {
+      stop("The length of time_dim must be at least 2.")
+    }
+  } else {
+    if (dim(obs)[time_dim] < 2) {
+      stop("The length of time_dim must be at least 2.")
+    }
+  }
+  ## lat and lon
+  if (!is.null(exp)) {
+    if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) {
+      stop("Parameter 'lat' must be a numeric vector with the same ",
+           "length as the latitude dimension of 'exp' and 'obs'.")
+    }
+    if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) {
+      stop("Parameter 'lon' must be a numeric vector with the same ",
+           "length as the longitude dimension of 'exp' and 'obs'.")
+    }
+  } else {
+    if (!is.numeric(lat) | length(lat) != dim(obs)[space_dim[1]]) {
+      stop("Parameter 'lat' must be a numeric vector with the same ",
+           "length as the latitude dimension of 'exp' and 'obs'.")
+    }
+    if (!is.numeric(lon) | length(lon) != dim(obs)[space_dim[2]]) {
+      stop("Parameter 'lon' must be a numeric vector with the same ",
+           "length as the longitude dimension of 'exp' and 'obs'.")
+    }
+  } 
+  stop_needed <- FALSE
+  if (max(lat) > 80 | min(lat) < 20) {
+    stop_needed <- TRUE
+  }
+  #NOTE: different from s2dverification
+  # lon is not used in the calculation actually. EOF only uses lat to do the
+  # weight. So we just need to ensure the data is in this region, regardless
+  # the order. 
+  if (any(lon < 0)) {  #[-180, 180] 
+    if (!(min(lon) > -90 & min(lon) < -70 & max(lon) < 50 & max(lon) > 30)) {
+      stop_needed <- TRUE
+    }
+  } else {  #[0, 360]
+    if (any(lon >= 50 & lon <= 270)) {
+      stop_needed <- TRUE
+    } else {
+      lon_E <- lon[which(lon < 50)]
+      lon_W <- lon[-which(lon < 50)]
+      if (max(lon_E) < 30 | min(lon_W) > 290) {
+        stop_needed <- TRUE
+      }
+    }
+  }
+  if (stop_needed) {
+    stop("The typical domain used to compute the NAO is 20N-80N, ",
+         "80W-40E. 'lat' or 'lon' is out of range.")
+  }
+  ## obsproj
+  if (!is.logical(obsproj)  | length(obsproj) > 1) {
+    stop("Parameter 'obsproj' must be either TRUE or FALSE.")
+  }
+  if (obsproj) {
+    if (is.null(obs)) {
+      stop("Parameter 'obsproj' set to TRUE but no 'obs' provided.")
+    }
+    if (is.null(exp)) {
+      .warning("parameter 'obsproj' set to TRUE but no 'exp' provided.")
+    }
+  }
+  ## ncores
+  if (!is.null(ncores)) {
+    if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores == 0 |
+      length(ncores) > 1) {
+      stop("Parameter 'ncores' must be a positive integer.")
+    }
+  }
+
+  # Average ftime 
+  if (!is.null(ftime_avg)) {
+    if (!is.null(exp)) {
+      exp_sub <- ClimProjDiags::Subset(exp, ftime_dim, ftime_avg, drop = FALSE)
+      exp <- MeanDims(exp_sub, ftime_dim, na.rm = TRUE)
+      ## Cross-validated PCs. Fabian. This should be extended to
+      ## nmod and nlt by simple loops. Virginie
+    }
+    if (!is.null(obs)) {
+      obs_sub <- ClimProjDiags::Subset(obs, ftime_dim, ftime_avg, drop = FALSE)
+      obs <- MeanDims(obs_sub, ftime_dim, na.rm = TRUE)
+    }
+  }
+
+  # wght
+  wght <- array(sqrt(cos(lat * pi / 180)), dim = c(length(lat), length(lon)))
+
+  if (!is.null(exp) & !is.null(obs)) {
+    res <- Apply(list(exp, obs),
+                 target_dims = list(exp = c(memb_dim, time_dim, space_dim),
+                                    obs = c(time_dim, space_dim)),
+                 fun = .NAO,
+                 lat = lat, wght = wght, 
+                 obsproj = obsproj, add_member_back = add_member_back,
+                 ncores = ncores)
+  } else if (!is.null(exp)) {
+    res <- Apply(list(exp = exp),
+                 target_dims = list(exp = c(memb_dim, time_dim, space_dim)),
+                 fun = .NAO,
+                 lat = lat, wght = wght, obs = NULL, 
+                 obsproj = obsproj, add_member_back = FALSE,
+                 ncores = ncores)
+  } else if (!is.null(obs)) {
+    if (add_member_back) {
+      output_dims <- list(obs = c(time_dim, memb_dim))
+    } else {
+      output_dims <- list(obs = time_dim)
+    }
+    res <- Apply(list(obs = obs),
+                 target_dims = list(obs = c(time_dim, space_dim)),
+                 output_dims = output_dims,
+                 fun = .NAO,
+                 lat = lat, wght = wght, exp = NULL, 
+                 obsproj = obsproj, add_member_back = add_member_back,
+                 ncores = ncores)
+  }
+  return(res)
+}
+
+.NAO <- function(exp = NULL, obs = NULL, lat, wght, obsproj = TRUE, add_member_back = FALSE) {
+  # exp: [memb_exp, sdate, lat, lon]
+  # obs: [sdate, lat, lon]
+  # wght: [lat, lon]
+
+  if (!is.null(exp)) {
+    ntime <- dim(exp)[2]
+    nlat <- dim(exp)[3]
+    nlon <- dim(exp)[4]
+    nmemb_exp <- dim(exp)[1]
+  } else {
+    ntime <- dim(obs)[1]
+    nlat <- dim(obs)[2]
+    nlon <- dim(obs)[3]
+  }
+
+  if (!is.null(obs)) NAOO.ver <- array(NA, dim = ntime)
+  if (!is.null(exp)) NAOF.ver <- array(NA, dim = c(ntime, nmemb_exp))
+
+  for (tt in 1:ntime) {  #sdate
+
+    if (!is.null(obs)) {
+      ## Calculate observation EOF. Excluding one forecast start year.
+      obs_sub <- obs[(1:ntime)[-tt], , , drop = FALSE]
+      obs_EOF <- .EOF(obs_sub, neofs = 1, wght = wght)  # $EOFs: [mode, lat, lon]
+
+      ## Correct polarity of pattern.
+      # dim(obs_EOF$EOFs): [mode, lat, lon]
+      if (0 < mean(obs_EOF$EOFs[1, which.min(abs(lat - 65)), ], na.rm = T)) {
+        obs_EOF$EOFs <- obs_EOF$EOFs * (-1)
+#        obs_EOF$PCs <- obs_EOF$PCs * (-1)  # not used
+      }
+      ## Project observed anomalies.
+      PF <- .ProjectField(obs, eof_mode = obs_EOF$EOFs[1, , ], wght = wght)  # [sdate]
+      ## Keep PCs of excluded forecast start year. Fabian.
+      NAOO.ver[tt] <- PF[tt]
+    }
+
+    if (!is.null(exp)) {
+      if (!obsproj) {
+        exp_sub <- exp[, (1:ntime)[-tt], , , drop = FALSE]
+        # Combine 'memb' and 'sdate' to calculate EOF
+        dim(exp_sub) <- c(nmemb_exp * (ntime - 1), nlat, nlon)
+        exp_EOF <- .EOF(exp_sub, neofs = 1, wght = wght)  # $EOFs: [mode, lat, lon]
+
+        ## Correct polarity of pattern.
+        ##NOTE: different from s2dverification, which doesn't use mean().
+#        if (0 < exp_EOF$EOFs[1, which.min(abs(lat - 65)), ]) {
+        if (0 < mean(exp_EOF$EOFs[1, which.min(abs(lat - 65)), ], na.rm = T)) {
+          exp_EOF$EOFs <- exp_EOF$EOFs * (-1)
+#          exp_EOF$PCs <- exp_EOF$PCs * sign  # not used
+        }
+
+        ### Lines below could be simplified further by computing
+        ### ProjectField() only on the year of interest... (though this is
+        ### not vital). Lauriane
+        for (imemb in 1:nmemb_exp) {
+          PF <- .ProjectField(exp[imemb, , , ],
+                              eof_mode = exp_EOF$EOFs[1, , ],
+                              wght = wght) # [sdate, memb]
+          NAOF.ver[tt, imemb] <- PF[tt]
+        }    
+      } else {
+        ## Project forecast anomalies on obs EOF
+        for (imemb in 1:nmemb_exp) {
+          PF <- .ProjectField(exp[imemb, , , ],
+                              eof_mode = obs_EOF$EOFs[1, , ],
+                              wght = wght)  # [sdate]
+          NAOF.ver[tt, imemb] <- PF[tt]
+        }
+      }
+    }
+
+  }  # for loop sdate
+
+  # add_member_back
+  if (add_member_back) {
+    suppressWarnings({
+    NAOO.ver <- InsertDim(NAOO.ver, 2, 1, name = names(dim(exp))[1])
+    })
+  }
+
+  #NOTE: EOFs_obs is not returned because it's only the result of the last sdate
+  #     (It is returned in s2dverification.)
+  if (!is.null(exp) & !is.null(obs)) {
+    return(list(exp = NAOF.ver, obs = NAOO.ver)) #, EOFs_obs = obs_EOF))
+  } else if (!is.null(exp)) {
+    return(list(exp = NAOF.ver))
+  } else if (!is.null(obs)) {
+    return(list(obs = NAOO.ver))
+  }
+}
diff --git a/modules/Indices/R/tmp/ProjectField.R b/modules/Indices/R/tmp/ProjectField.R
new file mode 100644
index 00000000..4fdf1892
--- /dev/null
+++ b/modules/Indices/R/tmp/ProjectField.R
@@ -0,0 +1,272 @@
+#'Project anomalies onto modes of variability
+#'
+#'Project anomalies onto modes of variability to get the temporal evolution of 
+#'the EOF mode selected. It returns principal components (PCs) by area-weighted
+#'projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from 
+#'\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and 
+#'returns NA if the whole spatial pattern is NA.
+#'
+#'@param ano A numerical array of anomalies with named dimensions. The 
+#'  dimensions must have at least 'time_dim' and 'space_dim'. It can be
+#'  generated by Ano().
+#'@param eof A list that contains at least 'EOFs' or 'REOFs' and 'wght', which 
+#'  are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and 
+#'  'space_dim' at least. 'wght' has dimensions space_dim. It can be generated 
+#'  by EOF() or REOF().
+#'@param time_dim A character string indicating the name of the time dimension
+#' of 'ano'. The default value is 'sdate'. 
+#'@param space_dim A vector of two character strings. The first is the dimension
+#'  name of latitude of 'ano' and the second is the dimension name of longitude
+#'  of 'ano'. The default value is c('lat', 'lon').
+#'@param mode An integer of the variability mode number in the EOF to be 
+#'  projected on. The default value is NULL, which means all the modes of 'eof'
+#'  is calculated.
+#'@param ncores An integer indicating the number of cores to use for parallel 
+#'  computation. The default value is NULL.
+#'
+#'@return A numerical array of the principal components in the verification 
+#'  format. The dimensions are the same as 'ano' except 'space_dim'.
+#'
+#'@seealso EOF, NAO, PlotBoxWhisker
+#'@examples
+#'\dontshow{
+#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101')
+#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'),
+#'                                     c('observation'), startDates,
+#'                                     leadtimemin = 1,
+#'                                     leadtimemax = 4,
+#'                                     output = 'lonlat',
+#'                                     latmin = 27, latmax = 48,
+#'                                     lonmin = -12, lonmax = 40)
+#'}
+#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs)
+#'eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) 
+#'eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) 
+#'mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1)
+#'mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1)
+#'
+#'\dontrun{
+#'  # Plot the forecast and the observation of the first mode for the last year 
+#'  # of forecast
+#'  sdate_dim_length <- dim(mode1_obs)['sdate']
+#'  plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), 
+#'       lwd = 2)
+#'  for (i in 1:dim(mode1_exp)['member']) {
+#'    par(new = TRUE)
+#'    plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], 
+#'         ylim = c(-15000, 15000))
+#'  }
+#'}
+#'
+#'@import multiApply
+#'@export
+ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon'), 
+                         mode = NULL, ncores = NULL) {
+
+  # Check inputs 
+  ## ano (1)
+  if (is.null(ano)) {
+    stop("Parameter 'ano' cannot be NULL.")
+  }
+  if (!is.numeric(ano)) {
+    stop("Parameter 'ano' must be a numeric array.")
+  }
+  if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) {
+    stop("Parameter 'ano' must have dimension names.")
+  }
+  ## eof (1)
+  if (is.null(eof)) {
+    stop("Parameter 'eof' cannot be NULL.")
+  }
+  if (!is.list(eof)) {
+    stop("Parameter 'eof' must be a list generated by EOF() or REOF().")
+  }
+  if ('EOFs' %in% names(eof)) {
+    EOFs <- "EOFs"
+  } else if ('REOFs' %in% names(eof)) {
+    EOFs <- "REOFs"
+  } else if ('patterns' %in% names(eof)) {
+    EOFs <- "patterns"
+  } else {
+    stop("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ",
+         "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().")
+  } 
+  if (!'wght' %in% names(eof)) {
+    stop("Parameter 'eof' must be a list that contains 'wght'. ",
+         "It can be generated by EOF() or REOF().")
+  }
+  if (!is.numeric(eof[[EOFs]]) || !is.array(eof[[EOFs]])) {
+    stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array.")
+  }
+  if (!is.numeric(eof$wght) || !is.array(eof$wght)) {
+    stop("The component 'wght' of parameter 'eof' must be a numeric array.")
+  }
+  ## time_dim
+  if (!is.character(time_dim) | length(time_dim) > 1) {
+    stop("Parameter 'time_dim' must be a character string.")
+  }
+  if (!time_dim %in% names(dim(ano))) {
+    stop("Parameter 'time_dim' is not found in 'ano' dimension.")
+  }
+  ## space_dim
+  if (!is.character(space_dim) | length(space_dim) != 2) {
+    stop("Parameter 'space_dim' must be a character vector of 2.")
+  }
+  if (!all(space_dim %in% names(dim(ano)))) {
+    stop("Parameter 'space_dim' is not found in 'ano' dimension.")
+  }
+  ## ano (2)
+  if (!all(space_dim %in% names(dim(ano))) | !time_dim %in% names(dim(ano))) {
+    stop("Parameter 'ano' must be an array with dimensions named as ",
+         "parameter 'space_dim' and 'time_dim'.")
+  }
+  ## eof (2)
+  if (!all(space_dim %in% names(dim(eof[[EOFs]]))) | 
+      !'mode' %in% names(dim(eof[[EOFs]]))) {
+    stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ",
+         "with dimensions named as parameter 'space_dim' and 'mode'.")
+  }
+  if (length(dim(eof$wght)) != 2 | !all(names(dim(eof$wght)) %in% space_dim)) {
+    stop("The component 'wght' of parameter 'eof' must be an array ",
+         "with dimensions named as parameter 'space_dim'.")
+  }
+  ## mode
+  if (!is.null(mode)) {
+    if (!is.numeric(mode) | mode %% 1 != 0 | mode < 0 | length(mode) > 1) {
+      stop("Parameter 'mode' must be NULL or a positive integer.")
+    }
+    if (mode > dim(eof[[EOFs]])['mode']) {
+      stop("Parameter 'mode' is greater than the number of available ",
+           "modes in 'eof'.")
+    }
+  }
+  ## ncores
+  if (!is.null(ncores)) {
+    if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
+      length(ncores) > 1) {
+      stop("Parameter 'ncores' must be a positive integer.")
+    }
+  }
+ 
+#-------------------------------------------------------
+
+  # Keep the chosen mode
+  if (!is.null(mode)) {
+    eof_mode <- ClimProjDiags::Subset(eof[[EOFs]], 'mode', mode, drop = 'selected')
+  } else {
+    eof_mode <- eof[[EOFs]]
+  }
+
+  if ('mode' %in% names(dim(eof_mode))) {
+    dimnames_without_mode <- names(dim(eof_mode))[-which(names(dim(eof_mode)) == 'mode')]
+  } else {
+    dimnames_without_mode <- names(dim(eof_mode))
+  }
+
+  if (all(dimnames_without_mode %in% space_dim)) { # eof_mode: [lat, lon] or [mode, lat, lon]
+    if ('mode' %in% names(dim(eof_mode))) {
+      eof_mode_target <- c('mode', space_dim)
+      output_dims <- c('mode', time_dim)
+    } else {
+      eof_mode_target <- space_dim
+      output_dims <- time_dim
+    }
+    res <- Apply(list(ano, eof_mode),
+                 target_dims = list(c(time_dim, space_dim),
+                                    eof_mode_target),
+                 output_dims = output_dims,
+                 wght = eof$wght,
+                 fun = .ProjectField,
+                 ncores = ncores)$output1
+
+  } else {
+
+    if (!all(dimnames_without_mode %in% names(dim(ano)))) {
+      stop("The array 'EOF' in parameter 'eof' has dimension not in parameter ",
+           "'ano'. Check if 'ano' and 'eof' are compatible.")
+    }
+  
+    common_dim_ano <- dim(ano)[which(names(dim(ano)) %in% dimnames_without_mode)]
+    if (any(common_dim_ano[match(dimnames_without_mode, names(common_dim_ano))] != 
+        dim(eof_mode)[dimnames_without_mode])) {
+      stop("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ",
+           "with different length. Check if 'ano' and 'eof' are compatible.")
+    }
+  
+    # Enlarge eof/ano is needed. The margin_dims of Apply() must be consistent
+    # between ano and eof.
+    additional_dims <- dim(ano)[-which(names(dim(ano)) %in% names(dim(eof_mode)))]
+    additional_dims <- additional_dims[-which(names(additional_dims) == time_dim)]
+    if (length(additional_dims) != 0) {
+      for (i in seq_along(additional_dims)) {
+        eof_mode <- InsertDim(eof_mode, posdim = (length(dim(eof_mode)) + 1), 
+                              lendim = additional_dims[i], name = names(additional_dims)[i])
+      }
+    }
+    if ('mode' %in% names(dim(eof_mode))) {
+      eof_mode_target <- c('mode', space_dim)
+      output_dims <- c('mode', time_dim)
+    } else {
+      eof_mode_target <- space_dim
+      output_dims <- time_dim
+    }
+    res <- Apply(list(ano, eof_mode),
+                 target_dims = list(c(time_dim, space_dim),
+                                    eof_mode_target),
+                 output_dims = output_dims,
+                 wght = eof$wght,
+                 fun = .ProjectField,
+                 ncores = ncores)$output1
+  }
+
+  return(res)
+}
+
+
+.ProjectField <- function(ano, eof_mode, wght) {
+  # ano: [sdate, lat, lon]
+  # eof_mode: [lat, lon] or [mode, lat, lon]
+  # wght: [lat, lon]
+
+  ntime <- dim(ano)[1]
+
+  if (length(dim(eof_mode)) == 2) {  # mode != NULL
+    # Initialization of pc.ver.
+    pc.ver <- array(NA, dim = ntime)  #[sdate]
+  
+    # Weight
+    e.1 <- eof_mode * wght
+    ano <- ano * InsertDim(wght, 1, ntime)
+    #ano <- aaply(ano, 1, '*', wght)  # much heavier
+
+    na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA
+    #na <- apply(ano, 1, mean, na.rm = TRUE)  # much heavier
+    tmp <- ano * InsertDim(e.1, 1, ntime)  # [sdate, lat, lon]
+    rm(ano)
+    #pc.ver <- apply(tmp, 1, sum, na.rm = TRUE)  # much heavier 
+    pc.ver <- rowSums(tmp, na.rm = TRUE)
+    pc.ver[which(is.na(na))] <- NA
+
+  } else {  # mode = NULL
+    # Weight
+    e.1 <- eof_mode * InsertDim(wght, 1, dim(eof_mode)[1])
+    dim(e.1) <- c(dim(eof_mode)[1], prod(dim(eof_mode)[2:3]))  # [mode, lat*lon] 
+    ano <- ano * InsertDim(wght, 1, ntime)
+    dim(ano) <- c(ntime, prod(dim(ano)[2:3]))  # [sdate, lat*lon]
+
+    na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA
+    na <- aperm(array(na, dim = c(ntime, dim(e.1)[1])), c(2, 1))
+
+    # Matrix multiplication e.1 [mode, lat*lon] by ano [lat*lon, sdate]
+    # Result: [mode, sdate]
+    pc.ver <- e.1 %*% t(ano)  
+    pc.ver[which(is.na(na))] <- NA
+
+#  # Change back dimensions to feet original input
+#  dim(projection) <- c(moredims, mode = unname(neofs))
+#  return(projection)
+  }
+
+  return(pc.ver)
+}
+
diff --git a/modules/Indices/R/tmp/Utils.R b/modules/Indices/R/tmp/Utils.R
new file mode 100644
index 00000000..c6e233c0
--- /dev/null
+++ b/modules/Indices/R/tmp/Utils.R
@@ -0,0 +1,1884 @@
+#'@importFrom abind abind
+#'@import plyr ncdf4
+#'@importFrom grDevices png jpeg pdf svg bmp tiff
+#'@importFrom easyVerification convert2prob
+
+## Function to tell if a regexpr() match is a complete match to a specified name
+.IsFullMatch <- function(x, name) {
+  x > 0 && attributes(x)$match.length == nchar(name)
+}
+
+.ConfigReplaceVariablesInString <- function(string, replace_values, 
+                                            allow_undefined_key_vars = FALSE) {
+  # This function replaces all the occurrences of a variable in a string by 
+  # their corresponding string stored in the replace_values.
+  if (length(strsplit(string, "\\$")[[1]]) > 1) {
+    parts <- strsplit(string, "\\$")[[1]]
+    output <- ""
+    i <- 0
+    for (part in parts) {
+      if (i %% 2 == 0) {
+        output <- paste0(output, part)
+      } else {
+        if (part %in% names(replace_values)) {
+          output <- paste0(output,
+                           .ConfigReplaceVariablesInString(replace_values[[part]], 
+                                                           replace_values, 
+                                                           allow_undefined_key_vars))
+        } else if (allow_undefined_key_vars) {
+          output <- paste0(output, "$", part, "$")
+        } else {
+          stop('Error: The variable $', part, 
+               '$ was not defined in the configuration file.', sep = '')
+        }
+      }
+      i <- i + 1
+    }
+    output
+  } else {
+    string
+  }
+}
+
+.KnownLonNames <- function() {
+  known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon')
+  return(known_lon_names)
+}
+
+.KnownLatNames <- function() {
+  known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat')
+  return(known_lat_names)
+}
+
+.t2nlatlon <- function(t) {
+  ## As seen in cdo's griddes.c: ntr2nlat()
+  nlats <- (t * 3 + 1) / 2
+  if ((nlats > 0) && (nlats - trunc(nlats) >= 0.5)) {
+    nlats <- ceiling(nlats)
+  } else {
+    nlats <- round(nlats)
+  }
+  if (nlats %% 2 > 0) {
+    nlats <- nlats + 1
+  }
+  ## As seen in cdo's griddes.c: compNlon(), and as specified in ECMWF
+  nlons <- 2 * nlats
+  keep_going <- TRUE
+  while (keep_going) {
+    n <- nlons
+    if (n %% 8 == 0) n <- trunc(n / 8)
+    while (n %% 6 == 0) n <- trunc(n / 6)
+    while (n %% 5 == 0) n <- trunc(n / 5)
+    while (n %% 4 == 0) n <- trunc(n / 4)
+    while (n %% 3 == 0) n <- trunc(n / 3)
+    if (n %% 2 == 0) n <- trunc(n / 2)
+    if (n <= 8) {
+      keep_going <- FALSE
+    } else {
+      nlons <- nlons + 2
+      if (nlons > 9999) {
+        stop("Error: pick another gaussian grid truncation. ",
+             "It doesn't fulfill the standards to apply FFT.")
+      }
+    }
+  }
+  c(nlats, nlons)
+}
+
+.nlat2t <- function(nlats) {
+  trunc((nlats * 2 - 1) / 3)
+}
+
+.LoadDataFile <- function(work_piece, explore_dims = FALSE, silent = FALSE) {
+  # The purpose, working modes, inputs and outputs of this function are
+  # explained in ?LoadDataFile
+  #suppressPackageStartupMessages({library(ncdf4)})
+  #suppressPackageStartupMessages({library(bigmemory)})
+  #suppressPackageStartupMessages({library(plyr)})
+  # Auxiliar function to convert array indices to lineal indices
+  arrayIndex2VectorIndex <- function(indices, dims) {
+    if (length(indices) > length(dims)) {
+      stop("Error: indices do not match dimensions in arrayIndex2VectorIndex.")
+    }
+    position <- 1
+    dims <- rev(dims)
+    indices <- rev(indices)
+    for (i in seq_along(indices)) {
+      position <- position + (indices[i] - 1) * prod(dims[-(1:i)])
+    }
+    position
+  }
+  
+  found_file <- NULL
+  dims <- NULL
+  grid_name <- units <- var_long_name <- NULL
+  is_2d_var <- array_across_gw <- NULL
+  data_across_gw <- NULL
+
+  filename <- work_piece[['filename']]
+  namevar <- work_piece[['namevar']]
+  output <- work_piece[['output']]
+  # The names of all data files in the directory of the repository that match 
+  # the pattern are obtained.
+  if (any(grep("^http", filename))) {
+    is_url <- TRUE
+    files <- filename
+    ## TODO: Check that the user is not using shell globbing exps.
+  } else {
+    is_url <- FALSE
+    files <- Sys.glob(filename)
+  }
+
+  # If we don't find any, we leave the flag 'found_file' with a NULL value.
+  if (length(files) > 0) {
+    # The first file that matches the pattern is chosen and read.
+    filename <- head(files, 1)
+    filein <- filename
+    found_file <- filename
+    mask <- work_piece[['mask']]
+
+    if (!silent && explore_dims) {
+      .message(paste("Exploring dimensions...", filename))
+    ##} else {
+    ##  cat(paste("* Reading & processing data...", filename, '\n'))
+    ##}
+    }
+
+    # We will fill in 'expected_dims' with the names of the expected dimensions of
+    # the data array we'll retrieve from the file.
+    expected_dims <- NULL
+    remap_needed <- FALSE
+    # But first we open the file and work out whether the requested variable is 2d
+    fnc <- nc_open(filein)
+    if (!(namevar %in% names(fnc$var))) {
+      stop("Error: The variable", namevar, "is not defined in the file", filename)
+    }
+    var_long_name <- fnc$var[[namevar]]$longname
+    units <- fnc$var[[namevar]]$units
+    file_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name'))
+    # The following two 'ifs' are to allow for 'lon'/'lat' by default, instead of 
+    # 'longitude'/'latitude'.
+    if (!(work_piece[['dimnames']][['lon']] %in% file_dimnames) &&
+        (work_piece[['dimnames']][['lon']] == 'longitude') &&
+        ('lon' %in% file_dimnames)) {
+      work_piece[['dimnames']][['lon']] <- 'lon'
+    }
+    if (!(work_piece[['dimnames']][['lat']] %in% file_dimnames) &&
+        (work_piece[['dimnames']][['lat']] == 'latitude') &&
+        ('lat' %in% file_dimnames)) {
+      work_piece[['dimnames']][['lat']] <- 'lat'
+    }
+    if (is.null(work_piece[['is_2d_var']])) {
+      is_2d_var <- all(c(work_piece[['dimnames']][['lon']], 
+                         work_piece[['dimnames']][['lat']]) %in%
+                       unlist(lapply(fnc$var[[namevar]][['dim']], 
+                                     '[[', 'name')))
+    } else {
+      is_2d_var <- work_piece[['is_2d_var']]
+    }
+    if ((is_2d_var || work_piece[['is_file_per_dataset']])) {
+      if (Sys.which("cdo")[[1]] == "") {
+        stop("Error: CDO libraries not available")
+      }
+
+     cdo_version <- 
+       strsplit(suppressWarnings(
+        system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5]
+
+      cdo_version <- 
+        as.numeric_version(unlist(strsplit(cdo_version, "[A-Za-z]", fixed = FALSE))[[1]])
+
+    }
+    # If the variable to load is 2-d, we need to determine whether:
+    #  - interpolation is needed
+    #  - subsetting is requested
+    if (is_2d_var) {
+      ## We read the longitudes and latitudes from the file.
+      lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']])
+      lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']])
+      first_lon_in_original_file <- lon[1]
+      # If a common grid is requested or we are exploring the file dimensions
+      # we need to read the grid type and size of the file to finally work out the 
+      # CDO grid name.
+      if (!is.null(work_piece[['grid']]) || explore_dims) {
+        # Here we read the grid type and its number of longitudes and latitudes
+        file_info <- system(paste('cdo -s griddes', filein, '2> /dev/null'), intern = TRUE)
+        grids_positions <- grep('# gridID', file_info)
+        if (length(grids_positions) < 1) {
+          stop("The grid should be defined in the files.")
+        }
+        grids_first_lines <- grids_positions + 2
+        grids_last_lines <- c((grids_positions - 2)[-1], length(file_info))
+        grids_info <- as.list(seq_along(grids_positions))
+        grids_info <- lapply(grids_info, 
+                             function (x) file_info[grids_first_lines[x]:grids_last_lines[x]])
+        grids_info <- lapply(grids_info, function (x) gsub("  *", " ", x))
+        grids_info <- lapply(grids_info, function (x) gsub("^ | $", "", x))
+        grids_info <- lapply(grids_info, function (x) unlist(strsplit(x, " | = ")))
+        grids_types <- unlist(lapply(grids_info, function (x) x[grep('gridtype', x) + 1]))
+        grids_matches <- unlist(lapply(grids_info, function (x) {
+          nlons <- if (any(grep('xsize', x))) {
+                     as.numeric(x[grep('xsize', x) + 1])
+                   } else {
+                     NA
+                   }
+          nlats <- if (any(grep('ysize', x))) {
+                    as.numeric(x[grep('ysize', x) + 1])
+                  } else {
+                    NA
+                  }
+          result <- FALSE
+          if (!anyNA(c(nlons, nlats))) {
+            if ((nlons == length(lon)) && 
+                (nlats == length(lat))) {
+              result <- TRUE
+            }
+          }
+          result
+        }))
+        grids_matches <- grids_matches[which(grids_types %in% c('gaussian', 'lonlat'))]
+        grids_info <- grids_info[which(grids_types %in% c('gaussian', 'lonlat'))]
+        grids_types <- grids_types[which(grids_types %in% c('gaussian', 'lonlat'))]
+        if (length(grids_matches) == 0) {
+          stop("Error: Only 'gaussian' and 'lonlat' grids supported. See e.g: cdo sinfo ", filename)
+        }
+        if (sum(grids_matches) > 1) {
+          if ((all(grids_types[which(grids_matches)] == 'gaussian') || 
+               all(grids_types[which(grids_matches)] == 'lonlat')) && 
+               all(unlist(lapply(grids_info[which(grids_matches)], identical, 
+                                 grids_info[which(grids_matches)][[1]])))) {
+            grid_type <- grids_types[which(grids_matches)][1]
+          } else {
+            stop("Error: Load() can't disambiguate: ",
+                 "More than one lonlat/gaussian grids with the same size as ",
+                 "the requested variable defined in ", filename)
+          }
+        } else if (sum(grids_matches) == 1) {
+          grid_type <- grids_types[which(grids_matches)]
+        } else {
+          stop("Unexpected error.")
+        }
+        grid_lons <- length(lon)
+        grid_lats <- length(lat)
+        # Convert to CDO grid name as seen in cdo's griddes.c: nlat2ntr()
+        if (grid_type == 'lonlat') {
+          grid_name <- paste0('r', grid_lons, 'x', grid_lats)
+        } else {
+          grid_name <- paste0('t', .nlat2t(grid_lats), 'grid')
+        }
+        if (is.null(work_piece[['grid']])) {
+          .warning(paste0("Detect the grid type to be '", grid_name, "'. ",
+                          "If it is not expected, assign parameter 'grid' to avoid wrong result."))
+        }
+      }
+      # If a common grid is requested, we will also calculate its size which we will use
+      # later on.
+      if (!is.null(work_piece[['grid']])) {
+        # Now we calculate the common grid type and its lons and lats
+        if (any(grep('^t\\d{1,+}grid$', work_piece[['grid']]))) {
+          common_grid_type <- 'gaussian'
+          common_grid_res <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2])
+          nlonlat <- .t2nlatlon(common_grid_res)
+          common_grid_lats <- nlonlat[1]
+          common_grid_lons <- nlonlat[2]
+        } else if (any(grep('^r\\d{1,+}x\\d{1,+}$', work_piece[['grid']]))) {
+          common_grid_type <- 'lonlat'
+          common_grid_lons <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2])
+          common_grid_lats <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][3])
+        } else {
+          stop("Error: Only supported grid types in parameter 'grid' are t<RES>grid and r<NX>x<NY>")
+        }
+      } else {
+        ## If no 'grid' is specified, there is no common grid.
+        ## But these variables are filled in for consistency in the code.
+        common_grid_lons <- length(lon)
+        common_grid_lats <- length(lat)
+      }
+      first_common_grid_lon <- 0
+      last_common_grid_lon <- 360 - 360 / common_grid_lons
+      ## This is not true for gaussian grids or for some regular grids, but 
+      ## is a safe estimation
+      first_common_grid_lat <- -90
+      last_common_grid_lat <- 90
+      # And finally determine whether interpolation is needed or not
+      remove_shift <- FALSE
+      if (!is.null(work_piece[['grid']])) {
+        if ((grid_lons != common_grid_lons) || 
+            (grid_lats != common_grid_lats) || 
+            (grid_type != common_grid_type) ||
+            (lon[1] != first_common_grid_lon)) { 
+          if (grid_lons == common_grid_lons && grid_lats == common_grid_lats &&
+              grid_type == common_grid_type && lon[1] != first_common_grid_lon) {
+            remove_shift <- TRUE
+          }
+          remap_needed <- TRUE
+          common_grid_name <- work_piece[['grid']]
+        }
+      } else if ((lon[1] != first_common_grid_lon) && explore_dims && 
+                 !work_piece[['single_dataset']]) {
+        remap_needed <- TRUE
+        common_grid_name <- grid_name
+        remove_shift <- TRUE
+      }
+      if (remap_needed && (work_piece[['remap']] == 'con') && 
+          (cdo_version >= as.numeric_version('1.7.0'))) {
+        work_piece[['remap']] <- 'ycon'
+      }
+      if (remove_shift && !explore_dims) {
+        if (!is.null(work_piece[['progress_amount']])) {
+          cat("\n")
+        }
+        .warning(paste0("The dataset with index ", 
+            tail(work_piece[['indices']], 1), " in '", 
+            work_piece[['dataset_type']], 
+            "' doesn't start at longitude 0 and will be re-interpolated in order ",
+            "to align its longitudes with the standard CDO grids definable with ",
+            "the names 't<RES>grid' or 'r<NX>x<NY>', which are by definition ",
+            "starting at the longitude 0.\n"))
+        if (!is.null(mask)) {
+          .warning(paste0("A mask was provided for the dataset with index ",    
+              tail(work_piece[['indices']], 1), " in '",
+              work_piece[['dataset_type']], 
+              "'. This dataset has been re-interpolated to align its longitudes to ",
+              "start at 0. You must re-interpolate the corresponding mask to align ",
+              "its longitudes to start at 0 as well, if you haven't done so yet. ",
+              "Running cdo remapcon,", common_grid_name, 
+              " original_mask_file.nc new_mask_file.nc will fix it.\n"))
+        }
+      }
+      if (remap_needed && (grid_lons < common_grid_lons || grid_lats < common_grid_lats)) {
+        if (!is.null(work_piece[['progress_amount']])) {
+          cat("\n")
+        }
+        if (!explore_dims) {
+          .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), 
+                     " in '", work_piece[['dataset_type']], "' is originally on ",
+                     "a grid coarser than the common grid and it has been ",
+                     "extrapolated. Check the results carefully. It is ",
+                     "recommended to specify as common grid the coarsest grid ",
+                     "among all requested datasets via the parameter 'grid'.\n"))
+        }
+      }
+      # Now calculate if the user requests for a lonlat subset or for the 
+      # entire field
+      lonmin <- work_piece[['lon_limits']][1]
+      lonmax <- work_piece[['lon_limits']][2]
+      latmin <- work_piece[['lat_limits']][1]
+      latmax <- work_piece[['lat_limits']][2]
+      lon_subsetting_requested <- FALSE
+      lonlat_subsetting_requested <- FALSE
+      if (lonmin <= lonmax) {
+        if ((lonmin > first_common_grid_lon) || (lonmax < last_common_grid_lon)) {
+          lon_subsetting_requested <- TRUE
+        }
+      } else {
+        if ((lonmin - lonmax) > 360 / common_grid_lons) {
+          lon_subsetting_requested <- TRUE
+        } else {
+          gap_width <- floor(lonmin / (360 / common_grid_lons)) - 
+                       floor(lonmax / (360 / common_grid_lons))
+          if (gap_width > 0) { 
+            if (!(gap_width == 1 && (lonmin %% (360 / common_grid_lons) == 0) && 
+                  (lonmax %% (360 / common_grid_lons) == 0))) {
+              lon_subsetting_requested <- TRUE
+            }
+          }
+        }
+      }
+      if ((latmin > first_common_grid_lat) || (latmax < last_common_grid_lat)
+          || (lon_subsetting_requested)) {
+        lonlat_subsetting_requested <- TRUE
+      }
+      # Now that we know if subsetting was requested, we can say if final data
+      # will go across greenwich
+      if (lonmax < lonmin) {
+        data_across_gw <- TRUE
+      } else {
+        data_across_gw <- !lon_subsetting_requested
+      }
+
+      # When remap is needed but no subsetting, the file is copied locally
+      # so that cdo works faster, and then interpolated.
+      # Otherwise the file is kept as is and the subset will have to be 
+      # interpolated still.
+      if (!lonlat_subsetting_requested && remap_needed) {
+        nc_close(fnc)
+        filecopy <- tempfile(pattern = "load", fileext = ".nc")
+        file.copy(filein, filecopy)
+        filein <- tempfile(pattern = "loadRegridded", fileext = ".nc")
+        # "-L" is to serialize I/O accesses. It prevents potential segmentation fault in the 
+        # underlying hdf5 library.
+        system(paste0("cdo -L -s remap", work_piece[['remap']], ",", 
+                      common_grid_name, 
+                      " -selname,", namevar, " ", filecopy, " ", filein, 
+                      " 2>/dev/null"))
+        file.remove(filecopy)
+        work_piece[['dimnames']][['lon']] <- 'lon'
+        work_piece[['dimnames']][['lat']] <- 'lat'
+        fnc <- nc_open(filein)
+        lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']])
+        lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']])
+      }
+
+      # Read and check also the mask
+      if (!is.null(mask)) {
+        ###mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc')
+        if (is.list(mask)) {
+          if (!file.exists(mask[['path']])) {
+            stop("Error: Couldn't find the mask file", mask[['path']])
+          }
+          mask_file <- mask[['path']]
+          ###file.copy(work_piece[['mask']][['path']], mask_file)
+          fnc_mask <- nc_open(mask_file)
+          vars_in_mask <- sapply(fnc_mask$var, '[[', 'name')
+          if ('nc_var_name' %in% names(mask)) {
+            if (!(mask[['nc_var_name']] %in% 
+                  vars_in_mask)) {
+              stop("Error: couldn't find variable", mask[['nc_var_name']], 
+                   "in the mask file", mask[['path']])
+            }
+          } else {
+            if (length(vars_in_mask) != 1) {
+              stop("Error: one and only one non-coordinate variable should be ",
+                   "defined in the mask file", 
+                   mask[['path']], 
+                   "if the component 'nc_var_name' is not specified. ",
+                   "Currently found: ", 
+                   toString(vars_in_mask), ".")
+            } else {
+              mask[['nc_var_name']] <- vars_in_mask
+            }
+          }
+          if (sum(fnc_mask$var[[mask[['nc_var_name']]]]$size > 1) != 2) {
+            stop("Error: the variable '", 
+                 mask[['nc_var_name']], 
+                 "' must be defined only over the dimensions '", 
+                 work_piece[['dimnames']][['lon']], "' and '", 
+                 work_piece[['dimnames']][['lat']], 
+                 "' in the mask file ", 
+                 mask[['path']])
+          }
+          mask <- ncvar_get(fnc_mask, mask[['nc_var_name']], collapse_degen = TRUE)
+          nc_close(fnc_mask)
+        ###  mask_lon <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lon']])
+        ###  mask_lat <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lat']])
+        ###} else {
+        ###  dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon)
+        ###  dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat)
+        ###  ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double')
+        ###  fnc_mask <- nc_create(mask_file, list(ncdf_var))
+        ###  ncvar_put(fnc_mask, ncdf_var, work_piece[['mask']])
+        ###  nc_close(fnc_mask)
+        ###  fnc_mask <- nc_open(mask_file)
+        ###  work_piece[['mask']] <- list(path = mask_file, nc_var_name = 'LSM')
+        ###  mask_lon <- lon
+        ###  mask_lat <- lat
+        ###}
+      ###}
+        ### Now ready to check that the mask is right
+        ##if (!(lonlat_subsetting_requested && remap_needed)) {
+        ###  if ((dim(mask)[2] != length(lon)) || (dim(mask)[1] != length(lat))) {
+        ###    stop(paste("Error: the mask of the dataset with index ", 
+        ###         tail(work_piece[['indices']], 1), " in '", 
+        ###         work_piece[['dataset_type']], "' is wrong. ", 
+        ###         "It must be on the common grid if the selected output type is 'lonlat', ",
+        ###         "'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on ",
+        ###         "the grid of the corresponding dataset if the selected output type is ",
+        ###         "'areave' and no 'grid' has been specified. For more information ",
+        ###         "check ?Load and see help on parameters 'grid', 'maskmod' and ",
+        ###         "'maskobs'.", sep = ""))
+        ###  }
+        ###if (!(identical(mask_lon, lon) && identical(mask_lat, lat))) {
+        ###  stop(paste0("Error: the longitudes and latitudes in the masks must be ",
+        ###       "identical to the ones in the corresponding data files if output = 'areave' ",
+        ###       " or, if the selected output is 'lon', 'lat' or 'lonlat', the longitudes in ",
+        ###       "the mask file must start by 0 and the latitudes must be ordered from ",
+        ###       "highest to lowest. See\n  ", 
+        ###     work_piece[['mask']][['path']], " and ", filein))
+        ###}
+        }
+      }
+
+      lon_indices <- seq_along(lon)
+      if (!(lonlat_subsetting_requested && remap_needed)) {
+        lon[which(lon < 0)] <- lon[which(lon < 0)] + 360
+      }
+      if (lonmax >= lonmin) {
+        lon_indices <- lon_indices[which(((lon %% 360) >= lonmin) & ((lon %% 360) <= lonmax))]
+      } else if (!remap_needed) {
+        lon_indices <- lon_indices[which(((lon %% 360) <= lonmax) | ((lon %% 360) >= lonmin))]
+      }
+      lat_indices <- which(lat >= latmin & lat <= latmax)
+      ## In most of the cases the latitudes are ordered from -90 to 90. 
+      ## We will reorder them to be in the order from 90 to -90, so mostly 
+      ## always the latitudes are reordered.
+      ## TODO: This could be avoided in future.
+      if (lat[1] < lat[length(lat)]) {
+        lat_indices <- lat_indices[rev(seq_along(lat_indices))]
+      }
+      if (!is.null(mask) && !(lonlat_subsetting_requested && remap_needed)) {
+        if ((dim(mask)[1] != length(lon)) || (dim(mask)[2] != length(lat))) {
+          stop("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), 
+               " in '", work_piece[['dataset_type']], "' is wrong. It must be on the ",
+               "common grid if the selected output type is 'lonlat', 'lon' or 'lat', ",
+               "or 'areave' and 'grid' has been specified. It must be on the grid of ",
+               "the corresponding dataset if the selected output type is 'areave' and ",
+               "no 'grid' has been specified. For more information check ?Load and see ",
+               "help on parameters 'grid', 'maskmod' and 'maskobs'.")
+        }
+        mask <- mask[lon_indices, lat_indices]
+      }
+      ## If the user requests subsetting, we must extend the lon and lat limits if possible
+      ## so that the interpolation after is done properly
+      maximum_extra_points <- work_piece[['remapcells']]
+      if (lonlat_subsetting_requested && remap_needed) {
+        if ((maximum_extra_points > (head(lon_indices, 1) - 1)) ||
+            (maximum_extra_points > (length(lon) - tail(lon_indices, 1)))) {
+          ## if the requested number of points goes beyond the left or right
+          ## sides of the map, we need to take the entire map so that the 
+          ## interpolation works properly
+          lon_indices <- seq_along(lon)
+        } else {
+          extra_points <- min(maximum_extra_points, head(lon_indices, 1) - 1)
+          if (extra_points > 0) {
+            lon_indices <- 
+              c((head(lon_indices, 1) - extra_points):(head(lon_indices, 1) - 1), lon_indices)
+          }
+          extra_points <- min(maximum_extra_points, length(lon) - tail(lon_indices, 1))
+          if (extra_points > 0) {
+            lon_indices <- c(lon_indices, 
+                             (tail(lon_indices, 1) + 1):(tail(lon_indices, 1) + extra_points))
+          }
+        }
+        min_lat_ind <- min(lat_indices)
+        max_lat_ind <- max(lat_indices)
+        extra_points <- min(maximum_extra_points, min_lat_ind - 1)
+        if (extra_points > 0) {
+          if (lat[1] < tail(lat, 1)) {
+            lat_indices <- c(lat_indices, (min_lat_ind - 1):(min_lat_ind - extra_points))
+          } else {
+            lat_indices <- c((min_lat_ind - extra_points):(min_lat_ind - 1), lat_indices)
+          }
+        }
+        extra_points <- min(maximum_extra_points, length(lat) - max_lat_ind)
+        if (extra_points > 0) {
+          if (lat[1] < tail(lat, 1)) {
+            lat_indices <- c((max_lat_ind + extra_points):(max_lat_ind + 1), lat_indices)
+          } else {
+            lat_indices <- c(lat_indices, (max_lat_ind + 1):(max_lat_ind + extra_points))
+          }
+        }
+      }
+      lon <- lon[lon_indices]
+      lat <- lat[lat_indices]
+      expected_dims <- c(work_piece[['dimnames']][['lon']],
+                         work_piece[['dimnames']][['lat']])
+    } else {
+      lon <- 0
+      lat <- 0
+    }
+    # We keep on filling the expected dimensions
+    var_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name'))
+    nmemb <- nltime <- NULL
+    ## Sometimes CDO renames 'members' dimension to 'lev'
+    old_members_dimname <- NULL
+    if (('lev' %in% var_dimnames) && !(work_piece[['dimnames']][['member']] %in% var_dimnames)) {
+      old_members_dimname <- work_piece[['dimnames']][['member']]
+      work_piece[['dimnames']][['member']] <- 'lev'
+    }
+    if (work_piece[['dimnames']][['member']] %in% var_dimnames) {
+      nmemb <- fnc$var[[namevar]][['dim']][[match(work_piece[['dimnames']][['member']], 
+                                                  var_dimnames)]]$len
+      expected_dims <- c(expected_dims, work_piece[['dimnames']][['member']])
+    } else {
+      nmemb <- 1
+    }
+    if (length(expected_dims) > 0) {
+      dim_matches <- match(expected_dims, var_dimnames)
+      if (anyNA(dim_matches)) {
+        if (!is.null(old_members_dimname)) {
+          expected_dims[which(expected_dims == 'lev')] <- old_members_dimname
+        }
+        stop("Error: the expected dimension(s)", 
+             toString(expected_dims[which(is.na(dim_matches))]), 
+             "were not found in", filename)
+      }
+      time_dimname <- var_dimnames[-dim_matches]
+    } else {
+      time_dimname <- var_dimnames
+    }
+    if (length(time_dimname) > 0) {
+      if (length(time_dimname) == 1) {
+        nltime <- fnc$var[[namevar]][['dim']][[match(time_dimname, var_dimnames)]]$len
+        expected_dims <- c(expected_dims, time_dimname)
+        dim_matches <- match(expected_dims, var_dimnames)
+      } else {
+        if (!is.null(old_members_dimname)) {
+          expected_dims[which(expected_dims == 'lev')] <- old_members_dimname
+        }
+        stop("Error: the variable ", namevar, 
+             " is defined over more dimensions than the expected (", 
+             toString(c(expected_dims, 'time')), 
+             "). It could also be that the members, longitude or latitude ",
+             "dimensions are named incorrectly. In that case, either rename ",
+             "the dimensions in the file or adjust Load() to recognize the actual ",
+             "name with the parameter 'dimnames'. See file ", filename)
+      }
+    } else {
+      nltime <- 1
+    }
+
+    # Now we must retrieve the data from the file, but only the asked indices.
+    # So we build up the indices to retrieve.
+    # Longitudes or latitudes have been retrieved already.
+    if (explore_dims) {
+      # If we're exploring the file we only want one time step from one member, 
+      # to regrid it and work out the number of longitudes and latitudes.
+      # We don't need more.
+      members <- 1
+      ltimes_list <- list(1)
+    } else {
+      # The data is arranged in the array 'tmp' with the dimensions in a 
+      # common order:
+      #   1) Longitudes 
+      #   2) Latitudes
+      #   3) Members (even if is not a file per member experiment)
+      #   4) Lead-times
+      if (work_piece[['is_file_per_dataset']]) {
+        time_indices <- 1:nltime
+        mons <- strsplit(system(paste('cdo showmon ', filein, 
+                         ' 2>/dev/null'), intern = TRUE), split = ' ')
+        years <- strsplit(system(paste('cdo showyear ', filein, 
+                          ' 2>/dev/null'), intern = TRUE), split = ' ')
+        mons <- as.numeric(mons[[1]][which(mons[[1]] != "")])
+        years <- as.numeric(years[[1]][which(years[[1]] != "")])
+        time_indices <- ts(time_indices, start = c(years[1], mons[1]), 
+                           end = c(years[length(years)], mons[length(mons)]),
+                           frequency = 12)
+                ltimes_list <- list()
+        for (sdate in work_piece[['startdates']]) {
+          selected_time_indices <- window(time_indices, start = c(as.numeric(
+                                   substr(sdate, 1, 4)), as.numeric(substr(sdate, 5, 6))), 
+                                   end = c(3000, 12), frequency = 12, extend = TRUE)
+          selected_time_indices <- selected_time_indices[work_piece[['leadtimes']]]
+          ltimes_list <- c(ltimes_list, list(selected_time_indices))
+        }
+      } else {
+        ltimes <- work_piece[['leadtimes']]
+        #if (work_piece[['dataset_type']] == 'exp') {
+          ltimes_list <- list(ltimes[which(ltimes <= nltime)])
+        #}
+      }
+      ## TODO: Put, when reading matrices, this kind of warnings
+      #  if (nmember < nmemb) {
+      #    cat("Warning:
+      members <- 1:work_piece[['nmember']]
+      members <- members[which(members <= nmemb)]
+    }
+
+    # Now, for each list of leadtimes to load (usually only one list with all leadtimes), 
+    # we'll join the indices and retrieve data
+    found_disordered_dims <- FALSE
+    for (ltimes in ltimes_list) {
+      if (is_2d_var) {
+        start <- c(min(lon_indices), min(lat_indices))
+        end <- c(max(lon_indices), max(lat_indices))
+        if (lonlat_subsetting_requested && remap_needed) {
+          subset_indices <- list(min(lon_indices):max(lon_indices) - min(lon_indices) + 1,
+                                 lat_indices - min(lat_indices) + 1)
+          dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon)
+          dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat)
+          ncdf_dims <- list(dim_longitudes, dim_latitudes)
+        } else {
+          subset_indices <- list(lon_indices - min(lon_indices) + 1,
+                                 lat_indices - min(lat_indices) + 1)
+          ncdf_dims <- list()
+        }
+        final_dims <- c(length(subset_indices[[1]]), length(subset_indices[[2]]), 1, 1)
+      } else {
+        start <- end <- NULL
+        subset_indices <- list()
+        ncdf_dims <- list()
+        final_dims <- c(1, 1, 1, 1)
+      }
+      
+      if (work_piece[['dimnames']][['member']] %in% expected_dims) {
+        start <- c(start, head(members, 1))
+        end <- c(end, tail(members, 1))
+        subset_indices <- c(subset_indices, list(members - head(members, 1) + 1))
+        dim_members <- ncdim_def(work_piece[['dimnames']][['member']], "", members)
+        ncdf_dims <- c(ncdf_dims, list(dim_members))
+        final_dims[3] <- length(members)
+      }
+      if (time_dimname %in% expected_dims) {
+        if (!all(is.na(ltimes))) {
+          start <- c(start, head(ltimes[which(!is.na(ltimes))], 1))
+          end <- c(end, tail(ltimes[which(!is.na(ltimes))], 1))
+          subset_indices <- c(subset_indices, 
+                              list(ltimes - head(ltimes[which(!is.na(ltimes))], 1) + 1))
+        } else {
+          start <- c(start, NA)
+          end <- c(end, NA)
+          subset_indices <- c(subset_indices, list(ltimes))
+        }
+        dim_time <- ncdim_def(time_dimname, "", seq_along(ltimes), unlim = TRUE)
+        ncdf_dims <- c(ncdf_dims, list(dim_time))
+        final_dims[4] <- length(ltimes)
+      }
+      count <- end - start + 1
+      start <- start[dim_matches]
+      count <- count[dim_matches]
+      subset_indices <- subset_indices[dim_matches]
+      # Now that we have the indices to retrieve, we retrieve the data
+      if (prod(final_dims) > 0) {
+        tmp <- take(ncvar_get(fnc, namevar, start, count, 
+                    collapse_degen = FALSE), 
+                    seq_along(subset_indices), subset_indices)
+        # The data is regridded if it corresponds to an atmospheric variable. When
+        # the chosen output type is 'areave' the data is not regridded to not 
+        # waste computing time unless the user specified a common grid.
+        if (is_2d_var) {
+          ###if (!is.null(work_piece[['mask']]) && !(lonlat_subsetting_requested && remap_needed)) {
+          ###  mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], 
+          ###               start[dim_matches[1:2]], count[dim_matches[1:2]],
+          ###               collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]])
+          ###}
+          if (lonlat_subsetting_requested && remap_needed) {
+            filein <- tempfile(pattern = "loadRegridded", fileext = ".nc")
+            filein2 <- tempfile(pattern = "loadRegridded2", fileext = ".nc")
+            ncdf_var <- ncvar_def(namevar, "", ncdf_dims[dim_matches], 
+                                  fnc$var[[namevar]]$missval, 
+                                  prec = if (fnc$var[[namevar]]$prec == 'int') {
+                                           'integer'
+                                         } else {
+                                           fnc$var[[namevar]]$prec
+                                         })
+            scale_factor <- ifelse(fnc$var[[namevar]]$hasScaleFact, fnc$var[[namevar]]$scaleFact, 1)
+            add_offset <- ifelse(fnc$var[[namevar]]$hasAddOffset, fnc$var[[namevar]]$addOffset, 0)
+            if (fnc$var[[namevar]]$hasScaleFact || fnc$var[[namevar]]$hasAddOffset) {
+              tmp <- (tmp - add_offset) / scale_factor
+            }
+            #nc_close(fnc)
+            fnc2 <- nc_create(filein2, list(ncdf_var))
+            ncvar_put(fnc2, ncdf_var, tmp)
+            if (add_offset != 0) {
+              ncatt_put(fnc2, ncdf_var, 'add_offset', add_offset)
+            }
+            if (scale_factor != 1) {
+              ncatt_put(fnc2, ncdf_var, 'scale_factor', scale_factor)
+            }
+            nc_close(fnc2)
+            system(paste0("cdo -L -s -sellonlatbox,", if (lonmin > lonmax) {
+                                                     "0,360,"
+                                                   } else {
+                                                     paste0(lonmin, ",", lonmax, ",")
+                                                   }, latmin, ",", latmax,
+                   " -remap", work_piece[['remap']], ",", common_grid_name, 
+                   " ", filein2, " ", filein, " 2>/dev/null"))
+            file.remove(filein2)
+            fnc2 <- nc_open(filein)
+            sub_lon <- ncvar_get(fnc2, 'lon')
+            sub_lat <- ncvar_get(fnc2, 'lat')
+            ## We read the longitudes and latitudes from the file.
+            ## In principle cdo should put in order the longitudes
+            ## and slice them properly unless data is across greenwich
+            sub_lon[which(sub_lon < 0)] <- sub_lon[which(sub_lon < 0)] + 360
+            sub_lon_indices <- seq_along(sub_lon)
+            if (lonmax < lonmin) {
+              sub_lon_indices <- sub_lon_indices[which((sub_lon <= lonmax) | (sub_lon >= lonmin))]
+            }
+            sub_lat_indices <- seq_along(sub_lat)
+            ## In principle cdo should put in order the latitudes
+            if (sub_lat[1] < sub_lat[length(sub_lat)]) {
+              sub_lat_indices <- rev(seq_along(sub_lat))
+            }
+            final_dims[c(1, 2)] <- c(length(sub_lon_indices), length(sub_lat_indices))
+            subset_indices[[dim_matches[1]]] <- sub_lon_indices
+            subset_indices[[dim_matches[2]]] <- sub_lat_indices
+
+            tmp <- take(ncvar_get(fnc2, namevar, collapse_degen = FALSE), 
+                        seq_along(subset_indices), subset_indices)
+
+            if (!is.null(mask)) {
+              ## We create a very simple 2d netcdf file that is then interpolated to the common
+              ## grid to know what are the lons and lats of our slice of data
+              mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc')
+              mask_file_remap <- tempfile(pattern = 'loadMask', fileext = '.nc')
+              dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], 
+                                          "degrees_east", c(0, 360))
+              dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], 
+                                         "degrees_north", c(-90, 90))
+              ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double')
+              fnc_mask <- nc_create(mask_file, list(ncdf_var))
+              ncvar_put(fnc_mask, ncdf_var, array(rep(0, 4), dim = c(2, 2)))
+              nc_close(fnc_mask)
+              system(paste0("cdo -L -s remap", work_piece[['remap']], ",", 
+                            common_grid_name,
+                     " ", mask_file, " ", mask_file_remap, " 2>/dev/null"))
+              fnc_mask <- nc_open(mask_file_remap)
+              mask_lons <- ncvar_get(fnc_mask, 'lon')
+              mask_lats <- ncvar_get(fnc_mask, 'lat')
+              nc_close(fnc_mask)
+              file.remove(mask_file, mask_file_remap)
+              if ((dim(mask)[1] != common_grid_lons) || (dim(mask)[2] != common_grid_lats)) {
+                stop("Error: the mask of the dataset with index ", 
+                     tail(work_piece[['indices']], 1), " in '", 
+                     work_piece[['dataset_type']], 
+                     "' is wrong. It must be on the common grid if the ",
+                     "selected output type is 'lonlat', 'lon' or 'lat', ",
+                     "or 'areave' and 'grid' has been specified. It must ",
+                     "be on the grid of the corresponding dataset if the ",
+                     "selected output type is 'areave' and no 'grid' has been ",
+                     "specified. For more information check ?Load and see help ",
+                     "on parameters 'grid', 'maskmod' and 'maskobs'.")
+              }
+              mask_lons[which(mask_lons < 0)] <- mask_lons[which(mask_lons < 0)] + 360
+              if (lonmax >= lonmin) {
+                mask_lon_indices <- which((mask_lons >= lonmin) & (mask_lons <= lonmax))
+              } else {
+                mask_lon_indices <- which((mask_lons >= lonmin) | (mask_lons <= lonmax))
+              }
+              mask_lat_indices <- which((mask_lats >= latmin) & (mask_lats <= latmax))
+              if (sub_lat[1] < sub_lat[length(sub_lat)]) {
+                mask_lat_indices <- mask_lat_indices[rev(seq_along(mask_lat_indices))]
+              }
+              mask <- mask[mask_lon_indices, mask_lat_indices]
+            }
+            sub_lon <- sub_lon[sub_lon_indices]
+            sub_lat <- sub_lat[sub_lat_indices]
+            ###  nc_close(fnc_mask)
+            ###  system(paste0("cdo -s -sellonlatbox,", if (lonmin > lonmax) {
+            ###                                           "0,360,"
+            ###                                         } else {
+            ###                                           paste0(lonmin, ",", lonmax, ",")
+            ###                                         }, latmin, ",", latmax,
+            ###         " -remap", work_piece[['remap']], ",", common_grid_name, 
+            ###This is wrong: same files  
+            ###         " ", mask_file, " ", mask_file, " 2>/dev/null", sep = ""))
+            ###  fnc_mask <- nc_open(mask_file) 
+            ###  mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']],
+            ###               collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]])
+            ###}
+          }
+        }
+        if (is.unsorted(dim_matches)) {
+          if (!found_disordered_dims && 
+              rev(work_piece[['indices']])[2] == 1 && 
+              rev(work_piece[['indices']])[3] == 1) {
+            found_disordered_dims <- TRUE
+            .warning(paste0("The dimensions for the variable ", namevar, 
+                            " in the files of the experiment with index ", 
+                            tail(work_piece[['indices']], 1), 
+                            " are not in the optimal order for loading with Load(). ",
+                            "The optimal order would be '", 
+                            toString(expected_dims), 
+                            "'. One of the files of the dataset is stored in ", filename))
+          }
+          tmp <- aperm(tmp, dim_matches)
+        }
+        dim(tmp) <- final_dims
+        # If we are exploring the file we don't need to process and arrange
+        # the retrieved data. We only need to keep the dimension sizes.
+        if (is_2d_var && lonlat_subsetting_requested && remap_needed) {
+          final_lons <- sub_lon
+          final_lats <- sub_lat
+        } else {
+          final_lons <- lon
+          final_lats <- lat
+        }
+        if (explore_dims) {
+          if (work_piece[['is_file_per_member']]) {
+            ## TODO: When the exp_full_path contains asterisks and is file_per_member
+            ##       members from different datasets may be accounted.
+            ##       Also if one file member is missing the accounting will be wrong.
+            ##       Should parse the file name and extract number of members.
+            if (is_url) {
+              nmemb <- NULL
+            } else {
+              nmemb <- length(files)
+            }
+          }
+          dims <- list(member = nmemb, ftime = nltime, lon = final_lons, lat = final_lats)
+        } else {
+        # If we are not exploring, then we have to process the retrieved data
+          if (is_2d_var) {
+            tmp <- apply(tmp, c(3, 4), function(x) {
+              # Disable of large values.
+              if (!is.na(work_piece[['var_limits']][2])) {
+                x[which(x > work_piece[['var_limits']][2])] <- NA
+              }
+              if (!is.na(work_piece[['var_limits']][1])) {
+                x[which(x < work_piece[['var_limits']][1])] <- NA
+              }
+              if (!is.null(mask)) {
+                x[which(mask < 0.5)] <- NA
+              }
+  
+              if (output == 'areave' || output == 'lon') {
+                weights <- InsertDim(cos(final_lats * pi / 180), 1, 
+                                     length(final_lons), name = 'lon')
+                weights[which(is.na(x))] <- NA
+                if (output == 'areave') {
+                  weights <- weights / mean(weights, na.rm = TRUE)
+                  mean(x * weights, na.rm = TRUE) 
+                } else {
+                  weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, 
+                                                 length(final_lats), name = 'lat')
+                  MeanDims(x * weights, 2, na.rm = TRUE)
+                }
+              } else if (output == 'lat') {
+                MeanDims(x, 1, na.rm = TRUE)
+              } else if (output == 'lonlat') {
+                signif(x, 5)
+              }
+            })
+            if (output == 'areave') {
+              dim(tmp) <- c(1, 1, final_dims[3:4])
+            } else if (output == 'lon') {
+              dim(tmp) <- c(final_dims[1], 1, final_dims[3:4])
+            } else if (output == 'lat') {
+              dim(tmp) <- c(1, final_dims[c(2, 3, 4)])
+            } else if (output == 'lonlat') {
+              dim(tmp) <- final_dims
+            }
+          }
+          var_data <- attach.big.matrix(work_piece[['out_pointer']])
+          if (work_piece[['dims']][['member']] > 1 && nmemb > 1 && 
+              work_piece[['dims']][['ftime']] > 1 && 
+              nltime < work_piece[['dims']][['ftime']]) {
+            work_piece[['indices']][2] <- work_piece[['indices']][2] - 1
+            for (jmemb in members) {
+              work_piece[['indices']][2] <- work_piece[['indices']][2] + 1
+              out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']])
+              out_indices <- out_position:(out_position + length(tmp[, , jmemb, ]) - 1)
+              var_data[out_indices] <- as.vector(tmp[, , jmemb, ])
+            }
+            work_piece[['indices']][2] <- work_piece[['indices']][2] - tail(members, 1) + 1
+          } else {
+            out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']])
+            out_indices <- out_position:(out_position + length(tmp) - 1)
+            a <- aperm(tmp, c(1, 2, 4, 3))
+            as.vector(a)
+            var_data[out_indices] <- as.vector(aperm(tmp, c(1, 2, 4, 3)))
+          }
+          work_piece[['indices']][3] <- work_piece[['indices']][3] + 1
+        }
+      }
+    }
+    nc_close(fnc)    
+    if (is_2d_var) {
+      if (remap_needed) {
+        array_across_gw <- FALSE
+        file.remove(filein)
+        ###if (!is.null(mask) && lonlat_subsetting_requested) {
+        ###  file.remove(mask_file)
+        ###}
+      } else {
+        if (first_lon_in_original_file < 0) {
+          array_across_gw <- data_across_gw
+        } else {
+          array_across_gw <- FALSE
+        }
+      }
+    }    
+  }
+  if (explore_dims) {
+    list(dims = dims, is_2d_var = is_2d_var, grid = grid_name, 
+         units = units, var_long_name = var_long_name, 
+         data_across_gw = data_across_gw, array_across_gw = array_across_gw)
+  } else {
+    ###if (!silent && !is.null(progress_connection) && !is.null(work_piece[['progress_amount']])) {
+    ###  foobar <- writeBin(work_piece[['progress_amount']], progress_connection)
+    ###}
+    if (!silent && !is.null(work_piece[['progress_amount']])) {
+      message(work_piece[['progress_amount']], appendLF = FALSE)
+    }
+    found_file
+  }
+}
+
+.LoadSampleData <- function(var, exp = NULL, obs = NULL, sdates, 
+                            nmember = NULL, nmemberobs = NULL, 
+                            nleadtime = NULL, leadtimemin = 1, 
+                            leadtimemax = NULL, storefreq = 'monthly', 
+                            sampleperiod = 1, lonmin = 0, lonmax = 360, 
+                            latmin = -90, latmax = 90, output = 'areave', 
+                            method = 'conservative', grid = NULL, 
+                            maskmod = vector("list", 15), 
+                            maskobs = vector("list", 15), 
+                            configfile = NULL, suffixexp = NULL, 
+                            suffixobs = NULL, varmin = NULL, varmax = NULL, 
+                            silent = FALSE, nprocs = NULL) {
+  ## This function loads and selects sample data stored in sampleMap and 
+  ## sampleTimeSeries and is used in the examples instead of Load() so as
+  ## to avoid nco and cdo system calls and computation time in the stage 
+  ## of running examples in the CHECK process on CRAN.
+  selected_start_dates <- match(sdates, c('19851101', '19901101', '19951101', 
+                                          '20001101', '20051101'))
+  start_dates_position <- 3
+  lead_times_position <- 4
+
+  if (output == 'lonlat') {
+    sampleData <- s2dv::sampleMap
+    if (is.null(leadtimemax)) {
+      leadtimemax <- dim(sampleData$mod)[lead_times_position]
+    }
+    selected_lead_times <- leadtimemin:leadtimemax
+
+    dataOut <- sampleData
+    dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times, , ]
+    dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times, , ]
+  } else if (output == 'areave') {
+    sampleData <- s2dv::sampleTimeSeries
+    if (is.null(leadtimemax)) {
+      leadtimemax <- dim(sampleData$mod)[lead_times_position]
+    }
+    selected_lead_times <- leadtimemin:leadtimemax
+
+    dataOut <- sampleData
+    dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times]
+    dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times]
+  }
+
+  dims_out <- dim(sampleData$mod)
+  dims_out[start_dates_position] <- length(selected_start_dates)
+  dims_out[lead_times_position] <- length(selected_lead_times)
+  dim(dataOut$mod) <- dims_out
+
+  dims_out <- dim(sampleData$obs)
+  dims_out[start_dates_position] <- length(selected_start_dates)
+  dims_out[lead_times_position] <- length(selected_lead_times)
+  dim(dataOut$obs) <- dims_out
+
+  invisible(list(mod = dataOut$mod, obs = dataOut$obs, 
+                 lat = dataOut$lat, lon = dataOut$lon))
+}
+
+.ConfigGetDatasetInfo <- function(matching_entries, table_name) {
+  # This function obtains the information of a dataset and variable pair,
+  # applying all the entries that match in the configuration file.
+  if (table_name == 'experiments') {
+    id <- 'EXP'
+  } else {
+    id <- 'OBS'
+  }
+  defaults <- c(paste0('$DEFAULT_', id, '_MAIN_PATH$'), 
+                paste0('$DEFAULT_', id, '_FILE_PATH$'), 
+                '$DEFAULT_NC_VAR_NAME$', '$DEFAULT_SUFFIX$', 
+                '$DEFAULT_VAR_MIN$', '$DEFAULT_VAR_MAX$')
+  info <- NULL
+
+  for (entry in matching_entries) {
+    if (is.null(info)) {
+      info <- entry[-1:-2]
+      info[which(info == '*')] <- defaults[which(info == '*')]
+    } else {
+      info[which(entry[-1:-2] != '*')] <- entry[-1:-2][which(entry[-1:-2] != '*')]
+    }
+  }
+
+  info <- as.list(info)
+  names(info) <- c('main_path', 'file_path', 'nc_var_name', 'suffix', 'var_min', 'var_max')  
+  info
+}
+
+.ReplaceGlobExpressions <- function(path_with_globs, actual_path, 
+                                    replace_values, tags_to_keep, 
+                                    dataset_name, permissive) {
+  # The goal of this function is to replace the shell globbing expressions in
+  # a path pattern (that may contain shell globbing expressions and Load() 
+  # tags) by the corresponding part of the real existing path.
+  # What is done actually is to replace all the values of the tags in the 
+  # actual path by the corresponding $TAG$
+  #
+  # It takes mainly two inputs. The path with expressions and tags, e.g.:
+  #   /data/experiments/*/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_*$START_DATE$*.nc
+  # and a complete known path to one of the matching files, e.g.:
+  #   /data/experiments/ecearth/i00k/tos/tos_fc0-1_19901101_199011-199110.nc
+  # and it returns the path pattern but without shell globbing expressions:
+  #   /data/experiments/ecearth/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_fc0-1_$START_DATE$_199011-199110.nc
+  #
+  # To do that, it needs also as inputs the list of replace values (the 
+  # association of each tag to their value).
+  #
+  # All the tags not present in the parameter tags_to_keep will be repalced.
+  #
+  # Not all cases can be resolved with the implemented algorithm. In an
+  # unsolvable case a warning is given and one possible guess is returned.
+  #
+  # In some cases it is interesting to replace only the expressions in the
+  # path to the file, but not the ones in the file name itself. To keep the
+  # expressions in the file name, the parameter permissive can be set to 
+  # TRUE. To replace all the expressions it can be set to FALSE.
+  clean <- function(x) {
+    if (nchar(x) > 0) {
+      x <- gsub('\\\\', '', x)
+      x <- gsub('\\^', '', x)
+      x <- gsub('\\$', '', x)
+      x <- unname(sapply(strsplit(x, '[', fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y)))
+      do.call(paste0, as.list(x))
+    } else {
+      x
+    }
+  }
+
+  strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "")
+
+  if (permissive) {
+    actual_path_chunks <- strsplit(actual_path, '/')[[1]]
+    actual_path <- paste(actual_path_chunks[-length(actual_path_chunks)], collapse = '/')
+    file_name <- tail(actual_path_chunks, 1)
+    if (length(actual_path_chunks) > 1) {
+      file_name <- paste0('/', file_name)
+    }
+    path_with_globs_chunks <- strsplit(path_with_globs, '/')[[1]]
+    path_with_globs <- paste(path_with_globs_chunks[-length(path_with_globs_chunks)], 
+                             collapse = '/')
+    path_with_globs <- .ConfigReplaceVariablesInString(path_with_globs, replace_values)
+    file_name_with_globs <- tail(path_with_globs_chunks, 1)
+    if (length(path_with_globs_chunks) > 1) {
+      file_name_with_globs <- paste0('/', file_name_with_globs)
+    }
+    right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1)
+    right_known_no_tags <- .ConfigReplaceVariablesInString(right_known, replace_values)
+    path_with_globs_rx <- utils::glob2rx(paste0(path_with_globs, right_known_no_tags))
+    match <- regexpr(gsub('$', '', path_with_globs_rx, fixed = TRUE), 
+                     paste0(actual_path, file_name))
+    if (match != 1) {
+      stop("Incorrect parameters to replace glob expressions. ",
+           "The path with expressions does not match the actual path.")
+    }
+    if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) {
+      path_with_globs <- paste0(path_with_globs, right_known_no_tags, '*')
+      file_name_with_globs <- sub(right_known, '/*', file_name_with_globs)
+    } 
+  }
+  path_with_globs_rx <- utils::glob2rx(path_with_globs)
+  values_to_replace <- NULL
+  tags_to_replace_starts <- NULL
+  tags_to_replace_ends <- NULL
+  give_warning <- FALSE
+  for (tag in tags_to_keep) {
+    matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]]
+    lengths <- attr(matches, 'match.length')
+    if (!(length(matches) == 1 && matches[1] == -1)) {
+      for (i in seq_along(matches)) {
+        left <- NULL
+        if (matches[i] > 1) {
+          left <- 
+            .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 1, 
+                                                   matches[i] - 1), replace_values)
+          left_known <- 
+            strReverse(head(strsplit(strReverse(left), 
+                            strReverse('.*'), fixed = TRUE)[[1]], 1))
+        }
+        right <- NULL
+        if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) {
+          right <- 
+            .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 
+                                                   matches[i] + lengths[i], 
+                                                   nchar(path_with_globs_rx)),
+                                                   replace_values)
+          right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1)
+        }
+        match_limits <- NULL
+        if (!is.null(left)) {
+          left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path)
+          match_len <- attr(left_match, 'match.length')
+          left_match_limits <- 
+            c(left_match + match_len - 1 - nchar(clean(right_known)) - 
+                nchar(replace_values[[tag]]) + 1,
+              left_match + match_len - 1 - nchar(clean(right_known)))
+          if (!(left_match < 1)) {
+            match_limits <- left_match_limits
+          }
+        }
+        right_match <- NULL
+        if (!is.null(right)) {
+          right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path)
+          match_len <- attr(right_match, 'match.length')
+          right_match_limits <- 
+            c(right_match + nchar(clean(left_known)),
+              right_match + nchar(clean(left_known)) + 
+                nchar(replace_values[[tag]]) - 1)
+          if (is.null(match_limits) && !(right_match < 1)) {
+            match_limits <- right_match_limits
+          }
+        }
+        if (!is.null(right_match) && !is.null(left_match)) {
+          if (!identical(right_match_limits, left_match_limits)) {
+            give_warning <- TRUE
+          }
+        }
+        if (is.null(match_limits)) {
+          stop("Too complex path pattern specified for ", dataset_name,
+               ". Specify a simpler path pattern for this dataset.")
+        }
+        values_to_replace <- c(values_to_replace, tag)
+        tags_to_replace_starts <- c(tags_to_replace_starts, match_limits[1])
+        tags_to_replace_ends <- c(tags_to_replace_ends, match_limits[2])
+      }
+    }
+  }
+
+  if (length(tags_to_replace_starts) > 0) {
+    reorder <- sort(tags_to_replace_starts, index.return = TRUE)
+    tags_to_replace_starts <- reorder$x
+    values_to_replace <- values_to_replace[reorder$ix]
+    tags_to_replace_ends <- tags_to_replace_ends[reorder$ix]
+    while (length(values_to_replace) > 0) {
+      actual_path <- paste0(substr(actual_path, 1, head(tags_to_replace_starts, 1) - 1),
+                           '$', head(values_to_replace, 1), '$',
+                           substr(actual_path, head(tags_to_replace_ends, 1) + 1, 
+                                  nchar(actual_path)))
+      extra_chars <- nchar(head(values_to_replace, 1)) + 2 - 
+                           (head(tags_to_replace_ends, 1) - 
+                              head(tags_to_replace_starts, 1) + 1)
+      values_to_replace <- values_to_replace[-1]
+      tags_to_replace_starts <- tags_to_replace_starts[-1]
+      tags_to_replace_ends <- tags_to_replace_ends[-1]
+      tags_to_replace_starts <- tags_to_replace_starts + extra_chars
+      tags_to_replace_ends <- tags_to_replace_ends + extra_chars
+    }
+  }
+
+  if (give_warning) {
+    .warning(paste0("Too complex path pattern specified for ", dataset_name, 
+                    ". Double check carefully the '$Files' fetched for this dataset ",
+                    "or specify a simpler path pattern."))
+  }
+
+  if (permissive) {
+    paste0(actual_path, file_name_with_globs)
+  } else {
+    actual_path
+  }
+}
+
+.FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) {
+  tag <- paste0('\\$', tag, '\\$')
+  path_with_globs_and_tag <- paste0('^', path_with_globs_and_tag, '$')
+  parts <- strsplit(path_with_globs_and_tag, '*', fixed = TRUE)[[1]]
+  parts <- as.list(grep(tag, parts, value = TRUE))
+  longest_couples <- NULL
+  pos_longest_couples <- NULL
+  found_value <- NULL
+  for (i in seq_along(parts)) {
+    parts[[i]] <- strsplit(parts[[i]], tag)[[1]]
+    if (length(parts[[i]]) == 1) {
+      parts[[i]] <- c(parts[[i]], '')
+    }
+    len_parts <- sapply(parts[[i]], nchar)
+    len_couples <- len_parts[-length(len_parts)] + len_parts[2:length(len_parts)]
+    pos_longest_couples <- c(pos_longest_couples, which.max(len_couples))
+    longest_couples <- c(longest_couples, max(len_couples))
+  }
+  chosen_part <- which.max(longest_couples)
+  parts[[chosen_part]] <- 
+    parts[[chosen_part]][pos_longest_couples[chosen_part]:(pos_longest_couples[chosen_part] + 1)]
+  if (nchar(parts[[chosen_part]][1]) >= nchar(parts[[chosen_part]][2])) {
+    if (nchar(parts[[chosen_part]][1]) > 0) {
+      matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]]
+      if (length(matches) == 1) {
+        match_left <- matches
+        actual_path <- 
+          substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path))
+      }
+    }
+    if (nchar(parts[[chosen_part]][2]) > 0) {
+      matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]]
+      if (length(matches) == 1) {
+        match_right <- matches
+        found_value <- substr(actual_path, 0, match_right - 1)
+      }
+    }
+  } else {
+    if (nchar(parts[[chosen_part]][2]) > 0) {
+      matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]]
+      if (length(matches) == 1) {
+        match_right <- matches
+        actual_path <- substr(actual_path, 0, match_right - 1)
+      }
+    }
+    if (nchar(parts[[chosen_part]][1]) > 0) {
+      matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]]
+      if (length(matches) == 1) {
+        match_left <- matches
+        found_value <- 
+          substr(actual_path, match_left + attr(match_left, 'match.length'), 
+                 nchar(actual_path))
+      }
+    }
+  }
+  found_value
+}
+
+.FilterUserGraphicArgs <- function(excludedArgs, ...) {
+  # This function filter the extra graphical parameters passed by the user in
+  # a plot function, excluding the ones that the plot function uses by default.
+  # Each plot function has a different set of arguments that are not allowed to
+  # be modified.
+  args <- list(...)
+  userArgs <- list()
+  for (name in names(args)) {
+      if ((name != "") & !is.element(name, excludedArgs)) {
+          # If the argument has a name and it is not in the list of excluded
+          # arguments, then it is added to the list that will be used
+          userArgs[[name]] <- args[[name]]
+      } else {
+        .warning(paste0("the argument '", name, "' can not be 
+        modified and the new value will be ignored"))
+      }
+  }
+  userArgs
+}
+
+.SelectDevice <- function(fileout, width, height, units, res) {
+  # This function is used in the plot functions to check the extension of the 
+  # files where the graphics will be stored and select the right R device to 
+  # save them.
+  # If the vector of filenames ('fileout') has files with different 
+  # extensions, then it will only accept the first one, changing all the rest 
+  # of the filenames to use that extension.
+
+  # We extract the extension of the filenames: '.png', '.pdf', ...
+  ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout))
+
+  if (length(ext) != 0) {
+    # If there is an extension specified, select the correct device
+    ## units of width and height set to accept inches
+    if (ext[1] == ".png") {
+      saveToFile <- function(fileout) {
+        png(filename = fileout, width = width, height = height, res = res, units = units)
+      }
+    } else if (ext[1] == ".jpeg") {
+      saveToFile <- function(fileout) {
+        jpeg(filename = fileout, width = width, height = height, res = res, units = units)
+      }
+    } else if (ext[1] %in% c(".eps", ".ps")) {
+      saveToFile <- function(fileout) {
+        postscript(file = fileout, width = width, height = height)
+      }
+    } else if (ext[1] == ".pdf") {
+      saveToFile <- function(fileout) {
+        pdf(file = fileout, width = width, height = height)
+      }
+    } else if (ext[1] == ".svg") {
+      saveToFile <- function(fileout) {
+        svg(filename = fileout, width = width, height = height)
+      }
+    } else if (ext[1] == ".bmp") {
+      saveToFile <- function(fileout) {
+        bmp(filename = fileout, width = width, height = height, res = res, units = units)
+      }
+    } else if (ext[1] == ".tiff") {
+      saveToFile <- function(fileout) {
+        tiff(filename = fileout, width = width, height = height, res = res, units = units)
+      }
+    } else {
+      .warning("file extension not supported, it will be used '.eps' by default.")
+      ## In case there is only one filename
+      fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1])
+      ext[1] <- ".eps"
+      saveToFile <- function(fileout) {
+        postscript(file = fileout, width = width, height = height)
+      }
+    }
+    # Change filenames when necessary
+    if (any(ext != ext[1])) {
+      .warning(paste0("some extensions of the filenames provided in 'fileout' ",
+                      "are not ", ext[1],
+                      ". The extensions are being converted to ", ext[1], "."))
+      fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout)
+    }
+  } else {
+    # Default filenames when there is no specification
+    .warning("there are no extensions specified in the filenames, default to '.eps'")
+    fileout <- paste0(fileout, ".eps")
+    saveToFile <- postscript
+  }
+
+  # return the correct function with the graphical device, and the correct 
+  # filenames
+  list(fun = saveToFile, files = fileout)
+}
+
+.message <- function(...) {
+  # Function to use the 'message' R function with our custom settings
+  # Default: new line at end of message, indent to 0, exdent to 3, 
+  #  collapse to \n*
+  args <- list(...)
+
+  ## In case we need to specify message arguments
+  if (!is.null(args[["appendLF"]])) {
+    appendLF <- args[["appendLF"]]
+  } else {
+    ## Default value in message function
+    appendLF <- TRUE
+  } 
+  if (!is.null(args[["domain"]])) {
+    domain <- args[["domain"]]
+  } else {
+    ## Default value in message function
+    domain <- NULL
+  }
+  args[["appendLF"]] <- NULL
+  args[["domain"]] <- NULL
+
+  ## To modify strwrap indent and exdent arguments
+  if (!is.null(args[["indent"]])) {
+    indent <- args[["indent"]]
+  } else {
+    indent <- 0
+  }
+  if (!is.null(args[["exdent"]])) {
+    exdent <- args[["exdent"]]
+  } else {
+    exdent <- 3
+  }
+  args[["indent"]] <- NULL
+  args[["exdent"]] <- NULL
+
+  ## To modify paste collapse argument
+  if (!is.null(args[["collapse"]])) {
+    collapse <- args[["collapse"]]
+  } else {
+    collapse <- "\n*"
+  }
+  args[["collapse"]] <- NULL
+
+  ## Message tag
+  if (!is.null(args[["tag"]])) {
+    tag <- args[["tag"]]
+  } else {
+    tag <- "* "
+  }
+  args[["tag"]] <- NULL
+
+  tmp <- paste0(tag, 
+                paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse))
+  message(tmp, appendLF = appendLF, domain = domain)
+}
+
+.warning <- function(...) {
+  # Function to use the 'warning' R function with our custom settings
+  # Default: no call information, indent to 0, exdent to 3, 
+  #  collapse to \n
+  args <- list(...)
+
+  ## In case we need to specify warning arguments
+  if (!is.null(args[["call."]])) {
+    call <- args[["call."]]
+  } else {
+    ## Default: don't show info about the call where the warning came up
+    call <- FALSE
+  }
+  if (!is.null(args[["immediate."]])) {
+    immediate <- args[["immediate."]]
+  } else {
+    ## Default value in warning function
+    immediate <- FALSE
+  }
+  if (!is.null(args[["noBreaks."]])) {
+    noBreaks <- args[["noBreaks."]]
+  } else {
+    ## Default value warning function
+    noBreaks <- FALSE
+  }
+  if (!is.null(args[["domain"]])) {
+    domain <- args[["domain"]]
+  } else {
+    ## Default value warning function
+    domain <- NULL
+  }
+  args[["call."]] <- NULL
+  args[["immediate."]] <- NULL
+  args[["noBreaks."]] <- NULL
+  args[["domain"]] <- NULL
+
+  ## To modify strwrap indent and exdent arguments
+  if (!is.null(args[["indent"]])) {
+    indent <- args[["indent"]]
+  } else {
+    indent <- 0
+  }
+  if (!is.null(args[["exdent"]])) {
+    exdent <- args[["exdent"]]
+  } else {
+    exdent <- 3
+  }
+  args[["indent"]] <- NULL
+  args[["exdent"]] <- NULL
+
+  ## To modify paste collapse argument
+  if (!is.null(args[["collapse"]])) {
+    collapse <- args[["collapse"]]
+  } else {
+    collapse <- "\n!"
+  }
+  args[["collapse"]] <- NULL
+
+  ## Warning tag
+  if (!is.null(args[["tag"]])) {
+    tag <- args[["tag"]]
+  } else {
+    tag <- "! Warning: "
+  }
+  args[["tag"]] <- NULL
+
+  tmp <- paste0(tag, 
+                paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse))
+  warning(tmp, call. = call, immediate. = immediate, 
+          noBreaks. = noBreaks, domain = domain)
+}
+
+.IsColor <- function(x) {
+  res <- try(col2rgb(x), silent = TRUE)
+  return(!is(res, "try-error"))
+}
+
+# This function switches to a specified figure at position (row, col) in a layout.
+# This overcomes the bug in par(mfg = ...). However the mode par(new = TRUE) is 
+# activated, i.e., all drawn elements will be superimposed. Additionally, after 
+# using this function, the automatical pointing to the next figure in the layout
+# will be spoiled: once the last figure in the layout is drawn, the pointer won't 
+# move to the first figure in the layout.
+# Only figures with numbers other than 0 (when creating the layout) will be
+# accessible.
+# Inputs: either row and col, or n and mat
+.SwitchToFigure <- function(row = NULL, col = NULL, n = NULL, mat = NULL) {
+  if (!is.null(n) && !is.null(mat)) {
+    if (!is.numeric(n) || length(n) != 1) {
+      stop("Parameter 'n' must be a single numeric value.")
+    }
+    n <- round(n)
+    if (!is.array(mat)) {
+      stop("Parameter 'mat' must be an array.")
+    }
+    target <- which(mat == n, arr.ind = TRUE)[1, ]
+    row <- target[1]
+    col <- target[2]
+  } else if (!is.null(row) && !is.null(col)) {
+    if (!is.numeric(row) || length(row) != 1) {
+      stop("Parameter 'row' must be a single numeric value.")
+    }
+    row <- round(row)
+    if (!is.numeric(col) || length(col) != 1) {
+      stop("Parameter 'col' must be a single numeric value.")
+    }
+    col <- round(col)
+  } else {
+    stop("Either 'row' and 'col' or 'n' and 'mat' must be provided.")
+  }
+  next_attempt <- c(row, col)
+  par(mfg = next_attempt)
+  i <- 1
+  layout_size <- par('mfrow')
+  layout_cells <- matrix(1:prod(layout_size), layout_size[1], layout_size[2], 
+                         byrow = TRUE)
+  while (any((par('mfg')[1:2] != c(row, col)))) {
+    next_attempt <- which(layout_cells == i, arr.ind = TRUE)[1, ]
+    par(mfg = next_attempt)
+    i <- i + 1
+    if (i > prod(layout_size)) {
+      stop("Figure not accessible.")
+    }
+  }
+  plot(0, type = 'n', axes = FALSE, ann = FALSE)
+  par(mfg = next_attempt)
+}
+
+# Function to permute arrays of non-atomic elements (e.g. POSIXct)
+.aperm2 <- function(x, new_order) {
+  old_dims <- dim(x)
+  attr_bk <- attributes(x)
+  if ('dim' %in% names(attr_bk)) {
+    attr_bk[['dim']] <- NULL
+  }
+  if (is.numeric(x)) {
+    x <- aperm(x, new_order)
+  } else {
+    y <- array(seq_along(x), dim = dim(x))
+    y <- aperm(y, new_order)
+    x <- x[as.vector(y)]
+  }
+  dim(x) <- old_dims[new_order]
+  attributes(x) <- c(attributes(x), attr_bk)
+  x
+}
+
+# This function is a helper for the function .MergeArrays.
+# It expects as inputs two named numeric vectors, and it extends them
+# with dimensions of length 1 until an ordered common dimension
+# format is reached.
+# The first output is dims1 extended with 1s.
+# The second output is dims2 extended with 1s.
+# The third output is a merged dimension vector. If dimensions with
+# the same name are found in the two inputs, and they have a different
+# length, the maximum is taken.
+.MergeArrayDims <- function(dims1, dims2) {
+  new_dims1 <- NULL
+  new_dims2 <- NULL
+  while (length(dims1) > 0) {
+    if (names(dims1)[1] %in% names(dims2)) {
+      pos <- which(names(dims2) == names(dims1)[1])
+      dims_to_add <- rep(1, pos - 1)
+      if (length(dims_to_add) > 0) {
+        names(dims_to_add) <- names(dims2[1:(pos - 1)])
+      }
+      new_dims1 <- c(new_dims1, dims_to_add, dims1[1])
+      new_dims2 <- c(new_dims2, dims2[1:pos])
+      dims1 <- dims1[-1]
+      dims2 <- dims2[-(1:pos)]
+    } else {
+      new_dims1 <- c(new_dims1, dims1[1])
+      new_dims2 <- c(new_dims2, 1)
+      names(new_dims2)[length(new_dims2)] <- names(dims1)[1]
+      dims1 <- dims1[-1]
+    }
+  }
+  if (length(dims2) > 0) {
+    dims_to_add <- rep(1, length(dims2))
+    names(dims_to_add) <- names(dims2)
+    new_dims1 <- c(new_dims1, dims_to_add)
+    new_dims2 <- c(new_dims2, dims2)
+  }
+  list(new_dims1, new_dims2, pmax(new_dims1, new_dims2))
+}
+
+# This function takes two named arrays and merges them, filling with
+# NA where needed.
+# dim(array1)
+#          'b'   'c'         'e'   'f'
+#           1     3           7     9
+# dim(array2)
+#    'a'   'b'         'd'         'f'   'g'
+#     2     3           5           9     11
+# dim(.MergeArrays(array1, array2, 'b'))
+#    'a'   'b'   'c'   'e'   'd'   'f'   'g'
+#     2     4     3     7     5     9     11
+.MergeArrays <- function(array1, array2, along) {
+  if (!(is.null(array1) || is.null(array2))) {
+    if (!(identical(names(dim(array1)), names(dim(array2))) &&
+        identical(dim(array1)[-which(names(dim(array1)) == along)],
+                  dim(array2)[-which(names(dim(array2)) == along)]))) {
+      new_dims <- .MergeArrayDims(dim(array1), dim(array2))
+      dim(array1) <- new_dims[[1]]
+      dim(array2) <- new_dims[[2]]
+      for (j in seq_along(dim(array1))) {
+        if (names(dim(array1))[j] != along) {
+          if (dim(array1)[j] != dim(array2)[j]) {
+            if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) {
+              na_array_dims <- dim(array2)
+              na_array_dims[j] <- dim(array1)[j] - dim(array2)[j]
+              na_array <- array(dim = na_array_dims)
+              array2 <- abind(array2, na_array, along = j)
+              names(dim(array2)) <- names(na_array_dims)
+            } else {
+              na_array_dims <- dim(array1)
+              na_array_dims[j] <- dim(array2)[j] - dim(array1)[j]
+              na_array <- array(dim = na_array_dims)
+              array1 <- abind(array1, na_array, along = j)
+              names(dim(array1)) <- names(na_array_dims)
+            }
+          }
+        }
+      }
+    }
+    if (!(along %in% names(dim(array2)))) {
+      stop("The dimension specified in 'along' is not present in the ",
+           "provided arrays.")
+    }
+    array1 <- abind(array1, array2, along = which(names(dim(array1)) == along))
+    names(dim(array1)) <- names(dim(array2))
+  } else if (is.null(array1)) {
+    array1 <- array2
+  }
+  array1
+}
+
+# only can be used in Trend(). Needs generalization or be replaced by other function.
+.reorder <- function(output, time_dim, dim_names) {
+  # Add dim name back
+  if (is.null(dim(output))) {
+    dim(output) <- c(stats = length(output))
+  } else {  #is an array
+    if (length(dim(output)) == 1) {
+      if (!is.null(names(dim(output)))) {
+        dim(output) <- c(1, dim(output))
+        names(dim(output))[1] <- time_dim
+      } else {
+        names(dim(output)) <- time_dim
+      }
+    } else {  # more than one dim
+      if (names(dim(output))[1] != "") {
+        dim(output) <- c(1, dim(output))
+        names(dim(output))[1] <- time_dim
+      } else {   #regular case
+        names(dim(output))[1] <- time_dim
+      }
+    }
+  }
+  # reorder
+  pos <- match(dim_names, names(dim(output)))
+  output <- aperm(output, pos)
+  names(dim(output)) <- dim_names
+  names(dim(output))[names(dim(output)) == time_dim] <- 'stats'
+  return(output)
+}
+
+# to be used in AMV.R, TPI.R, SPOD.R, GSAT.R and GMST.R
+.Indices <- function(data, type, monini, indices_for_clim, 
+                     fmonth_dim, sdate_dim, year_dim, month_dim, na.rm) {
+  
+  if (type == 'dcpp') {
+    
+    fyear_dim <- 'fyear'
+    data <- Season(data = data, time_dim = fmonth_dim,
+                   monini = monini, moninf = 1, monsup = 12,
+                   method = mean, na.rm = na.rm)
+    names(dim(data))[which(names(dim(data)) == fmonth_dim)] <- fyear_dim
+    
+    if (identical(indices_for_clim, FALSE)) { ## data is already anomalies
+      
+      anom <- data
+      
+    } else { ## Different indices_for_clim for each forecast year (to use the same calendar years)
+      
+      n_fyears <- as.numeric(dim(data)[fyear_dim])
+      n_sdates <- as.numeric(dim(data)[sdate_dim])
+      
+      if (is.null(indices_for_clim)) { ## climatology over the whole (common) period
+        first_years_for_clim <- n_fyears : 1
+        last_years_for_clim <- n_sdates : (n_sdates - n_fyears + 1)
+      } else { ## indices_for_clim specified as a numeric vector
+        first_years_for_clim <- seq(from = indices_for_clim[1], by = -1, length.out = n_fyears)
+        last_years_for_clim <- 
+          seq(from = indices_for_clim[length(indices_for_clim)], 
+              by = -1, length.out = n_fyears)
+      }
+      
+      data <- s2dv::Reorder(data = data, order = c(fyear_dim, sdate_dim))
+      anom <- array(data = NA, dim = dim(data))
+      for (i in 1:n_fyears) {
+        clim <- mean(data[i, first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm)
+        anom[i, ] <- data[i, ] - clim
+      }
+    }
+    
+  } else if (type %in% c('obs', 'hist')) {
+    
+    data <- multiApply::Apply(data = data, target_dims = month_dim, 
+                              fun = mean, na.rm = na.rm)$output1
+    
+    if (identical(indices_for_clim, FALSE)) { ## data is already anomalies
+      clim <- 0
+    } else if (is.null(indices_for_clim)) { 
+      ## climatology over the whole period
+      clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, 
+                                na.rm = na.rm)$output1
+    } else { 
+      ## indices_for_clim specified as a numeric vector
+      clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim,
+                                                             indices = indices_for_clim),
+                                target_dims = year_dim, fun = mean, na.rm = na.rm)$output1
+    }
+    
+    anom <- data - clim
+    
+  } else {
+    stop('type must be dcpp, hist or obs')
+  }
+  
+  return(anom)
+}
+
+#TODO: Remove from s2dv when PlotLayout can get colorbar info from plotting function directly. 
+#      The function is temporarily here because PlotLayout() needs to draw the colorbars of
+#      PlotMostLikelyQuantileMap().
+#Draws Color Bars for Categories
+#A wrapper of s2dv::ColorBar to generate multiple color bars for different 
+#categories, and each category has different color set.
+GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL,
+                                 bar_limits, var_limits = NULL,
+                                 triangle_ends = NULL, plot = TRUE,
+                                 draw_separators = FALSE,
+                                 bar_titles = NULL, title_scale = 1, 
+                                 label_scale = 1, extra_margin = rep(0, 4),
+                                 ...) {
+  # bar_limits
+  if (!is.numeric(bar_limits) || length(bar_limits) != 2) {
+    stop("Parameter 'bar_limits' must be a numeric vector of length 2.")
+  }
+
+  # Check brks
+  if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) {
+    num_brks <- 5
+    if (is.numeric(brks)) {
+      num_brks <- brks
+    }
+    brks <- seq(from = bar_limits[1], to = bar_limits[2], length.out = num_brks)
+  }
+  if (!is.numeric(brks)) {
+    stop("Parameter 'brks' must be a numeric vector.")
+  }
+  # Check cols
+  col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"),
+                   c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"),
+                   c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"),
+                   c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"),
+                   c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497"))
+  if (is.null(cols)) {
+    if (length(col_sets) >= nmap) {
+      chosen_sets <- 1:nmap
+      chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2)
+    } else {
+      chosen_sets <- array(seq_along(col_sets), nmap)
+    }
+    cols <- col_sets[chosen_sets]
+  } else {
+    if (!is.list(cols)) {
+      stop("Parameter 'cols' must be a list of character vectors.")
+    }
+    if (!all(sapply(cols, is.character))) {
+      stop("Parameter 'cols' must be a list of character vectors.")
+    }
+    if (length(cols) != nmap) {
+      stop("Parameter 'cols' must be a list of the same length as the number of ",
+           "maps in 'maps'.")
+    }
+  }
+  for (i in seq_along(cols)) {
+    if (length(cols[[i]]) != (length(brks) - 1)) {
+      cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1)
+    }
+  }
+
+  # Check bar_titles
+  if (is.null(bar_titles)) {
+    if (nmap == 3) {
+      bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)")
+    } else if (nmap == 5) {
+      bar_titles <- c("Low (%)", "Below normal (%)",
+                         "Normal (%)", "Above normal (%)", "High (%)")
+    } else {
+      bar_titles <- paste0("Cat. ", 1:nmap, " (%)")
+    }
+  }
+
+  if (plot) {
+    for (k in 1:nmap) {
+      s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg,
+#                     bar_limits = bar_limits, var_limits = var_limits,
+                     triangle_ends = triangle_ends, plot = TRUE,
+                     draw_separators = draw_separators,
+                     title = bar_titles[[k]], title_scale = title_scale,
+                     label_scale = label_scale, extra_margin = extra_margin)
+    }
+  } else {
+    #TODO: col_inf and col_sup
+    return(list(brks = brks, cols = cols))
+  }
+
+}
+
-- 
GitLab


From 6d0432ca662901511236aae8c68fad5c044732d5 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Mon, 13 May 2024 16:46:32 +0200
Subject: [PATCH 76/96] Fix output_dims in Compute() and add threads_load and
 threads_compute

---
 build_compute_workflow.R                      | 19 ++++++++++---------
 .../recipe_test_compute_decadal.yml           |  8 ++++----
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index a4403cb5..c416bb65 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -7,7 +7,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
   source("modules/Anomalies/Anomalies.R")
   source("modules/Downscaling/Downscaling.R")
   source("modules/Skill/Skill.R")
-  source("modules/Indices/Indices.R")
+  # source("modules/Indices/Indices.R")
 
   # Define appender with custom log layout so that it knows not to append
   # within Compute().
@@ -107,7 +107,7 @@ run_compute_workflow <- function(recipe, data) {
   # Add forecast if not empty
   if (!is.null(data$fcst)) {
     target_dims <- c(target_dims,
-                     list(exp_target_dims))
+                     list(fcst = exp_target_dims))
     inputs <- c(inputs, list(fcst = data$fcst))
     input_attributes <- c(input_attributes, list(fcst = STARTR_CUBE_ATTRS))
   }
@@ -119,11 +119,10 @@ run_compute_workflow <- function(recipe, data) {
   } else {
     spatial_output_dims <- c("latitude", "longitude")
   }
-  exp_output_dims <- c(exp_target_dims[!exp_target_dims %in% c('var_dir',
-                                                               'latitude',
-                                                               'longitude')],
-                       spatial_output_dims,
-                       c("sday", "sweek"))
+  ## TODO: There may be a bug here when the user requests only lat or lon as chunking dim
+  exp_output_dims <- unique(c(exp_target_dims, spatial_output_dims, 'sday', 'sweek'))
+  exp_output_dims <- c(exp_output_dims[!exp_output_dims %in% 
+                                       c('var_dir', names(recipe$Run$startR_workflow$chunk_along))])
   # Default outputs: hindcast and observations
   output_dims <- list(hcst = exp_output_dims,
                       obs = exp_output_dims)
@@ -133,6 +132,7 @@ run_compute_workflow <- function(recipe, data) {
                      list(fcst = exp_output_dims))
   }
   # Add skill metrics if skill module is called
+  ## TODO: Fix potential bug
   if ("skill" %in% modules) {
     skill_dims <- c('metric', 'var', 'time', spatial_output_dims)
     skill_output_dims <-
@@ -181,8 +181,9 @@ run_compute_workflow <- function(recipe, data) {
   if (run_on == 'local') {
     # Compute locally, in serial
     res <- Compute(wf$hcst,
-                   chunks = recipe$Run$startR_workflow$chunk_along)
-    print("made it here")
+                   chunks = recipe$Run$startR_workflow$chunk_along,
+                   threads_compute = recipe$Analysis$ncores,
+                   threads_load = recipe$Analysis$ncores/2)
   } else if (run_on %in% c('nord3', 'as_machine')) {
     #NOTE: autosubmit_suite_dir can be anywhere
     ## TODO: Change code_dir to autosubmit dir when necessary
diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal.yml b/recipes/atomic_recipes/recipe_test_compute_decadal.yml
index 8ea48e9a..555eaec7 100644
--- a/recipes/atomic_recipes/recipe_test_compute_decadal.yml
+++ b/recipes/atomic_recipes/recipe_test_compute_decadal.yml
@@ -18,7 +18,7 @@ Analysis:
     hcst_start: 1990 
     hcst_end: 1993 
 #    season: 'Annual'
-    ftime_min: 2
+    ftime_min: 1
     ftime_max: 24
   Region:
     latmin: -10 #-90
@@ -46,7 +46,7 @@ Analysis:
       index: FALSE
     Visualization:
       plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles
-  ncores:  # Optional, int: number of cores, defaults to 1
+  ncores: 10 # Optional, int: number of cores, defaults to 1
   remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE
   Output_format: S2S4E
 Run:
@@ -55,10 +55,10 @@ Run:
   output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/
   code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
   startR_workflow:
-    modules: skill # Modules to run inside Compute(), in order
+    modules: 'anomalies skill' # Modules to run inside Compute(), in order
     return: hcst, fcst, obs # Choose only if they will fit into memory
     save: hcst, fcst, obs # Only if returned
-    chunk_along: {latitude: 2, longitude: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
+    chunk_along: {latitude: 2, longitude: 1} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
     run_on: local # options: local, as_machine, nord3
     chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local'
     expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O
-- 
GitLab


From f269df8d69e37078bd9d8ecec1a8840343773a14 Mon Sep 17 00:00:00 2001
From: Victoria Agudetse Roures <victoria.agudetse@bsc.es>
Date: Fri, 10 May 2024 13:22:13 +0200
Subject: [PATCH 77/96] Fix typo: 360-day to 360_day

---
 conf/archive_decadal.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml
index c7c6f3a4..0697fc3f 100644
--- a/conf/archive_decadal.yml
+++ b/conf/archive_decadal.yml
@@ -100,7 +100,7 @@ esarchive:
         daily_mean:
           grid: {"tasmin":"gn", "tasmax":"gn", "pr":"gn"}
           version: {"tasmin":"v20200417", "tasmax":"v20200417", "pr":"v20200417"}
-        calendar: "360-day"
+        calendar: "360_day"
         member: r1i1p1f2,r2i1p1f2,r3i1p1f2,r4i1p1f2,r5i1p1f2,r6i1p1f2,r7i1p1f2,r8i1p1f2,r9i1p1f2,r10i1p1f2
         initial_month: 11
         sdate_add: 0
-- 
GitLab


From 616e8e3b4d1e16655f385fd8fa9348347302502c Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 14 May 2024 10:40:45 +0200
Subject: [PATCH 78/96] Add retrieve = TRUE to logger in viz functions

---
 modules/Visualization/R/plot_ensemble_mean.R            | 2 +-
 modules/Visualization/R/plot_most_likely_terciles_map.R | 2 +-
 modules/Visualization/Visualization.R                   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R
index 3cbfbd8e..a88e5184 100644
--- a/modules/Visualization/R/plot_ensemble_mean.R
+++ b/modules/Visualization/R/plot_ensemble_mean.R
@@ -223,6 +223,6 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o
       }
     }
   }
-  info(recipe$Run$logger,
+  info(recipe$Run$logger, retrieve = TRUE,
        "##### FORECAST ENSEMBLE MEAN PLOTS SAVED TO OUTPUT DIRECTORY #####")
 }
diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R
index c9ce70f2..33bbb103 100644
--- a/modules/Visualization/R/plot_most_likely_terciles_map.R
+++ b/modules/Visualization/R/plot_most_likely_terciles_map.R
@@ -233,6 +233,6 @@ plot_most_likely_terciles <- function(recipe,
       }
     }
   }
-  info(recipe$Run$logger,
+  info(recipe$Run$logger, retrieve = TRUE,
        "##### MOST LIKELY TERCILE PLOTS SAVED TO OUTPUT DIRECTORY #####")
 }
diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R
index 14b4194e..6db108a8 100644
--- a/modules/Visualization/Visualization.R
+++ b/modules/Visualization/Visualization.R
@@ -244,7 +244,7 @@ Visualization <- function(recipe,
       system(system_command)
     }
     unlink(plot_files)
-    info(recipe$Run$logger,
+    info(recipe$Run$logger, retrieve = TRUE,
          paste0("##### PLOT FILES CONVERTED TO ", toupper(extension), " #####"))
   }
 }
-- 
GitLab


From 386618a8e8475a2189b73d17a82b4f260bfee058 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 14 May 2024 10:41:19 +0200
Subject: [PATCH 79/96] Add check to remove singleton chunking dimensions from
 chunk list; comment code

---
 tools/check_recipe.R | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tools/check_recipe.R b/tools/check_recipe.R
index efb9877e..47b5f08b 100644
--- a/tools/check_recipe.R
+++ b/tools/check_recipe.R
@@ -866,7 +866,7 @@ check_recipe <- function(recipe) {
                      paste(STARTR_MODULES, collapse = ", "), "."))
         error_status <- T
       }
-      # Check chunking dims
+      # Check if chunking dims are among the allowed dimensions
       if (!(any(names(recipe$Run$startR_workflow$chunk_along) %in% CHUNK_DIMS))) {
         error(recipe$Run$logger, retrieve = TRUE,
               paste0("The dimensions in 'Run:startR_workflow:chunk_along' can ",
@@ -874,6 +874,7 @@ check_recipe <- function(recipe) {
         error_status <- T
       }
       # Check compatibility between module selection and chunking dimensions
+      # spatial dimensions
       if (any(c("latitude", "longitude") %in% names(recipe$Run$startR_workflow$chunk_along)) &&
           any(modules %in% MODULES_USING_LATLON)) {
         error(recipe$Run$logger, retrieve = TRUE,
@@ -882,6 +883,7 @@ check_recipe <- function(recipe) {
                      paste(MODULES_USING_LATLON, collapse = ", "), "."))
         error_status <- T
       }
+      # 'time'
       if ("time" %in% names(recipe$Run$startR_workflow$chunk_along) &&
           any(modules %in% MODULES_USING_TIME)) {
         error(recipe$Run$logger, retrieve = TRUE,
@@ -890,6 +892,7 @@ check_recipe <- function(recipe) {
                      paste(MODULES_USING_TIME, collapse = ", "), "."))
         error_status <- T
       }
+      # 'var'
       if ("var" %in% names(recipe$Run$startR_workflow$chunk_along) &&
           any(modules %in% MODULES_USING_VAR)) {
         error(recipe$Run$logger, retrieve = TRUE,
@@ -898,6 +901,14 @@ check_recipe <- function(recipe) {
                      paste(MODULES_USING_VAR, collapse = ", "), "."))
         error_status <- T
       }
+      # Remove chunks of length 1
+      if (any(recipe$Run$startR_workflow$chunk_along) == 1) {
+        recipe$Run$startR_workflow$chunk_along <- 
+          recipe$Run$startR_workflow$chunk_along[-which(recipe$Run$startR_workflow$chunk_along == 1)]
+        warn(recipe$Run$logger, retrieve = TRUE,
+             paste0("Chunks of length 1 defined in startR_workflow:chunk_along",
+                    " have been removed from the chunk list."))
+      }
     }
   }
 
-- 
GitLab


From 0a865d7a50088c805b29a1fb793edf2ca7c74a98 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 14 May 2024 10:54:29 +0200
Subject: [PATCH 80/96] Build paths with file.path()

---
 build_compute_workflow.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index c416bb65..77b16bb9 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -235,7 +235,7 @@ run_compute_workflow <- function(recipe, data) {
   }
   ## TODO: Replicate for probabilities and other modules
   if (!is.null(res$skill)) {
-    tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Skill/")
+    tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", "Skill")
     metric_list <- readRDS(paste0(tmp_dir, "metric_names.Rds"))
     res$skill <- ClimProjDiags::ArrayToList(res$skill,
                                             dim = 'metric',
@@ -250,7 +250,7 @@ run_compute_workflow <- function(recipe, data) {
   
   # ---------------------------------------------------------------------------
   # Step 5: Remove temporary files
-  unlink(paste0(recipe$Run$output_dir, "/outputs/tmp/"), recursive = TRUE)
+  unlink(file.path(recipe$Run$output_dir, "outputs", "tmp"), recursive = TRUE)
 
   # ---------------------------------------------------------------------------
   # Step 6: Save data
-- 
GitLab


From ce2d5ab98505e58e5107212548790605f02eea76 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 14 May 2024 12:51:00 +0200
Subject: [PATCH 81/96] bugfix: fix any() condition for chunk_along in recipe
 checker

---
 tools/check_recipe.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/check_recipe.R b/tools/check_recipe.R
index 47b5f08b..a22f995a 100644
--- a/tools/check_recipe.R
+++ b/tools/check_recipe.R
@@ -902,7 +902,7 @@ check_recipe <- function(recipe) {
         error_status <- T
       }
       # Remove chunks of length 1
-      if (any(recipe$Run$startR_workflow$chunk_along) == 1) {
+      if (any(recipe$Run$startR_workflow$chunk_along == 1) {
         recipe$Run$startR_workflow$chunk_along <- 
           recipe$Run$startR_workflow$chunk_along[-which(recipe$Run$startR_workflow$chunk_along == 1)]
         warn(recipe$Run$logger, retrieve = TRUE,
-- 
GitLab


From f6b6ae1307d2ce4bc5e790a3f421a7e8b8f463ae Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 14 May 2024 17:13:53 +0200
Subject: [PATCH 82/96] Fix paths and checks for metadata retrieval

---
 build_compute_workflow.R  |  4 ++--
 tools/check_recipe.R      |  2 +-
 tools/retrieve_metadata.R | 30 +++++++++++++++++-------------
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 77b16bb9..19106957 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -236,7 +236,7 @@ run_compute_workflow <- function(recipe, data) {
   ## TODO: Replicate for probabilities and other modules
   if (!is.null(res$skill)) {
     tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", "Skill")
-    metric_list <- readRDS(paste0(tmp_dir, "metric_names.Rds"))
+    metric_list <- readRDS(file.path(tmp_dir, "metric_names.Rds"))
     res$skill <- ClimProjDiags::ArrayToList(res$skill,
                                             dim = 'metric',
                                             level = 'list',
@@ -247,7 +247,7 @@ run_compute_workflow <- function(recipe, data) {
                                             chunks = recipe$Run$startR_workflow$chunk_along,
                                             array_dims = dim(res$skill[[1]]))
   }
-  
+   
   # ---------------------------------------------------------------------------
   # Step 5: Remove temporary files
   unlink(file.path(recipe$Run$output_dir, "outputs", "tmp"), recursive = TRUE)
diff --git a/tools/check_recipe.R b/tools/check_recipe.R
index a22f995a..19d5082b 100644
--- a/tools/check_recipe.R
+++ b/tools/check_recipe.R
@@ -902,7 +902,7 @@ check_recipe <- function(recipe) {
         error_status <- T
       }
       # Remove chunks of length 1
-      if (any(recipe$Run$startR_workflow$chunk_along == 1) {
+      if (any(recipe$Run$startR_workflow$chunk_along == 1)) {
         recipe$Run$startR_workflow$chunk_along <- 
           recipe$Run$startR_workflow$chunk_along[-which(recipe$Run$startR_workflow$chunk_along == 1)]
         warn(recipe$Run$logger, retrieve = TRUE,
diff --git a/tools/retrieve_metadata.R b/tools/retrieve_metadata.R
index 3550b19a..66e381e6 100644
--- a/tools/retrieve_metadata.R
+++ b/tools/retrieve_metadata.R
@@ -16,14 +16,15 @@ retrieve_metadata <- function(tmp_dir, chunks, array_dims) {
   }
   metadata_file_pattern <- paste0(metadata_file_pattern, ".Rds")
   metadata_files <- list.files(path = tmp_dir, pattern = glob2rx(metadata_file_pattern))
-  metadata <- readRDS(paste0(tmp_dir, metadata_files[1]))
+  metadata <- readRDS(file.path(tmp_dir, metadata_files[1]))
   # Piece together variable info
   if ("var" %in% names(chunks)) {
     # $attrs
     for (i in 2:chunks[["var"]]) {
-      metadata_chunk <- readRDS(paste0(tmp_dir,
-                                       gsub("var_1", paste0("var_", i),
-                                            metadata_files[1])))
+      metadata_chunk <- readRDS(file.path(tmp_dir,
+                                          paste0(gsub("var_1",
+                                                      paste0("var_", i),
+                                                 metadata_files[1]))))
       var_name <- metadata_chunk$attrs$Variable$varName
       metadata$attrs$Variable$varName <- c(metadata$attrs$Variable$varName,
                                            var_name)
@@ -42,9 +43,10 @@ retrieve_metadata <- function(tmp_dir, chunks, array_dims) {
   if ("time" %in% names(chunks)) {
     dates_dims <- dim(metadata$attrs$Dates)
     for (i in 2:chunks[["time"]]) {
-      metadata_chunk <- readRDS(paste0(tmp_dir,
-                                       gsub("time_1", paste0("time_", i),
-                                            metadata_files[1])))
+      metadata_chunk <- readRDS(file.path(tmp_dir,
+                                          paste0(gsub("time_1",
+                                                      paste0("time_", i),
+                                                 metadata_files[1]))))
       metadata$attrs$Dates <- c(metadata$attrs$Dates, metadata_chunk$attrs$Dates)
     }
     time_dim_length <- array_dims[["time"]]
@@ -61,9 +63,10 @@ retrieve_metadata <- function(tmp_dir, chunks, array_dims) {
     # $attrs
     ## TODO: Preserve attributes
     for (i in 2:chunks[["latitude"]]) {
-      metadata_chunk <- readRDS(paste0(tmp_dir,
-                                       gsub("latitude_1", paste0("latitude_", i),
-                                            metadata_files[1])))
+      metadata_chunk <- readRDS(file.path(tmp_dir,
+                                          paste0(gsub("latitude_1",
+                                                 paste0("latitude_", i),
+                                            metadata_files[1]))))
       metadata$attrs$Variable$metadata$latitude <- c(metadata$attrs$Variable$metadata$latitude,
                                                      metadata_chunk$attrs$Variable$metadata$latitude)
     }
@@ -78,9 +81,10 @@ retrieve_metadata <- function(tmp_dir, chunks, array_dims) {
     # $attrs
     ## TODO: Preserve attributes
     for (i in 2:chunks[["longitude"]]) {
-      metadata_chunk <- readRDS(paste0(tmp_dir,
-                                       gsub("longitude_1", paste0("longitude_", i),
-                                            metadata_files[1])))
+      metadata_chunk <- readRDS(file.path(tmp_dir,
+                                          paste0(gsub("longitude_1",
+                                                      paste0("longitude_", i),
+                                            metadata_files[1]))))
       metadata$attrs$Variable$metadata$longitude <- c(metadata$attrs$Variable$metadata$longitude,
                                                       metadata_chunk$attrs$Variable$metadata$longitude)
     }
-- 
GitLab


From 5bd4f5f79e8e3e5abd3cb5caa58c37f61c19a1f0 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Fri, 24 May 2024 17:03:41 +0200
Subject: [PATCH 83/96] test time aggregation module (WIP)

---
 build_compute_workflow.R                             |  7 +++++--
 modules/Aggregation/Aggregation.R                    |  4 ++--
 .../atomic_recipes/recipe_test_compute_decadal.yml   | 12 ++++++++++--
 tools/check_recipe.R                                 |  4 ++--
 4 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 19106957..efb0688b 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -7,7 +7,8 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
   source("modules/Anomalies/Anomalies.R")
   source("modules/Downscaling/Downscaling.R")
   source("modules/Skill/Skill.R")
-  # source("modules/Indices/Indices.R")
+  source("modules/Indices/Indices.R")
+  source("modules/Aggregation/Aggregation.R")
 
   # Define appender with custom log layout so that it knows not to append
   # within Compute().
@@ -41,7 +42,9 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
   data <- Units(recipe, data, retrieve = F)
   # Loop over the modules
   for (module in modules) {
-    if (module == "calibration") {
+    if (module == "aggregation") {
+      data <- Aggregation(recipe, data, retrieve = F)
+    } else if (module == "calibration") {
       data <- Calibration(recipe, data, retrieve = F)
     } else if (module == "anomalies") {
       data <- Anomalies(recipe, data, retrieve = F)
diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R
index d5c09fac..f12a570c 100644
--- a/modules/Aggregation/Aggregation.R
+++ b/modules/Aggregation/Aggregation.R
@@ -8,7 +8,7 @@
 ## ini = 2, end = 3 with monthly freq would be a 2 months agg
 source("modules/Aggregation/R/agg_ini_end.R")
 
-Aggregation <- function(recipe, data) {
+Aggregation <- function(recipe, data, retrieve = TRUE) {
 
   ## TODO: Move checks to recipe checker
   ncores <- recipe$Analysis$ncores # is it already checked? NULL or number
@@ -58,6 +58,6 @@ Aggregation <- function(recipe, data) {
     }
   }
   return(res)
-  info(recipe$Run$logger,
+  info(recipe$Run$logger, retrieve = retrieve,
        "##### TIME AGGREGATION COMPLETE #####")
 }
diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal.yml b/recipes/atomic_recipes/recipe_test_compute_decadal.yml
index 555eaec7..44f77898 100644
--- a/recipes/atomic_recipes/recipe_test_compute_decadal.yml
+++ b/recipes/atomic_recipes/recipe_test_compute_decadal.yml
@@ -16,7 +16,7 @@ Analysis:
   Time:
     fcst_year: [2020,2021]
     hcst_start: 1990 
-    hcst_end: 1993 
+    hcst_end: 2020 
 #    season: 'Annual'
     ftime_min: 1
     ftime_max: 24
@@ -33,6 +33,14 @@ Analysis:
       compute: no
       cross_validation:
       save:
+    Time_aggregation:
+      execute: yes
+      method: 'average'
+      user_def:
+        JJA: !expr sort(c(seq(6,24, 12), seq(7,24,12), seq(8, 24, 12)))
+        MMA: !expr sort(c(seq(3,24, 12), seq(4,24,12), seq(5, 24, 12)))
+        SON: !expr sort(c(seq(9,24, 12), seq(10,24,12), seq(11, 24, 12)))
+        DJF: !expr sort(c(seq(12,24, 12), seq(13,24,12), seq(2, 24, 12)))
     Calibration:
       method: bias
       save: 'all'
@@ -55,7 +63,7 @@ Run:
   output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/
   code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
   startR_workflow:
-    modules: 'anomalies skill' # Modules to run inside Compute(), in order
+    modules: 'aggregation skill' # Modules to run inside Compute(), in order
     return: hcst, fcst, obs # Choose only if they will fit into memory
     save: hcst, fcst, obs # Only if returned
     chunk_along: {latitude: 2, longitude: 1} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
diff --git a/tools/check_recipe.R b/tools/check_recipe.R
index 9396415d..8ca1890d 100644
--- a/tools/check_recipe.R
+++ b/tools/check_recipe.R
@@ -941,10 +941,10 @@ check_recipe <- function(recipe) {
 
   STARTR_PARAMS <- c("modules", "chunk_along")
   STARTR_MODULES <- c("calibration", "anomalies", "downscaling", 
-                      "skill", "probabilities", "indices")
+                      "skill", "probabilities", "indices", "aggregation")
   CHUNK_DIMS <- c("var", "time", "latitude", "longitude")
   MODULES_USING_LATLON <- c("downscaling", "indices")
-  MODULES_USING_TIME <- c("indicators")
+  MODULES_USING_TIME <- c("indicators", "aggregation")
   MODULES_USING_VAR <- c("indicators")
   # Check that all required fields are present
   if ("startR_workflow" %in% names(recipe$Run)) {
-- 
GitLab


From 352d228fb1cfca1ebb0ecc790258182a5881eb3e Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Fri, 24 May 2024 17:15:38 +0200
Subject: [PATCH 84/96] Adapt loading when retrieve = FALSE for multi_path =
 FALSE case

---
 modules/Loading/R/load_decadal.R | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R
index 10651836..09a5969b 100644
--- a/modules/Loading/R/load_decadal.R
+++ b/modules/Loading/R/load_decadal.R
@@ -141,12 +141,18 @@ load_decadal <- function(recipe, retrieve = retrieve) {
     hcst <- do.call(Start, Start_hcst_arg_list)
     # Reshape and reorder dimensions
     ## dat should be 1, syear should be length of dat; reorder dimensions
-    dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6])
-    hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble'))
+    if (retrieve) {
+      dim(hcst) <- c(dat = 1, syear = as.numeric(dim(hcst))[1], dim(hcst)[2:6])
+      hcst <- s2dv::Reorder(hcst, c('dat', 'var', 'syear', 'time', 'latitude', 'longitude', 'ensemble'))
+      hcst_dims <- dim(hcst)
+    } else {
+      hcst_dims <- c(dat = 1, syear = as.numeric(attr(hcst, "Dimensions"))[1],
+                     attr(hcst, "Dimensions")[2:6])
+    }
     
     # Manipulate time attr because Start() cannot read it correctly
     wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only
-    tmp <- array(dim = c(dim(hcst)[c('syear', 'time')]))
+    tmp <- array(dim = c(hcst_dims[c('syear', 'time')]))
     tmp[1, ] <- wrong_time_attr
     yr_diff <- (sdates_hcst - sdates_hcst[1])[-1] #diff(sdates_hcst)
     for (i_syear in 1:length(yr_diff)) {
@@ -228,11 +234,15 @@ load_decadal <- function(recipe, retrieve = retrieve) {
         dim(fcst) <- c(dat = 1, syear = as.numeric(dim(fcst))[1], dim(fcst)[2:6])
         fcst <- s2dv::Reorder(fcst, c('dat', 'var', 'syear', 'time',
                                       'latitude', 'longitude', 'ensemble'))
+        fcst_dims <- dim(fcst)
+      } else {
+        fcst_dims <- c(dat = 1, syear = as.numeric(attr(fcst, "Dimensions"))[1],
+                       attr(fcst, "Dimensions")[2:6])
       }
       
       # Manipulate time attr because Start() cannot read it correctly
       wrong_time_attr <- attr(fcst, 'Variables')$common$time # dim: [time], the first syear only
-      tmp <- array(dim = c(dim(fcst)[c('syear', 'time')]))
+      tmp <- array(dim = c(fcst_dims[c('syear', 'time')]))
       tmp[1, ] <- wrong_time_attr
       yr_diff <- (sdates_fcst - sdates_fcst[1])[-1] #diff(sdates_fcst)
       for (i_syear in 1:length(yr_diff)) {
-- 
GitLab


From da547a611601fd39d2e2c14b537e7432d4fcbc29 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 28 May 2024 10:46:58 +0200
Subject: [PATCH 85/96] Bugfix: change 'data' to 'x'

---
 modules/Aggregation/R/agg_ini_end.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/Aggregation/R/agg_ini_end.R b/modules/Aggregation/R/agg_ini_end.R
index 17c6940a..551e4d49 100644
--- a/modules/Aggregation/R/agg_ini_end.R
+++ b/modules/Aggregation/R/agg_ini_end.R
@@ -1,4 +1,4 @@
-agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) {
+agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm, ncores) {
   # to make the code work with both options:
   if (!is.null(ini) && is.null(indices)) {
     # list of vectors for the indices from ini to end pairs
@@ -36,7 +36,7 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) {
     stop("Unknown method")
   }
   # Check in case only 1 aggregation only is requested to add time dim back:
-  if (!('time' %in% names(dim(data[[1]]$data)))) {
+  if (!('time' %in% names(dim(x[[1]]$data)))) {
     dim(x[[1]]$data) <- c(dim(x[[1]]$data), time = 1)
   }
   x[[1]]$data <- Reorder(x[[1]]$data, original_dims)
-- 
GitLab


From 92e671c53ae8440ea48343b5f44f7f630be15866 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 28 May 2024 10:47:50 +0200
Subject: [PATCH 86/96] Adjustments to retrieve = FALSE case for decadal data

---
 modules/Loading/R/load_decadal.R      | 1 +
 modules/Preprocessing/Preprocessing.R | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R
index 09a5969b..724a3471 100644
--- a/modules/Loading/R/load_decadal.R
+++ b/modules/Loading/R/load_decadal.R
@@ -148,6 +148,7 @@ load_decadal <- function(recipe, retrieve = retrieve) {
     } else {
       hcst_dims <- c(dat = 1, syear = as.numeric(attr(hcst, "Dimensions"))[1],
                      attr(hcst, "Dimensions")[2:6])
+      # attr(hcst, "Dimensions") <- hcst_dims
     }
     
     # Manipulate time attr because Start() cannot read it correctly
diff --git a/modules/Preprocessing/Preprocessing.R b/modules/Preprocessing/Preprocessing.R
index d6a287e8..1344dfa0 100644
--- a/modules/Preprocessing/Preprocessing.R
+++ b/modules/Preprocessing/Preprocessing.R
@@ -13,6 +13,11 @@ preprocess_datasets <- function(recipe, data) {
     if (!("sweek" %in% names(dim(data[[element]])))) {
       dim(data[[element]]) <- c(dim(data[[element]]), sweek = 1)
     }
+    if (!("syear" %in% names(dim(data[[element]]))) &&
+        (dim(data[[element]])[["dat"]] > 1)) {
+      names(dim(data[[element]]))[which(names(dim(data[[element]])) == 'dat')] <- "syear"
+      dim(data[[element]]) <- c(dat = 1, dim(data[[element]]))
+    }
   }
   # Add 'ensemble' dimension to obs
   dim(data$obs) <- c(dim(data$obs), ensemble = 1)
-- 
GitLab


From 9e95c9f77b6bdce70e8dad5f7febf640d5b7ea58 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 28 May 2024 10:48:08 +0200
Subject: [PATCH 87/96] Add modified version of as.s2dv_cube() (to be removed
 later)

---
 tools/libs.R             |   1 +
 tools/tmp/as.s2dv_cube.R | 363 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 364 insertions(+)
 create mode 100644 tools/tmp/as.s2dv_cube.R

diff --git a/tools/libs.R b/tools/libs.R
index fe646cb6..43ae0170 100644
--- a/tools/libs.R
+++ b/tools/libs.R
@@ -44,6 +44,7 @@ source("tools/write_autosubmit_conf.R")
 source("tools/get_archive.R")
 source("tools/Utils.R")
 source("tools/restructure_recipe.R")
+source("tools/tmp/as.s2dv_cube.R")
 # source("tools/add_dims.R") # Not sure if necessary yet
 
 # Settings
diff --git a/tools/tmp/as.s2dv_cube.R b/tools/tmp/as.s2dv_cube.R
new file mode 100644
index 00000000..1b4c4275
--- /dev/null
+++ b/tools/tmp/as.s2dv_cube.R
@@ -0,0 +1,363 @@
+#'Conversion of 'startR_array' or 'list' objects to 's2dv_cube'
+#'
+#'This function converts data loaded using Start function from startR package or 
+#'Load from s2dv into an 's2dv_cube' object.
+#'
+#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es}
+#'@author Nicolau Manubens, \email{nicolau.manubens@bsc.es}
+#'
+#'@param object An object of class 'startR_array' generated from function 
+#'  \code{Start} from startR package or a list output from function \code{Load} 
+#'  from s2dv package. Any other object class will not be accepted.
+#'@param remove_attrs_coords A logical value indicating whether to remove the 
+#'  attributes of the coordinates (TRUE) or not (FALSE). The default value is 
+#'  FALSE.
+#'@param remove_null Optional. A logical value indicating whether to remove the  
+#'  elements that are NULL (TRUE) or not (FALSE) of the output object. It is 
+#'  only used when the object is an output from function \code{Load}. The 
+#'  default value is FALSE.
+#' 
+#'@return The function returns an 's2dv_cube' object to be easily used with 
+#'functions with the prefix \code{CST} from CSTools and CSIndicators packages. 
+#'The object is mainly a list with the following elements:\cr
+#'\itemize{
+#'  \item{'data', array with named dimensions;}
+#'  \item{'dims', named vector of the data dimensions;}
+#'  \item{'coords', list of named vectors with the coordinates corresponding to 
+#'        the dimensions of the data parameter;}
+#'  \item{'attrs', named list with elements:
+#'    \itemize{
+#'      \item{'Dates', array with named temporal dimensions of class 'POSIXct' 
+#'            from time values in the data;}
+#'      \item{'Variable', has the following components:
+#'        \itemize{
+#'          \item{'varName', character vector of the short variable name. It is  
+#'                usually specified in the parameter 'var' from the functions 
+#'                Start and Load;}
+#'          \item{'metadata', named list of elements with variable metadata. 
+#'                They can be from coordinates variables (e.g. longitude) or 
+#'                main variables (e.g. 'var');}
+#'        }
+#'      }
+#'      \item{'Datasets', character strings indicating the names of the 
+#'            datasets;}
+#'      \item{'source_files', a vector of character strings with complete paths 
+#'            to all the found files involved in loading the data;}
+#'      \item{'when', a time stamp of the date issued by the Start() or Load() 
+#'            call to obtain the data;}
+#'      \item{'load_parameters', it contains the components used in the 
+#'            arguments to load the data from Start() or Load() functions.}
+#'    }
+#'  }
+#'}
+#'
+#'@seealso \code{\link{s2dv_cube}}, \code{\link{CST_Start}}, 
+#'\code{\link[startR]{Start}} and \code{\link{CST_Load}}
+#'@examples
+#'\dontrun{
+#'# Example 1: convert an object from startR::Start function to 's2dv_cube'
+#'library(startR)
+#'repos <- '/esarchive/exp/ecmwf/system5_m1/monthly_mean/$var$_f6h/$var$_$sdate$.nc'
+#'data <- Start(dat = repos,
+#'              var = 'tas',
+#'              sdate = c('20170101', '20180101'),
+#'              ensemble = indices(1:5),
+#'              time = 'all',
+#'              latitude = indices(1:5),
+#'              longitude = indices(1:5),
+#'              return_vars = list(latitude = 'dat', longitude = 'dat', time = 'sdate'),
+#'              retrieve = TRUE)
+#'data <- as.s2dv_cube(data)
+#'# Example 2: convert an object from s2dv::Load function to 's2dv_cube'
+#'startDates <- c('20001101', '20011101', '20021101',
+#'                '20031101', '20041101', '20051101')
+#'data <- Load(var = 'tas', exp = 'system5c3s', 
+#'             nmember = 2, sdates = startDates,
+#'             leadtimemax = 3, latmin = 10, latmax = 30,
+#'             lonmin = -10, lonmax = 10, output = 'lonlat')
+#'data <- as.s2dv_cube(data)
+#'}
+#'@export
+as.s2dv_cube <- function(object, remove_attrs_coords = FALSE, 
+                         remove_null = FALSE) {
+  
+  if (is.list(object) & length(object) == 11) {
+    if (is.null(object) || (is.null(object$mod) && is.null(object$obs))) {
+      stop("The s2dv::Load call did not return any data.")
+    }
+    obs <- object
+    obs$mod <- NULL
+    object$obs <- NULL
+    names(object)[[1]] <- 'data' # exp
+    names(obs)[[1]] <- 'data' # obs
+    # obs
+    if (!is.null(obs$data)) {
+      obs_exist <- TRUE
+      obs$Datasets$exp <- NULL
+      obs$Datasets <- obs$Datasets$obs
+    } else {
+      obs_exist <- FALSE
+    }
+    # object
+    if (!is.null(object$data)) {
+      exp_exist <- TRUE
+      object$Datasets$obs <- NULL
+      object$Datasets <- object$Datasets$exp
+    } else {
+      exp_exist <- FALSE
+    }
+    result <- list()
+    # obs and exp
+    if (obs_exist & exp_exist) {
+      obs_exp = list(exp = object, obs = obs)
+    } else if (obs_exist & !exp_exist) {
+      obs_exp = list(obs = obs)
+    } else {
+      obs_exp = list(exp = object)
+    }
+    i <- 0
+    for (obj_i in obs_exp) {
+      i <- i + 1
+      # attrs
+      obj_i$attrs <- within(obj_i, rm(list = c('data')))
+      obj_i <- within(obj_i, rm(list = names(obj_i$attrs)))
+      dates <- obj_i$attrs$Dates$start
+      attr(dates, 'end') <- obj_i$attrs$Dates$end
+      if (!is.null(dates)) {
+        dim(dates) <- dim(obj_i$data)[c('ftime', 'sdate')]
+        obj_i$attrs$Dates <- dates
+      }
+      # Variable
+      varname <- obj_i$attrs$Variable$varName
+      varmetadata <- NULL
+      varmetadata[[varname]] <- attributes(obj_i$attrs$Variable)[-1]
+      obj_i$attrs$Variable <- list(varName = varname, metadata = varmetadata)
+      # dims
+      obj_i$dims <- dim(obj_i$data)
+      # coords
+      obj_i$coords <- sapply(names(dim(obj_i$data)), function(x) NULL)
+      # sdate
+      obj_i$coords$sdate <- obj_i$attrs$load_parameters$sdates
+      if (!remove_attrs_coords) attr(obj_i$coords$sdate, 'indices') <- FALSE
+      # lon
+      if (!is.null(obj_i$attrs$lon)) {
+        if (remove_attrs_coords) {
+          obj_i$coords$lon <- as.vector(obj_i$attrs$lon)
+        } else {
+          obj_i$coords$lon <- obj_i$attrs$lon
+          dim(obj_i$coords$lon) <- NULL
+          attr(obj_i$coords$lon, 'indices') <- FALSE
+        }
+        obj_i$attrs$Variable$metadata$lon <- obj_i$attrs$lon
+        obj_i$attrs <- within(obj_i$attrs, rm(list = 'lon'))
+      }
+      # lat
+      if (!is.null(obj_i$attrs$lat)) {
+        if (remove_attrs_coords) {
+          obj_i$coords$lat <- as.vector(obj_i$attrs$lat)
+        } else {
+          obj_i$coords$lat <- obj_i$attrs$lat
+          dim(obj_i$coords$lat) <- NULL
+          attr(obj_i$coords$lat, 'indices') <- FALSE
+        }
+        obj_i$attrs$Variable$metadata$lat <- obj_i$attrs$lat
+        obj_i$attrs <- within(obj_i$attrs, rm(list = 'lat'))
+      }
+      # member
+      obj_i$coords$member <- 1:obj_i$dims['member']
+      if (!remove_attrs_coords) attr(obj_i$coords$member, 'indices') <- TRUE
+      # dataset
+      if (!is.null(names(obj_i$attrs$Datasets))) {
+        obj_i$coords$dataset <- names(obj_i$attrs$Datasets)
+        if (!remove_attrs_coords) attr(obj_i$coords$dataset, 'indices') <- FALSE
+        obj_i$attrs$Datasets <- names(obj_i$attrs$Datasets)
+      } else {
+        obj_i$coords$dataset <- 1:obj_i$dims['dataset']
+        if (!remove_attrs_coords) attr(obj_i$coords$dataset, 'indices') <- TRUE
+      }
+      # ftime
+      obj_i$coords$ftime <- 1:obj_i$dims['ftime']
+      if (!remove_attrs_coords) attr(obj_i$coords$ftime, 'indices') <- TRUE
+      # remove NULL values
+      if (isTRUE(remove_null)) {
+        obj_i$attrs$load_parameters <- .rmNullObs(obj_i$attrs$load_parameters)
+      }
+      obj_i <- obj_i[c('data', 'dims', 'coords', 'attrs')]
+      class(obj_i) <- 's2dv_cube'
+      if (names(obs_exp)[[i]] == 'exp') {
+        result$exp <- obj_i
+      } else {
+        result$obs <- obj_i
+      }
+    }
+    if (is.list(result)) {
+      if (is.null(result$exp)) {
+          result <- result$obs
+      } else if (is.null(result$obs)) {
+          result <- result$exp
+      } else {
+          warning("The output is a list of two 's2dv_cube' objects",
+                  " corresponding to 'exp' and 'obs'.")
+      }
+    }
+
+  } else if (inherits(object, 'startR_array')) {
+    # From Start:
+    result <- list()
+    result$data <- as.vector(object)
+    ## dims
+    dims <- dim(object)
+    dim(result$data) <- dims
+    result$dims <- dims
+    ## coords
+    result$coords <- sapply(names(dims), function(x) NULL)
+    # Find coordinates
+    FileSelector <- attributes(object)$FileSelectors
+    VariablesCommon <- names(attributes(object)$Variables$common)
+    dat <- names(FileSelector)[1]
+    VariablesDat <- names(attributes(object)$Variables[[dat]])
+    varName <- NULL
+    for (i_coord in names(dims)) {
+      if (i_coord %in% names(FileSelector[[dat]])) { # coords in FileSelector
+        coord_in_fileselector <- FileSelector[[dat]][which(i_coord == names(FileSelector[[dat]]))]
+        if (length(coord_in_fileselector) == 1) {
+          if (length(coord_in_fileselector[[i_coord]][[1]]) == dims[i_coord]) {
+            # TO DO: add var_dim parameter
+            if (i_coord %in% c('var', 'vars')) {
+              varName <- as.vector(coord_in_fileselector[[i_coord]][[1]])
+            }
+            if (remove_attrs_coords) {
+              result$coords[[i_coord]] <- as.vector(coord_in_fileselector[[i_coord]][[1]])
+            } else {
+              result$coords[[i_coord]] <- coord_in_fileselector[[i_coord]][[1]]
+              attr(result$coords[[i_coord]], 'indices') <- FALSE
+            }
+          } else {
+            result$coords[[i_coord]] <- 1:dims[i_coord]
+            if (!remove_attrs_coords) attr(result$coords[[i_coord]], 'indices') <- TRUE
+          }
+        }
+      } else if (i_coord %in% VariablesCommon) { # coords in common
+        coord_in_common <- attributes(object)$Variables$common[[which(i_coord == VariablesCommon)]]
+        if (inherits(coord_in_common, "POSIXct")) {
+          result$attrs$Dates <- coord_in_common
+        }
+        if (length(coord_in_common) == dims[i_coord]) {
+          if (remove_attrs_coords) {
+            if (inherits(coord_in_common, "POSIXct")) {
+              result$coords[[i_coord]] <- 1:dims[i_coord]
+              attr(result$coords[[i_coord]], 'indices') <- TRUE
+            } else {
+              result$coords[[i_coord]] <- as.vector(coord_in_common)
+            }
+          } else {
+            result$coords[[i_coord]] <- coord_in_common
+            attr(result$coords[[i_coord]], 'indices') <- FALSE
+          }
+        } else {
+          result$coords[[i_coord]] <- 1:dims[i_coord]
+          if (!remove_attrs_coords) attr(result$coords[[i_coord]], 'indices') <- TRUE
+        }
+      } else if (!is.null(VariablesDat)) { # coords in dat
+        if (i_coord %in% VariablesDat) {
+          coord_in_dat <- attributes(object)$Variables[[dat]][[which(i_coord == VariablesDat)]]
+          if (inherits(coord_in_dat, "POSIXct")) {
+            result$attrs$Dates <- coord_in_dat
+          }
+          if (length(coord_in_dat) == dims[i_coord]) {
+            if (remove_attrs_coords) {
+              if (inherits(coord_in_dat, "POSIXct")) {
+                result$coords[[i_coord]] <- coord_in_dat
+              } else {
+                result$coords[[i_coord]] <- as.vector(coord_in_dat)
+              }
+            } else {
+              result$coords[[i_coord]] <- coord_in_dat
+              attr(result$coords[[i_coord]], 'indices') <- FALSE
+            }
+          } else {
+            result$coords[[i_coord]] <- 1:dims[i_coord]
+            if (!remove_attrs_coords) attr(result$coords[[i_coord]], 'indices') <- TRUE
+          }
+        } else {
+          result$coords[[i_coord]] <- 1:dims[i_coord]
+          if (!remove_attrs_coords) attr(result$coords[[i_coord]], 'indices') <- TRUE
+        }
+      } else { # missing other dims
+        result$coords[[i_coord]] <- 1:dims[i_coord]
+        if (!remove_attrs_coords) attr(result$coords[[i_coord]], 'indices') <- TRUE
+      }
+      dim(result$coords[[i_coord]]) <- NULL
+    }
+
+    # attrs
+    ## varName
+    if (!is.null(varName)) {
+      result$attrs$Variable$varName <- varName
+    }
+    ## Variables
+    for (var_type in names(attributes(object)$Variables)) {
+      if (!is.null(attributes(object)$Variables[[var_type]])) {
+        for (var in names(attributes(object)$Variables[[var_type]])) {
+          attr_variable <- attributes(object)$Variables[[var_type]][[var]]
+          if (is.null(result$attrs$Dates)) {
+            if (inherits(attr_variable, "POSIXct")) {
+              result$attrs$Dates <- attr_variable
+            }
+          }
+          if (is.null(result$attrs$Variable$metadata[[var]])) {
+            result$attrs$Variable$metadata[[var]] <- attr_variable
+          }
+        }
+      }
+    }
+    ## Datasets
+    if (length(names(FileSelector)) > 1) {
+      # lon name
+      known_lon_names <- .KnownLonNames()
+      lon_name_dat <- names(dims)[which(names(dims) %in% known_lon_names)]
+      # lat name
+      known_lat_names <- .KnownLatNames()
+      lat_name_dat <- names(dims)[which(names(dims) %in% known_lat_names)]
+      result$attrs$Datasets <- names(FileSelector)
+      # TO DO: add dat_dim parameter
+      if (any(names(dims) %in% c('dat', 'dataset'))) {
+        dat_dim <- names(dims)[which(names(dims) %in% c('dat', 'dataset'))]
+        result$coords[[dat_dim]] <- names(FileSelector)
+        if (!remove_attrs_coords) attr(result$coords[[dat_dim]], 'indices') <- FALSE
+      }
+      for (i in 2:length(names(FileSelector))) {
+        if (!(length(lon_name_dat) == 0L)) {
+          if (any(result$coords[[lon_name_dat]] != as.vector(attributes(object)$Variables[[names(FileSelector)[i]]][[lon_name_dat]]))) {
+            warning("'lon' values are different for different datasets. ", 
+                    "Only values from the first will be used.")
+          }
+        }
+        if (!(length(lat_name_dat) == 0L)) {
+          if (any(result$coords[[lat_name_dat]] != as.vector(attributes(object)$Variables[[names(FileSelector)[i]]][[lat_name_dat]]))) {
+            warning("'lat' values are different for different datasets. ", 
+                    "Only values from the first will be used.")
+          }
+        }
+      }
+    } else {
+      result$attrs$Datasets <- names(FileSelector)
+    }
+    ## when
+    result$attrs$when <- Sys.time()
+    ## source_files
+    result$attrs$source_files <- attributes(object)$Files
+    ## load_parameters
+    result$attrs$load_parameters <- attributes(object)$FileSelectors
+    class(result) <- 's2dv_cube'
+  } else if (inherits(object, 'startR_cube')) {
+    stop("Unsupported object class: 'startR_cube'. ",
+         "When using startR::Start() or CSTools::CST_Start(), set ",
+         "'retrieve = TRUE' to ensure the data is retrieved into ",
+         "memory and can be converted into a 's2dv_cube' object.")
+  } else {
+    stop("The class of parameter 'object' is not implemented",
+         " to be converted into 's2dv_cube' class yet.")
+  }
+  return(result)
+}
-- 
GitLab


From 3b0d6d7c1098915a7f5810b619fcd672ca9c99e4 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 28 May 2024 10:48:27 +0200
Subject: [PATCH 88/96] Refactor code

---
 build_compute_workflow.R | 77 ++++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 38 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index efb0688b..fb0e19c4 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -1,5 +1,6 @@
 compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
-                             nchunks = chunk_indices) {
+                             nchunks = chunk_indices,
+                             expected_output_dims) {
   # Load modules
   source("modules/Preprocessing/Preprocessing.R")
   source("modules/Units/Units.R")
@@ -9,6 +10,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
   source("modules/Skill/Skill.R")
   source("modules/Indices/Indices.R")
   source("modules/Aggregation/Aggregation.R")
+  source("tools/tmp/as.s2dv_cube.R")
 
   # Define appender with custom log layout so that it knows not to append
   # within Compute().
@@ -61,12 +63,17 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
   }
   ## TODO: Define what to return depending the modules called + the recipe
   return_list <- list(hcst = data$hcst$data,
-                      obs = data$obs$data,
                       fcst = data$fcst$data,
+                      obs = data$obs$data,
                       skill = skill_metrics,
                       probabilities = probabilities)
   # Eliminate NULL elements from the return list
   return_list <- return_list[!sapply(return_list, is.null)]
+  # Reorder dimensions as expected
+  for (element in names(return_list)) {
+    return_list[[element]] <- s2dv::Reorder(data = return_list[[element]],
+                                            order = expected_output_dims[[element]])
+  }
   return(return_list)
 }
 
@@ -90,31 +97,16 @@ run_compute_workflow <- function(recipe, data) {
   
   # ---------------------------------------------------------------------------
   # Step 2: Define the inputs and outputs for Compute()
+  
+  # Reorder data: hcst, obs, fcst
+  data <- data[c("hcst", "obs", "fcst")]
+  data <- data[!sapply(data, is.null)]
   # Retrieve original dimensions
-  exp_dims <- names(attr(data$hcst, "Dimensions"))
-  obs_dims <- names(attr(data$obs, "Dimensions"))
-  # Remove the chunking dimensions from the original dimensions in the arrays,
-  # to create the target dimensions
-  exp_target_dims <- 
-    exp_dims[!exp_dims %in% names(recipe$Run$startR_workflow$chunk_along)]
-  obs_target_dims <-
-    obs_dims[!obs_dims %in% names(recipe$Run$startR_workflow$chunk_along)]
-  # Default inputs: hindcast and observations
-  target_dims <- list(hcst = exp_target_dims,
-                      obs = obs_target_dims)
-  inputs <- list(hcst = data$hcst, obs = data$obs)
-  # Define input attributes from Start()
+  chunking_dims <- names(recipe$Run$startR_workflow$chunk_along)
+  target_dims <- sapply(names(data), function(x) NULL)
+  inputs <- sapply(names(data), function(x) NULL)
+  input_attributes <- sapply(names(data), function(x) NULL)
   STARTR_CUBE_ATTRS <- c("Variables", "Dimensions", "FileSelectors", "Files")
-  input_attributes <- list(hcst = STARTR_CUBE_ATTRS,
-                           obs = STARTR_CUBE_ATTRS)
-  # Add forecast if not empty
-  if (!is.null(data$fcst)) {
-    target_dims <- c(target_dims,
-                     list(fcst = exp_target_dims))
-    inputs <- c(inputs, list(fcst = data$fcst))
-    input_attributes <- c(input_attributes, list(fcst = STARTR_CUBE_ATTRS))
-  }
-
   # Create output dimensions
   ## TODO: Improve this condition
   if ("indices" %in% modules) {
@@ -123,23 +115,31 @@ run_compute_workflow <- function(recipe, data) {
     spatial_output_dims <- c("latitude", "longitude")
   }
   ## TODO: There may be a bug here when the user requests only lat or lon as chunking dim
-  exp_output_dims <- unique(c(exp_target_dims, spatial_output_dims, 'sday', 'sweek'))
-  exp_output_dims <- c(exp_output_dims[!exp_output_dims %in% 
-                                       c('var_dir', names(recipe$Run$startR_workflow$chunk_along))])
+  FIXED_OUTPUT_DIMS <- c("dat", "var", "syear", "sday", "sweek", "time", "ensemble")
+  expected_output_dims <- c(FIXED_OUTPUT_DIMS, spatial_output_dims)
+  expected_output_dims <- expected_output_dims[!expected_output_dims %in% chunking_dims]
   # Default outputs: hindcast and observations
-  output_dims <- list(hcst = exp_output_dims,
-                      obs = exp_output_dims)
-  # Add forecast if not empty
-  if (!is.null(data$fcst)) {
-    output_dims <- c(output_dims,
-                     list(fcst = exp_output_dims))
+  output_dims <- sapply(names(data), function(x) NULL)
+  # Iterate over datasets to define inputs and outputs 
+  for (dataset in names(data)) {
+    # Retrieve original dimensions
+    dataset_dims <- names(attr(data[[dataset]], "Dimensions"))
+    # Remove the chunking dimensions from the original dimensions in the arrays,
+    # to create the target dimensions
+    target_dims[[dataset]] <- dataset_dims[!dataset_dims %in% chunking_dims]
+    # Define inputs
+    inputs[[dataset]] <- data[[dataset]]
+    # Define input attributes 
+    input_attributes[[dataset]] <- STARTR_CUBE_ATTRS
+    # Define output dimensions
+    output_dims[[dataset]] <- expected_output_dims
   }
+
   # Add skill metrics if skill module is called
   ## TODO: Fix potential bug
   if ("skill" %in% modules) {
     skill_dims <- c('metric', 'var', 'time', spatial_output_dims)
-    skill_output_dims <-
-      skill_dims[!skill_dims %in% names(recipe$Run$startR_workflow$chunk_along)]
+    skill_output_dims <- skill_dims[!skill_dims %in% chunking_dims]
     output_dims <- c(output_dims,
                      list(skill = skill_output_dims))
   }
@@ -167,7 +167,8 @@ run_compute_workflow <- function(recipe, data) {
 
   wf <- AddStep(inputs = inputs,
                 step = step,
-                recipe = recipe)
+                recipe = recipe,
+                expected_output_dims = output_dims)
 
   #---------------------------------------
   ## TODO: Create function to generate call to Compute()?
@@ -185,7 +186,7 @@ run_compute_workflow <- function(recipe, data) {
     # Compute locally, in serial
     res <- Compute(wf$hcst,
                    chunks = recipe$Run$startR_workflow$chunk_along,
-                   threads_compute = recipe$Analysis$ncores,
+                   threads_compute = 1, # recipe$Analysis$ncores,
                    threads_load = recipe$Analysis$ncores/2)
   } else if (run_on %in% c('nord3', 'as_machine')) {
     #NOTE: autosubmit_suite_dir can be anywhere
-- 
GitLab


From cbbdee4ad5f4616c1666f1ccb3a3cfe918d5a6cf Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Wed, 29 May 2024 14:32:11 +0200
Subject: [PATCH 89/96] Add notes

---
 build_compute_workflow.R         | 4 ++++
 modules/Loading/R/load_decadal.R | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index fb0e19c4..7c2b7d47 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -19,9 +19,13 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
   modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
                               ", | |,")[[1]])
   # Create data list
+  print(dim(attr(data$hcst, "Variables")$common$time))
   data <- list(hcst = hcst,
                obs = obs,
                fcst = fcst)
+  print(dim(attr(data$hcst, "Variables")$common$time))
+  print(dim(attr(data$fcst, "Variables")$common$time))
+  
   data <- data[!sapply(data, is.null)]
   var_name <- as.vector(attributes(hcst)$FileSelectors[[1]]$var[[1]])
   # Remove duplicated objects
diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R
index 724a3471..94c11fb0 100644
--- a/modules/Loading/R/load_decadal.R
+++ b/modules/Loading/R/load_decadal.R
@@ -152,6 +152,7 @@ load_decadal <- function(recipe, retrieve = retrieve) {
     }
     
     # Manipulate time attr because Start() cannot read it correctly
+    ## NOTE: Why can't Start() read it correctly?? 
     wrong_time_attr <- attr(hcst, 'Variables')$common$time # dim: [time], the first syear only
     tmp <- array(dim = c(hcst_dims[c('syear', 'time')]))
     tmp[1, ] <- wrong_time_attr
@@ -165,7 +166,6 @@ load_decadal <- function(recipe, retrieve = retrieve) {
   
   tmp_time_attr <- attr(hcst, 'Variables')$common$time
  
-  ## TODO: for retrieve = FALSE, do this in pre-processing
   # change syear to c(sday, sweek, syear)
   # dim(hcst) should be [dat, var, sday, sweek, syear, time, latitude, longitude, ensemble]
   if (retrieve) {
-- 
GitLab


From 8285e66c50d605fc7ef60051e43a40d5f7d75f40 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Mon, 10 Jun 2024 11:41:50 +0200
Subject: [PATCH 90/96] Seasonal daily with retrieve = FALSE

---
 modules/Loading/R/load_seasonal.R     | 30 ++++++++++++++++-----------
 modules/Preprocessing/Preprocessing.R |  3 +++
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R
index c2ff36eb..7277766a 100644
--- a/modules/Loading/R/load_seasonal.R
+++ b/modules/Loading/R/load_seasonal.R
@@ -123,15 +123,19 @@ load_seasonal <- function(recipe, retrieve = TRUE) {
                 split_multiselected_dims = split_multiselected_dims,
                 retrieve = retrieve)
 
+  browser()
+
   if (store.freq %in% c("daily_mean", "daily")) {
     # Adjusts dims for daily case, could be removed if startR allows
     # multidim split
-    names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear"
-    default_dims <- c(dat = 1, var = 1, sday = 1, 
-                      sweek = 1, syear = 1, time = 1, 
-                      latitude = 1, longitude = 1, ensemble = 1)
-    default_dims[names(dim(hcst))] <- dim(hcst)
-    dim(hcst) <- default_dims
+    if (retrieve) {
+      names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear"
+      default_dims <- c(dat = 1, var = 1, sday = 1, 
+                        sweek = 1, syear = 1, time = 1, 
+                        latitude = 1, longitude = 1, ensemble = 1)
+      default_dims[names(dim(hcst))] <- dim(hcst)
+      dim(hcst) <- default_dims
+    }
     # Change time attribute dimensions
     default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1)
     names(dim(attr(hcst, "Variables")$common$time))[which(names(
@@ -190,12 +194,14 @@ load_seasonal <- function(recipe, retrieve = TRUE) {
     if (store.freq %in% c("daily_mean", "daily")) {
       # Adjusts dims for daily case, could be removed if startR allows
       # multidim split
-      names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear"
-      default_dims <- c(dat = 1, var = 1, sday = 1,
-                        sweek = 1, syear = 1, time = 1,
-                        latitude = 1, longitude = 1, ensemble = 1)
-      default_dims[names(dim(fcst))] <- dim(fcst)
-      dim(fcst) <- default_dims
+      if (retrieve) {
+        names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear"
+        default_dims <- c(dat = 1, var = 1, sday = 1,
+                          sweek = 1, syear = 1, time = 1,
+                          latitude = 1, longitude = 1, ensemble = 1)
+        default_dims[names(dim(fcst))] <- dim(fcst)
+        dim(fcst) <- default_dims
+      }
       # Change time attribute dimensions
       ## TODO: Is this still necessary?
       default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1)
diff --git a/modules/Preprocessing/Preprocessing.R b/modules/Preprocessing/Preprocessing.R
index 1344dfa0..aedd85bb 100644
--- a/modules/Preprocessing/Preprocessing.R
+++ b/modules/Preprocessing/Preprocessing.R
@@ -17,6 +17,9 @@ preprocess_datasets <- function(recipe, data) {
         (dim(data[[element]])[["dat"]] > 1)) {
       names(dim(data[[element]]))[which(names(dim(data[[element]])) == 'dat')] <- "syear"
       dim(data[[element]]) <- c(dat = 1, dim(data[[element]]))
+    } else if (!("syear" %in% names(dim(data[[element]]))) &&
+               ("file_date" %in% names(dim(data[[element]])))) {
+      names(dim(data[[element]]))[which(names(dim(data[[element]])) == 'file_date')] <- "syear"
     }
   }
   # Add 'ensemble' dimension to obs
-- 
GitLab


From 06edfd69f2f7c53c27fdff460093e0da6e3ad09c Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Mon, 10 Jun 2024 11:42:08 +0200
Subject: [PATCH 91/96] Add recipe for testing

---
 .../recipe_test_compute_seasonal_daily.yml    | 83 +++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml

diff --git a/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml b/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml
new file mode 100644
index 00000000..c4d6ee26
--- /dev/null
+++ b/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml
@@ -0,0 +1,83 @@
+Description:
+  Author: V. Agudetse
+
+Analysis:
+  Horizon: Seasonal
+  Variables:
+    name: tas
+    freq: daily_mean
+    # units: {tas: C}
+  Datasets:
+    System:
+      name: ECMWF-SEAS5
+    Multimodel: False
+    Reference:
+      name: ERA5
+  Time:
+    sdate: '0101'
+    fcst_year: # '2020'
+    hcst_start: '2000'
+    hcst_end: '2010'
+    ftime_min: 1
+    ftime_max: 2
+  Region:
+    latmin: 20
+    latmax: 80
+    lonmin: -80
+    lonmax: 40
+  Regrid:
+    method: bilinear
+    type: "r180x90" #to_reference #'r360x181'
+  Workflow:
+    Anomalies:
+      compute: yes # yes/no, default yes
+      cross_validation: yes # yes/no, default yes
+      save: 'all' # 'all'/'none'/'exp_only'/'fcst_only'
+    Calibration:
+      method: raw
+      save: 'none' # 'all'/'none'/'exp_only'/'fcst_only'
+    # Indices:
+      # NAO: {obsproj: TRUE, save: 'all', plot_ts: TRUE, plot_sp: yes}
+      # Downscaling:
+      #   # Assumption 1: leave-one-out cross-validation is always applied
+      #   # Assumption 2: for analogs, we select the best analog (minimum distance)
+      #   type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'.
+      #   int_method: conservative # regridding method accepted by CDO.
+      #   bc_method: bias # If type intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'.
+      #   lr_method: # If type intlr. Options: 'basic', 'large_scale', '4nn'
+      #   log_reg_method: # If type logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members'
+      #   target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO
+      #   nanalogs: # If type analgs. Number of analogs to be searched
+      #   save: 'all' # 'all'/'none'/'exp_only'
+    Skill:
+      metric: RPS RPSS BSS10 # RPS RPSS CRPS CRPSS FRPSS BSS10 BSS90 EnsCorr Corr mean_bias mean_bias_SS
+      save: 'all' # 'all'/'none'
+    Probabilities:
+      percentiles: [[1/3, 2/3], [1/10, 9/10]]
+      save: 'percentiles_only' # 'all'/'none'/'bins_only'/'percentiles_only'
+    Visualization:
+      plots: skill_metrics, forecast_ensemble_mean
+      multi_panel: no
+      projection: cylindrical_equidistant
+    Indicators:
+      index: no
+  ncores: 10
+  remove_NAs: yes
+  Output_format: S2S4E
+Run:
+  Loglevel: INFO
+  Terminal: yes
+  output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/
+  code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
+  startR_workflow:
+    modules: anomalies skill # Modules to run inside Compute(), in order
+    return: hcst, fcst, obs # Choose only if they will fit into memory
+    save: hcst, fcst, obs # Only if returned
+    chunk_along: {time: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
+    run_on: local # options: local, as_machine, nord3
+    chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local'
+    expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O
+    hpc_user: bsc32762 # your hpc username. Only if run_on is not 'local'
+    slurm_directives: ['#SBATCH --constraint=medmem', '#SBATCH --exclusive'] # Only if run_on is not 'local'
+
+
-- 
GitLab


From e72776615b73156fc082f4b8950a2e8e44e76e67 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Mon, 10 Jun 2024 11:42:29 +0200
Subject: [PATCH 92/96] testing

---
 recipes/atomic_recipes/recipe_test_compute_decadal.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal.yml b/recipes/atomic_recipes/recipe_test_compute_decadal.yml
index 44f77898..df8d22aa 100644
--- a/recipes/atomic_recipes/recipe_test_compute_decadal.yml
+++ b/recipes/atomic_recipes/recipe_test_compute_decadal.yml
@@ -16,7 +16,7 @@ Analysis:
   Time:
     fcst_year: [2020,2021]
     hcst_start: 1990 
-    hcst_end: 2020 
+    hcst_end: 2018 
 #    season: 'Annual'
     ftime_min: 1
     ftime_max: 24
-- 
GitLab


From ad3e5cbf325f9ef27468820c80a511718061516e Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Mon, 10 Jun 2024 11:43:58 +0200
Subject: [PATCH 93/96] WIP

---
 build_compute_workflow.R | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 7c2b7d47..23cc3a42 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -19,12 +19,9 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
   modules <- tolower(strsplit(recipe$Run$startR_workflow$modules,
                               ", | |,")[[1]])
   # Create data list
-  print(dim(attr(data$hcst, "Variables")$common$time))
   data <- list(hcst = hcst,
                obs = obs,
                fcst = fcst)
-  print(dim(attr(data$hcst, "Variables")$common$time))
-  print(dim(attr(data$fcst, "Variables")$common$time))
   
   data <- data[!sapply(data, is.null)]
   var_name <- as.vector(attributes(hcst)$FileSelectors[[1]]$var[[1]])
@@ -118,7 +115,7 @@ run_compute_workflow <- function(recipe, data) {
   } else {
     spatial_output_dims <- c("latitude", "longitude")
   }
-  ## TODO: There may be a bug here when the user requests only lat or lon as chunking dim
+  
   FIXED_OUTPUT_DIMS <- c("dat", "var", "syear", "sday", "sweek", "time", "ensemble")
   expected_output_dims <- c(FIXED_OUTPUT_DIMS, spatial_output_dims)
   expected_output_dims <- expected_output_dims[!expected_output_dims %in% chunking_dims]
-- 
GitLab


From a69cdd9744d88ab8b543f88c7801287c7761fbe8 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Mon, 10 Jun 2024 15:30:07 +0200
Subject: [PATCH 94/96] Workaround for daily seasonal metadata

---
 modules/Preprocessing/Preprocessing.R | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/modules/Preprocessing/Preprocessing.R b/modules/Preprocessing/Preprocessing.R
index aedd85bb..96196344 100644
--- a/modules/Preprocessing/Preprocessing.R
+++ b/modules/Preprocessing/Preprocessing.R
@@ -15,11 +15,14 @@ preprocess_datasets <- function(recipe, data) {
     }
     if (!("syear" %in% names(dim(data[[element]]))) &&
         (dim(data[[element]])[["dat"]] > 1)) {
-      names(dim(data[[element]]))[which(names(dim(data[[element]])) == 'dat')] <- "syear"
+      names(dim(data[[element]]))[which(names(dim(data[[element]])) == "dat")] <- "syear"
       dim(data[[element]]) <- c(dat = 1, dim(data[[element]]))
     } else if (!("syear" %in% names(dim(data[[element]]))) &&
                ("file_date" %in% names(dim(data[[element]])))) {
-      names(dim(data[[element]]))[which(names(dim(data[[element]])) == 'file_date')] <- "syear"
+      names(dim(data[[element]]))[which(names(dim(data[[element]])) == "file_date")] <- "syear"
+      time_dim_names <- names(dim(attr(data[[element]], "Variables")$common$time))
+      time_dim_names[which(time_dim_names == "file_date")] <- "syear"
+      names(dim(attr(data[[element]], "Variables")$common$time)) <- time_dim_names
     }
   }
   # Add 'ensemble' dimension to obs
-- 
GitLab


From 84d5e9ee0fffcec898661433a3d20994786a3311 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Thu, 13 Jun 2024 09:47:23 +0200
Subject: [PATCH 95/96] Testing

---
 .../recipe_test_compute_decadal.yml           |  4 +-
 .../recipe_test_compute_decadal_daily.yml     | 74 +++++++++++++++++++
 .../recipe_test_compute_seasonal_daily.yml    |  2 +-
 3 files changed, 77 insertions(+), 3 deletions(-)
 create mode 100644 recipes/atomic_recipes/recipe_test_compute_decadal_daily.yml

diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal.yml b/recipes/atomic_recipes/recipe_test_compute_decadal.yml
index df8d22aa..ef500b43 100644
--- a/recipes/atomic_recipes/recipe_test_compute_decadal.yml
+++ b/recipes/atomic_recipes/recipe_test_compute_decadal.yml
@@ -16,7 +16,7 @@ Analysis:
   Time:
     fcst_year: [2020,2021]
     hcst_start: 1990 
-    hcst_end: 2018 
+    hcst_end: 2000 
 #    season: 'Annual'
     ftime_min: 1
     ftime_max: 24
@@ -60,7 +60,7 @@ Analysis:
 Run:
   Loglevel: INFO
   Terminal: yes
-  output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/
+  output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/
   code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
   startR_workflow:
     modules: 'aggregation skill' # Modules to run inside Compute(), in order
diff --git a/recipes/atomic_recipes/recipe_test_compute_decadal_daily.yml b/recipes/atomic_recipes/recipe_test_compute_decadal_daily.yml
new file mode 100644
index 00000000..dd78e5d2
--- /dev/null
+++ b/recipes/atomic_recipes/recipe_test_compute_decadal_daily.yml
@@ -0,0 +1,74 @@
+Description:
+  Author: An-Chi Ho
+  '': split version
+Analysis:
+  Horizon: Decadal
+  Variables:
+    name: tas
+    freq: daily_mean
+  Datasets:
+    System: 
+      name: EC-Earth3-i4 #CanESM5 
+      member: r1i4p1f1,r2i4p1f1,r3i4p1f1 #'all'
+    Multimodel: no
+    Reference:
+      name: ERA5 #JRA-55
+  Time:
+    fcst_year: [2020,2021]
+    hcst_start: 1990 
+    hcst_end: 1994 
+#    season: 'Annual'
+    ftime_min: 1
+    ftime_max: 12
+  Region:
+    latmin: -10 #-90
+    latmax: 20 #90
+    lonmin: 0
+    lonmax: 30 #359.9
+  Regrid:
+    method: bilinear
+    type: to_system #to_reference
+  Workflow:
+    Anomalies:
+      compute: no
+      cross_validation:
+      save:
+    Time_aggregation:
+      execute: yes
+      method: 'average'
+      user_def:
+        JJA: !expr sort(c(seq(6,24, 12), seq(7,24,12), seq(8, 24, 12)))
+        MMA: !expr sort(c(seq(3,24, 12), seq(4,24,12), seq(5, 24, 12)))
+        SON: !expr sort(c(seq(9,24, 12), seq(10,24,12), seq(11, 24, 12)))
+        DJF: !expr sort(c(seq(12,24, 12), seq(13,24,12), seq(2, 24, 12)))
+    Calibration:
+      method: bias
+      save: 'all'
+    Skill:
+      metric: RPSS
+      save: 'all'
+    Probabilities:
+      percentiles: [[1/3, 2/3]]
+      save: 'all'
+    Indicators:
+      index: FALSE
+    Visualization:
+      plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles
+  ncores: 10 # Optional, int: number of cores, defaults to 1
+  remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE
+  Output_format: S2S4E
+Run:
+  Loglevel: INFO
+  Terminal: yes
+  output_dir: /esarchive/scratch/vagudets/auto-s2s-ouputs/
+  code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/
+  startR_workflow:
+    modules: 'aggregation skill' # Modules to run inside Compute(), in order
+    return: hcst, fcst, obs # Choose only if they will fit into memory
+    save: hcst, fcst, obs # Only if returned
+    chunk_along: {latitude: 2, longitude: 1} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
+    run_on: local # options: local, as_machine, nord3
+    chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local'
+    expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O
+    hpc_user: bsc32762 # your hpc username. Only if run_on is not 'local'
+    slurm_directives: ['#SBATCH --constraint=medmem', '#SBATCH --exclusive'] # Only if run_on is not 'local'
diff --git a/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml b/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml
index c4d6ee26..2a34c36e 100644
--- a/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml
+++ b/recipes/atomic_recipes/recipe_test_compute_seasonal_daily.yml
@@ -73,7 +73,7 @@ Run:
     modules: anomalies skill # Modules to run inside Compute(), in order
     return: hcst, fcst, obs # Choose only if they will fit into memory
     save: hcst, fcst, obs # Only if returned
-    chunk_along: {time: 2} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
+    chunk_along: {time: 6} # list: {dimension_1: # of chunks, dimension_2, # of chunks, ...}
     run_on: local # options: local, as_machine, nord3
     chunk_wallclock: '01:00:00' # Wallclock for each chunk, hh:mm:ss. Only if run_on is not 'local'
     expid: a6cb # Only if run_on is not 'local'. leave empty to create a new one? O
-- 
GitLab


From f0c01d29c8310bcccf8e51767b49dbaadf14c2df Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Thu, 13 Jun 2024 09:47:44 +0200
Subject: [PATCH 96/96] Add metadata saving and retrieval for aggregation
 module

---
 build_compute_workflow.R          | 36 +++++++++++++++++++++----------
 example_scripts/test_compute.R    |  3 ++-
 modules/Aggregation/Aggregation.R | 19 +++++++++++++++-
 tools/retrieve_metadata.R         |  5 ++---
 4 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/build_compute_workflow.R b/build_compute_workflow.R
index 23cc3a42..bbddc1d2 100644
--- a/build_compute_workflow.R
+++ b/build_compute_workflow.R
@@ -46,7 +46,7 @@ compute_workflow <- function(recipe, hcst, obs, fcst = NULL,
   # Loop over the modules
   for (module in modules) {
     if (module == "aggregation") {
-      data <- Aggregation(recipe, data, retrieve = F)
+      data <- Aggregation(recipe, data, retrieve = F, nchunks = nchunks)
     } else if (module == "calibration") {
       data <- Calibration(recipe, data, retrieve = F)
     } else if (module == "anomalies") {
@@ -88,7 +88,7 @@ convert_to_s2dv_cube <- function(new_cube, original_cube) {
   return(new_cube)
 }
 
-run_compute_workflow <- function(recipe, data) {
+run_compute_workflow <- function(recipe, data, last_module = NULL) {
   
   # ---------------------------------------------------------------------------
   # Step 1: Retrieve the modules that will be called inside the workflow
@@ -187,7 +187,7 @@ run_compute_workflow <- function(recipe, data) {
     # Compute locally, in serial
     res <- Compute(wf$hcst,
                    chunks = recipe$Run$startR_workflow$chunk_along,
-                   threads_compute = 1, # recipe$Analysis$ncores,
+                   threads_compute = 1, # recipe$Analysis$ncores, # 1
                    threads_load = recipe$Analysis$ncores/2)
   } else if (run_on %in% c('nord3', 'as_machine')) {
     #NOTE: autosubmit_suite_dir can be anywhere
@@ -230,14 +230,28 @@ run_compute_workflow <- function(recipe, data) {
 
   # ---------------------------------------------------------------------------
   # Step 4: Convert results to s2dv_cube/list format
+  source("tools/retrieve_metadata.R")
   for (cube in c("hcst", "obs", "fcst")) {
-    if (!is.null(res[[cube]])) {
-      res$data[[cube]] <- convert_to_s2dv_cube(res[[cube]], data[[cube]])
-    } else {
-      res$data[[cube]] <- NULL
-    }
-    res[[cube]] <- NULL
+   if (!is.null(res[[cube]])) {
+     if (!is.null(last_module)) {
+       tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp",
+                            str_to_title(last_module))
+       res$data[[cube]]  <- retrieve_metadata(tmp_dir = tmp_dir,
+                                              chunks = recipe$Run$startR_workflow$chunk_along,
+                                              array_dims = dim(res[[cube]]),
+                                              metadata_file_pattern = paste0(last_module,
+                                                                             "_metadata_",
+                                                                             cube))
+       res$data[[cube]]$data <- res[[cube]]
+     } else {
+        res$data[[cube]] <- convert_to_s2dv_cube(res[[cube]], data[[cube]])
+     }
+   } else {
+     res$data[[cube]] <- NULL
+   }
+   res[[cube]] <- NULL
   }
+
   ## TODO: Replicate for probabilities and other modules
   if (!is.null(res$skill)) {
     tmp_dir <- file.path(recipe$Run$output_dir, "outputs", "tmp", "Skill")
@@ -247,10 +261,10 @@ run_compute_workflow <- function(recipe, data) {
                                             level = 'list',
                                             names = metric_list)
     # Put chunked metadata back together
-    source("tools/retrieve_metadata.R")
     res$skill$metadata <- retrieve_metadata(tmp_dir = tmp_dir,
                                             chunks = recipe$Run$startR_workflow$chunk_along,
-                                            array_dims = dim(res$skill[[1]]))
+                                            array_dims = dim(res$skill[[1]]),
+                                            metadata_file_pattern = "skill_metadata")
   }
    
   # ---------------------------------------------------------------------------
diff --git a/example_scripts/test_compute.R b/example_scripts/test_compute.R
index 5622c5ec..9726ee71 100644
--- a/example_scripts/test_compute.R
+++ b/example_scripts/test_compute.R
@@ -9,7 +9,8 @@ source("modules/Saving/Saving.R")
 source("modules/Visualization/Visualization.R")
 source("build_compute_workflow.R")
 
-recipe_file <- "recipes/atomic_recipes/recipe_test_compute_decadal.yml"
+# recipe_file <- "recipes/atomic_recipes/recipe_test_compute_decadal.yml"
+recipe_file <- "recipes/atomic_recipes/recipe_test_compute_decadal_daily.yml"
 recipe <- prepare_outputs(recipe_file)
 
 # Load datasets
diff --git a/modules/Aggregation/Aggregation.R b/modules/Aggregation/Aggregation.R
index f12a570c..baf42984 100644
--- a/modules/Aggregation/Aggregation.R
+++ b/modules/Aggregation/Aggregation.R
@@ -8,7 +8,7 @@
 ## ini = 2, end = 3 with monthly freq would be a 2 months agg
 source("modules/Aggregation/R/agg_ini_end.R")
 
-Aggregation <- function(recipe, data, retrieve = TRUE) {
+Aggregation <- function(recipe, data, retrieve = TRUE, nchunks = nchunks) {
 
   ## TODO: Move checks to recipe checker
   ncores <- recipe$Analysis$ncores # is it already checked? NULL or number
@@ -57,6 +57,23 @@ Aggregation <- function(recipe, data, retrieve = TRUE) {
       }
     }
   }
+  if (!retrieve) {
+    tmp_dir <- paste0(recipe$Run$output_dir, "/outputs/tmp/Aggregation/")
+    if (!dir.exists(tmp_dir)) {
+      dir.create(tmp_dir, recursive = TRUE)
+    }
+    # Save s2dv_cube metadata
+    for (chunk in names(nchunks)) {
+      for (element in names(data)) {
+        metadata_filename <- paste0("aggregation_metadata", "_", element, "_",
+                                    chunk, "_", nchunks[[chunk]], ".Rds")
+        cube_metadata <- list(dims = res[[element]]$dims,
+                              coords = res[[element]]$coords,
+                              attrs = res[[element]]$attrs)
+        saveRDS(cube_metadata, paste0(tmp_dir, metadata_filename))
+      }
+    }
+  }
   return(res)
   info(recipe$Run$logger, retrieve = retrieve,
        "##### TIME AGGREGATION COMPLETE #####")
diff --git a/tools/retrieve_metadata.R b/tools/retrieve_metadata.R
index 66e381e6..f1784425 100644
--- a/tools/retrieve_metadata.R
+++ b/tools/retrieve_metadata.R
@@ -6,11 +6,10 @@
 # chunks: named list of chunking dimensions with the amount of chunks for each.
 # array_dims: vector containing the dimensions of the final array.
 
-retrieve_metadata <- function(tmp_dir, chunks, array_dims) {
-  ## TODO: add array_dims parameter
+retrieve_metadata <- function(tmp_dir, chunks, array_dims,
+                              metadata_file_pattern) {
   ## For each chunking dimension
   # Build metadata file pattern:
-  metadata_file_pattern <- "skill_metadata"
   for (chunk in sort(names(chunks))) {
     metadata_file_pattern <- paste0(metadata_file_pattern, "_", chunk, "_*")
   }
-- 
GitLab