From 9a337e10ed40de38534ddb35ca812a393ea5e40f Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Fri, 12 Jan 2024 15:51:33 +0100
Subject: [PATCH 1/6] Add use case for sample data

---
 .../ex0_1_sample_dataset/ex0_1-handson.md     | 235 ++++++++++++++++++
 .../ex0_1_sample_dataset/ex0_1-recipe.yml     |  57 +++++
 2 files changed, 292 insertions(+)
 create mode 100644 use_cases/ex0_1_sample_dataset/ex0_1-handson.md
 create mode 100644 use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml

diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-handson.md b/use_cases/ex0_1_sample_dataset/ex0_1-handson.md
new file mode 100644
index 00000000..ef9abbe1
--- /dev/null
+++ b/use_cases/ex0_1_sample_dataset/ex0_1-handson.md
@@ -0,0 +1,235 @@
+# Use case 0.1: Loading a sample dataset
+
+## Goal
+Create a SUNSET recipe and use the functions in the suite to reproduce the verification workflow from the previous hands-on exercises.
+
+## 0. Cloning the SUNSET repository
+
+The first step to use SUNSET is to create a copy of the code in your local environment. Open a terminal and `cd` to the directory where you would like to store your local copy of SUNSET. For example: `/esarchive/scratch/<your_username>/git/`. If a directory does not exist yet, you can create it with the `mkdir` shell command.
+
+```shell
+# Clone the GitLab repository to create a local copy of the code
+git clone https://earth.bsc.es/gitlab/es/sunset.git
+```
+
+## 1. Modifying the recipe
+
+SUNSET uses YAML configuration files called 'recipes' to specify which data you want to load and the details of the different steps of the workflow. In this example, we want to evaluate the temperature-at-surface (tas) monthly means, using MeteoFrance System 7 data as our experiment and ERA5 as our reference dataset, for the initialization month of November.
+
+There is a template file for this hands-on tutorial, which you can open with a text editor:
+
+```shell
+# cd to the main SUNSET directory
+# Open the recipe with a text editor such as vim or emacs
+vim use_cases/ex1_1_single_analysis_terminal/ex1_1-recipe.yml
+```
+
+Once you have opened your recipe, it is time to edit the contents. Fill in the blank slots according to your preference, based on the options given in the description. 
+
+```yaml
+Description:
+ Author: <___>
+ Description: Exercise 1.1: Calibration and skill assessment of MeteoFrance System 7 surface temperature
+Analysis:
+ Horizon: Seasonal
+ Variables:
+   name: tas
+   freq: monthly_mean
+   units: <___> # Choose your units: C or K
+ Datasets:
+   System:
+     name: Meteo-France-System7
+     Multimodel: no
+   Reference:
+     name:  ERA5
+ Time:
+   sdate: '1101'
+   fcst_year: '2020'
+   hcst_start: '1993'
+   hcst_end: '2016'
+   ftime_min: <___> # Choose the first time step! A number from 1 to 6
+   ftime_max: <___> # Choose the last time step! A number from 1 to 6
+ Region:
+   name:  "EU"
+   latmin:  20
+   latmax:  80
+   lonmin:  -20
+   lonmax:  40
+ Regrid:
+   method:  bilinear
+   type:  'r360x181' # options: to_system, to_reference, self-defined grid
+ Workflow:
+   Anomalies:
+     compute: yes
+     cross_validation: yes
+     save: 'none'
+   Calibration:
+     method: <___>
+     save: 'none'
+   Skill:
+     metric: RPSS,  BSS10,  BSS90
+     cross_validation: yes
+     save: 'none'
+   Probabilities:
+     percentiles:  [[1/3,  2/3],  [1/10,  9/10]]
+     save: 'none'
+   Visualization:
+     plots: skill_metrics,  forecast_ensemble_mean,  most_likely_terciles
+     multi_panel: no
+     projection: cylindrical_equidistant
+   ncores: 10
+   remove_NAs: yes
+   Output_format: S2S4E
+Run:
+ Loglevel: INFO
+ Terminal: yes
+ output_dir: <______> # Path to the directory where you want your outputs to be saved
+ code_dir: <______> # Path to the directory where your code is
+```
+
+## 2. Load the required SUNSET modules and read the recipe
+
+First of all, source the MODULES file to load the environment modules needed to run SUNSET.
+
+```shell
+source MODULES
+```
+
+Open an R session, by simply typing `R` on the terminal.
+
+To run SUNSET, we must run the R session from the directory where the code is. To check your working directory, you can run the shell command `pwd`. From the R session, you can use the commands `getwd()` and `setwd()` to see and change your working directory.
+
+```r
+# Load required modules
+source("modules/Loading/Loading.R")
+source("modules/Units/Units.R")
+source("modules/Calibration/Calibration.R")
+source("modules/Anomalies/Anomalies.R")
+source("modules/Skill/Skill.R")
+source("modules/Saving/Saving.R")
+source("modules/Visualization/Visualization.R")
+
+# Read recipe
+recipe_file <- "use_cases/ex1_1_single_analysis_terminal/ex1_1-recipe.yml"
+recipe <- prepare_outputs(recipe_file)
+```
+
+The function `prepare_outputs()` creates a unique folder for the logs, data files and plots that result from the execution of your recipe, inside the directory you specified.  It also runs a check over the recipe to detect any potential errors, misspellings or missing arguments. At the end of the check, a message is displayed indicating whether or not the recipe passed the check, along with the list of errors and warnings.
+
+**Questions**
+
+Read the logs!
+
+(1) Did your recipe pass the check? Did you get any warnings?
+
+(2) Where will your outputs be saved? Copy and paste this directory somewhere, so that you can check it later!
+
+*Tip*: The recipe is now stored as a `list` containing all the information of the original YAML file, plus some extra things! If you want to see any particular element of the recipe from the R session, you can simply access that element in the list. For example: 
+
+```r
+# Checking the variable name
+recipe$Analysis$Variables$name
+# Checking the output directory
+recipe$Run$output_dir
+```
+
+## 3. Load the data and change the units
+
+The **Loading** module retrieves the information from the recipe to load the data that has been requested it in. It loads the experiment data for the hindcast period, the reference data for the corresponding period, and the experiment forecast if a forecast year has been requested. 
+
+For certain variables like temperature, precipitation or sea level pressure, the user can request for specific units to load the data in. The **Units** module will read the original units as stored in the netCDF files and perform any necessary unit converstions to match the request in the recipe. It also verifies that all of the loaded datasets share the same units, even if no specific unit has been requested. For this reason, users are strongly encouraged to run it even if they did not request any unit conversion.
+
+```r
+# Load datasets
+data <- Loading(recipe)
+# Change units
+data <- Units(recipe, data)
+```
+
+**Questions**
+
+(1) What is the structure of `data`? What is the class of the objects in `data`? *Tip*: you can use functions like `class()`, `names()` or `str()` to gain information about the structure of the object and its contents.
+
+```r
+class(data)
+names(data)
+str(data, max.level = 2)
+# You can access any of the three objects with the `$` operator:
+class(data$hcst)
+```
+
+(2) Pay attention to the log messages: Did your units get converted? Are the new units what you expect? You can check the metadata of any of the objects in data. SUNSET also provides the `data_summary()` function, which lets you have a quick look at your objects:
+
+```r
+# Check the new units and data of hcst, fcst and/or obs. Are they all the same?
+data$hcst$attrs$Variable$metadata$tas$units
+data_summary(data$hcst, recipe)
+data_summary(data$fcst, recipe)
+data_summary(data$obs, recipe)
+```
+(3) What are the dimensions of the datasets? Are they consistent with what is requested in the recipe? *Tip*: Check the data summary!
+
+## 4. Calibrate the data and compute the anomalies
+
+SUNSET has a few modules to perform post-processing on the experimental and the reference datasets. The **Calibration** module performs the bias correction method indicated in the recipe, using the `CSTools::CST_Calibration()` function. 
+
+The **Anomalies** module removes the climatologies using functions like `CSTools::CST_Anomaly()` and `s2dv::Clim()`, and also returns the full fields in case they are needed for any future computations.
+
+```r
+# Calibrate the data
+data <- Calibration(recipe, data)
+# Compute anomalies
+data <- Anomalies(recipe, data)
+```
+**Questions**
+
+(1) Verify that you now have anomaly values instead of the original full field. *Tip*: Use `data_summary()` like in the previous example and pay attention to the new values.
+
+## 5. Evaluate the model skill and compute the probability thresholds
+
+The **Skill** module returns a list of all the evaluation metrics requested in the recipe, in the shape of multi-dimensional arrays. In this case, we will compute three metrics:
+
+- **RPSS (Ranked Probability Skill Score)**: This skill score measures how well a forecast predicts the probability of the tercile categories (below normal, normal and above-normal), compared to the climatology.
+- **BSS10 and BSS90 (Brier Skill Score):** This skill score measures how well a forecast predicts the probability of the 10th percentile and 90th percentile extreme events, compared to the climatology.
+
+The `Probabilities()` function returns the probability values for each requested category for the hindcast and forecast data, as well as the hindcast percentile values corresponding to each threshold.
+```
+# Compute skill metrics
+skill_metrics <- Skill(recipe, data)
+# Compute percentiles and probability bins
+probabilities <- Probabilities(recipe, data)
+```
+**Questions**
+
+(1) What is the structure of `skill_metrics`? Which metrics were computed? What dimensions do they have? *Tip*: use `str()` and `names()`.
+
+(2) What is the structure of `probabilities`? Can you identify the probability categories and the percentiles? *Tip*: use `str()`.
+
+## 6. Plotting the results
+
+Now, let's visualize the information that was computed!
+
+The **Visualization** module will generate the three types of maps we requested in the recipe: 
+- Skill maps to visualize the skill distribution of the model, for each metric.
+- The ensemble mean of the calibrated forecast anomalies.
+- A map showing the most likely tercile category for each point in the grid.
+
+With the significance option in the `Visualization()` function, you can choose whether or not to shade the grid points that are statistically significant in each skill metric plot.
+
+```r
+# Plot data
+Visualization(recipe, data, 
+              skill_metrics = skill_metrics,
+              probabilities = probabilities,
+              significance = TRUE)
+```
+
+Now, you can `cd` to the the output directory and inspect the contents of the `plots/` subdirectory. The plots are png files that can be visualized with the `display` command. They have a descriptive name including the content of the plot, the date and the forecast time.
+
+**Questions**
+
+(1) Let's take a look at the forecast ensemble mean. What is the sign of the anomalies over Spain? In what regions are the anomalous temperatures strongest?
+
+(2) Let's take a look at the skill metrics RPSS, BSS10 and BSS90. In what regions and for which metrics is the forecast most skillful? *Tip*: Positive values indicate that the model is a better predictor than the climatology, with 1 being the perfect score.
+
+(3) Let's take a look at the Most Likely Terciles plots. This plot indicates the probability of the temperature being below normal, near-normal or above normal. What is the most likely category for Spain?
diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml
new file mode 100644
index 00000000..06b1e572
--- /dev/null
+++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml
@@ -0,0 +1,57 @@
+Description:
+  Author: V. Agudetse
+  Description: Run SUNSET workflow with CSTools sample data
+Analysis:
+  Horizon: seasonal
+  Variables:
+    name: tas
+    freq: monthly_mean
+  Datasets:
+    System: 
+      name: ECMWF-SEAS5.1
+    Multimodel: no
+    Reference:
+      name: ERA5
+  Time:
+    sdate: '1101'
+    fcst_year:
+    hcst_start: '2000'
+    hcst_end: '2006'
+    ftime_min: 1
+    ftime_max: 3
+  Region:
+    latmin: 27
+    latmax: 48
+    lonmin: 0
+    lonmax: 359
+  Regrid:
+    method: bilinear
+    type: to_system
+  Workflow:
+    Anomalies:
+      compute: no
+      cross_validation:
+      save:
+    Calibration:
+      method: bias
+      save: 'all'
+    Skill:
+      metric: RPSS
+      save: 'all'
+    Probabilities:
+      percentiles: [[1/3, 2/3]]
+      save: 'all'
+    Indicators:
+      index: FALSE
+    Visualization:
+      plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles
+  ncores:  # Optional, int: number of cores, defaults to 1
+  remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE
+  Output_format: S2S4E
+Run:
+  Loglevel: INFO
+  Terminal: yes
+  output_dir: /tmp/out-logs/
+  code_dir: './'
+  filesystem: sample
+
-- 
GitLab


From 62e5c83e6890dfc0b4bba5d18a070169ec97cac0 Mon Sep 17 00:00:00 2001
From: VICTORIA AGUDETSE ROURES <vagudets@bsceshub02.bsc.es>
Date: Mon, 8 Jan 2024 16:59:08 +0100
Subject: [PATCH 2/6] Add option recipe == 'sample' option with CSTools sample
 data

---
 conf/archive.yml                              | 23 ++++++++
 modules/Loading/Loading.R                     |  3 +
 modules/Loading/R/load_sample.R               | 49 ++++++++++++++++
 .../recipe_seasonal_provenance.yml            | 57 +++++++++++++++++++
 tools/CST_ChangeDimName.R                     | 39 +++++++++++++
 5 files changed, 171 insertions(+)
 create mode 100644 modules/Loading/R/load_sample.R
 create mode 100644 recipes/atomic_recipes/recipe_seasonal_provenance.yml
 create mode 100644 tools/CST_ChangeDimName.R

diff --git a/conf/archive.yml b/conf/archive.yml
index dbacf0e7..61f62be2 100644
--- a/conf/archive.yml
+++ b/conf/archive.yml
@@ -254,3 +254,26 @@ mars:
             monthly_mean: {"tas":""}
             calendar: "standard"
             reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt"
+
+sample:
+    src: 
+    System:
+        ECMWF-SEAS5.1:
+            name: "ECMWF SEAS5"
+            institution: "European Centre for Medium-Range Weather Forecasts"
+            src: 
+            monthly_mean: {"tas":"", "prlr":""}
+            nmember:
+              fcst: 15
+              hcst: 15
+            calendar: "proleptic_gregorian"
+            time_stamp_lag: "0"
+            reference_grid: "conf/grid_description/griddes_GRIB_system51_m1.txt"
+    Reference:
+        ERA5:
+            name: "ERA5"
+            institution: "European Centre for Medium-Range Weather Forecasts"
+            src: "GRIB_era5_tas/"
+            monthly_mean: {"tas":"", "prlr":""}
+            calendar: "standard"
+            reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt"
diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R
index 43dea065..63fee97b 100644
--- a/modules/Loading/Loading.R
+++ b/modules/Loading/Loading.R
@@ -8,6 +8,9 @@ Loading <- function(recipe) {
   if (tolower(recipe$Run$filesystem) == "mars") {
     source("modules/Loading/R/load_GRIB.R")
     data <- load_GRIB(recipe)
+  } else if (tolower(recipe$Run$filesystem) == "sample") {
+    source("modules/Loading/R/load_sample.R")
+    data <- load_sample(recipe)
   } else {
     # Case: esarchive
     time_horizon <- tolower(recipe$Analysis$Horizon)
diff --git a/modules/Loading/R/load_sample.R b/modules/Loading/R/load_sample.R
new file mode 100644
index 00000000..e0d906d3
--- /dev/null
+++ b/modules/Loading/R/load_sample.R
@@ -0,0 +1,49 @@
+## TODO: remove paths to personal scratchs 
+source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R")
+# Load required libraries/funs
+source("tools/CST_ChangeDimName.R")
+source("modules/Loading/R/compare_exp_obs_grids.R")
+
+load_sample <- function(recipe) {
+  # Hindcast:
+  # Change hcst dimension names
+  hcst <- CST_ChangeDimName(lonlat_temp_st$exp,
+                            original_dimnames = c("dataset", "sdate", "ftime",
+                                                  "lat", "lon", "member"),
+                            final_dimnames = c("dat", "syear", "time",
+                                               "latitude", "longitude",
+                                               "ensemble"))
+  # Add sday and sweek dimensions
+  hcst <- CST_InsertDim(hcst, posdim = 3, lendim = 1, name = "sday", values = 1)
+  hcst <- CST_InsertDim(hcst, posdim = 4, lendim = 1, name = "sweek", values = 1)
+  dim(hcst$attrs$Dates) <- c(sday = 1, sweek = 1, 
+                             syear = dim(hcst$attrs$Dates)[['syear']],
+                             time = dim(hcst$attrs$Dates)[['time']])
+  # Observations:
+  # Change obs dimension names
+  obs <- CST_ChangeDimName(lonlat_temp_st$obs,
+                           original_dimnames = c("dataset", "sdate", "ftime",
+                                                 "lat", "lon"),
+                           final_dimnames = c("dat", "syear", "time",
+                                              "latitude", "longitude"))
+  # Add sday and sweek dimensions
+  obs <- CST_InsertDim(obs, posdim = 3, lendim = 1, name = "sday", values = 1)
+  obs <- CST_InsertDim(obs, posdim = 4, lendim = 1, name = "sweek", values = 1)
+  dim(obs$attrs$Dates) <- c(sday = 1, sweek = 1,
+                            syear = dim(obs$attrs$Dates)[['syear']],
+                            time = dim(obs$attrs$Dates)[['time']])
+  # Add ensemble dimension to obs
+  obs <- CST_InsertDim(obs, posdim = 7, lendim = 1, name = "ensemble", values = 1)
+  # Adjust name of 'load_parameters$date' attribute
+  obs$attrs$load_parameters$dat1$file_date <- obs$attrs$load_parameters$dat1$date
+  
+  # Sample fcst NULL
+  fcst <- NULL
+  # Check for consistency between hcst and obs grid
+  compare_exp_obs_grids(hcst, obs)
+  
+  info(recipe$Run$logger,
+       "##### DATA LOADING COMPLETED SUCCESSFULLY #####")
+  .log_memory_usage(recipe$Run$logger, when = "After loading (sample data)")  
+  return(list(hcst = hcst, fcst = fcst, obs = obs))
+}
diff --git a/recipes/atomic_recipes/recipe_seasonal_provenance.yml b/recipes/atomic_recipes/recipe_seasonal_provenance.yml
new file mode 100644
index 00000000..196b49c9
--- /dev/null
+++ b/recipes/atomic_recipes/recipe_seasonal_provenance.yml
@@ -0,0 +1,57 @@
+Description:
+  Author: V. Agudetse
+  '': split version
+Analysis:
+  Horizon: seasonal
+  Variables:
+    name: tas
+    freq: monthly_mean
+  Datasets:
+    System: 
+      name: ECMWF-SEAS5.1
+    Multimodel: no
+    Reference:
+      name: ERA5
+  Time:
+    sdate: '1101'
+    fcst_year:
+    hcst_start: '2000'
+    hcst_end: '2006'
+    ftime_min: 1
+    ftime_max: 3
+  Region:
+    latmin: 27
+    latmax: 48
+    lonmin: 0
+    lonmax: 359
+  Regrid:
+    method: bilinear
+    type: to_system
+  Workflow:
+    Anomalies:
+      compute: no
+      cross_validation:
+      save:
+    Calibration:
+      method: bias
+      save: 'all'
+    Skill:
+      metric: RPSS
+      save: 'all'
+    Probabilities:
+      percentiles: [[1/3, 2/3]]
+      save: 'all'
+    Indicators:
+      index: FALSE
+    Visualization:
+      plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles
+  ncores:  # Optional, int: number of cores, defaults to 1
+  remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE
+  Output_format: S2S4E
+Run:
+  Loglevel: INFO
+  Terminal: yes
+  output_dir: /tmp/out-logs/
+  code_dir: /home/kinow/Development/r/workspace/sunset/
+  filesystem: sample
+
diff --git a/tools/CST_ChangeDimName.R b/tools/CST_ChangeDimName.R
new file mode 100644
index 00000000..1ccbbb3d
--- /dev/null
+++ b/tools/CST_ChangeDimName.R
@@ -0,0 +1,39 @@
+## TODO: Documentation
+
+CST_ChangeDimName <- function(data, original_dimnames, final_dimnames) {
+  if (!inherits(data, "s2dv_cube")) {
+    stop("Parameter 'data' must be an object of class 's2dv_cube'")
+  }
+  if (!(length(original_dimnames) == length(final_dimnames))) {
+    stop("The number of dimension names in 'final_dimnames' must be the same
+         as in 'original_dimnames'")
+  }
+  ## TODO: Add check to verify that all original_dimnames are present in the array
+  for (index in 1:length(original_dimnames)) {
+    original_name <- original_dimnames[index]
+    final_name <- final_dimnames[index]
+    # Step 1: Change dims and data
+    names(data$dims)[which(names(data$dims) == original_name)] <- final_name
+    dim(data$data) <- data$dims
+    # Step 2: Change coords
+    names(data$coords)[which(names(data$coords) == original_name)] <- final_name
+    # Step 3: Change attrs
+    # 3.1 - Dates
+    if (original_name %in% names(dim(data$attrs$Dates))) {
+      names(dim(data$attrs$Dates))[which(names(dim(data$attrs$Dates)) 
+                                         == original_name)] <- final_name
+    }
+    # 3.2 - Variable metadata
+    if (original_name %in% names(data$attrs$Variable$metadata)) {
+      names(data$attrs$Variable$metadata)[which(names(data$attrs$Variable$metadata) 
+                                                == original_name)] <- final_name
+    }
+    # 3.3 - Source files
+    if (original_name %in% names(dim(data$attrs$source_files))) {
+      names(dim(data$attrs$source_files))[which(names(dim(data$attrs$source_files))
+                                                == original_name)] <- final_name
+    }
+  }
+  return(data)
+}
+
-- 
GitLab


From ff0f3ce6739f193755d836d54c027bf009c22fb9 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 16 Jan 2024 15:54:49 +0100
Subject: [PATCH 3/6] Update hands-on 0.1

---
 .../ex0_1_sample_dataset/ex0_1-handson.md     | 92 ++++++++++---------
 .../ex0_1_sample_dataset/ex0_1-recipe.yml     |  6 +-
 2 files changed, 51 insertions(+), 47 deletions(-)

diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-handson.md b/use_cases/ex0_1_sample_dataset/ex0_1-handson.md
index ef9abbe1..7dc9927f 100644
--- a/use_cases/ex0_1_sample_dataset/ex0_1-handson.md
+++ b/use_cases/ex0_1_sample_dataset/ex0_1-handson.md
@@ -1,9 +1,9 @@
 # Use case 0.1: Loading a sample dataset
 
 ## Goal
-Create a SUNSET recipe and use the functions in the suite to reproduce the verification workflow from the previous hands-on exercises.
+Load a sample dataset into SUNSET without the need configure an archive of netCDF or GRIB files.
 
-## 0. Cloning the SUNSET repository
+## 0. SUNSET software stack
 
 The first step to use SUNSET is to create a copy of the code in your local environment. Open a terminal and `cd` to the directory where you would like to store your local copy of SUNSET. For example: `/esarchive/scratch/<your_username>/git/`. If a directory does not exist yet, you can create it with the `mkdir` shell command.
 
@@ -12,24 +12,34 @@ The first step to use SUNSET is to create a copy of the code in your local envir
 git clone https://earth.bsc.es/gitlab/es/sunset.git
 ```
 
-## 1. Modifying the recipe
+If you are using BSC infrastructure, all of the software dependencies are already installed within our common environment. However, if you are running SUNSET outside of the BSC facilities, you can install the dependencies by creating a conda environment. A script is provided in the SUNSET repository, and you can install the environment by running the following line from the main folder of the SUNSET repository:
 
-SUNSET uses YAML configuration files called 'recipes' to specify which data you want to load and the details of the different steps of the workflow. In this example, we want to evaluate the temperature-at-surface (tas) monthly means, using MeteoFrance System 7 data as our experiment and ERA5 as our reference dataset, for the initialization month of November.
+```shell
+bash conda_installation/load_sunset.bash <conda_dir>
+```
+
+To run the line above, you should replace `<conda_dir>` with the path where you want the conda environment to be installed. For example, `/home/<user>/conda-sunset`.
+
+## 1. The recipe
+
+SUNSET uses YAML configuration files called 'recipes' to specify which data you want to load and the details of the different steps of the workflow. In this example, we are using a sample dataset which contains data of temperature-at-surface (tas) monthly means, from ECMWF SEAS5 as the experiment and ERA5 as the reference dataset, for the initialization month of November.
 
 There is a template file for this hands-on tutorial, which you can open with a text editor:
 
 ```shell
 # cd to the main SUNSET directory
 # Open the recipe with a text editor such as vim or emacs
-vim use_cases/ex1_1_single_analysis_terminal/ex1_1-recipe.yml
+vim use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml
 ```
 
 Once you have opened your recipe, it is time to edit the contents. Fill in the blank slots according to your preference, based on the options given in the description. 
 
+NOTE: In this particular example, the sample data is limited, so the system and reference names, variable, forecast times and region cannot be changed.
+
 ```yaml
 Description:
  Author: <___>
- Description: Exercise 1.1: Calibration and skill assessment of MeteoFrance System 7 surface temperature
+ Description: Exercise 0.1: Skill assessment of a sample dataset
 Analysis:
  Horizon: Seasonal
  Variables:
@@ -38,26 +48,25 @@ Analysis:
    units: <___> # Choose your units: C or K
  Datasets:
    System:
-     name: Meteo-France-System7
+     name: ECMWF-SEAS5.1
      Multimodel: no
    Reference:
      name:  ERA5
  Time:
    sdate: '1101'
-   fcst_year: '2020'
-   hcst_start: '1993'
-   hcst_end: '2016'
-   ftime_min: <___> # Choose the first time step! A number from 1 to 6
-   ftime_max: <___> # Choose the last time step! A number from 1 to 6
+   fcst_year: 
+   hcst_start: '2000'
+   hcst_end: '2006'
+   ftime_min: 1
+   ftime_max: 3
  Region:
-   name:  "EU"
-   latmin:  20
-   latmax:  80
-   lonmin:  -20
-   lonmax:  40
+   latmin:  27
+   latmax:  48
+   lonmin:  0
+   lonmax:  359
  Regrid:
    method:  bilinear
-   type:  'r360x181' # options: to_system, to_reference, self-defined grid
+   type:  'to_system'
  Workflow:
    Anomalies:
      compute: yes
@@ -67,34 +76,41 @@ Analysis:
      method: <___>
      save: 'none'
    Skill:
-     metric: RPSS,  BSS10,  BSS90
+     metric: RPSS, BSS10, BSS90
      cross_validation: yes
      save: 'none'
    Probabilities:
-     percentiles:  [[1/3,  2/3],  [1/10,  9/10]]
+     percentiles:  [[1/3,  2/3]]
      save: 'none'
    Visualization:
-     plots: skill_metrics,  forecast_ensemble_mean,  most_likely_terciles
+     plots: skill_metrics
      multi_panel: no
      projection: cylindrical_equidistant
    ncores: 10
    remove_NAs: yes
    Output_format: S2S4E
 Run:
- Loglevel: INFO
- Terminal: yes
- output_dir: <______> # Path to the directory where you want your outputs to be saved
- code_dir: <______> # Path to the directory where your code is
+  filesystem: sample # This parameter specifies that we want to load the sample data, rather than data from a filesystem
+  Loglevel: INFO
+  Terminal: yes
+  output_dir: <______> # Path to the directory where you want your outputs to be saved
+  code_dir: <______> # Path to the directory where your code is
 ```
 
 ## 2. Load the required SUNSET modules and read the recipe
 
-First of all, source the MODULES file to load the environment modules needed to run SUNSET.
+If you are running SUNSET within BSC infrastructure, source the MODULES file to load the environment modules needed to run SUNSET.
 
 ```shell
 source MODULES
 ```
 
+Otherwise, activate the conda environment:
+
+```shell
+conda activate <conda_dir>
+```
+
 Open an R session, by simply typing `R` on the terminal.
 
 To run SUNSET, we must run the R session from the directory where the code is. To check your working directory, you can run the shell command `pwd`. From the R session, you can use the commands `getwd()` and `setwd()` to see and change your working directory.
@@ -110,7 +126,7 @@ source("modules/Saving/Saving.R")
 source("modules/Visualization/Visualization.R")
 
 # Read recipe
-recipe_file <- "use_cases/ex1_1_single_analysis_terminal/ex1_1-recipe.yml"
+recipe_file <- "use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml"
 recipe <- prepare_outputs(recipe_file)
 ```
 
@@ -135,7 +151,7 @@ recipe$Run$output_dir
 
 ## 3. Load the data and change the units
 
-The **Loading** module retrieves the information from the recipe to load the data that has been requested it in. It loads the experiment data for the hindcast period, the reference data for the corresponding period, and the experiment forecast if a forecast year has been requested. 
+The **Loading** module retrieves the information from the recipe to load the data that has been requested it in. It loads the experiment data for the hindcast period, the reference data for the corresponding period, and the experiment forecast if a forecast year has been requested. In the case of the sample dataset, we have a hindcast and the corresponding observations, but no forecast.
 
 For certain variables like temperature, precipitation or sea level pressure, the user can request for specific units to load the data in. The **Units** module will read the original units as stored in the netCDF files and perform any necessary unit converstions to match the request in the recipe. It also verifies that all of the loaded datasets share the same units, even if no specific unit has been requested. For this reason, users are strongly encouraged to run it even if they did not request any unit conversion.
 
@@ -161,10 +177,9 @@ class(data$hcst)
 (2) Pay attention to the log messages: Did your units get converted? Are the new units what you expect? You can check the metadata of any of the objects in data. SUNSET also provides the `data_summary()` function, which lets you have a quick look at your objects:
 
 ```r
-# Check the new units and data of hcst, fcst and/or obs. Are they all the same?
+# Check the new units and data of the hindcast (hcst) and/or observations (obs). Are they the same?
 data$hcst$attrs$Variable$metadata$tas$units
 data_summary(data$hcst, recipe)
-data_summary(data$fcst, recipe)
 data_summary(data$obs, recipe)
 ```
 (3) What are the dimensions of the datasets? Are they consistent with what is requested in the recipe? *Tip*: Check the data summary!
@@ -192,7 +207,7 @@ The **Skill** module returns a list of all the evaluation metrics requested in t
 - **RPSS (Ranked Probability Skill Score)**: This skill score measures how well a forecast predicts the probability of the tercile categories (below normal, normal and above-normal), compared to the climatology.
 - **BSS10 and BSS90 (Brier Skill Score):** This skill score measures how well a forecast predicts the probability of the 10th percentile and 90th percentile extreme events, compared to the climatology.
 
-The `Probabilities()` function returns the probability values for each requested category for the hindcast and forecast data, as well as the hindcast percentile values corresponding to each threshold.
+The `Probabilities()` function returns the probability values for each requested category for the hindcast (and forecast) data, as well as the hindcast percentile values corresponding to each threshold.
 ```
 # Compute skill metrics
 skill_metrics <- Skill(recipe, data)
@@ -209,10 +224,7 @@ probabilities <- Probabilities(recipe, data)
 
 Now, let's visualize the information that was computed!
 
-The **Visualization** module will generate the three types of maps we requested in the recipe: 
-- Skill maps to visualize the skill distribution of the model, for each metric.
-- The ensemble mean of the calibrated forecast anomalies.
-- A map showing the most likely tercile category for each point in the grid.
+The **Visualization** module will generate the maps we requested in the recipe: Skill maps to visualize the skill distribution of the model, for each metric.
 
 With the significance option in the `Visualization()` function, you can choose whether or not to shade the grid points that are statistically significant in each skill metric plot.
 
@@ -224,12 +236,4 @@ Visualization(recipe, data,
               significance = TRUE)
 ```
 
-Now, you can `cd` to the the output directory and inspect the contents of the `plots/` subdirectory. The plots are png files that can be visualized with the `display` command. They have a descriptive name including the content of the plot, the date and the forecast time.
-
-**Questions**
-
-(1) Let's take a look at the forecast ensemble mean. What is the sign of the anomalies over Spain? In what regions are the anomalous temperatures strongest?
-
-(2) Let's take a look at the skill metrics RPSS, BSS10 and BSS90. In what regions and for which metrics is the forecast most skillful? *Tip*: Positive values indicate that the model is a better predictor than the climatology, with 1 being the perfect score.
-
-(3) Let's take a look at the Most Likely Terciles plots. This plot indicates the probability of the temperature being below normal, near-normal or above normal. What is the most likely category for Spain?
+Now, you can `cd` to the the output directory and inspect the contents of the `plots/` subdirectory. The plots are PNG files that can be visualized with the `display` command. They have a descriptive name including the content of the plot, the date and the forecast time.
diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml
index 06b1e572..bdd7a8d2 100644
--- a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml
+++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml
@@ -1,6 +1,6 @@
 Description:
   Author: V. Agudetse
-  Description: Run SUNSET workflow with CSTools sample data
+  '': split version
 Analysis:
   Horizon: seasonal
   Variables:
@@ -44,7 +44,7 @@ Analysis:
     Indicators:
       index: FALSE
     Visualization:
-      plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles
+      plots: skill_metrics, most_likely_terciles
   ncores:  # Optional, int: number of cores, defaults to 1
   remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE
   Output_format: S2S4E
@@ -52,6 +52,6 @@ Run:
   Loglevel: INFO
   Terminal: yes
   output_dir: /tmp/out-logs/
-  code_dir: './'
+  code_dir: 
   filesystem: sample
 
-- 
GitLab


From a62fa1a0deab64135c5bff75634cd8637d20e1f9 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 16 Jan 2024 16:22:38 +0100
Subject: [PATCH 4/6] add provisional conda environment YAML and script

---
 conda_installation/environment-sunset.yml | 2 ++
 conda_installation/load_sunset.bash       | 9 +++++++++
 2 files changed, 11 insertions(+)
 create mode 100755 conda_installation/load_sunset.bash

diff --git a/conda_installation/environment-sunset.yml b/conda_installation/environment-sunset.yml
index 0427f523..27dbd8b9 100644
--- a/conda_installation/environment-sunset.yml
+++ b/conda_installation/environment-sunset.yml
@@ -165,6 +165,7 @@ dependencies:
   - r-docopt=0.7.1=r42hc72bb7e_3
   - r-doparallel=1.0.17=r42hc72bb7e_2
   - r-dotcall64=1.0_2=r42h61816a4_2
+  - r-dplyr=1.1.2=r42ha503ecb_1
   - r-e1071=1.7_13=r42ha503ecb_1
   - r-easyncdf=0.1.2=r42hc72bb7e_1
   - r-easyverification=0.4.5=r42ha503ecb_0
@@ -187,6 +188,7 @@ dependencies:
   - r-globals=0.16.2=r42hc72bb7e_1
   - r-glue=1.6.2=r42h57805ef_2
   - r-gridextra=2.3=r42hc72bb7e_1005
+  - r-gridgraphics=0.5_1=r42hc72bb7e_2
   - r-gtable=0.3.4=r42hc72bb7e_0
   - r-highr=0.10=r42hc72bb7e_1
   - r-htmltools=0.5.6=r42ha503ecb_0
diff --git a/conda_installation/load_sunset.bash b/conda_installation/load_sunset.bash
new file mode 100755
index 00000000..add332b4
--- /dev/null
+++ b/conda_installation/load_sunset.bash
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+prefix=$1
+
+conda env create --file environment-sunset.yml --prefix $prefix
+
+conda activate $prefix
+
+R -e "options(timeout = 600) ; install.packages('CSTools', repos='https://ftp.cixug.es/CRAN/')"
-- 
GitLab


From b77e028cdbdcd2cf87becda821213ef94a7b3b52 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 16 Jan 2024 16:51:12 +0100
Subject: [PATCH 5/6] Correct recipe and hands-on

---
 use_cases/ex0_1_sample_dataset/ex0_1-handson.md |  5 ++---
 use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml | 14 +++++++-------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-handson.md b/use_cases/ex0_1_sample_dataset/ex0_1-handson.md
index 7dc9927f..c1dfe48a 100644
--- a/use_cases/ex0_1_sample_dataset/ex0_1-handson.md
+++ b/use_cases/ex0_1_sample_dataset/ex0_1-handson.md
@@ -76,11 +76,11 @@ Analysis:
      method: <___>
      save: 'none'
    Skill:
-     metric: RPSS, BSS10, BSS90
+     metric: RPSS BSS10 BSS90
      cross_validation: yes
      save: 'none'
    Probabilities:
-     percentiles:  [[1/3,  2/3]]
+     percentiles: [[1/3,  2/3]]
      save: 'none'
    Visualization:
      plots: skill_metrics
@@ -232,7 +232,6 @@ With the significance option in the `Visualization()` function, you can choose w
 # Plot data
 Visualization(recipe, data, 
               skill_metrics = skill_metrics,
-              probabilities = probabilities,
               significance = TRUE)
 ```
 
diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml
index bdd7a8d2..662e75cc 100644
--- a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml
+++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml
@@ -29,14 +29,14 @@ Analysis:
     type: to_system
   Workflow:
     Anomalies:
-      compute: no
-      cross_validation:
-      save:
+      compute: yes
+      cross_validation: yes
+      save: 'all'
     Calibration:
       method: bias
-      save: 'all'
+      save: 'none'
     Skill:
-      metric: RPSS
+      metric: RPSS BSS10 BSS90
       save: 'all'
     Probabilities:
       percentiles: [[1/3, 2/3]]
@@ -44,7 +44,7 @@ Analysis:
     Indicators:
       index: FALSE
     Visualization:
-      plots: skill_metrics, most_likely_terciles
+      plots: skill_metrics
   ncores:  # Optional, int: number of cores, defaults to 1
   remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE
   Output_format: S2S4E
@@ -52,6 +52,6 @@ Run:
   Loglevel: INFO
   Terminal: yes
   output_dir: /tmp/out-logs/
-  code_dir: 
+  code_dir: ./ 
   filesystem: sample
 
-- 
GitLab


From e08f838019356982ab2f9da8561ec5db84cc8548 Mon Sep 17 00:00:00 2001
From: vagudets <victoria.agudetse@bsc.es>
Date: Tue, 16 Jan 2024 16:51:30 +0100
Subject: [PATCH 6/6] Improve output_conf warning logic

---
 modules/Visualization/Visualization.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R
index a6b6bd75..ae94ed3d 100644
--- a/modules/Visualization/Visualization.R
+++ b/modules/Visualization/Visualization.R
@@ -46,7 +46,7 @@ Visualization <- function(recipe,
                      "parameters of the requested plotting function, i.e. ",
                      "PlotEquiMap, PlotRobinson or PlotLayout. There could be ",
                      "plotting erros if the list is incomplete."))
-    } else {
+    } else if (!is.null(output_conf)) {
       warning(paste("Parameter 'output_conf' should be a list.",
                     "Using default configuration."))
       output_conf <- NULL
-- 
GitLab