diff --git a/conda_installation/environment-sunset.yml b/conda_installation/environment-sunset.yml index 0427f523a39363df46c5f9227066e4c36178e566..27dbd8b96bfb39955ba302c6712bc3a8cca0ef36 100644 --- a/conda_installation/environment-sunset.yml +++ b/conda_installation/environment-sunset.yml @@ -165,6 +165,7 @@ dependencies: - r-docopt=0.7.1=r42hc72bb7e_3 - r-doparallel=1.0.17=r42hc72bb7e_2 - r-dotcall64=1.0_2=r42h61816a4_2 + - r-dplyr=1.1.2=r42ha503ecb_1 - r-e1071=1.7_13=r42ha503ecb_1 - r-easyncdf=0.1.2=r42hc72bb7e_1 - r-easyverification=0.4.5=r42ha503ecb_0 @@ -187,6 +188,7 @@ dependencies: - r-globals=0.16.2=r42hc72bb7e_1 - r-glue=1.6.2=r42h57805ef_2 - r-gridextra=2.3=r42hc72bb7e_1005 + - r-gridgraphics=0.5_1=r42hc72bb7e_2 - r-gtable=0.3.4=r42hc72bb7e_0 - r-highr=0.10=r42hc72bb7e_1 - r-htmltools=0.5.6=r42ha503ecb_0 diff --git a/conda_installation/load_sunset.bash b/conda_installation/load_sunset.bash new file mode 100755 index 0000000000000000000000000000000000000000..add332b4a99210f3a890b14da77ee466f8b2ba8d --- /dev/null +++ b/conda_installation/load_sunset.bash @@ -0,0 +1,9 @@ +#!/bin/bash + +prefix=$1 + +conda env create --file environment-sunset.yml --prefix $prefix + +conda activate $prefix + +R -e "options(timeout = 600) ; install.packages('CSTools', repos='https://ftp.cixug.es/CRAN/')" diff --git a/conf/archive.yml b/conf/archive.yml index dbacf0e724ef6fb85420638cc4d5f4f92317ad5f..61f62be230b4ff05021a60a17e38e5e4d446cff9 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -254,3 +254,26 @@ mars: monthly_mean: {"tas":""} calendar: "standard" reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt" + +sample: + src: + System: + ECMWF-SEAS5.1: + name: "ECMWF SEAS5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: + monthly_mean: {"tas":"", "prlr":""} + nmember: + fcst: 15 + hcst: 15 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" + reference_grid: "conf/grid_description/griddes_GRIB_system51_m1.txt" + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "GRIB_era5_tas/" + monthly_mean: {"tas":"", "prlr":""} + calendar: "standard" + reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt" diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 43dea0651fc7eb02cb8cb69a1d44d580903a4666..63fee97bede51b72128b917af9b5171d83061d4f 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -8,6 +8,9 @@ Loading <- function(recipe) { if (tolower(recipe$Run$filesystem) == "mars") { source("modules/Loading/R/load_GRIB.R") data <- load_GRIB(recipe) + } else if (tolower(recipe$Run$filesystem) == "sample") { + source("modules/Loading/R/load_sample.R") + data <- load_sample(recipe) } else { # Case: esarchive time_horizon <- tolower(recipe$Analysis$Horizon) diff --git a/modules/Loading/R/load_sample.R b/modules/Loading/R/load_sample.R new file mode 100644 index 0000000000000000000000000000000000000000..e0d906d3dbc6beb4f59a3fe0eece2f6bd5fea8a4 --- /dev/null +++ b/modules/Loading/R/load_sample.R @@ -0,0 +1,49 @@ +## TODO: remove paths to personal scratchs +source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") +# Load required libraries/funs +source("tools/CST_ChangeDimName.R") +source("modules/Loading/R/compare_exp_obs_grids.R") + +load_sample <- function(recipe) { + # Hindcast: + # Change hcst dimension names + hcst <- CST_ChangeDimName(lonlat_temp_st$exp, + original_dimnames = c("dataset", "sdate", "ftime", + "lat", "lon", "member"), + final_dimnames = c("dat", "syear", "time", + "latitude", "longitude", + "ensemble")) + # Add sday and sweek dimensions + hcst <- CST_InsertDim(hcst, posdim = 3, lendim = 1, name = "sday", values = 1) + hcst <- CST_InsertDim(hcst, posdim = 4, lendim = 1, name = "sweek", values = 1) + dim(hcst$attrs$Dates) <- c(sday = 1, sweek = 1, + syear = dim(hcst$attrs$Dates)[['syear']], + time = dim(hcst$attrs$Dates)[['time']]) + # Observations: + # Change obs dimension names + obs <- CST_ChangeDimName(lonlat_temp_st$obs, + original_dimnames = c("dataset", "sdate", "ftime", + "lat", "lon"), + final_dimnames = c("dat", "syear", "time", + "latitude", "longitude")) + # Add sday and sweek dimensions + obs <- CST_InsertDim(obs, posdim = 3, lendim = 1, name = "sday", values = 1) + obs <- CST_InsertDim(obs, posdim = 4, lendim = 1, name = "sweek", values = 1) + dim(obs$attrs$Dates) <- c(sday = 1, sweek = 1, + syear = dim(obs$attrs$Dates)[['syear']], + time = dim(obs$attrs$Dates)[['time']]) + # Add ensemble dimension to obs + obs <- CST_InsertDim(obs, posdim = 7, lendim = 1, name = "ensemble", values = 1) + # Adjust name of 'load_parameters$date' attribute + obs$attrs$load_parameters$dat1$file_date <- obs$attrs$load_parameters$dat1$date + + # Sample fcst NULL + fcst <- NULL + # Check for consistency between hcst and obs grid + compare_exp_obs_grids(hcst, obs) + + info(recipe$Run$logger, + "##### DATA LOADING COMPLETED SUCCESSFULLY #####") + .log_memory_usage(recipe$Run$logger, when = "After loading (sample data)") + return(list(hcst = hcst, fcst = fcst, obs = obs)) +} diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index a6b6bd75ebb6864e686c7101eb713a5f82f5a773..ae94ed3d01526f8cf98932a2adf6f6bce6d7359d 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -46,7 +46,7 @@ Visualization <- function(recipe, "parameters of the requested plotting function, i.e. ", "PlotEquiMap, PlotRobinson or PlotLayout. There could be ", "plotting erros if the list is incomplete.")) - } else { + } else if (!is.null(output_conf)) { warning(paste("Parameter 'output_conf' should be a list.", "Using default configuration.")) output_conf <- NULL diff --git a/recipes/atomic_recipes/recipe_seasonal_provenance.yml b/recipes/atomic_recipes/recipe_seasonal_provenance.yml new file mode 100644 index 0000000000000000000000000000000000000000..196b49c92bcbd52582bbf4573ae51591e3096ebc --- /dev/null +++ b/recipes/atomic_recipes/recipe_seasonal_provenance.yml @@ -0,0 +1,57 @@ +Description: + Author: V. Agudetse + '': split version +Analysis: + Horizon: seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5.1 + Multimodel: no + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: + hcst_start: '2000' + hcst_end: '2006' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 27 + latmax: 48 + lonmin: 0 + lonmax: 359 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: + save: + Calibration: + method: bias + save: 'all' + Skill: + metric: RPSS + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'all' + Indicators: + index: FALSE + Visualization: + plots: skill_metrics, forecast_ensemble_mean, most_likely_terciles + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /tmp/out-logs/ + code_dir: /home/kinow/Development/r/workspace/sunset/ + filesystem: sample + diff --git a/tools/CST_ChangeDimName.R b/tools/CST_ChangeDimName.R new file mode 100644 index 0000000000000000000000000000000000000000..1ccbbb3d327df21f6582326e2e54fdf58ea68f09 --- /dev/null +++ b/tools/CST_ChangeDimName.R @@ -0,0 +1,39 @@ +## TODO: Documentation + +CST_ChangeDimName <- function(data, original_dimnames, final_dimnames) { + if (!inherits(data, "s2dv_cube")) { + stop("Parameter 'data' must be an object of class 's2dv_cube'") + } + if (!(length(original_dimnames) == length(final_dimnames))) { + stop("The number of dimension names in 'final_dimnames' must be the same + as in 'original_dimnames'") + } + ## TODO: Add check to verify that all original_dimnames are present in the array + for (index in 1:length(original_dimnames)) { + original_name <- original_dimnames[index] + final_name <- final_dimnames[index] + # Step 1: Change dims and data + names(data$dims)[which(names(data$dims) == original_name)] <- final_name + dim(data$data) <- data$dims + # Step 2: Change coords + names(data$coords)[which(names(data$coords) == original_name)] <- final_name + # Step 3: Change attrs + # 3.1 - Dates + if (original_name %in% names(dim(data$attrs$Dates))) { + names(dim(data$attrs$Dates))[which(names(dim(data$attrs$Dates)) + == original_name)] <- final_name + } + # 3.2 - Variable metadata + if (original_name %in% names(data$attrs$Variable$metadata)) { + names(data$attrs$Variable$metadata)[which(names(data$attrs$Variable$metadata) + == original_name)] <- final_name + } + # 3.3 - Source files + if (original_name %in% names(dim(data$attrs$source_files))) { + names(dim(data$attrs$source_files))[which(names(dim(data$attrs$source_files)) + == original_name)] <- final_name + } + } + return(data) +} + diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-handson.md b/use_cases/ex0_1_sample_dataset/ex0_1-handson.md new file mode 100644 index 0000000000000000000000000000000000000000..c1dfe48a1d9dcf613db094a1c265331d4abf4a2e --- /dev/null +++ b/use_cases/ex0_1_sample_dataset/ex0_1-handson.md @@ -0,0 +1,238 @@ +# Use case 0.1: Loading a sample dataset + +## Goal +Load a sample dataset into SUNSET without the need configure an archive of netCDF or GRIB files. + +## 0. SUNSET software stack + +The first step to use SUNSET is to create a copy of the code in your local environment. Open a terminal and `cd` to the directory where you would like to store your local copy of SUNSET. For example: `/esarchive/scratch//git/`. If a directory does not exist yet, you can create it with the `mkdir` shell command. + +```shell +# Clone the GitLab repository to create a local copy of the code +git clone https://earth.bsc.es/gitlab/es/sunset.git +``` + +If you are using BSC infrastructure, all of the software dependencies are already installed within our common environment. However, if you are running SUNSET outside of the BSC facilities, you can install the dependencies by creating a conda environment. A script is provided in the SUNSET repository, and you can install the environment by running the following line from the main folder of the SUNSET repository: + +```shell +bash conda_installation/load_sunset.bash +``` + +To run the line above, you should replace `` with the path where you want the conda environment to be installed. For example, `/home//conda-sunset`. + +## 1. The recipe + +SUNSET uses YAML configuration files called 'recipes' to specify which data you want to load and the details of the different steps of the workflow. In this example, we are using a sample dataset which contains data of temperature-at-surface (tas) monthly means, from ECMWF SEAS5 as the experiment and ERA5 as the reference dataset, for the initialization month of November. + +There is a template file for this hands-on tutorial, which you can open with a text editor: + +```shell +# cd to the main SUNSET directory +# Open the recipe with a text editor such as vim or emacs +vim use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml +``` + +Once you have opened your recipe, it is time to edit the contents. Fill in the blank slots according to your preference, based on the options given in the description. + +NOTE: In this particular example, the sample data is limited, so the system and reference names, variable, forecast times and region cannot be changed. + +```yaml +Description: + Author: <___> + Description: Exercise 0.1: Skill assessment of a sample dataset +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + units: <___> # Choose your units: C or K + Datasets: + System: + name: ECMWF-SEAS5.1 + Multimodel: no + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: + hcst_start: '2000' + hcst_end: '2006' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 27 + latmax: 48 + lonmin: 0 + lonmax: 359 + Regrid: + method: bilinear + type: 'to_system' + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: 'none' + Calibration: + method: <___> + save: 'none' + Skill: + metric: RPSS BSS10 BSS90 + cross_validation: yes + save: 'none' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'none' + Visualization: + plots: skill_metrics + multi_panel: no + projection: cylindrical_equidistant + ncores: 10 + remove_NAs: yes + Output_format: S2S4E +Run: + filesystem: sample # This parameter specifies that we want to load the sample data, rather than data from a filesystem + Loglevel: INFO + Terminal: yes + output_dir: <______> # Path to the directory where you want your outputs to be saved + code_dir: <______> # Path to the directory where your code is +``` + +## 2. Load the required SUNSET modules and read the recipe + +If you are running SUNSET within BSC infrastructure, source the MODULES file to load the environment modules needed to run SUNSET. + +```shell +source MODULES +``` + +Otherwise, activate the conda environment: + +```shell +conda activate +``` + +Open an R session, by simply typing `R` on the terminal. + +To run SUNSET, we must run the R session from the directory where the code is. To check your working directory, you can run the shell command `pwd`. From the R session, you can use the commands `getwd()` and `setwd()` to see and change your working directory. + +```r +# Load required modules +source("modules/Loading/Loading.R") +source("modules/Units/Units.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +# Read recipe +recipe_file <- "use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml" +recipe <- prepare_outputs(recipe_file) +``` + +The function `prepare_outputs()` creates a unique folder for the logs, data files and plots that result from the execution of your recipe, inside the directory you specified. It also runs a check over the recipe to detect any potential errors, misspellings or missing arguments. At the end of the check, a message is displayed indicating whether or not the recipe passed the check, along with the list of errors and warnings. + +**Questions** + +Read the logs! + +(1) Did your recipe pass the check? Did you get any warnings? + +(2) Where will your outputs be saved? Copy and paste this directory somewhere, so that you can check it later! + +*Tip*: The recipe is now stored as a `list` containing all the information of the original YAML file, plus some extra things! If you want to see any particular element of the recipe from the R session, you can simply access that element in the list. For example: + +```r +# Checking the variable name +recipe$Analysis$Variables$name +# Checking the output directory +recipe$Run$output_dir +``` + +## 3. Load the data and change the units + +The **Loading** module retrieves the information from the recipe to load the data that has been requested it in. It loads the experiment data for the hindcast period, the reference data for the corresponding period, and the experiment forecast if a forecast year has been requested. In the case of the sample dataset, we have a hindcast and the corresponding observations, but no forecast. + +For certain variables like temperature, precipitation or sea level pressure, the user can request for specific units to load the data in. The **Units** module will read the original units as stored in the netCDF files and perform any necessary unit converstions to match the request in the recipe. It also verifies that all of the loaded datasets share the same units, even if no specific unit has been requested. For this reason, users are strongly encouraged to run it even if they did not request any unit conversion. + +```r +# Load datasets +data <- Loading(recipe) +# Change units +data <- Units(recipe, data) +``` + +**Questions** + +(1) What is the structure of `data`? What is the class of the objects in `data`? *Tip*: you can use functions like `class()`, `names()` or `str()` to gain information about the structure of the object and its contents. + +```r +class(data) +names(data) +str(data, max.level = 2) +# You can access any of the three objects with the `$` operator: +class(data$hcst) +``` + +(2) Pay attention to the log messages: Did your units get converted? Are the new units what you expect? You can check the metadata of any of the objects in data. SUNSET also provides the `data_summary()` function, which lets you have a quick look at your objects: + +```r +# Check the new units and data of the hindcast (hcst) and/or observations (obs). Are they the same? +data$hcst$attrs$Variable$metadata$tas$units +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) +``` +(3) What are the dimensions of the datasets? Are they consistent with what is requested in the recipe? *Tip*: Check the data summary! + +## 4. Calibrate the data and compute the anomalies + +SUNSET has a few modules to perform post-processing on the experimental and the reference datasets. The **Calibration** module performs the bias correction method indicated in the recipe, using the `CSTools::CST_Calibration()` function. + +The **Anomalies** module removes the climatologies using functions like `CSTools::CST_Anomaly()` and `s2dv::Clim()`, and also returns the full fields in case they are needed for any future computations. + +```r +# Calibrate the data +data <- Calibration(recipe, data) +# Compute anomalies +data <- Anomalies(recipe, data) +``` +**Questions** + +(1) Verify that you now have anomaly values instead of the original full field. *Tip*: Use `data_summary()` like in the previous example and pay attention to the new values. + +## 5. Evaluate the model skill and compute the probability thresholds + +The **Skill** module returns a list of all the evaluation metrics requested in the recipe, in the shape of multi-dimensional arrays. In this case, we will compute three metrics: + +- **RPSS (Ranked Probability Skill Score)**: This skill score measures how well a forecast predicts the probability of the tercile categories (below normal, normal and above-normal), compared to the climatology. +- **BSS10 and BSS90 (Brier Skill Score):** This skill score measures how well a forecast predicts the probability of the 10th percentile and 90th percentile extreme events, compared to the climatology. + +The `Probabilities()` function returns the probability values for each requested category for the hindcast (and forecast) data, as well as the hindcast percentile values corresponding to each threshold. +``` +# Compute skill metrics +skill_metrics <- Skill(recipe, data) +# Compute percentiles and probability bins +probabilities <- Probabilities(recipe, data) +``` +**Questions** + +(1) What is the structure of `skill_metrics`? Which metrics were computed? What dimensions do they have? *Tip*: use `str()` and `names()`. + +(2) What is the structure of `probabilities`? Can you identify the probability categories and the percentiles? *Tip*: use `str()`. + +## 6. Plotting the results + +Now, let's visualize the information that was computed! + +The **Visualization** module will generate the maps we requested in the recipe: Skill maps to visualize the skill distribution of the model, for each metric. + +With the significance option in the `Visualization()` function, you can choose whether or not to shade the grid points that are statistically significant in each skill metric plot. + +```r +# Plot data +Visualization(recipe, data, + skill_metrics = skill_metrics, + significance = TRUE) +``` + +Now, you can `cd` to the the output directory and inspect the contents of the `plots/` subdirectory. The plots are PNG files that can be visualized with the `display` command. They have a descriptive name including the content of the plot, the date and the forecast time. diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml new file mode 100644 index 0000000000000000000000000000000000000000..662e75cc77691b7e6670cf68f31af885c48225c5 --- /dev/null +++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml @@ -0,0 +1,57 @@ +Description: + Author: V. Agudetse + '': split version +Analysis: + Horizon: seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: ECMWF-SEAS5.1 + Multimodel: no + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: + hcst_start: '2000' + hcst_end: '2006' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 27 + latmax: 48 + lonmin: 0 + lonmax: 359 + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: 'all' + Calibration: + method: bias + save: 'none' + Skill: + metric: RPSS BSS10 BSS90 + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'all' + Indicators: + index: FALSE + Visualization: + plots: skill_metrics + ncores: # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /tmp/out-logs/ + code_dir: ./ + filesystem: sample +