#'Plotting two probability density gaussian functions and the optimal linear #'Plotting two probability density gaussian functions and the optimal linear
#'estimation (OLE) as result of combining them. #'estimation (OLE) as result of combining them.
#' #'
#'@author Eroteida Sanchez-Garcia - AEMET, //email{esanchezg@aemet.es} #'@author Eroteida Sanchez-Garcia - AEMET, \email{esanchezg@aemet.es}
#' #'
#'@description This function plots two probability density gaussian functions #'@description This function plots two probability density gaussian functions
#'and the optimal linear estimation (OLE) as result of combining them. #'and the optimal linear estimation (OLE) as result of combining them.
......
...@@ -29,21 +29,22 @@ ...@@ -29,21 +29,22 @@
#'@param ncores The number of cores to use in parallel computation. #'@param ncores The number of cores to use in parallel computation.
#' #'
#'@return A list of length 2: #'@return A list of length 2:
#' \itemize{ #'\itemize{
#' \item\code{pred.dim} {a list of two lists 'qdim' and 'pos.d'. The 'qdim' list #' \item{'pred.dim', a list of two lists 'qdim' and 'pos.d'. The 'qdim' list
#'contains values of local dimension 'dim' divided by terciles: #' contains values of local dimension 'dim' divided by terciles:
#'d1: lower tercile (high predictability), #' d1: lower tercile (high predictability),
#'d2: middle tercile, #' d2: middle tercile,
#'d3: higher tercile (low predictability) #' d3: higher tercile (low predictability)
#'The 'pos.d' list contains the position of each tercile in parameter 'dim'} #' The 'pos.d' list contains the position of each tercile in parameter
#' #' 'dim'.}
#' \item\code{pred.theta} {a list of two lists 'qtheta' and 'pos.t'. #'\item{'pred.theta', a list of two lists 'qtheta' and 'pos.t'.
#'The 'qtheta' list contains values of the inverse of persistence 'theta' #' The 'qtheta' list contains values of the inverse of persistence 'theta'
#'divided by terciles: #' divided by terciles:
#'th1: lower tercile (high predictability), #' th1: lower tercile (high predictability),
#'th2: middle tercile, #' th2: middle tercile,
#'th3: higher tercile (low predictability) #' th3: higher tercile (low predictability)
#'The 'pos.t' list contains the position of each tercile in parameter 'theta'} #' The 'pos.t' list contains the position of each tercile in parameter
#' 'theta'.}
#'} #'}
#'@return dyn_scores values from 0 to 1. A dyn_score of 1 indicates the highest #'@return dyn_scores values from 0 to 1. A dyn_score of 1 indicates the highest
#'predictability. #'predictability.
......
...@@ -51,7 +51,7 @@ ...@@ -51,7 +51,7 @@
#' } #' }
#'} #'}
#' #'
#'@seealso \code{\link{s2dv_cube}}, \code{\link[s2dv]{Load}}, #'@seealso \code{\link{s2dv_cube}}, \code{\link{CST_Start}},
#'\code{\link[startR]{Start}} and \code{\link{CST_Load}} #'\code{\link[startR]{Start}} and \code{\link{CST_Load}}
#'@examples #'@examples
#'\dontrun{ #'\dontrun{
......
...@@ -13,9 +13,11 @@ ...@@ -13,9 +13,11 @@
#'information about the 's2dv_cube', see \code{s2dv_cube()} and #'information about the 's2dv_cube', see \code{s2dv_cube()} and
#'\code{as.s2dv_cube()} functions. #'\code{as.s2dv_cube()} functions.
#' #'
#'@param x An 's2dv_cube' object #'@param x An 's2dv_cube' object.
#'@param ... Additional arguments of print function.
#'
#'@export #'@export
print.s2dv_cube <- function(x) { print.s2dv_cube <- function(x, ...) {
if (is.atomic(x)) { if (is.atomic(x)) {
cat(x, "\n") cat(x, "\n")
} else { } else {
...@@ -45,7 +47,7 @@ print.s2dv_cube <- function(x) { ...@@ -45,7 +47,7 @@ print.s2dv_cube <- function(x) {
} }
} else { } else {
cat(" ", attr_name, " : ") cat(" ", attr_name, " : ")
.print_beginning(x$attrs[[attr_name]]) .print_beginning(x = x$attrs[[attr_name]], name = attr_name)
} }
} }
} }
...@@ -53,13 +55,23 @@ print.s2dv_cube <- function(x) { ...@@ -53,13 +55,23 @@ print.s2dv_cube <- function(x) {
} }
## Auxiliary function for the print method ## Auxiliary function for the print method
.print_beginning <- function(x, n = 5, j = 1) { .print_beginning <- function(x, name, n = 5, j = 1) {
if (inherits(x, 'numeric') | inherits(x, 'POSIXct') | inherits(x, 'Date')) { if (inherits(x, 'numeric') | inherits(x, 'POSIXct') | inherits(x, 'Date')) {
if (length(x) <= n) { if (length(x) <= n) {
cat(as.character(x), "\n") cat(as.character(x), "\n")
} else { } else {
cat(paste0(as.character(x[seq_len(n)])), "...", "\n") cat(paste0(as.character(x[seq_len(n)])), "...", "\n")
} }
} else if (name == "time_bounds") {
cat("\n")
for (param in names(x)) {
cat(" ", "(", param,")", " : ")
if (length(x[[param]]) <= n) {
cat(as.character(x[[param]]), "\n")
} else {
cat(paste0(as.character(x[[param]][seq_len(n)])), "...", "\n")
}
}
} else if (inherits(x, 'list')) { } else if (inherits(x, 'list')) {
cat("\n") cat("\n")
k = 1 k = 1
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#' #'
#'@description This function allows to create an 's2dv_cube' object by passing #'@description This function allows to create an 's2dv_cube' object by passing
#'information through its parameters. This function will be needed if the data #'information through its parameters. This function will be needed if the data
#'hasn't been loaded using CST_Load or has been transformed with other methods. #'hasn't been loaded using CST_Start or has been transformed with other methods.
#'An 's2dv_cube' object has many different components including metadata. This #'An 's2dv_cube' object has many different components including metadata. This
#'function will allow to create 's2dv_cube' objects even if not all elements #'function will allow to create 's2dv_cube' objects even if not all elements
#'are defined and for each expected missed parameter a warning message will be #'are defined and for each expected missed parameter a warning message will be
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} #'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es}
#' #'
#'@param data A multidimensional array with named dimensions, typically with #'@param data A multidimensional array with named dimensions, typically with
#' dimensions: dataset, member, sdate, ftime, lat and lon. #' dimensions: dataset, member, sdate, time, lat and lon.
#'@param coords A list of named vectors with the coordinates corresponding to #'@param coords A list of named vectors with the coordinates corresponding to
#' the dimensions of the data parameter. If any coordinate has dimensions, they #' the dimensions of the data parameter. If any coordinate has dimensions, they
#' will be set as NULL. If any coordinate is not provided, it is set as an #' will be set as NULL. If any coordinate is not provided, it is set as an
...@@ -65,7 +65,7 @@ ...@@ -65,7 +65,7 @@
#' } #' }
#'} #'}
#' #'
#'@seealso \code{\link[s2dv]{Load}} and \code{\link{CST_Load}} #'@seealso \code{\link[s2dv]{Load}} and \code{\link{CST_Start}}
#'@examples #'@examples
#'exp_original <- 1:100 #'exp_original <- 1:100
#'dim(exp_original) <- c(lat = 2, time = 10, lon = 5) #'dim(exp_original) <- c(lat = 2, time = 10, lon = 5)
......
...@@ -23,15 +23,15 @@ ...@@ -23,15 +23,15 @@
#' lonmin <- -12 #' lonmin <- -12
#' latmax <- 48 #' latmax <- 48
#' latmin <- 27 #' latmin <- 27
#' lonlat_temp_st$exp <- CST_Start(dat = repos_exp, #' lonlat_temp_st$exp <- CST_Start(dataset = repos_exp,
#' var = 'tas', #' var = 'tas',
#' member = indices(1:15), #' member = startR::indices(1:15),
#' sdate = sdates, #' sdate = sdates,
#' ftime = indices(1:3), #' ftime = startR::indices(1:3),
#' lat = values(list(latmin, latmax)), #' lat = startR::values(list(latmin, latmax)),
#' lat_reorder = Sort(decreasing = TRUE), #' lat_reorder = startR::Sort(decreasing = TRUE),
#' lon = values(list(lonmin, lonmax)), #' lon = startR::values(list(lonmin, lonmax)),
#' lon_reorder = CircularSort(0, 360), #' lon_reorder = startR::CircularSort(0, 360),
#' synonims = list(lon = c('lon', 'longitude'), #' synonims = list(lon = c('lon', 'longitude'),
#' lat = c('lat', 'latitude'), #' lat = c('lat', 'latitude'),
#' member = c('member', 'ensemble'), #' member = c('member', 'ensemble'),
...@@ -39,30 +39,34 @@ ...@@ -39,30 +39,34 @@
#' return_vars = list(lat = NULL, #' return_vars = list(lat = NULL,
#' lon = NULL, ftime = 'sdate'), #' lon = NULL, ftime = 'sdate'),
#' retrieve = TRUE) #' retrieve = TRUE)
#'
#' dates <- c(paste0(2000, c(11, 12)), paste0(2001, c('01', 11, 12)), #' dates <- c(paste0(2000, c(11, 12)), paste0(2001, c('01', 11, 12)),
#' paste0(2002, c('01', 11, 12)), paste0(2003, c('01', 11, 12)), #' paste0(2002, c('01', 11, 12)), paste0(2003, c('01', 11, 12)),
#' paste0(2004, c('01', 11, 12)), paste0(2005, c('01', 11, 12)), 200601) #' paste0(2004, c('01', 11, 12)), paste0(2005, c('01', 11, 12)), 200601)
#' dates <- sapply(dates, function(x) {paste0(x, '01')}) #' dates <- sapply(dates, function(x) {paste0(x, '01')})
#' dates <- as.POSIXct(dates, format = '%Y%m%d', 'UTC') #' dates <- as.POSIXct(dates, format = '%Y%m%d', 'UTC')
#' dim(dates) <- c(ftime = 3, sdate = 6) #' dim(dates) <- c(ftime = 3, sdate = 6)
#'
#' dates <- t(dates)
#' names(dim(dates)) <- c('sdate', 'ftime')
#' #'
#' path.obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$date$.nc' #' path.obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$date$.nc'
#' lonlat_temp_st$obs <- CST_Start(dat = path.obs, #' lonlat_temp_st$obs <- CST_Start(dataset = path.obs,
#' var = 'tas', #' var = 'tas',
#' date = unique(format(dates, '%Y%m')), #' date = unique(format(dates, '%Y%m')),
#' ftime = values(dates), #' ftime = startR::values(dates),
#' ftime_across = 'date', #' ftime_across = 'date',
#' ftime_var = 'ftime', #' ftime_var = 'ftime',
#' merge_across_dims = TRUE, #' merge_across_dims = TRUE,
#' split_multiselected_dims = TRUE, #' split_multiselected_dims = TRUE,
#' lat = values(list(latmin, latmax)), #' lat = startR::values(list(latmin, latmax)),
#' lat_reorder = Sort(decreasing = TRUE), #' lat_reorder = startR::Sort(decreasing = TRUE),
#' lon = values(list(lonmin, lonmax)), #' lon = startR::values(list(lonmin, lonmax)),
#' lon_reorder = CircularSort(0, 360), #' lon_reorder = startR::CircularSort(0, 360),
#' synonims = list(lon = c('lon', 'longitude'), #' synonims = list(lon = c('lon', 'longitude'),
#' lat = c('lat', 'latitude'), #' lat = c('lat', 'latitude'),
#' ftime = c('ftime', 'time')), #' ftime = c('ftime', 'time')),
#' transform = CDORemapper, #' transform = startR::CDORemapper,
#' transform_extra_cells = 2, #' transform_extra_cells = 2,
#' transform_params = list(grid = 'r360x181', #' transform_params = list(grid = 'r360x181',
#' method = 'conservative'), #' method = 'conservative'),
...@@ -71,6 +75,16 @@ ...@@ -71,6 +75,16 @@
#' lat = NULL, #' lat = NULL,
#' ftime = 'date'), #' ftime = 'date'),
#' retrieve = TRUE) #' retrieve = TRUE)
#'
#' library(lubridate)
#' dates_exp <- lonlat_temp_st$exp$attrs$Dates
#' lonlat_temp_st$exp$attrs$Dates <- floor_date(ymd_hms(dates_exp), unit = "months")
#' dim(lonlat_temp_st$exp$attrs$Dates) <- dim(dates_exp)
#'
#' dates_obs <- lonlat_temp_st$obs$attrs$Dates
#' lonlat_temp_st$obs$attrs$Dates <- floor_date(ymd_hms(dates_obs), unit = "months")
#' dim(lonlat_temp_st$obs$attrs$Dates) <- dim(dates_obs)
#'
#'} #'}
#' #'
#'@name lonlat_temp_st #'@name lonlat_temp_st
...@@ -103,15 +117,15 @@ NULL ...@@ -103,15 +117,15 @@ NULL
#' lonmin <- 6 #' lonmin <- 6
#' lonmax <- 9 #' lonmax <- 9
#' #'
#' lonlat_prec_st <- CST_Start(dat = path, #' lonlat_prec_st <- CST_Start(dataset = path,
#' var = 'prlr', #' var = 'prlr',
#' member = indices(1:6), #' member = startR::indices(1:6),
#' sdate = sdates, #' sdate = sdates,
#' ftime = 121:151, #' ftime = 121:151,
#' lat = values(list(latmin, latmax)), #' lat = startR::values(list(latmin, latmax)),
#' lat_reorder = Sort(decreasing = TRUE), #' lat_reorder = startR::Sort(decreasing = TRUE),
#' lon = values(list(lonmin, lonmax)), #' lon = startR::values(list(lonmin, lonmax)),
#' lon_reorder = CircularSort(0, 360), #' lon_reorder = startR::CircularSort(0, 360),
#' synonims = list(lon = c('lon', 'longitude'), #' synonims = list(lon = c('lon', 'longitude'),
#' lat = c('lat', 'latitude'), #' lat = c('lat', 'latitude'),
#' ftime = c('time', 'ftime'), #' ftime = c('time', 'ftime'),
......
...@@ -19,6 +19,7 @@ A part from this GitLab project, that allows you to monitor CSTools progress, to ...@@ -19,6 +19,7 @@ A part from this GitLab project, that allows you to monitor CSTools progress, to
- The CRAN repository [https://CRAN.R-project.org/package=CSTools](https://CRAN.R-project.org/package=CSTools) which includes the user manual and vignettes. - The CRAN repository [https://CRAN.R-project.org/package=CSTools](https://CRAN.R-project.org/package=CSTools) which includes the user manual and vignettes.
- Video tutorials [https://www.medscope-project.eu/products/tool-box/cstools-video-tutorials/](https://www.medscope-project.eu/products/tool-box/cstools-video-tutorials/). - Video tutorials [https://www.medscope-project.eu/products/tool-box/cstools-video-tutorials/](https://www.medscope-project.eu/products/tool-box/cstools-video-tutorials/).
- Other resources are under-development such [training material](https://earth.bsc.es/gitlab/external/cstools/-/tree/MEDCOF2022/inst/doc/MEDCOF2022) and a [full reproducible use case for forecast calibration](https://earth.bsc.es/gitlab/external/cstools/-/tree/develop-CalibrationVignette/FOCUS_7_2). - Other resources are under-development such [training material](https://earth.bsc.es/gitlab/external/cstools/-/tree/MEDCOF2022/inst/doc/MEDCOF2022) and a [full reproducible use case for forecast calibration](https://earth.bsc.es/gitlab/external/cstools/-/tree/develop-CalibrationVignette/FOCUS_7_2).
- See and run package [**use cases**](https://earth.bsc.es/gitlab/external/cstools/-/blob/master/inst/doc/usecase.md)
Installation Installation
------------ ------------
...@@ -46,43 +47,53 @@ Overview ...@@ -46,43 +47,53 @@ Overview
The CSTools package functions can be distributed in the following methods: The CSTools package functions can be distributed in the following methods:
- **Data retrieval and formatting:** CST_Load, CST_Anomaly, CST_MergeDims, CST_SplitDims, CST_Subset, as.s2dv_cube, s2dv_cube, CST_SaveExp. - **Data retrieval and formatting:** CST_Start, CST_SaveExp, CST_MergeDims, CST_SplitDim, CST_Subset, CST_InsertDim, CST_ChangeDimNames, as.s2dv_cube and s2dv_cube.
- **Classification:** CST_MultiEOF, CST_WeatherRegimes, CST_RegimsAssign, CST_CategoricalEnsCombination, CST_EnsClustering. - **Classification:** CST_MultiEOF, CST_WeatherRegimes, CST_RegimsAssign, CST_CategoricalEnsCombination, CST_EnsClustering.
- **Downscaling:** CST_Analogs, CST_RainFARM, CST_RFTemp, CST_AdamontAnalog, CST_AnalogsPredictors. - **Downscaling:** CST_Analogs, CST_RainFARM, CST_RFTemp, CST_AdamontAnalog, CST_AnalogsPredictors.
- **Correction:** CST_BEI_Weighting, CST_BiasCorrection, CST_Calibration, CST_QuantileMapping, CST_DynBiasCorrection. - **Correction and transformation:** CST_BiasCorrection, CST_Calibration, CST_QuantileMapping, CST_Anomaly, CST_BEI_Weighting, CST_DynBiasCorrection.
- **Assessment:** CST_MultiMetric, CST_MultivarRMSE - **Assessment:** CST_MultiMetric, CST_MultivarRMSE
- **Visualization:** PlotCombinedMap, PlotForecastPDF, PlotMostLikelyQuantileMap, PlotPDFsOLE, PlotTriangles4Categories, PlotWeeklyClim. - **Visualization:** PlotCombinedMap, PlotForecastPDF, PlotMostLikelyQuantileMap, PlotPDFsOLE, PlotTriangles4Categories, PlotWeeklyClim.
This package is designed to be compatible with other R packages such as [s2dv](https://CRAN.R-project.org/package=s2dv), [startR](https://CRAN.R-project.org/package=startR), [CSIndicators](https://CRAN.R-project.org/package=CSIndicators), [CSDownscale](https://earth.bsc.es/gitlab/es/csdownscale). Functions with the prefix **CST_** deal with a common object called `s2dv_cube` as inputs. Also, this object can be created from Load (s2dv) and from Start (startR) directly. Multiple functions from different packages can operate on this common data structure to easily define a complete post-processing workflow. An `s2dv_cube` is an object to store ordered multidimensional array with named dimensions, specific coordinates and stored metadata (in-memory representation of a NetCDF file). Its “methods” are the **CST** prefix functions. The basic structure of the class `s2dv_cube` is a list of lists. The first level elements are: `data`, `dims`, `coords` and `attrs`. To access any specific element it will be done using the `$` operator.
The class `s2dv_cube` is mainly a list of named elements to keep data and metadata in a single object. Basic structure of the object:
As an example, this is how an `s2dv_cube` looks like (see `lonlat_temp_st$exp`):
```r ```r
$ data: [data array] 's2dv_cube'
$ dims: [dimensions vector] Data [ 279.99, 280.34, 279.45, 281.99, 280.92, ... ]
$ coords: [List of coordinates vectors] Dimensions ( dataset = 1, var = 1, member = 15, sdate = 6, ftime = 3, lat = 22, lon = 53 )
$ sdate Coordinates
$ time * dataset : dat1
$ lon * var : tas
[...] member : 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
$ attrs: [List of the attributes] * sdate : 20001101, 20011101, 20021101, 20031101, 20041101, 20051101
$ Variable: ftime : 1, 2, 3
$ varName * lat : 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, ...
$ metadata * lon : 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ...
$ Datasets Attributes
$ Dates Dates : 2000-11-01 2001-11-01 2002-11-01 2003-11-01 2004-11-01 ...
$ source_files varName : tas
$ when metadata :
$ load_parameters lat
units : degrees_north
long name : latitude
lon
units : degrees_east
long name : longitude
ftime
units : hours since 2000-11-01 00:00:00
tas
units : K
long name : 2 metre temperature
Datasets : dat1
when : 2023-10-02 10:11:06
source_files : "/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20001101.nc" ...
load_parameters :
( dat1 ) : dataset = dat1, var = tas, sdate = 20001101 ...
``` ```
More information about the `s2dv_cube` object class can be found here: [description of the s2dv_cube object structure document](https://docs.google.com/document/d/1ko37JFl_h6mOjDKM5QSQGikfLBKZq1naL11RkJIwtMM/edit?usp=sharing). This package is designed to be compatible with other R packages such as [s2dv](https://CRAN.R-project.org/package=s2dv), [startR](https://CRAN.R-project.org/package=startR), [CSIndicators](https://CRAN.R-project.org/package=CSIndicators), [CSDownscale](https://earth.bsc.es/gitlab/es/csdownscale).
The current `s2dv_cube` object (CSTools 5.0.0) differs from the original object used in the previous versions of the packages. If you have **questions** on this change you can follow some of the points below:
- [New s2dv_cube object discussion](https://earth.bsc.es/gitlab/external/cstools/-/issues/94) > **Note:** The current `s2dv_cube` object (CSTools version > 5.0.0) differs from the original object used in the previous versions of the packages. If you have doubts on this change you can follow some of the issues: [New s2dv_cube object discussion](https://earth.bsc.es/gitlab/external/cstools/-/issues/94), [How to deal with the compatibility break](https://earth.bsc.es/gitlab/external/cstools/-/issues/112) and [Testing issue and specifications](https://earth.bsc.es/gitlab/external/cstools/-/issues/110). More information can be found in this document: [About the new ‘s2dv_cube’](https://docs.google.com/document/d/1ko37JFl_h6mOjDKM5QSQGikfLBKZq1naL11RkJIwtMM/edit?usp=sharing).
- [How to deal with the compatibility break](https://earth.bsc.es/gitlab/external/cstools/-/issues/112)
- [Testing issue and specifications](https://earth.bsc.es/gitlab/external/cstools/-/issues/110)
Contribute Contribute
---------- ----------
......
No preview for this file type
No preview for this file type
citHeader("To cite package 'CSTools' in publications use:")
yr <- sub('.*(2[[:digit:]]{3})-.*', '\\1', meta$Date, perl = TRUE)
if (length(yr) == 0) yr <- format(Sys.Date(), '%Y')
bibentry(
bibtype = 'Manual',
title = paste0(meta$Package, ': ', meta$Title),
author = Filter(function(p) 'aut' %in% p$role, as.person(meta$Author)),
year = yr,
note = paste('R package version', meta$Version),
url = meta$URL
)
bibentry(
bibtype = "Article",
author = c(person("Núria", "Pérez-Zanón", email = "nuria.perez@bsc.es"), person("", "et al.")),
title = "Climate Services Toolbox (CSTools) v4.0: from climate forecasts to climate forecast information",
doi = "10.5194/gmd-15-6115-2022",
url = "https://gmd.copernicus.org/articles/15/6115/2022/",
journal = "Geoscientific Model Development",
publisher = "European Geosciences Union",
year = "2022"
)
# Hands-on 2: Data assesment
**Goal:** Use CSTools and s2dv to perform a quality assesment of a climate model.
**Load packages**
```r
library(CSTools)
library(s2dv)
```
## 1. Load the data
In this section we will use the function **Start** to load the data. Then, we will transfrom the output **startR_array** to an **s2dv_cube** object in order that the data is easy to use within **CSTools** functions.
The **s2dv_cube** object is a structured list that contains the information needed to work with multidimensional arrays of data. Coordinates, dimensions, and metadata are neatly combined to allow for a more intuitive, concise, and less error-prone experience.
> **Note:** If you have already loaded the data with **Start**, go directly to section **b)**.
### a) Load the data
The following section is taken from [PATC 2023 startR tutorial](https://earth.bsc.es/gitlab/es/startR/-/blob/doc-bsc_training_2023/inst/doc/tutorial/PATC2023/handson_1-data-loading.md?ref_type=heads). The experiment data are Meteo-France System 7 from ECMWF, and the observation ones are ERA5 from ECMWF for near-surface temperature (short name: tas). We will focus on the Europe region (roughly 20W-40E, 20N-80N). The hindcast years are 1993 to 2016.
```r
# Use this one if on workstation or nord3 (have access to /esarchive)
path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc"
#----------------------------------------------------------------------
# Run these two lines if you're on Marenostrum4 and log in with training account
prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/'
path_exp <- paste0(prefix, path_exp)
#----------------------------------------------------------------------
sdate_hcst <- paste0(1993:2016, '1101')
hcst <- CST_Start(dat = path_exp,
var = 'tas',
syear = sdate_hcst,
ensemble = 'all',
time = 1:2,
latitude = startR::values(list(20, 80)),
latitude_reorder = startR::Sort(),
longitude = startR::values(list(-20, 40)),
longitude_reorder = startR::CircularSort(-180, 180),
transform = startR::CDORemapper,
transform_params = list(grid = 'r360x181', method = 'bilinear'),
transform_vars = c('latitude', 'longitude'),
synonims = list(latitude = c('lat', 'latitude'),
longitude = c('lon', 'longitude')),
return_vars = list(time = 'syear',
longitude = NULL, latitude = NULL),
retrieve = TRUE)
```
```r
# Save Dates dimensions
dates_dim <- dim(hcst$attrs$Dates)
# Adjust the day to the correct month
hcst$attrs$Dates <- hcst$attrs$Dates - lubridate::days(1)
# Add again Dates dimensions
dim(hcst$attrs$Dates) <- dates_dim
date_string <- format(hcst$attrs$Dates, '%Y%m')
sdate_obs <- array(date_string, dim = c(syear = 24, time = 2))
```
```r
path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc'
#----------------------------------------------------------------------
# Run these two lines if you're on Marenostrum4 and log in with training account
prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/'
path_obs <- paste0(prefix, path_obs)
#----------------------------------------------------------------------
obs <- CST_Start(dat = path_obs,
var = 'tas',
syear = sdate_obs,
split_multiselected_dims = TRUE,
latitude = startR::values(list(20, 80)),
latitude_reorder = startR::Sort(),
longitude = startR::values(list(-20, 40)),
longitude_reorder = startR::CircularSort(-180, 180),
transform = startR::CDORemapper,
transform_params = list(grid = 'r360x181', method = 'bilinear'),
transform_vars = c('latitude', 'longitude'),
synonims = list(syear = c('syear', 'sdate'),
latitude = c('lat', 'latitude'),
longitude = c('lon', 'longitude')),
return_vars = list(time = 'syear',
longitude = NULL, latitude = NULL),
retrieve = TRUE)
```
### b) Create `s2dv_cube`:
Now we convert the **hindcast and observations data** (**startR_array**) into an **s2dv_cube** object with the function **as.s2dv_cube** from **CSTools** package:
```r
hcst <- as.s2dv_cube(hcst)
obs <- as.s2dv_cube(obs)
```
By printing the object, we can see the object structure. The first level elements are:
- **Data**: A multidimensional array containing the data
- **Dimensions**: A vector with the data dimensions.
- **Coordinates**: A list with vector coordinates.
- **Attributes**: A list containing the metadata.
```r
> hcst
's2dv_cube'
Data [ 294.975204467773, 295.99658203125, 296.999153137207, 296.874618530273, 297.662521362305, 297.113525390625, 296.145011901855, 295.981201171875 ... ]
Dimensions ( dat = 1, var = 1, syear = 24, ensemble = 25, time = 2, latitude = 61, longitude = 61 )
Coordinates
* dat : dat1
* var : tas
* syear : 19931101, 19941101, 19951101, 19961101, 19971101, 19981101, 19991101, 20001101, 20011101, 20021101, 20031101, 20041101, 20051101, 20061101, 20071101, 20081101, 20091101, 20101101, 20111101, 20121101, 20131101, 20141101, 20151101, 20161101
ensemble : 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
time : 1, 2
* latitude : 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80
* longitude : -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40
Attributes
Dates : 1993-11-30 1994-11-30 1995-11-30 1996-11-30 1997-11-30 ...
varName : tas
metadata :
time
other : class, tzone
longitude
other : dim
latitude
other : dim
tas
units : K
long name : 2 metre temperature
other : prec, dim, unlim, make_missing_value, missval, hasAddOffset, hasScaleFact, code, table
Datasets : dat1
when : 2023-10-26 14:43:30
source_files : /esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_19931101.nc ...
load_parameters :
( dat1 ) : dat = dat1, var = tas, syear = 19931101 ...
...
```
> **Note:** An **s2dv_cube** object is an structured list in base R. To acces the elements, you need to use the `$` operator (e.g. `hcst$data`, `hcst$dims`, `hcst$coords`, `hcst$attrs`, ...)
#### Exercise 1
**Goal:** To find **s2dv_cube** information of **hindcast** data.
1. What type of object is an **s2dv_cube** in base R? Use the function **class()** and **typeof()**:
2. What type of object is the element `hcst$data` (common language)? Use the function **dim()** and **typeof()** to check `hcst$data`:
3. What are the **time dimensions** of the **hindcast** data? The Dates of an **s2dv_cube** can be found in element: `hcst$attrs$Dates`.
4. What are the coordinates names in the **hindcast**? Use the function **names()** to check. The coordinates in the **s2dv_cube** are stored in element `hcst$coords`.
5. In which **latitude** and **longitude** we have loaded the data?
6. What is the **start date** dimension name of the `hcst`? What is the **ensemble member** dimension name?
7. How many **ensemble members** have the **hindcast** and **observations** datasets?
8. What is the full variable name of the loaded data? Find out the information in `hcst$attrs$Variable$metadata` with the function **str()**.
9. From what season is the data loaded from? You can use the function **months()**.
10. What are the **units** of the data?
## 2. Calibrate the data
The first step to perform a quality assesment is to correct biases as well as dispersion errors of the model. The function **Calibration** from **CSTools** allows us to chose from different calibration member-by-member techniques.
In this case, we are going to chose a method called **"evmos"** which applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011).
> **Note:** The functions of **CSTools** whose name starts with the prefix **CST** work directly with **s2dv_cube** objects. If we are not using **s2dv_cube** we can use the standard version without the prefix that work with arrays (e.g. use **Calibration**, **Anomaly** instead of **CST_Calibration**, **CST_Anomaly**...).
To calibrate the hindcast, we need to use also the observations and to specify some parameters, such as the adjustment specifications and the dimension names.
#### Exercise 2:
**Goal:** Calibrate the hindcast with completing the ensemble member dimension names and the start date dimension names of data.
```r
hcst_cal <- CST_Calibration(exp = hcst, obs = obs,
cal.method = "evmos",
eval.method = "leave-one-out",
multi.model = FALSE,
na.fill = TRUE,
na.rm = TRUE,
apply_to = NULL,
alpha = NULL,
memb_dim = ____,
sdate_dim = ____,
ncores = 10)
```
## 3. Compute Anomalies
In this section we will compute the hindcast anomalies from the calibrated data in the previous step.
Anomalies are deviations from the average weather conditions over a long period. A positive anomaly indicates that the conditions are higher than the average while negative indicates that is lower. Calculating anomalies is an important step in the model quality assesment for several reasons such as removing seasonal variations, visualization and for policy and decision-making among others.
We are going to use the function **CST_Anomaly** from **CSTools**. This function computes the anomalies relative to a climatology computed along the selected dimension (in our case starting dates). The computation is carried out independently for experimental and observational datasets.
#### Exercise 3:
**Goal:** Calculate the hindcast anomalies from the calibrated hindcast and observations dataset. You can take a look on the [CSTools package documentation](https://cran.r-project.org/web/packages/CSTools/CSTools.pdf) on page 40 to find the missing parameters.
```r
hcst_anom <- CST_Anomaly(exp = ____,
obs = ____,
cross = TRUE,
memb = TRUE,
memb_dim = ____,
dim_anom = 'syear',
dat_dim = c('dat', 'ensemble'),
ftime_dim = ____,
ncores = 10)
```
## 4. Compute skill: RPSS
To trust the climate models we need to evaluate its skill. To do it, we are going to use the **Ranked Probability Skill Score** (RPSS; Wilks, 2011). Is the skill score based on the **Ranked Probability Score** (RPS; Wilks, 2011). It can be used to assess whether a forecast presents an improvement or worsening with respect to a reference forecast.
The **RPSS** ranges between minus infinite and 1. If the **RPSS is positive**, it indicates that the **forecast has higher skill than the reference forecast**, while a **negative** value means that it has a **lower skill**. It is computed as `RPSS = 1 - RPS_exp / RPS_ref`. The statistical significance is obtained based on a Random Walk test at the specified confidence level (DelSole and Tippett, 2016).
Next, we compute the RPSS for anomalies:
```r
skill <- RPSS(exp = hcst_anom$exp$data,
obs = hcst_anom$obs$data,
time_dim = 'syear',
memb_dim = 'ensemble',
Fair = FALSE,
cross.val = TRUE,
ncores = 10)
```
The output of the **RPSS** function is a list of two elements. The first element is the RPSS; the second element, `sign` is a logical array of the statistical significance of the RPSS with the same dimensions as `rpss`.
#### Exercise 4:
**Goal:** Compare the RPSS results with **calibrated** and **raw anomalies**.
```r
hcst_anom <- CST_Anomaly(exp = ____,
obs = ____,
cross = ____,
memb = ____,
memb_dim = ____,
dim_anom = ____,
dat_dim = ____,
ftime_dim = ____,
ncores = ____)
skill_raw <- RPSS(exp = ____,
obs = ____,
time_dim = ____,
memb_dim = ____,
Fair = ____,
cross.val = ____,
ncores = ____)
summary(____)
```
## 5. Additional Exercises: Visualization
#### Exercise 5
**Goal:** Use the function **PlotEquiMap** from **s2dv** to compare the raw and calibrated data.
We are going to plot the **last year** of the hindcast period (**2016**) for the last timestep (**December**). Also, we are going to use the **last ensemble member** (arbirtrary choice).
```r
lat <- hcst$coords$lat
lon <- hcst$coords$lon
PlotEquiMap(var = hcst_anom$exp$data[24, , 25, , 2, , ], lat = lat, lon = lon,
filled.continents = FALSE,
fileout = ____)
PlotEquiMap(var = ____, lat = ____, lon = ____,
filled.continents = ____,
fileout = ____)
```
#### Exercise 6
**Goal:** Use the function **PlotEquiMap** from **s2dv** to compare the RPSS results with calibrated and raw data.
```r
PlotEquiMap(var = ____, lat = ____, lon = ____,
brks = seq(-1, 1, by = 0.1),
filled.continents = ____,
fileout = ____)
```
\ No newline at end of file
# Hands-on 2: Data assesment
**Goal:** Use CSTools and s2dv to perform a quality assesment of a climate model.
**Load packages**
```r
library(CSTools)
library(s2dv)
```
## 1. Load the data
In this section we will use the function **Start** to load the data. Then, we will transfrom the output **startR_array** to an **s2dv_cube** object in order that the data is easy to use within **CSTools** functions.
The **s2dv_cube** object is a structured list that contains the information needed to work with multidimensional arrays of data. Coordinates, dimensions, and metadata are neatly combined to allow for a more intuitive, concise, and less error-prone experience.
> **Note:** If you have already loaded the data with **Start**, go directly to section **b)**.
### a) Load the data
The following section is taken from [PATC 2023 startR tutorial](https://earth.bsc.es/gitlab/es/startR/-/blob/doc-bsc_training_2023/inst/doc/tutorial/PATC2023/handson_1-data-loading.md?ref_type=heads). The experiment data are Meteo-France System 7 from ECMWF, and the observation ones are ERA5 from ECMWF for near-surface temperature (short name: tas). We will focus on the Europe region (roughly 20W-40E, 20N-80N). The hindcast years are 1993 to 2016.
```r
# Use this one if on workstation or nord3 (have access to /esarchive)
path_exp <- "/esarchive/exp/meteofrance/system7c3s/monthly_mean/$var$_f6h/$var$_$syear$.nc"
#----------------------------------------------------------------------
# Run these two lines if you're on Marenostrum4 and log in with training account
prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/'
path_exp <- paste0(prefix, path_exp)
#----------------------------------------------------------------------
sdate_hcst <- paste0(1993:2016, '1101')
hcst <- CST_Start(dat = path_exp,
var = 'tas',
syear = sdate_hcst,
ensemble = 'all',
time = 1:2,
latitude = startR::values(list(20, 80)),
latitude_reorder = startR::Sort(),
longitude = startR::values(list(-20, 40)),
longitude_reorder = startR::CircularSort(-180, 180),
transform = startR::CDORemapper,
transform_params = list(grid = 'r360x181', method = 'bilinear'),
transform_vars = c('latitude', 'longitude'),
synonims = list(latitude = c('lat', 'latitude'),
longitude = c('lon', 'longitude')),
return_vars = list(time = 'syear',
longitude = NULL, latitude = NULL),
retrieve = TRUE)
```
```r
# Save Dates dimensions
dates_dim <- dim(hcst$attrs$Dates)
# Adjust the day to the correct month
hcst$attrs$Dates <- hcst$attrs$Dates - lubridate::days(1)
# Add again Dates dimensions
dim(hcst$attrs$Dates) <- dates_dim
date_string <- format(hcst$attrs$Dates, '%Y%m')
sdate_obs <- array(date_string, dim = c(syear = 24, time = 2))
```
```r
path_obs <- '/esarchive/recon/ecmwf/era5/monthly_mean/$var$_f1h-r1440x721cds/$var$_$syear$.nc'
#----------------------------------------------------------------------
# Run these two lines if you're on Marenostrum4 and log in with training account
prefix <- '/gpfs/scratch/nct01/nct01001/d2_handson_R/'
path_obs <- paste0(prefix, path_obs)
#----------------------------------------------------------------------
obs <- CST_Start(dat = path_obs,
var = 'tas',
syear = sdate_obs,
split_multiselected_dims = TRUE,
latitude = startR::values(list(20, 80)),
latitude_reorder = startR::Sort(),
longitude = startR::values(list(-20, 40)),
longitude_reorder = startR::CircularSort(-180, 180),
transform = startR::CDORemapper,
transform_params = list(grid = 'r360x181', method = 'bilinear'),
transform_vars = c('latitude', 'longitude'),
synonims = list(syear = c('syear', 'sdate'),
latitude = c('lat', 'latitude'),
longitude = c('lon', 'longitude')),
return_vars = list(time = 'syear',
longitude = NULL, latitude = NULL),
retrieve = TRUE)
```
### b) Create `s2dv_cube`:
Now we convert the **hindcast and observations data** (**startR_array**) into an **s2dv_cube** object with the function **as.s2dv_cube** from **CSTools** package:
```r
hcst <- as.s2dv_cube(hcst)
obs <- as.s2dv_cube(obs)
```
By printing the object, we can see the object structure. The first level elements are:
- **Data**: A multidimensional array containing the data
- **Dimensions**: A vector with the data dimensions.
- **Coordinates**: A list with vector coordinates.
- **Attributes**: A list containing the metadata.
```r
> hcst
's2dv_cube'
Data [ 294.975204467773, 295.99658203125, 296.999153137207, 296.874618530273, 297.662521362305, 297.113525390625, 296.145011901855, 295.981201171875 ... ]
Dimensions ( dat = 1, var = 1, syear = 24, ensemble = 25, time = 2, latitude = 61, longitude = 61 )
Coordinates
* dat : dat1
* var : tas
* syear : 19931101, 19941101, 19951101, 19961101, 19971101, 19981101, 19991101, 20001101, 20011101, 20021101, 20031101, 20041101, 20051101, 20061101, 20071101, 20081101, 20091101, 20101101, 20111101, 20121101, 20131101, 20141101, 20151101, 20161101
ensemble : 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
time : 1, 2
* latitude : 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80
* longitude : -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40
Attributes
Dates : 1993-11-30 1994-11-30 1995-11-30 1996-11-30 1997-11-30 ...
varName : tas
metadata :
time
other : class, tzone
longitude
other : dim
latitude
other : dim
tas
units : K
long name : 2 metre temperature
other : prec, dim, unlim, make_missing_value, missval, hasAddOffset, hasScaleFact, code, table
Datasets : dat1
when : 2023-10-26 14:43:30
source_files : /esarchive/exp/meteofrance/system7c3s/monthly_mean/tas_f6h/tas_19931101.nc ...
load_parameters :
( dat1 ) : dat = dat1, var = tas, syear = 19931101 ...
...
```
> **Note:** An **s2dv_cube** object is an structured list in base R. To acces the elements, you need to use the `$` operator (e.g. `hcst$data`, `hcst$dims`, `hcst$coords`, `hcst$attrs`, ...)
#### Exercise 1
**Goal:** To find **s2dv_cube** information of **hindcast** data.
1. What type of object is an **s2dv_cube** in base R?
```r
class(hcst)
# "s2dv_cube"
typeof(hcst)
# "list"
```
2. What type of object is the element `hcst$data` (common language)? Use the function **dim()** and **typeof()** to check `hcst$data`:
```r
typeof(hcst$data) # base type
# [1] "double"
dim(hcst$data) # dimensions
# dat var syear ensemble time latitude longitude
# 1 1 24 25 2 61 61
# Answer: Multi-dimensional array / N-dimensional array / Tensor.
```
3. What are the **time dimensions** of the **hindcast** data? The Dates of an **s2dv_cube** can be found in element: `hcst$attrs$Dates`.
```r
dim(hcst$attrs$Dates)
# syear time
# 24 2
```
4. What are the coordinates names in the **hindcast**? Use the function **names()** to check. The coordinates in the **s2dv_cube** are stored in element `hcst$coords`.
```r
names(hcst$coords)
# [1] "dat" "var" "syear" "ensemble" "time" "latitude"
# [7] "longitude"
```
5. In which **latitude** and **longitude** we have loaded the data?
```r
hcst$coords$lat
# latitude: 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
# [26] 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
# [51] 70 71 72 73 74 75 76 77 78 79 80
hcst$coords$lon
# [1] -20 -19 -18 -17 -16 -15 -14 -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2
# [20] -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
# [39] 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
# [58] 37 38 39 40
```
6. What is the **start date** dimension name of the **hindcast**? What is the **ensemble member** dimension name?
```r
hcst$dims
# syear
# ensemble
```
7. How many **ensemble members** have the **hindcast** and **observations** datasets?
```r
hcst$dims[['ensemble']]
# [1] 25
fcst$dims[['ensemble']]
# [1] 51
obs$dims
# No ensemble member in obs
```
8. What is the full variable name of the loaded data? Find out the information in `hcst$attrs$Variable$metadata` with the function **str()**.
```r
str(hcst$attrs$Variable)
str(hcst$attrs$Variable$metadata$tas$long_name)
# chr "2 metre temperature"
```
9. From what season is the data loaded from? You can use the function **months()**.
```r
dim(hcst$attrs$Dates)
hcst$attrs$Dates[1,]
months(hcst$attrs$Dates[1,])
# "December" "January"
```
10. What are the **units** of the data?
```r
hcst$attrs$Variable$metadata$tas$units
# K
```
## 2. Calibrate the data
The first step to perform a quality assesment is to correct biases as well as dispersion errors of the model. The function **Calibration** from **CSTools** allows us to chose from different calibration member-by-member techniques.
In this case, we are going to chose a method called **"evmos"** which applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011).
> **Note:** The functions of **CSTools** whose name starts with the prefix **CST** work directly with **s2dv_cube** objects. If we are not using **s2dv_cube** we can use the standard version without the prefix that work with arrays (e.g. use **Calibration**, **Anomaly** instead of **CST_Calibration**, **CST_Anomaly**...).
To calibrate the hindcast, we need to use also the observations and to specify some parameters, such as the adjustment specifications and the dimension names.
#### Exercise 2:
**Goal:** Calibrate the hindcast with completing the ensemble member dimension names and the start date dimension names of data.
```r
hcst_cal <- CST_Calibration(exp = hcst, obs = obs,
cal.method = "evmos",
eval.method = "leave-one-out",
multi.model = FALSE,
na.fill = TRUE,
na.rm = TRUE,
apply_to = NULL,
alpha = NULL,
memb_dim = "ensemble",
sdate_dim = "syear",
ncores = 10)
```
## 3. Compute Anomalies
In this section we will compute the hindcast anomalies from the calibrated data in the previous step.
Anomalies are deviations from the average weather conditions over a long period. A positive anomaly indicates that the conditions are higher than the average while negative indicates that is lower. Calculating anomalies is an important step in the model quality assesment for several reasons such as removing seasonal variations, visualization and for policy and decision-making among others.
We are going to use the function **CST_Anomaly** from **CSTools**. This function computes the anomalies relative to a climatology computed along the selected dimension (in our case starting dates). The computation is carried out independently for experimental and observational datasets.
#### Exercise 3:
**Goal:** Calculate the hindcast anomalies from the calibrated hindcast and observations dataset. You can take a look on the [CSTools package documentation](https://cran.r-project.org/web/packages/CSTools/CSTools.pdf) on page 40 to find the missing parameters.
```r
hcst_anom <- CST_Anomaly(exp = hcst_cal,
obs = obs,
cross = TRUE,
memb = TRUE,
memb_dim = 'ensemble',
dim_anom = 'syear',
dat_dim = c('dat', 'ensemble'),
ftime_dim = 'time',
ncores = 10)
```
## 4. Compute skill: RPSS
To trust the climate models we need to evaluate its skill. To do it, we are going to use the **Ranked Probability Skill Score** (RPSS; Wilks, 2011). Is the skill score based on the **Ranked Probability Score** (RPS; Wilks, 2011). It can be used to assess whether a forecast presents an improvement or worsening with respect to a reference forecast.
The **RPSS** ranges between minus infinite and 1. If the **RPSS is positive**, it indicates that the **forecast has higher skill than the reference forecast**, while a **negative** value means that it has a **lower skill**. It is computed as `RPSS = 1 - RPS_exp / RPS_ref`. The statistical significance is obtained based on a Random Walk test at the specified confidence level (DelSole and Tippett, 2016).
Next, we compute the RPSS for anomalies:
```r
skill <- RPSS(exp = hcst_anom$exp$data,
obs = hcst_anom$obs$data,
time_dim = 'syear',
memb_dim = 'ensemble',
Fair = FALSE,
cross.val = TRUE,
ncores = 10)
```
The output of the **RPSS** function is a list of two elements. The first element is the RPSS; the second element, `sign` is a logical array of the statistical significance of the RPSS with the same dimensions as `rpss`.
#### Exercise 4:
**Goal:** Compare the RPSS results with **calibrated** and **raw anomalies**.
```r
hcst_anom_raw <- CST_Anomaly(exp = hcst,
obs = obs,
cross = TRUE,
memb = TRUE,
memb_dim = 'ensemble',
dim_anom = 'syear',
dat_dim = c('dat', 'ensemble'),
ftime_dim = 'time',
ncores = 10)
skill_raw <- RPSS(exp = hcst_anom_raw$exp$data,
obs = hcst_anom_raw$obs$data,
time_dim = 'syear',
memb_dim = 'ensemble',
Fair = FALSE,
cross.val = TRUE,
ncores = 10)
> summary(skill$rpss)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.54005 -0.11225 -0.03170 -0.03722 0.04480 0.44376
> summary(skill$sign)
Mode FALSE TRUE
logical 6798 402
> summary(skill_raw$rpss)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.59113 -0.11045 -0.03069 -0.03585 0.04637 0.44305
> summary(skill_raw$sign)
Mode FALSE TRUE
logical 7055 387
```
## 5. Additional Exercises: Visualization
#### Exercise 5
**Goal:** Use the function **PlotEquiMap** from **s2dv** to compare the raw and calibrated data.
We are going to plot the **last year** of the hindcast period (**2016**) for the last timestep (**December**). Also, we are going to use the **last ensemble member** (arbirtrary choice).
```r
lat <- hcst$coords$lat
lon <- hcst$coords$lon
PlotEquiMap(hcst_anom$exp$data[24, , 25, , 2, , ], lat = lat, lon = lon,
filled.continents = FALSE,
toptitle = "Calibrated Hindcast Anomalies")
PlotEquiMap(hcst_anom_raw$exp$data[24, , 25, , 2, , ], lat = lat, lon = lon,
filled.continents = FALSE,
toptitle = "Raw Hindcast Anomalies")
```
![](./Figures/hcst_anom_cal.png)
![](./Figures/hcst_anom_raw.png)
#### Exercise 6
**Goal:** Use the function **PlotEquiMap** from **s2dv** to compare the RPSS results with calibrated and raw data.
```r
PlotEquiMap(skill$rpss[ , , 2, , ], lat = lat, lon = lon,
brks = seq(-1, 1, by = 0.1),
filled.continents = FALSE,
toptitle = "RPSS from Calibrated Hindcast Anomalies")
PlotEquiMap(skill_raw$rpss[ , , 2, , ], lat = lat, lon = lon,
brks = seq(-1, 1, by = 0.1),
filled.continents = FALSE,
toptitle = "RPSS from Raw Hindcast Anomalies")
```
![](./Figures/skill_cal.png)
![](./Figures/skill_raw.png)
\ No newline at end of file
# Use case and example scripts
In this document, you will find example scripts of the package. The first ones are use cases of cliimate data assessment. The second ones are example scripts on the use of the 's2dv_cube' object.
1. **Use cases of climate data assesment and downscaling**
1. [Bias adjustment for assessment of an extreme event](inst/doc/usecase/UseCase1_WindEvent_March2018.R)
2. [Precipitation Downscaling with RainFARM RF 4](inst/doc/usecase/UseCase2_PrecipitationDownscaling_RainFARM_RF4.R)
3. [Precipitation Downscaling with RainFARM RF 100](inst/doc/usecase/UseCase2_PrecipitationDownscaling_RainFARM_RF100.R)
4. [Seasonal forecasts for a river flow](inst/doc/usecase/UseCase3_data_preparation_SCHEME_model.R)
2. **Examples on how to use 's2dv_cube'**
1. [Create an 's2dv_cube'](inst/doc/usecase/ex1_create.R)
2. [Save 's2dv_cube'](inst/doc/usecase/ex2_save.R)
3. [Modify any 's2dv_cube' dimension](inst/doc/usecase/ex3_modify_dims.R)
4. [Subset any 's2dv_cube' dimension](inst/doc/usecase/ex4_subset.R)