diff --git a/modules/Saving/R/get_dir.R b/modules/Saving/R/get_dir.R index b8463114039f146bfbaf85c9b66a0e856abc31c2..b81450e746c94b14dc5b90496a74417b6bd528e1 100644 --- a/modules/Saving/R/get_dir.R +++ b/modules/Saving/R/get_dir.R @@ -10,12 +10,14 @@ get_dir <- function(recipe, variable, agg = "global") { # variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] outdir <- recipe$Run$output_dir system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) + reference <- gsub('.','', recipe$Analysis$Datasets$Reference$name, fixed = T) + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) if (tolower(recipe$Analysis$Output_format) == 'scorecards') { # Define output dir name accordint to Scorecards format dict <- read_yaml("conf/output_dictionaries/scorecards.yml") # system <- dict$System[[recipe$Analysis$Datasets$System$name]]$short_name - dir <- paste0(outdir, "/", system, "/", variable, "/") + dir <- paste0(outdir, system, "/", reference, "/", calib.method, "/", variable, "/") } else { # Default generic output format based on FOCUS # Get startdate or hindcast period @@ -37,12 +39,6 @@ get_dir <- function(recipe, variable, agg = "global") { fcst.sdate <- paste0("hcst-", recipe$Analysis$Time$sdate) } } - ## TODO: Remove calibration method from output directory? - if (!is.null(recipe$Analysis$Workflow$Calibration$method)) { - calib.method <- paste0(tolower(recipe$Analysis$Workflow$Calibration$method), "-") - } else { - calib.method <- "" - } store.freq <- recipe$Analysis$Variables$freq if (!is.null(recipe$Analysis$Region$name)) { outdir <- paste0(outdir, "/", recipe$Analysis$Region$name) diff --git a/modules/Saving/R/get_filename.R b/modules/Saving/R/get_filename.R index a179379975080ea566dfd6f3237b42d4152d62a1..2d2ac6d3e72ddfdd1eff3a5c672d8cce15bc8ac3 100644 --- a/modules/Saving/R/get_filename.R +++ b/modules/Saving/R/get_filename.R @@ -47,11 +47,19 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { "obs" = {type_info <- paste0("-obs_", date, "_")}, "percentiles" = {type_info <- "-percentiles_"}, "probs" = {type_info <- paste0("-probs_", date, "_")}, - "bias" = {type_info <- paste0("-bias_", date, "_")}) + "bias" = {type_info <- paste0("-bias_", date, "_")}, + # new + "rps_syear" = {type_info <- paste0("rps_syear")}, + "rps_clim_syear" = {type_info <- paste0("rps_clim_syear")}, + "crps_syear" = {type_info <- paste0("crps_syear")}, + "crps_clim_syear" = {type_info <- paste0("crps_clim_syear")}, + "crps" = {type_info <- paste0("crps")}, + "mean_bias" = {type_info <- paste0("mean_bias")}, + {type_info <- paste0(file.type)}) # Build file name file <- paste0("scorecards_", system, "_", reference, "_", - var, type_info, hcst_start, "-", hcst_end, "_s", shortdate) + var, "_",type_info, "_", hcst_start, "-", hcst_end, "_s", shortdate) } else { if (tolower(recipe$Analysis$Horizon) == "decadal") { diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index 2a59f2c2acdc4845fa20c702bc0585d04a7f43ae..9cd1fb0ddcfd6eba2fe71e046c717f367d180323 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -1,9 +1,10 @@ save_metrics <- function(recipe, - skill, + metrics, dictionary = NULL, data_cube, agg = "global", - outdir = NULL) { + outdir = NULL, + module = "skill") { # This function adds metadata to the skill metrics in 'skill' # and exports them to a netCDF file inside 'outdir'. # Define grid dimensions and names @@ -79,8 +80,8 @@ save_metrics <- function(recipe, # Loop over variable dimension for (var in 1:data_cube$dims[['var']]) { # Subset skill arrays - subset_skill <- lapply(skill, function(x) { - ClimProjDiags::Subset(x, along = 'var', + subset_metric <- lapply(metrics, function(x) { + ClimProjDiags::Subset(x, along = 'var', indices = var, drop = 'selected')}) # Generate name of output file @@ -89,59 +90,86 @@ save_metrics <- function(recipe, if (!dir.exists(outdir)) { dir.create(outdir, recursive = T) } - outfile <- get_filename(outdir, recipe, variable, - fcst.sdate, agg, "skill") - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - subset_skill <- lapply(subset_skill, function(x) { - Reorder(x, c(lalo, 'time'))}) - } - attr(subset_skill[[1]], 'global_attrs') <- global_attributes + if (recipe$Analysis$Output_format == "scorecards") { + for (i in 1:length(subset_metric)) { + if (any('syear' %in% names(dim(subset_metric[[i]])))) { + sdate_dim_save = 'syear' + dates <- data_cube$attrs$Dates + } else { + sdate_dim_save = NULL + dates <- Subset(data_cube$attrs$Dates, along = 'syear', indices = 1) + } + ## TODO: Maybe 'scorecards' condition could go here to further simplify + ## the code + extra_string <- get_filename(NULL, recipe, variable, + fcst.sdate, agg, names(subset_metric)[[i]]) + SaveExp(data = subset_metric[[i]], destination = outdir, + Dates = dates, + coords = c(data_cube$coords['longitude'], + data_cube$coords['latitude']), + varname = names(subset_metric)[[i]], + metadata = data_cube$attrs$Variable$metadata, Datasets = NULL, + startdates = NULL, dat_dim = NULL, sdate_dim = sdate_dim_save, + ftime_dim = 'time', var_dim = NULL, memb_dim = NULL, + drop_dims = NULL, single_file = TRUE, + extra_string = extra_string) + } + } else { + outfile <- get_filename(outdir, recipe, variable, + fcst.sdate, agg, module) + # Remove singleton dimensions and rearrange lon, lat and time dims + if (tolower(agg) == "global") { + subset_metric <- lapply(subset_metric, function(x) { + Reorder(x, c(lalo, 'time'))}) + } + attr(subset_metric[[1]], 'global_attrs') <- global_attributes - for (i in 1:length(subset_skill)) { - metric <- names(subset_skill[i]) - long_name <- dictionary$metrics[[metric]]$long_name - missing_val <- -9.e+33 - subset_skill[[i]][is.na(subset_skill[[i]])] <- missing_val - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'time') + for (i in 1:length(subset_metric)) { + metric <- names(subset_metric[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + subset_metric[[i]][is.na(subset_metric[[i]])] <- missing_val + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('Country', 'time') + } else if (tolower(agg) == "region") { + sdname <- paste0(metric, " region-aggregated metric") + dims <- c('region', 'time') + } else { + sdname <- paste0(metric) + dims <- c(lalo, 'time') + } + metadata <- list(metric = list(name = metric, + standard_name = sdname, + long_name = long_name, + missing_value = missing_val)) + attr(subset_metric[[i]], 'variables') <- metadata + names(dim(subset_metric[[i]])) <- dims + } + # Get grid data and metadata and export to netCDF + if (tolower(agg) == "country") { + country <- get_countries(grid) + ArrayToNc(append(country, time, subset_metric), outfile) } else if (tolower(agg) == "region") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('region', 'time') + region <- array(1:dim(skill[[1]])['region'], c(dim(skill[[1]])['region'])) + # TODO: check metadata when more than 1 region is store in the data array + metadata <- list(region = list(long_name = data_cube$attrs$Variable$metadata$region$name)) + attr(region, 'variables') <- metadata + vars <- list(region, time) + vars <- c(vars, subset_metric) + ArrayToNc(vars, outfile) } else { - sdname <- paste0(metric) - dims <- c(lalo, 'time') + latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] + longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] + latlon <- .get_latlon(latitude, longitude) + # Compile variables into a list and export to netCDF + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, subset_metric) + ArrayToNc(vars, outfile) } - metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) - attr(subset_skill[[i]], 'variables') <- metadata - names(dim(subset_skill[[i]])) <- dims - } - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, subset_skill), outfile) - } else if (tolower(agg) == "region") { - region <- array(1:dim(skill[[1]])['region'], c(dim(skill[[1]])['region'])) - # TODO: check metadata when more than 1 region is store in the data array - metadata <- list(region = list(long_name = data_cube$attrs$Variable$metadata$region$name)) - attr(region, 'variables') <- metadata - vars <- list(region, time) - vars <- c(vars, subset_skill) - ArrayToNc(vars, outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, subset_skill) - ArrayToNc(vars, outfile) } } - info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") + info(recipe$Run$logger, + paste("#####", toupper(module), " METRICS SAVED TO NETCDF FILE #####")) } diff --git a/modules/Saving/R/tmp/CST_SaveExp.R b/modules/Saving/R/tmp/CST_SaveExp.R new file mode 100644 index 0000000000000000000000000000000000000000..2ffd8fa8cd6babc1ac5b31c18c5a50dea5a3c420 --- /dev/null +++ b/modules/Saving/R/tmp/CST_SaveExp.R @@ -0,0 +1,915 @@ +#'Save objects of class 's2dv_cube' to data in NetCDF format +#' +#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +#' +#'@description This function allows to divide and save a object of class +#''s2dv_cube' into a NetCDF file, allowing to reload the saved data using +#'\code{CST_Start} or \code{CST_Load} functions. It also allows to save any +#''s2dv_cube' object that follows the NetCDF attributes conventions. +#' +#'@param data An object of class \code{s2dv_cube}. +#'@param destination A character string containing the directory name in which +#' to save the data. NetCDF file for each starting date are saved into the +#' folder tree: 'destination/Dataset/variable/'. By default the function +#' saves the data into the working directory. +#'@param sdate_dim A character string indicating the name of the start date +#' dimension. By default, it is set to 'sdate'. It can be NULL if there is no +#' start date dimension. +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension. If 'Dates' are used, it can't be NULL. If there is no forecast +#' time dimension, 'Dates' will be set to NULL and will not be used. By +#' default, it is set to 'time'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' It can be NULL if there is no dataset dimension. By default, it is set to +#' 'dataset'. +#'@param var_dim A character string indicating the name of variable dimension. +#' It can be NULL if there is no variable dimension. By default, it is set to +#' 'var'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It can be NULL if there is no member dimension. By default, it is +#' set to 'member'. +#'@param startdates A vector of dates that will be used for the filenames +#' when saving the data in multiple files (single_file = FALSE). It must be a +#' vector of the same length as the start date dimension of data. It must be a +#' vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts +#' between 1 and 10. If it is NULL, the coordinate corresponding the the start +#' date dimension or the first Date of each time step will be used as the name +#' of the files. It is NULL by default. +#'@param single_file A logical value indicating if all object is saved in a +#' single file (TRUE) or in multiple files (FALSE). When it is FALSE, +#' the array is separated for datasets, variable and start date. When there are +#' no specified time dimensions, the data will be saved in a single file by +#' default. The output file name when 'single_file' is TRUE is a character +#' string containing: '__.nc'; when it is FALSE, +#' it is '_.nc'. It is FALSE by default. +#'@param drop_dims (optional) A vector of character strings indicating the +#' dimension names of length 1 that need to be dropped in order that they don't +#' appear in the netCDF file. Only is allowed to drop dimensions that are not +#' used in the computation. The dimensions used in the computation are the ones +#' specified in: sdate_dim, ftime_dim, dat_dim, var_dim and memb_dim. It is +#' NULL by default. +#'@param extra_string (Optional) A character string to be included as part of +#' the file name, for instance, to identify member or realization. When +#' single_file is TRUE, the 'extra_string' will substitute all the default +#' file name; when single_file is FALSE, the 'extra_string' will be added +#' in the file name as: '__.nc'. It is NULL by +#' default. +#'@param units_hours_since (Optional) A logical value only available for the +#' case: 'Dates' have forecast time and start date dimension, 'single_file' is +#' TRUE and 'time_bounds' are not used. When it is TRUE, it saves the forecast +#' time with units of 'hours since'; if it is FALSE, the time units will be a +#' number of time steps with its corresponding frequency (e.g. n days, n months +#' or n hours). It is FALSE by default. +#'@param global_attrs (Optional) A list with elements containing the global +#' attributes to be saved in the NetCDF. +#' +#'@return Multiple or single NetCDF files containing the data array.\cr +#'\item{\code{single_file is TRUE}}{ +#' All data is saved in a single file located in the specified destination +#' path with the following name (by default): +#' '__.nc'. Multiple variables +#' are saved separately in the same file. The forecast time units +#' are calculated from each start date (if sdate_dim is not NULL) or from +#' the time step. If 'units_hours_since' is TRUE, the forecast time units +#' will be 'hours since '. If 'units_hours_since' is FALSE, +#' the forecast time units are extracted from the frequency of the time steps +#' (hours, days, months); if no frequency is found, the units will be ’hours +#' since’. When the time units are 'hours since' the time ateps are assumed to +#' be equally spaced. +#'} +#'\item{\code{single_file is FALSE}}{ +#' The data array is subset and stored into multiple files. Each file +#' contains the data subset for each start date, variable and dataset. Files +#' with different variables and datasets are stored in separated directories +#' within the following directory tree: 'destination/Dataset/variable/'. +#' The name of each file will be by default: '_.nc'. +#' The forecast time units are calculated from each start date (if sdate_dim +#' is not NULL) or from the time step. The forecast time units will be 'hours +#' since '. +#'} +#' +#'@seealso \code{\link[startR]{Start}}, \code{\link{as.s2dv_cube}} and +#'\code{\link{s2dv_cube}} +#' +#'@examples +#'data <- lonlat_temp_st$exp +#'CST_SaveExp(data = data, ftime_dim = 'ftime', var_dim = 'var', +#' dat_dim = 'dataset', sdate_dim = 'sdate') +#' +#'@export +CST_SaveExp <- function(data, destination = "./", startdates = NULL, + sdate_dim = 'sdate', ftime_dim = 'time', + memb_dim = 'member', dat_dim = 'dataset', + var_dim = 'var', drop_dims = NULL, + single_file = FALSE, extra_string = NULL, + global_attrs = NULL, units_hours_since = FALSE) { + # Check 's2dv_cube' + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube'.") + } + # Check object structure + if (!all(c('data', 'attrs') %in% names(data))) { + stop("Parameter 'data' must have at least 'data' and 'attrs' elements ", + "within the 's2dv_cube' structure.") + } + if (!inherits(data$attrs, 'list')) { + stop("Level 'attrs' must be a list with at least 'Dates' element.") + } + # metadata + if (!is.null(data$attrs$Variable$metadata)) { + if (!inherits(data$attrs$Variable$metadata, 'list')) { + stop("Element metadata from Variable element in attrs must be a list.") + } + } + # Dates + if (is.null(data$attrs$Dates)) { + stop("Element 'Dates' from 'attrs' level cannot be NULL.") + } + if (is.null(dim(data$attrs$Dates))) { + stop("Element 'Dates' from 'attrs' level must have time dimensions.") + } + # sdate_dim + if (!is.null(sdate_dim)) { + if (!is.character(sdate_dim)) { + stop("Parameter 'sdate_dim' must be a character string.") + } + } + # startdates + if (is.null(startdates)) { + if (is.character(data$coords[[sdate_dim]])) { + startdates <- data$coords[[sdate_dim]] + } + } + + SaveExp(data = data$data, + destination = destination, + coords = data$coords, + Dates = data$attrs$Dates, + time_bounds = data$attrs$time_bounds, + startdates = startdates, + varname = data$attrs$Variable$varName, + metadata = data$attrs$Variable$metadata, + Datasets = data$attrs$Datasets, + sdate_dim = sdate_dim, ftime_dim = ftime_dim, + memb_dim = memb_dim, + dat_dim = dat_dim, var_dim = var_dim, + drop_dims = drop_dims, + single_file = single_file, + extra_string = extra_string, + global_attrs = global_attrs, + units_hours_since = units_hours_since) +} +#'Save a multidimensional array with metadata to data in NetCDF format +#'@description This function allows to save a data array with metadata into a +#'NetCDF file, allowing to reload the saved data using \code{Start} function +#'from StartR package. If the original 's2dv_cube' object has been created from +#'\code{CST_Load()}, then it can be reloaded with \code{Load()}. +#' +#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +#' +#'@param data A multi-dimensional array with named dimensions. +#'@param destination A character string indicating the path where to store the +#' NetCDF files. +#'@param coords A named list with elements of the coordinates corresponding to +#' the dimensions of the data parameter. The names and length of each element +#' must correspond to the names of the dimensions. If any coordinate is not +#' provided, it is set as an index vector with the values from 1 to the length +#' of the corresponding dimension. +#'@param Dates A named array of dates with the corresponding sdate and forecast +#' time dimension. If there is no sdate_dim, you can set it to NULL. +#' It must have ftime_dim dimension. +#'@param time_bounds (Optional) A list of two arrays of dates containing +#' the lower (first array) and the upper (second array) time bounds +#' corresponding to Dates. Each array must have the same dimensions as Dates. +#' If 'Dates' parameter is NULL, 'time_bounds' are not used. It is NULL by +#' default. +#'@param startdates A vector of dates that will be used for the filenames +#' when saving the data in multiple files (single_file = FALSE). It must be a +#' vector of the same length as the start date dimension of data. It must be a +#' vector of class \code{Dates}, \code{'POSIXct'} or character with lenghts +#' between 1 and 10. If it is NULL, the coordinate corresponding the the start +#' date dimension or the first Date of each time step will be used as the name +#' of the files. It is NULL by default. +#'@param varname A character string indicating the name of the variable to be +#' saved. +#'@param metadata A named list where each element is a variable containing the +#' corresponding information. The information must be contained in a list of +#' lists for each variable. +#'@param Datasets A vector of character string indicating the names of the +#' datasets. +#'@param sdate_dim A character string indicating the name of the start date +#' dimension. By default, it is set to 'sdate'. It can be NULL if there is no +#' start date dimension. +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension. By default, it is set to 'time'. It can be NULL if there is no +#' forecast time dimension. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' By default, it is set to 'dataset'. It can be NULL if there is no dataset +#' dimension. +#'@param var_dim A character string indicating the name of variable dimension. +#' By default, it is set to 'var'. It can be NULL if there is no variable +#' dimension. +#'@param memb_dim A character string indicating the name of the member +#' dimension. By default, it is set to 'member'. It can be NULL if there is no +#' member dimension. +#'@param drop_dims (optional) A vector of character strings indicating the +#' dimension names of length 1 that need to be dropped in order that they don't +#' appear in the netCDF file. Only is allowed to drop dimensions that are not +#' used in the computation. The dimensions used in the computation are the ones +#' specified in: sdate_dim, ftime_dim, dat_dim, var_dim and memb_dim. It is +#' NULL by default. +#'@param single_file A logical value indicating if all object is saved in a +#' single file (TRUE) or in multiple files (FALSE). When it is FALSE, +#' the array is separated for datasets, variable and start date. When there are +#' no specified time dimensions, the data will be saved in a single file by +#' default. The output file name when 'single_file' is TRUE is a character +#' string containing: '__.nc'; when it is FALSE, +#' it is '_.nc'. It is FALSE by default. +#'@param extra_string (Optional) A character string to be included as part of +#' the file name, for instance, to identify member or realization. When +#' single_file is TRUE, the 'extra_string' will substitute all the default +#' file name; when single_file is FALSE, the 'extra_string' will be added +#' in the file name as: '__.nc'. It is NULL by +#' default. +#'@param global_attrs (Optional) A list with elements containing the global +#' attributes to be saved in the NetCDF. +#'@param units_hours_since (Optional) A logical value only available for the +#' case: Dates have forecast time and start date dimension, single_file is +#' TRUE and 'time_bounds' is NULL. When it is TRUE, it saves the forecast time +#' with units of 'hours since'; if it is FALSE, the time units will be a number +#' of time steps with its corresponding frequency (e.g. n days, n months or n +#' hours). It is FALSE by default. +#' +#'@return Multiple or single NetCDF files containing the data array.\cr +#'\item{\code{single_file is TRUE}}{ +#' All data is saved in a single file located in the specified destination +#' path with the following name (by default): +#' '__.nc'. Multiple variables +#' are saved separately in the same file. The forecast time units +#' are calculated from each start date (if sdate_dim is not NULL) or from +#' the time step. If 'units_hours_since' is TRUE, the forecast time units +#' will be 'hours since '. If 'units_hours_since' is FALSE, +#' the forecast time units are extracted from the frequency of the time steps +#' (hours, days, months); if no frequency is found, the units will be ’hours +#' since’. When the time units are 'hours since' the time ateps are assumed to +#' be equally spaced. +#'} +#'\item{\code{single_file is FALSE}}{ +#' The data array is subset and stored into multiple files. Each file +#' contains the data subset for each start date, variable and dataset. Files +#' with different variables and datasets are stored in separated directories +#' within the following directory tree: 'destination/Dataset/variable/'. +#' The name of each file will be by default: '_.nc'. +#' The forecast time units are calculated from each start date (if sdate_dim +#' is not NULL) or from the time step. The forecast time units will be 'hours +#' since '. +#'} +#' +#'@examples +#'data <- lonlat_temp_st$exp$data +#'lon <- lonlat_temp_st$exp$coords$lon +#'lat <- lonlat_temp_st$exp$coords$lat +#'coords <- list(lon = lon, lat = lat) +#'Datasets <- lonlat_temp_st$exp$attrs$Datasets +#'varname <- 'tas' +#'Dates <- lonlat_temp_st$exp$attrs$Dates +#'metadata <- lonlat_temp_st$exp$attrs$Variable$metadata +#'SaveExp(data = data, coords = coords, Datasets = Datasets, varname = varname, +#' Dates = Dates, metadata = metadata, single_file = TRUE, +#' ftime_dim = 'ftime', var_dim = 'var', dat_dim = 'dataset') +#' +#'@import easyNCDF +#'@importFrom s2dv Reorder +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +SaveExp <- function(data, destination = "./", coords = NULL, + Dates = NULL, time_bounds = NULL, startdates = NULL, + varname = NULL, metadata = NULL, Datasets = NULL, + sdate_dim = 'sdate', ftime_dim = 'time', + memb_dim = 'member', dat_dim = 'dataset', var_dim = 'var', + drop_dims = NULL, single_file = FALSE, extra_string = NULL, + global_attrs = NULL, units_hours_since = FALSE) { + ## Initial checks + # data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + dimnames <- names(dim(data)) + if (is.null(dimnames)) { + stop("Parameter 'data' must be an array with named dimensions.") + } + if (!is.null(attributes(data)$dimensions)) { + attributes(data)$dimensions <- NULL + } + # destination + if (!is.character(destination) | length(destination) > 1) { + stop("Parameter 'destination' must be a character string of one element ", + "indicating the name of the file (including the folder if needed) ", + "where the data will be saved.") + } + # drop_dims + if (!is.null(drop_dims)) { + if (!is.character(drop_dims) | any(!drop_dims %in% names(dim(data)))) { + warning("Parameter 'drop_dims' must be character string containing ", + "the data dimension names to be dropped. It will not be used.") + } else if (!all(dim(data)[drop_dims] %in% 1)) { + warning("Parameter 'drop_dims' can only contain dimension names ", + "that are of length 1. It will not be used.") + } else if (any(drop_dims %in% c(ftime_dim, sdate_dim, dat_dim, memb_dim, var_dim))) { + warning("Parameter 'drop_dims' contains dimensions used in the computation. ", + "It will not be used.") + drop_dims <- NULL + } else { + data <- Subset(x = data, along = drop_dims, + indices = lapply(1:length(drop_dims), function(x) 1), + drop = 'selected') + dimnames <- names(dim(data)) + } + } + # coords + if (!is.null(coords)) { + if (!inherits(coords, 'list')) { + stop("Parameter 'coords' must be a named list of coordinates.") + } + if (is.null(names(coords))) { + stop("Parameter 'coords' must have names corresponding to coordinates.") + } + } else { + coords <- sapply(dimnames, function(x) 1:dim(data)[x]) + } + # varname + if (is.null(varname)) { + varname <- 'X' + } else if (length(varname) > 1) { + multiple_vars <- TRUE + } else { + multiple_vars <- FALSE + } + if (!all(sapply(varname, is.character))) { + stop("Parameter 'varname' must be a character string with the ", + "variable names.") + } + # single_file + if (!inherits(single_file, 'logical')) { + warning("Parameter 'single_file' must be a logical value. It will be ", + "set as FALSE.") + single_file <- FALSE + } + # extra_string + if (!is.null(extra_string)) { + if (!is.character(extra_string)) { + stop("Parameter 'extra_string' must be a character string.") + } + } + # global_attrs + if (!is.null(global_attrs)) { + if (!inherits(global_attrs, 'list')) { + stop("Parameter 'global_attrs' must be a list.") + } + } + + ## Dimensions checks + # Spatial coordinates + if (!any(dimnames %in% .KnownLonNames()) | + !any(dimnames %in% .KnownLatNames())) { + lon_dim <- NULL + lat_dim <- NULL + } else { + lon_dim <- dimnames[which(dimnames %in% .KnownLonNames())] + lat_dim <- dimnames[which(dimnames %in% .KnownLatNames())] + } + # ftime_dim + if (!is.null(ftime_dim)) { + if (!is.character(ftime_dim)) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!all(ftime_dim %in% dimnames)) { + stop("Parameter 'ftime_dim' is not found in 'data' dimension. Set it ", + "as NULL if there is no forecast time dimension.") + } + } + # sdate_dim + if (!is.null(sdate_dim)) { + if (!is.character(sdate_dim)) { + stop("Parameter 'sdate_dim' must be a character string.") + } + if (!all(sdate_dim %in% dimnames)) { + stop("Parameter 'sdate_dim' is not found in 'data' dimension.") + } + } + # memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim)) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!all(memb_dim %in% dimnames)) { + stop("Parameter 'memb_dim' is not found in 'data' dimension. Set it ", + "as NULL if there is no member dimension.") + } + } + # dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim)) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!all(dat_dim %in% dimnames)) { + stop("Parameter 'dat_dim' is not found in 'data' dimension. Set it ", + "as NULL if there is no Datasets dimension.") + } + n_datasets <- dim(data)[dat_dim] + } else { + n_datasets <- 1 + } + # var_dim + if (!is.null(var_dim)) { + if (!is.character(var_dim)) { + stop("Parameter 'var_dim' must be a character string.") + } + if (!all(var_dim %in% dimnames)) { + stop("Parameter 'var_dim' is not found in 'data' dimension. Set it ", + "as NULL if there is no variable dimension.") + } + n_vars <- dim(data)[var_dim] + } else { + n_vars <- 1 + } + # minimum dimensions + if (all(dimnames %in% c(var_dim, dat_dim))) { + if (!single_file) { + warning("Parameter data has only ", + paste(c(var_dim, dat_dim), collapse = ' and '), " dimensions ", + "and it cannot be splitted in multiple files. All data will ", + "be saved in a single file.") + single_file <- TRUE + } + } + # Dates (1): initial checks + if (!is.null(Dates)) { + if (!any(inherits(Dates, "POSIXct"), inherits(Dates, "Date"))) { + stop("Parameter 'Dates' must be of 'POSIXct' or 'Dates' class.") + } + if (is.null(dim(Dates))) { + stop("Parameter 'Dates' must have dimension names.") + } + if (all(is.null(ftime_dim), is.null(sdate_dim))) { + warning("Parameters 'ftime_dim' and 'sdate_dim' can't both be NULL ", + "if 'Dates' are used. 'Dates' will not be used.") + Dates <- NULL + } + # sdate_dim in Dates + if (!is.null(sdate_dim)) { + if (!sdate_dim %in% names(dim(Dates))) { + warning("Parameter 'sdate_dim' is not found in 'Dates' dimension. ", + "Dates will not be used.") + Dates <- NULL + } + } + # ftime_dim in Dates + if (!is.null(ftime_dim)) { + if (!ftime_dim %in% names(dim(Dates))) { + warning("Parameter 'ftime_dim' is not found in 'Dates' dimension. ", + "Dates will not be used.") + Dates <- NULL + } + } + } + # time_bounds + if (!is.null(time_bounds)) { + if (!inherits(time_bounds, 'list')) { + stop("Parameter 'time_bounds' must be a list with two dates arrays.") + } + time_bounds_dims <- lapply(time_bounds, function(x) dim(x)) + if (!identical(time_bounds_dims[[1]], time_bounds_dims[[2]])) { + stop("Parameter 'time_bounds' must have 2 arrays with same dimensions.") + } + if (is.null(Dates)) { + time_bounds <- NULL + } else { + name_tb <- sort(names(time_bounds_dims[[1]])) + name_dt <- sort(names(dim(Dates))) + if (!identical(dim(Dates)[name_dt], time_bounds_dims[[1]][name_tb])) { + stop(paste0("Parameter 'Dates' and 'time_bounds' must have same length ", + "of all dimensions.")) + } + } + } + # Dates (2): Check dimensions + if (!is.null(Dates)) { + if (any(dim(Dates)[!names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] != 1)) { + stop("Parameter 'Dates' can have only 'sdate_dim' and 'ftime_dim' ", + "dimensions of length greater than 1.") + } + # drop dimensions of length 1 different from sdate_dim and ftime_dim + dim(Dates) <- dim(Dates)[names(dim(Dates)) %in% c(ftime_dim, sdate_dim)] + + # add ftime if needed + if (is.null(ftime_dim)) { + warning("A 'time' dimension of length 1 will be added to 'Dates'.") + dim(Dates) <- c(time = 1, dim(Dates)) + dim(data) <- c(time = 1, dim(data)) + dimnames <- names(dim(data)) + ftime_dim <- 'time' + if (!is.null(time_bounds)) { + time_bounds <- lapply(time_bounds, function(x) { + dim(x) <- c(time = 1, dim(x)) + return(x) + }) + } + units_hours_since <- TRUE + } + # add sdate if needed + if (is.null(sdate_dim)) { + if (!single_file) { + dim(Dates) <- c(dim(Dates), sdate = 1) + dim(data) <- c(dim(data), sdate = 1) + dimnames <- names(dim(data)) + sdate_dim <- 'sdate' + if (!is.null(time_bounds)) { + time_bounds <- lapply(time_bounds, function(x) { + dim(x) <- c(dim(x), sdate = 1) + return(x) + }) + } + if (!is.null(startdates)) { + if (length(startdates) != 1) { + warning("Parameter 'startdates' must be of length 1 if 'sdate_dim' is NULL.", + "They won't be used.") + startdates <- NULL + } + } + } + units_hours_since <- TRUE + } + } + # startdates + if (!is.null(Dates)) { + # check startdates + if (is.null(startdates)) { + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + } else if (any(nchar(startdates) > 10, nchar(startdates) < 1)) { + warning("Parameter 'startdates' should be a character string containing ", + "the start dates in the format 'yyyy-mm-dd', 'yyyymmdd', 'yyyymm', ", + "'POSIXct' or 'Dates' class. Files will be named with Dates instead.") + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + } + } else if (!single_file) { + warning("Dates must be provided if 'data' must be saved in separated files. ", + "All data will be saved in a single file.") + single_file <- TRUE + } + # startdates + if (is.null(startdates)) { + if (is.null(sdate_dim)) { + startdates <- 'XXX' + } else { + startdates <- rep('XXX', dim(data)[sdate_dim]) + } + } else { + if (any(inherits(startdates, "POSIXct"), inherits(startdates, "Date"))) { + startdates <- format(startdates, "%Y%m%d") + } + if (!is.null(sdate_dim)) { + if (dim(data)[sdate_dim] != length(startdates)) { + warning(paste0("Parameter 'startdates' doesn't have the same length ", + "as dimension '", sdate_dim,"', it will not be used.")) + startdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + startdates <- format(startdates, "%Y%m%d") + } + } + } + + # Datasets + if (is.null(Datasets)) { + Datasets <- rep('XXX', n_datasets ) + } + if (inherits(Datasets, 'list')) { + Datasets <- names(Datasets) + } + if (n_datasets > length(Datasets)) { + warning("Dimension 'Datasets' in 'data' is greater than those listed in ", + "element 'Datasets' and the first element will be reused.") + Datasets <- c(Datasets, rep(Datasets[1], n_datasets - length(Datasets))) + } else if (n_datasets < length(Datasets)) { + warning("Dimension 'Datasets' in 'data' is smaller than those listed in ", + "element 'Datasets' and only the firsts elements will be used.") + Datasets <- Datasets[1:n_datasets] + } + + ## NetCDF dimensions definition + excluded_dims <- var_dim + if (!is.null(Dates)) { + excluded_dims <- c(excluded_dims, sdate_dim, ftime_dim) + } + if (!single_file) { + excluded_dims <- c(excluded_dims, dat_dim) + } + + ## Unknown dimensions check + alldims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, ftime_dim, memb_dim) + if (!all(dimnames %in% alldims)) { + unknown_dims <- dimnames[which(!dimnames %in% alldims)] + memb_dim <- c(memb_dim, unknown_dims) + } + + filedims <- c(dat_dim, var_dim, sdate_dim, lon_dim, lat_dim, ftime_dim, memb_dim) + filedims <- filedims[which(!filedims %in% excluded_dims)] + + # Delete unneded coords + coords[c(names(coords)[!names(coords) %in% filedims])] <- NULL + out_coords <- NULL + for (i_coord in filedims) { + # vals + if (i_coord %in% names(coords)) { + if (length(coords[[i_coord]]) != dim(data)[i_coord]) { + warning(paste0("Coordinate '", i_coord, "' has different lenght as ", + "its dimension and it will not be used.")) + out_coords[[i_coord]] <- 1:dim(data)[i_coord] + } else if (is.numeric(coords[[i_coord]])) { + out_coords[[i_coord]] <- as.vector(coords[[i_coord]]) + } else { + out_coords[[i_coord]] <- 1:dim(data)[i_coord] + } + } else { + out_coords[[i_coord]] <- 1:dim(data)[i_coord] + } + dim(out_coords[[i_coord]]) <- dim(data)[i_coord] + + ## metadata + if (i_coord %in% names(metadata)) { + if ('variables' %in% names(attributes(metadata[[i_coord]]))) { + # from Start: 'lon' or 'lat' + attrs <- attributes(metadata[[i_coord]])[['variables']] + attrs[[i_coord]]$dim <- NULL + attr(out_coords[[i_coord]], 'variables') <- attrs + } else if (inherits(metadata[[i_coord]], 'list')) { + # from Start and Load: main var + attr(out_coords[[i_coord]], 'variables') <- list(metadata[[i_coord]]) + names(attributes(out_coords[[i_coord]])$variables) <- i_coord + } else if (!is.null(attributes(metadata[[i_coord]]))) { + # from Load + attrs <- attributes(metadata[[i_coord]]) + # We remove because some attributes can't be saved + attrs <- NULL + attr(out_coords[[i_coord]], 'variables') <- list(attrs) + names(attributes(out_coords[[i_coord]])$variables) <- i_coord + } + } + } + + if (!single_file) { + for (i in 1:n_datasets) { + path <- file.path(destination, Datasets[i], varname) + for (j in 1:n_vars) { + if (!dir.exists(path[j])) { + dir.create(path[j], recursive = TRUE) + } + startdates <- gsub("-", "", startdates) + dim(startdates) <- c(length(startdates)) + names(dim(startdates)) <- sdate_dim + if (is.null(dat_dim) & is.null(var_dim)) { + data_subset <- data + } else if (is.null(dat_dim)) { + data_subset <- Subset(data, c(var_dim), list(j), drop = 'selected') + } else if (is.null(var_dim)) { + data_subset <- Subset(data, along = c(dat_dim), list(i), drop = 'selected') + } else { + data_subset <- Subset(data, c(dat_dim, var_dim), list(i, j), drop = 'selected') + } + target <- names(dim(data_subset))[which(!names(dim(data_subset)) %in% c(sdate_dim, ftime_dim))] + target_dims_data <- c(target, ftime_dim) + if (is.null(Dates)) { + input_data <- list(data_subset, startdates) + target_dims <- list(target_dims_data, NULL) + } else if (!is.null(time_bounds)) { + input_data <- list(data_subset, startdates, Dates, + time_bounds[[1]], time_bounds[[2]]) + target_dims = list(target_dims_data, NULL, + ftime_dim, ftime_dim, ftime_dim) + } else { + input_data <- list(data_subset, startdates, Dates) + target_dims = list(target_dims_data, NULL, ftime_dim) + } + Apply(data = input_data, + target_dims = target_dims, + fun = .saveexp, + destination = path[j], + coords = out_coords, + ftime_dim = ftime_dim, + varname = varname[j], + metadata_var = metadata[[varname[j]]], + extra_string = extra_string, + global_attrs = global_attrs) + } + } + } else { + # time_bnds + if (!is.null(time_bounds)) { + time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) + } + # Dates + remove_metadata_dim <- TRUE + if (!is.null(Dates)) { + if (is.null(sdate_dim)) { + sdates <- Dates[1] + # ftime definition + leadtimes <- as.numeric(difftime(Dates, sdates, units = "hours")) + } else { + # sdate definition + sdates <- Subset(Dates, along = ftime_dim, 1, drop = 'selected') + differ <- as.numeric(difftime(sdates, sdates[1], units = "hours")) + dim(differ) <- dim(data)[sdate_dim] + differ <- list(differ) + names(differ) <- sdate_dim + out_coords <- c(differ, out_coords) + attrs <- list(units = paste('hours since', sdates[1]), + calendar = 'proleptic_gregorian', longname = sdate_dim) + attr(out_coords[[sdate_dim]], 'variables')[[sdate_dim]] <- attrs + # ftime definition + Dates <- Reorder(Dates, c(ftime_dim, sdate_dim)) + differ_ftime <- array(dim = dim(Dates)) + for (i in 1:length(sdates)) { + differ_ftime[, i] <- as.numeric(difftime(Dates[, i], Dates[1, i], + units = "hours")) + } + dim(differ_ftime) <- dim(Dates) + leadtimes <- Subset(differ_ftime, along = sdate_dim, 1, drop = 'selected') + if (!all(apply(differ_ftime, 1, function(x){length(unique(x)) == 1}))) { + warning("Time steps are not equal for all start dates. Only ", + "forecast time values for the first start date will be saved ", + "correctly.") + } + } + if (all(!units_hours_since, is.null(time_bounds))) { + if (all(diff(leadtimes/24) == 1)) { + # daily values + units <- 'days' + leadtimes_vals <- round(leadtimes/24) + 1 + } else if (all(diff(leadtimes/24) %in% c(28, 29, 30, 31))) { + # monthly values + units <- 'months' + leadtimes_vals <- round(leadtimes/(30.437*24)) + 1 + } else { + # other frequency + units <- 'hours' + leadtimes_vals <- leadtimes + 1 + } + } else { + units <- paste('hours since', paste(sdates, collapse = ', ')) + leadtimes_vals <- leadtimes + } + + # Add time_bnds + if (!is.null(time_bounds)) { + if (is.null(sdate_dim)) { + sdates <- Dates[1] + time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) + leadtimes_bnds <- as.numeric(difftime(time_bnds, sdates, units = "hours")) + dim(leadtimes_bnds) <- c(dim(Dates), bnds = 2) + } else { + # assuming they have sdate and ftime + time_bnds <- lapply(time_bounds, function(x) { + x <- Reorder(x, c(ftime_dim, sdate_dim)) + return(x) + }) + time_bnds <- c(time_bounds[[1]], time_bounds[[2]]) + dim(time_bnds) <- c(dim(Dates), bnds = 2) + differ_bnds <- array(dim = c(dim(time_bnds))) + for (i in 1:length(sdates)) { + differ_bnds[, i, ] <- as.numeric(difftime(time_bnds[, i, ], Dates[1, i], + units = "hours")) + } + # NOTE (TODO): Add a warning when they are not equally spaced? + leadtimes_bnds <- Subset(differ_bnds, along = sdate_dim, 1, drop = 'selected') + } + # Add time_bnds + leadtimes_bnds <- Reorder(leadtimes_bnds, c('bnds', ftime_dim)) + leadtimes_bnds <- list(leadtimes_bnds) + names(leadtimes_bnds) <- 'time_bnds' + out_coords <- c(leadtimes_bnds, out_coords) + attrs <- list(units = paste('hours since', paste(sdates, collapse = ', ')), + calendar = 'proleptic_gregorian', + long_name = 'time bounds', unlim = FALSE) + attr(out_coords[['time_bnds']], 'variables')$time_bnds <- attrs + } + # Add ftime var + dim(leadtimes_vals) <- dim(data)[ftime_dim] + leadtimes_vals <- list(leadtimes_vals) + names(leadtimes_vals) <- ftime_dim + out_coords <- c(leadtimes_vals, out_coords) + attrs <- list(units = units, calendar = 'proleptic_gregorian', + longname = ftime_dim, + dim = list(list(name = ftime_dim, unlim = TRUE))) + if (!is.null(time_bounds)) { + attrs$bounds = 'time_bnds' + } + attr(out_coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs + for (j in 1:n_vars) { + remove_metadata_dim <- FALSE + metadata[[varname[j]]]$dim <- list(list(name = ftime_dim, unlim = TRUE)) + } + # Reorder ftime_dim to last + if (length(dim(data)) != which(names(dim(data)) == ftime_dim)) { + order <- c(names(dim(data))[which(!names(dim(data)) %in% c(ftime_dim))], ftime_dim) + data <- Reorder(data, order) + } + } + # var definition + extra_info_var <- NULL + for (j in 1:n_vars) { + varname_j <- varname[j] + metadata_j <- metadata[[varname_j]] + if (is.null(var_dim)) { + out_coords[[varname_j]] <- data + } else { + out_coords[[varname_j]] <- Subset(data, var_dim, j, drop = 'selected') + } + if (!is.null(metadata_j)) { + if (remove_metadata_dim) metadata_j$dim <- NULL + attr(out_coords[[varname_j]], 'variables') <- list(metadata_j) + names(attributes(out_coords[[varname_j]])$variables) <- varname_j + } + # Add global attributes + if (!is.null(global_attrs)) { + attributes(out_coords[[varname_j]])$global_attrs <- global_attrs + } + } + if (is.null(extra_string)) { + first_sdate <- startdates[1] + last_sdate <- startdates[length(startdates)] + gsub("-", "", first_sdate) + file_name <- paste0(paste(c(varname, + gsub("-", "", first_sdate), + gsub("-", "", last_sdate)), + collapse = '_'), ".nc") + } else { + nc <- substr(extra_string, nchar(extra_string)-2, nchar(extra_string)) + if (nc == ".nc") { + file_name <- extra_string + } else { + file_name <- paste0(extra_string, ".nc") + } + } + full_filename <- file.path(destination, file_name) + ArrayToNc(out_coords, full_filename) + } +} + +.saveexp <- function(data, coords, destination = "./", + startdates = NULL, dates = NULL, + time_bnds1 = NULL, time_bnds2 = NULL, + ftime_dim = 'time', varname = 'var', + metadata_var = NULL, extra_string = NULL, + global_attrs = NULL) { + remove_metadata_dim <- TRUE + if (!is.null(dates)) { + if (!any(is.null(time_bnds1), is.null(time_bnds2))) { + time_bnds <- c(time_bnds1, time_bnds2) + time_bnds <- as.numeric(difftime(time_bnds, dates[1], units = "hours")) + dim(time_bnds) <- c(dim(data)[ftime_dim], bnds = 2) + time_bnds <- Reorder(time_bnds, c('bnds', ftime_dim)) + time_bnds <- list(time_bnds) + names(time_bnds) <- 'time_bnds' + coords <- c(time_bnds, coords) + attrs <- list(units = paste('hours since', dates[1]), + calendar = 'proleptic_gregorian', + longname = 'time bounds') + attr(coords[['time_bnds']], 'variables')$time_bnds <- attrs + } + # Add ftime_dim + differ <- as.numeric(difftime(dates, dates[1], units = "hours")) + dim(differ) <- dim(data)[ftime_dim] + differ <- list(differ) + names(differ) <- ftime_dim + coords <- c(differ, coords) + attrs <- list(units = paste('hours since', dates[1]), + calendar = 'proleptic_gregorian', + longname = ftime_dim, + dim = list(list(name = ftime_dim, unlim = TRUE))) + if (!is.null(time_bnds1)) { + attrs$bounds = 'time_bnds' + } + attr(coords[[ftime_dim]], 'variables')[[ftime_dim]] <- attrs + metadata_var$dim <- list(list(name = ftime_dim, unlim = TRUE)) + remove_metadata_dim <- FALSE + } + # Add data + coords[[varname]] <- data + if (!is.null(metadata_var)) { + if (remove_metadata_dim) metadata_var$dim <- NULL + attr(coords[[varname]], 'variables') <- list(metadata_var) + names(attributes(coords[[varname]])$variables) <- varname + } + # Add global attributes + if (!is.null(global_attrs)) { + attributes(coords[[varname]])$global_attrs <- global_attrs + } + + if (is.null(extra_string)) { + file_name <- paste0(varname, "_", startdates, ".nc") + } else { + file_name <- paste0(varname, "_", extra_string, "_", startdates, ".nc") + } + full_filename <- file.path(destination, file_name) + ArrayToNc(coords, full_filename) +} \ No newline at end of file diff --git a/modules/Saving/Saving.R b/modules/Saving/Saving.R index fc9fe4eebd0d51f1b33a9a397a566d424215d99c..1995c4d53c712157f85a39d7668b8724e59b28be 100644 --- a/modules/Saving/Saving.R +++ b/modules/Saving/Saving.R @@ -14,6 +14,7 @@ source("modules/Saving/R/get_time.R") source("modules/Saving/R/get_latlon.R") source("modules/Saving/R/get_global_attributes.R") source("modules/Saving/R/drop_dims.R") +source("modules/Saving/R/tmp/CST_SaveExp.R") Saving <- function(recipe, data, skill_metrics = NULL, @@ -73,7 +74,7 @@ Saving <- function(recipe, data, # Export skill metrics if (!is.null(skill_metrics)) { save_metrics(recipe = recipe, - skill = skill_metrics, + metrics = skill_metrics, data_cube = data$hcst, agg = agg, outdir = outdir[var]) } diff --git a/modules/Scorecards/R/tmp/ClimPalette.R b/modules/Scorecards/R/tmp/ClimPalette.R new file mode 100644 index 0000000000000000000000000000000000000000..eed3c802cca93b87d3cf6b55fcc04a53051bb56d --- /dev/null +++ b/modules/Scorecards/R/tmp/ClimPalette.R @@ -0,0 +1,78 @@ +#'Generate Climate Color Palettes +#' +#'Generates a colorblind friendly color palette with color ranges useful in +#'climate temperature variable plotting. +#' +#'@param palette A character string of palette. The current choices: +#' \itemize{ +#' \item{'bluered': from blue through white to red.} +#' \item{'redblue': from red through white to blue.} +#' \item{'yellowred': from yellow through orange to red.} +#' \item{'redyellow': from red through orange to yellow.} +#' \item{'purpleorange': from purple through white to orange.} +#' \item{'orangepurple': from orange through white to purple.} +#' } +#'@param n A number indicating how many colors to generate. +#' +#'@return +#'ClimPalette() returns the function that generates the color palette and the +#'attribute 'na_color'.\cr +#'ClimColors() returns a vector of the colors. +#' +#'@examples +#'lims <- seq(-1, 1, length.out = 21) +#' +#'cb <- ColorBarContinuous(lims, color_fun = ClimPalette('redyellow'), plot = FALSE) +#' +#'cols <- ClimColors(20) +#'cb <- ColorBarContinuous(lims, cols, plot = FALSE) +#' +#'@importFrom grDevices colorRampPalette +#'@export +ClimPalette <- function(palette = "bluered") { + if (palette == "bluered") { + colorbar <- colorRampPalette(rev(c("#67001f", "#b2182b", "#d6604d", + "#f4a582", "#fddbc7", "#f7f7f7", + "#d1e5f0", "#92c5de", "#4393c3", + "#2166ac", "#053061"))) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "redblue") { + colorbar <- colorRampPalette(c("#67001f", "#b2182b", "#d6604d", + "#f4a582", "#fddbc7", "#f7f7f7", + "#d1e5f0", "#92c5de", "#4393c3", + "#2166ac", "#053061")) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "yellowred") { + colorbar <- colorRampPalette(c("#ffffcc", "#ffeda0", "#fed976", + "#feb24c", "#fd8d3c", "#fc4e2a", + "#e31a1c", "#bd0026", "#800026")) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "redyellow") { + colorbar <- colorRampPalette(rev(c("#ffffcc", "#ffeda0", "#fed976", + "#feb24c", "#fd8d3c", "#fc4e2a", + "#e31a1c", "#bd0026", "#800026"))) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "purpleorange") { + colorbar <- colorRampPalette(c("#2d004b", "#542789", "#8073ac", + "#b2abd2", "#d8daeb", "#f7f7f7", + "#fee0b6", "#fdb863", "#e08214", + "#b35806", "#7f3b08")) + attr(colorbar, 'na_color') <- 'pink' + } else if (palette == "orangepurple") { + colorbar <- colorRampPalette(rev(c("#2d004b", "#542789", "#8073ac", + "#b2abd2", "#d8daeb", "#f7f7f7", + "#fee0b6", "#fdb863", "#e08214", + "#b35806", "#7f3b08"))) + attr(colorbar, 'na_color') <- 'pink' + } else { + stop("Parameter 'palette' must be one of 'bluered', 'redblue', 'yellowred'", + "'redyellow', 'purpleorange' or 'orangepurple'.") + } + colorbar +} + +#'@rdname ClimPalette +#'@export +ClimColors <- function(n, palette = "bluered") { + ClimPalette(palette)(n) +} diff --git a/modules/Scorecards/R/tmp/ColorBarContinuous.R b/modules/Scorecards/R/tmp/ColorBarContinuous.R new file mode 100644 index 0000000000000000000000000000000000000000..a4ef933fe032cf60a22e992e1bcae175b79d4961 --- /dev/null +++ b/modules/Scorecards/R/tmp/ColorBarContinuous.R @@ -0,0 +1,594 @@ +#'Draws a Continuous Color Bar +#' +#'Generates a color bar to use as colouring function for map plots and +#'optionally draws it (horizontally or vertically) to be added to map +#'multipanels or plots. It is possible to draw triangles at the ends of the +#'colour bar to represent values that go beyond the range of interest. A +#'number of options is provided to adjust the colours and the position and +#'size of the components. The drawn colour bar spans a whole figure region +#'and is compatible with figure layouts.\cr\cr +#'The generated colour bar consists of a set of breaks that define the +#'length(brks) - 1 intervals to classify each of the values in each of the +#'grid cells of a two-dimensional field. The corresponding grid cell of a +#'given value of the field will be coloured in function of the interval it +#'belongs to.\cr\cr +#'The only mandatory parameters are 'var_limits' or 'brks' (in its second +#'format, see below). +#' +#'@param brks Can be provided in two formats: +#'\itemize{ +#' \item{A single value with the number of breaks to be generated +#' automatically, between the minimum and maximum specified in 'var_limits' +#' (both inclusive). Hence the parameter 'var_limits' is mandatory if 'brks' +#' is provided with this format. If 'bar_limits' is additionally provided, +#' values only between 'bar_limits' will be generated. The higher the value +#' of 'brks', the smoother the plot will look.} +#' \item{A vector with the actual values of the desired breaks. Values will +#' be reordered by force to ascending order. If provided in this format, no +#' other parameters are required to generate/plot the colour bar.} +#'} +#' This parameter is optional if 'var_limits' is specified. If 'brks' not +#' specified but 'cols' is specified, it will take as value length(cols) + 1. +#' If 'cols' is not specified either, 'brks' will take 21 as value. +#'@param cols Vector of length(brks) - 1 valid colour identifiers, for each +#' interval defined by the breaks. This parameter is optional and will be +#' filled in with a vector of length(brks) - 1 colours generated with the +#' function provided in 'color_fun' (\code{clim.colors} by default).\cr 'cols' +#' can have one additional colour at the beginning and/or at the end with the +#' aim to colour field values beyond the range of interest represented in the +#' colour bar. If any of these extra colours is provided, parameter +#' 'triangle_ends' becomes mandatory in order to disambiguate which of the +#' ends the colours have been provided for. +#'@param vertical TRUE/FALSE for vertical/horizontal colour bar +#' (disregarded if plot = FALSE). +#'@param subsampleg The first of each subsampleg breaks will be ticked on the +#' colorbar. Takes by default an approximation of a value that yields a +#' readable tick arrangement (extreme breaks always ticked). If set to 0 or +#' lower, no labels are drawn. See the code of the function for details or +#' use 'extra_labels' for customized tick arrangements. +#'@param bar_limits Vector of two numeric values with the extremes of the +#' range of values represented in the colour bar. If 'var_limits' go beyond +#' this interval, the drawing of triangle extremes is triggered at the +#' corresponding sides, painted in 'col_inf' and 'col_sup'. Either of them +#' can be set as NA and will then take as value the corresponding extreme in +#' 'var_limits' (hence a triangle end won't be triggered for these sides). +#' Takes as default the extremes of 'brks' if available, else the same values +#' as 'var_limits'. +#'@param var_limits Vector of two numeric values with the minimum and maximum +#' values of the field to represent. These are used to know whether to draw +#' triangle ends at the extremes of the colour bar and what colour to fill +#' them in with. If not specified, take the same value as the extremes of +#' 'brks'. Hence the parameter 'brks' is mandatory if 'var_limits' is not +#' specified. +#'@param triangle_ends Vector of two logical elements, indicating whether to +#' force the drawing of triangle ends at each of the extremes of the colour +#' bar. This choice is automatically made from the provided 'brks', +#' 'bar_limits', 'var_limits', 'col_inf' and 'col_sup', but the behaviour +#' can be manually forced to draw or not to draw the triangle ends with this +#' parameter. If 'cols' is provided, 'col_inf' and 'col_sup' will take +#' priority over 'triangle_ends' when deciding whether to draw the triangle +#' ends or not. +#'@param col_inf Colour to fill the inferior triangle end with. Useful if +#' specifying colours manually with parameter 'cols', to specify the colour +#' and to trigger the drawing of the lower extreme triangle, or if 'cols' is +#' not specified, to replace the colour automatically generated by ColorBar(). +#'@param col_sup Colour to fill the superior triangle end with. Useful if +#' specifying colours manually with parameter 'cols', to specify the colour +#' and to trigger the drawing of the upper extreme triangle, or if 'cols' is +#' not specified, to replace the colour automatically generated by ColorBar(). +#'@param color_fun Function to generate the colours of the color bar. Must +#' take an integer and must return as many colours. The returned colour vector +#' can have the attribute 'na_color', with a colour to draw NA values. This +#' parameter is set by default to ClimPalette(). +#'@param plot Logical value indicating whether to only compute its breaks and +#' colours (FALSE) or to also draw it on the current device (TRUE). +#'@param draw_ticks Whether to draw ticks for the labels along the colour bar +#' (TRUE) or not (FALSE). TRUE by default. Disregarded if 'plot = FALSE'. +#'@param draw_separators Whether to draw black lines in the borders of each of +#' the colour rectancles of the colour bar (TRUE) or not (FALSE). FALSE by +#' default. Disregarded if 'plot = FALSE'. +#'@param triangle_ends_scale Scale factor for the drawn triangle ends of the +#' colour bar, if drawn at all. Takes 1 by default (rectangle triangle +#' proportional to the thickness of the colour bar). Disregarded if +#' 'plot = FALSE'. +#'@param extra_labels Numeric vector of extra labels to draw along axis of +#' the colour bar. The number of provided decimals will be conserved. +#' Disregarded if 'plot = FALSE'. +#'@param title Title to draw on top of the colour bar, most commonly with the +#' units of the represented field in the neighbour figures. Empty by default. +#'@param title_scale Scale factor for the 'title' of the colour bar. +#' Takes 1 by default. +#'@param label_scale Scale factor for the labels of the colour bar. +#' Takes 1 by default. +#'@param tick_scale Scale factor for the length of the ticks of the labels +#' along the colour bar. Takes 1 by default. +#'@param extra_margin Extra margins to be added around the colour bar, +#' in the format c(y1, x1, y2, x2). The units are margin lines. Takes +#' rep(0, 4) by default. +#'@param label_digits Number of significant digits to be displayed in the +#' labels of the colour bar, usually to avoid too many decimal digits +#' overflowing the figure region. This does not have effect over the labels +#' provided in 'extra_labels'. Takes 4 by default. +#'@param ... Arguments to be passed to the method. Only accepts the following +#' graphical parameters:\cr adj ann ask bg bty cex.lab cex.main cex.sub cin +#' col.axis col.lab col.main col.sub cra crt csi cxy err family fg fig fin +#' font font.axis font.lab font.main font.sub lend lheight ljoin lmitre lty +#' lwd mai mex mfcol mfrow mfg mkh oma omd omi page pch pin plt pty smo srt +#' tck tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog.\cr For more +#' information about the parameters see `par`. +#' +#'@return +#'\item{brks}{ +#' Breaks used for splitting the range in intervals. +#'} +#'\item{cols}{ +#' Colours generated for each of the length(brks) - 1 intervals. +#' Always of length length(brks) - 1. +#'} +#'\item{col_inf}{ +#' Colour used to draw the lower triangle end in the colour +#' bar (NULL if not drawn at all). +#'} +#'\item{col_sup}{ +#' Colour used to draw the upper triangle end in the colour +#' bar (NULL if not drawn at all). +#'} +#' +#'@examples +#'cols <- c("dodgerblue4", "dodgerblue1", "forestgreen", "yellowgreen", "white", +#' "white", "yellow", "orange", "red", "saddlebrown") +#'lims <- seq(-1, 1, 0.2) +#'cb <- ColorBarContinuous(lims, cols, plot = FALSE) +#' +#'@importFrom grDevices col2rgb rgb +#'@import utils +#'@export +ColorBarContinuous <- function(brks = NULL, cols = NULL, vertical = TRUE, + subsampleg = NULL, bar_limits = NULL, var_limits = NULL, + triangle_ends = NULL, col_inf = NULL, col_sup = NULL, + color_fun = ClimPalette(), plot = TRUE, + draw_ticks = TRUE, draw_separators = FALSE, + triangle_ends_scale = 1, extra_labels = NULL, + title = NULL, title_scale = 1, + label_scale = 1, tick_scale = 1, + extra_margin = rep(0, 4), label_digits = 4, ...) { + # Required checks + if ((is.null(brks) || length(brks) < 2) && is.null(bar_limits) && is.null(var_limits)) { + stop("At least one of 'brks' with the desired breaks, 'bar_limits' or ", + "'var_limits' must be provided to generate the colour bar.") + } + + # Check brks + if (!is.null(brks)) { + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be numeric if specified.") + } else if (length(brks) > 1) { + reorder <- sort(brks, index.return = TRUE) + if (!is.null(cols)) { + cols <- cols[reorder$ix[which(reorder$ix <= length(cols))]] + } + brks <- reorder$x + } + } + + # Check bar_limits + if (!is.null(bar_limits)) { + if (!(all(is.na(bar_limits) | is.numeric(bar_limits)) && (length(bar_limits) == 2))) { + stop("Parameter 'bar_limits' must be a vector of two numeric elements or NAs.") + } + } + + # Check var_limits + if (!is.null(var_limits)) { + if (!(is.numeric(var_limits) && (length(var_limits) == 2))) { + stop("Parameter 'var_limits' must be a numeric vector of length 2.") + } else if (anyNA(var_limits)) { + stop("Parameter 'var_limits' must not contain NA values.") + } else if (any(is.infinite(var_limits))) { + stop("Parameter 'var_limits' must not contain infinite values.") + } + } + + # Check cols + if (!is.null(cols)) { + if (!is.character(cols)) { + stop("Parameter 'cols' must be a vector of character strings.") + } else if (any(!sapply(cols, .IsColor))) { + stop("Parameter 'cols' must contain valid colour identifiers.") + } + } + + # Check color_fun + if (!is.function(color_fun)) { + stop("Parameter 'color_fun' must be a colour-generator function.") + } + + # Check integrity among brks, bar_limits and var_limits + if (is.null(brks) || (length(brks) < 2)) { + if (is.null(brks)) { + if (is.null(cols)) { + brks <- 21 + } else { + brks <- length(cols) + 1 + } + } + if (is.null(bar_limits) || anyNA(bar_limits)) { + # var_limits is defined + if (is.null(bar_limits)) { + bar_limits <- c(NA, NA) + } + half_width <- 0.5 * (var_limits[2] - var_limits[1]) / (brks - 1) + bar_limits[which(is.na(bar_limits))] <- c(var_limits[1] - half_width, var_limits[2] + half_width)[which(is.na(bar_limits))] + brks <- seq(bar_limits[1], bar_limits[2], length.out = brks) + } else if (is.null(var_limits)) { + # bar_limits is defined + var_limits <- bar_limits + half_width <- 0.5 * (var_limits[2] - var_limits[1]) / (brks - 1) + brks <- seq(bar_limits[1], bar_limits[2], length.out = brks) + var_limits[1] <- var_limits[1] + half_width / 50 + } else { + # both bar_limits and var_limits are defined + brks <- seq(bar_limits[1], bar_limits[2], length.out = brks) + } + } else if (is.null(bar_limits)) { + if (is.null(var_limits)) { + # brks is defined + bar_limits <- c(head(brks, 1), tail(brks, 1)) + var_limits <- bar_limits + half_width <- 0.5 * (var_limits[2] - var_limits[1]) / (length(brks) - 1) + var_limits[1] <- var_limits[1] + half_width / 50 + } else { + # brks and var_limits are defined + bar_limits <- c(head(brks, 1), tail(brks, 1)) + } + } else { + # brks and bar_limits are defined + # or + # brks, bar_limits and var_limits are defined + if (head(brks, 1) != bar_limits[1] || tail(brks, 1) != bar_limits[2]) { + stop("Parameters 'brks' and 'bar_limits' are inconsistent.") + } + } + + # Check col_inf + if (!is.null(col_inf)) { + if (!.IsColor(col_inf)) { + stop("Parameter 'col_inf' must be a valid colour identifier.") + } + } + + # Check col_sup + if (!is.null(col_sup)) { + if (!.IsColor(col_sup)) { + stop("Parameter 'col_sup' must be a valid colour identifier.") + } + } + + # Check triangle_ends + if (!is.null(triangle_ends) && (!is.logical(triangle_ends) || length(triangle_ends) != 2)) { + stop("Parameter 'triangle_ends' must be a logical vector with two elements.") + } + teflc <- triangle_ends_from_limit_cols <- c(!is.null(col_inf), !is.null(col_sup)) + if (is.null(triangle_ends) && is.null(col_inf) && is.null(col_sup)) { + triangle_ends <- c(FALSE, FALSE) + if (bar_limits[1] >= var_limits[1]) { + triangle_ends[1] <- TRUE + } + if (bar_limits[2] < var_limits[2]) { + triangle_ends[2] <- TRUE + } + } else if (!is.null(triangle_ends) && is.null(col_inf) && is.null(col_sup)) { + triangle_ends <- triangle_ends + } else if (is.null(triangle_ends) && (!is.null(col_inf) || !is.null(col_sup))) { + triangle_ends <- teflc + } else if (any(teflc != triangle_ends)) { + if (!is.null(brks) && length(brks) > 1 && !is.null(cols) && length(cols) >= length(brks)) { + triangle_ends <- teflc + } else if (!is.null(cols)) { + triangle_ends <- teflc + } else { + triangle_ends <- triangle_ends + } + } + if (plot && !is.null(var_limits)) { + if ((bar_limits[1] >= var_limits[1]) && !triangle_ends[1]) { + warning("There are variable values smaller or equal to the lower limit ", + "of the colour bar and the lower triangle end has been ", + "disabled. These will be painted in the colour for NA values.") + } + if ((bar_limits[2] < var_limits[2]) && !triangle_ends[2]) { + warning("There are variable values greater than the higher limit ", + "of the colour bar and the higher triangle end has been ", + "disabled. These will be painted in the colour for NA values.") + } + } + + # Generate colours if needed + if (is.null(cols)) { + cols <- color_fun(length(brks) - 1 + sum(triangle_ends)) + attr_bk <- attributes(cols) + if (triangle_ends[1]) { + if (is.null(col_inf)) col_inf <- head(cols, 1) + cols <- cols[-1] + } + if (triangle_ends[2]) { + if (is.null(col_sup)) col_sup <- tail(cols, 1) + cols <- cols[-length(cols)] + } + attributes(cols) <- attr_bk + } else if ((length(cols) != (length(brks) - 1))) { + stop("Incorrect number of 'brks' and 'cols'. There must be one more break than the number of colours.") + } + + # Check vertical + if (!is.logical(vertical)) { + stop("Parameter 'vertical' must be TRUE or FALSE.") + } + + # Check extra_labels + if (is.null(extra_labels)) { + extra_labels <- numeric(0) + } + if (!is.numeric(extra_labels)) { + stop("Parameter 'extra_labels' must be numeric.") + } else { + if (any(extra_labels > bar_limits[2]) || any(extra_labels < bar_limits[1])) { + stop("Parameter 'extra_labels' must not contain ticks beyond the color bar limits.") + } + } + extra_labels <- sort(extra_labels) + + # Check subsampleg + primes <- function(x) { + # Courtesy of Chase. See http://stackoverflow.com/questions/6424856/r-function-for-returning-all-factors + x <- as.integer(x) + div <- seq_len(abs(x)) + factors <- div[x %% div == 0L] + factors <- list(neg = -factors, pos = factors) + return(factors) + } + remove_final_tick <- FALSE + added_final_tick <- TRUE + if (is.null(subsampleg)) { + subsampleg <- 1 + while (length(brks) / subsampleg > 15 - 1) { + next_factor <- primes((length(brks) - 1) / subsampleg)$pos + next_factor <- next_factor[length(next_factor) - ifelse(length(next_factor) > 2, 1, 0)] + subsampleg <- subsampleg * next_factor + } + if (subsampleg > (length(brks) - 1) / 4) { + subsampleg <- max(1, round(length(brks) / 4)) + extra_labels <- c(extra_labels, bar_limits[2]) + added_final_tick <- TRUE + if ((length(brks) - 1) %% subsampleg < (length(brks) - 1) / 4 / 2) { + remove_final_tick <- TRUE + } + } + } else if (!is.numeric(subsampleg)) { + stop("Parameter 'subsampleg' must be numeric.") + } + subsampleg <- round(subsampleg) + draw_labels <- TRUE + if ((subsampleg) < 1) { + draw_labels <- FALSE + } + + # Check plot + if (!is.logical(plot)) { + stop("Parameter 'plot' must be logical.") + } + + # Check draw_separators + if (!is.logical(draw_separators)) { + stop("Parameter 'draw_separators' must be logical.") + } + + # Check triangle_ends_scale + if (!is.numeric(triangle_ends_scale)) { + stop("Parameter 'triangle_ends_scale' must be numeric.") + } + + # Check draw_ticks + if (!is.logical(draw_ticks)) { + stop("Parameter 'draw_ticks' must be logical.") + } + + # Check title + if (is.null(title)) { + title <- '' + } + if (!is.character(title)) { + stop("Parameter 'title' must be a character string.") + } + + # Check title_scale + if (!is.numeric(title_scale)) { + stop("Parameter 'title_scale' must be numeric.") + } + + # Check label_scale + if (!is.numeric(label_scale)) { + stop("Parameter 'label_scale' must be numeric.") + } + + # Check tick_scale + if (!is.numeric(tick_scale)) { + stop("Parameter 'tick_scale' must be numeric.") + } + + # Check extra_margin + if (!is.numeric(extra_margin) || length(extra_margin) != 4) { + stop("Parameter 'extra_margin' must be a numeric vector of length 4.") + } + + # Check label_digits + if (!is.numeric(label_digits)) { + stop("Parameter 'label_digits' must be numeric.") + } + label_digits <- round(label_digits) + + # Process the user graphical parameters that may be passed in the call + ## Graphical parameters to exclude + excludedArgs <- c("cex", "cex.axis", "col", "lab", "las", "mar", "mgp", "new", "ps") + userArgs <- .FilterUserGraphicArgs(excludedArgs, ...) + + # + # Plotting colorbar + # ~~~~~~~~~~~~~~~~~~~ + # + if (plot) { + pars_to_save <- c('mar', 'cex', names(userArgs), 'mai', 'mgp', 'las', 'xpd') + saved_pars <- par(pars_to_save) + par(mar = c(0, 0, 0, 0), cex = 1) + image(1, 1, t(t(1)), col = rgb(0, 0, 0, 0), axes = FALSE, xlab = '', ylab = '') + # Get the availale space + figure_size <- par('fin') + cs <- par('csi') + # This allows us to assume we always want to plot horizontally + if (vertical) { + figure_size <- rev(figure_size) + } + # pannel_to_redraw <- par('mfg') + # .SwitchToFigure(pannel_to_redraw[1], pannel_to_redraw[2]) + # Load the user parameters + par(new = TRUE) + par(userArgs) + # Set up color bar plot region + margins <- c(0.0, 0, 0.0, 0) + cex_title <- 1 * title_scale + cex_labels <- 0.9 * label_scale + cex_ticks <- -0.3 * tick_scale + spaceticklab <- max(-cex_ticks, 0) + if (vertical) { + margins[1] <- margins[1] + (1.2 * cex_labels * 3 + spaceticklab) * cs + margins <- margins + extra_margin[c(4, 1:3)] * cs + } else { + margins[1] <- margins[1] + (1.2 * cex_labels * 1 + spaceticklab) * cs + margins <- margins + extra_margin * cs + } + if (title != '') { + margins[3] <- margins[3] + (1.0 * cex_title) * cs + } + margins[3] <- margins[3] + sqrt(figure_size[2] / (margins[1] + margins[3])) * + figure_size[2] / 6 * ifelse(title != '', 0.5, 0.8) + # Set side margins + margins[2] <- margins[2] + figure_size[1] / 16 + margins[4] <- margins[4] + figure_size[1] / 16 + triangle_ends_prop <- 1 / 32 * triangle_ends_scale + triangle_ends_cex <- triangle_ends_prop * figure_size[2] + if (triangle_ends[1]) { + margins[2] <- margins[2] + triangle_ends_cex + } + if (triangle_ends[2]) { + margins[4] <- margins[4] + triangle_ends_cex + } + ncols <- length(cols) + # Set up the points of triangles + # Compute the proportion of horiz. space occupied by one plot unit + prop_unit <- (1 - (margins[2] + margins[4]) / figure_size[1]) / ncols + # Convert triangle height to plot inits + triangle_height <- triangle_ends_prop / prop_unit + left_triangle <- list(x = c(1, 1 - triangle_height, 1) - 0.5, + y = c(1.4, 1, 0.6)) + right_triangle <- list(x = c(ncols, ncols + triangle_height, ncols) + 0.5, + y = c(1.4, 1, 0.6)) + # Draw the color squares and title + if (vertical) { + par(mai = c(margins[2:4], margins[1]), + mgp = c(0, spaceticklab + 0.2, 0), las = 1) + d <- 4 + image(1, 1:ncols, t(1:ncols), axes = FALSE, col = cols, + xlab = '', ylab = '') + title(ylab = title, line = cex_title * (0.2 + 0.1), cex.lab = cex_title) + # Draw top and bottom border lines + lines(c(0.6, 0.6), c(1 - 0.5, ncols + 0.5)) + lines(c(1.4, 1.4), c(1 - 0.5, ncols + 0.5)) + # Rotate triangles + names(left_triangle) <- rev(names(left_triangle)) + names(right_triangle) <- rev(names(right_triangle)) + } else { + # The term - cex_labels / 4 * (3 / cex_labels - 1) was found by + # try and error + par(mai = margins, + mgp = c(0, cex_labels / 2 + spaceticklab + - cex_labels / 4 * (3 / cex_labels - 1), 0), + las = 1) + d <- 1 + image(1:ncols, 1, t(t(1:ncols)), axes = FALSE, col = cols, + xlab = '', ylab = '') + title(title, line = cex_title * (0.2 + 0.1), cex.main = cex_title) + # Draw top and bottom border lines + lines(c(1 - 0.5, ncols + 0.5), c(0.6, 0.6)) + lines(c(1 - 0.5, ncols + 0.5), c(1.4, 1.4)) + tick_length <- -0.4 + } + # Draw the triangles + par(xpd = TRUE) + if (triangle_ends[1]) { + # Draw left triangle + polygon(left_triangle$x, left_triangle$y, col = col_inf, border = NA) + lines(left_triangle$x, left_triangle$y) + } + if (triangle_ends[2]) { + # Draw right triangle + polygon(right_triangle$x, right_triangle$y, col = col_sup, border = NA) + lines(right_triangle$x, right_triangle$y) + } + par(xpd = FALSE) + + # Put the separators + if (vertical) { + if (draw_separators) { + for (i in 1:(ncols - 1)) { + lines(c(0.6, 1.4), c(i, i) + 0.5) + } + } + if (draw_separators || is.null(col_inf)) { + lines(c(0.6, 1.4), c(0.5, 0.5)) + } + if (draw_separators || is.null(col_sup)) { + lines(c(0.6, 1.4), c(ncols + 0.5, ncols + 0.5)) + } + } else { + if (draw_separators) { + for (i in 1:(ncols - 1)) { + lines(c(i, i) + 0.5, c(0.6, 1.4)) + } + } + if (draw_separators || is.null(col_inf)) { + lines(c(0.5, 0.5), c(0.6, 1.4)) + } + if (draw_separators || is.null(col_sup)) { + lines(c(ncols + 0.5, ncols + 0.5), c(0.6, 1.4)) + } + } + # Put the ticks + plot_range <- length(brks) - 1 + var_range <- tail(brks, 1) - head(brks, 1) + extra_labels_at <- ((extra_labels - head(brks, 1)) / var_range) * plot_range + 0.5 + at <- seq(1, length(brks), subsampleg) + labels <- brks[at] + # Getting rid of next-to-last tick if too close to last one + if (remove_final_tick) { + at <- at[-length(at)] + labels <- labels[-length(labels)] + } + labels <- signif(labels, label_digits) + if (added_final_tick) { + extra_labels[length(extra_labels)] <- signif(tail(extra_labels, 1), label_digits) + } + at <- at - 0.5 + at <- c(at, extra_labels_at) + labels <- c(labels, extra_labels) + tick_reorder <- sort(at, index.return = TRUE) + at <- tick_reorder$x + if (draw_labels) { + labels <- labels[tick_reorder$ix] + } else { + labels <- FALSE + } + axis(d, at = at, tick = draw_ticks, labels = labels, cex.axis = cex_labels, tcl = cex_ticks) + par(saved_pars) + } + invisible(list(brks = brks, cols = cols, col_inf = col_inf, col_sup = col_sup)) +} diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index fa12d61080a598cb981f90d8a28100f4dbd841d9..25202b49dca1242c4042f4f5ea3cf3054904f4d5 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -12,11 +12,10 @@ #'@param var A character string following the format from #' variable-dictionary.yml from verification suite (TO DO: multiple variables). #' The accepted names are: 'psl', 'tas', 'sfcWind', 'prlr'. -#'@param start.year A numeric indicating the start year of the reference period -#'@param end.year A numeric indicating the end year of the reference period -#'@param start.months A vector indicating the numbers of the start months -#'@param forecast.months A vector indicating the numbers of the forecast months -#'@param input.path A character string indicating the path where metrics output +#'@param period A character string indicating the start and end years of the +#' reference period (e.g. '1993-203') +#'@param start_months A vector indicating the numbers of the start months +#'@param input_path A character string indicating the path where metrics output #' files from verification suite are saved (or any other compatible files) #' #'@return A is a list by system and reference containing an array of with @@ -28,20 +27,29 @@ #'loaded_metrics <- LoadMetrics(system = c('ECMWF-SEAS5','DWD-GFCS2.1'), #' reference. = 'ERA5', #' var = 'tas', -#' start.year = 1993, -#' end.year = 2016, +#' period = '1993-2016' #' metrics = c('mean_bias', 'enscorr', 'rpss', 'crpss', 'enssprerr'), -#' start.months = sprintf("%02d", 1:12), -#' forecast.months = 1:6, -#' input.path = '/esarchive/scratch/nmilders/scorecards_data/input_data') +#' start_months = sprintf("%02d", 1:12), +#' calib_method = 'raw', +#' input_path = '/esarchive/scratch/nmilders/scorecards_data/input_data') #'} #'@import easyNCDF #'@import multiApply #'@export -LoadMetrics <- function(system, reference, var, start.year, end.year, - metrics, start.months, forecast.months, - inf_to_na = FALSE, - input.path) { + +system <- 'ECMWF-SEAS5' +reference <- 'ERA5' +var <- 'tas' +period <- '1993-2016' +metrics <- 'rps_syear' +start_months <- 1:2 +input_path <- '/esarchive/scratch/nmilders/scorecards_data/syear/testing/Skill/' +calib_method <- 'raw' +syear <- TRUE + +LoadMetrics <- function(input_path, system, reference, var, period, + metrics, start_months, calib_method = NULL, + inf_to_na = FALSE) { # Initial checks ## system @@ -59,51 +67,38 @@ LoadMetrics <- function(system, reference, var, start.year, end.year, "names.") } if (length(var) > 1) { - warning("Parameter 'var' must be of length one. Only the first value ", + warning("Parameter 'var' must be of length one. Only the first value ", "will be used.") var <- var[1] } - ## start.year - if (!is.numeric(start.year)) { - stop("Parameter 'start.year' must be a numeric value.") - } - ## end.year - if (!is.numeric(end.year)) { - stop("Parameter 'end.year' must be a numeric value.") - } ## metrics if (!is.character(metrics)) { stop("Parameter 'metrics' cannot be NULL.") } - ## start.months - if (is.character(start.months)) { - warning("Parameter 'start.months' must be a numeric vector indicating ", + ## start_months + if (is.character(start_months)) { + warning("Parameter 'start_months' must be a numeric vector indicating ", "the starting months.") - start.months <- as.numeric(start.months) + start_months <- as.numeric(start_months) } - if (!is.numeric(start.months)) { - stop("Parameter 'start.months' must be a numeric vector indicating ", + if (!is.numeric(start_months)) { + stop("Parameter 'start_months' must be a numeric vector indicating ", "the starting months.") } - start.months <- sprintf("%02d", start.months) + start_months <- sprintf("%02d", start_months) ## Check if sdates are continuous or discrete - if (all(diff(as.numeric(start.months)) == 1)) { - consecutive_start.months <- TRUE + if (all(diff(as.numeric(start_months)) == 1)) { + consecutive_start_months <- TRUE } else { - consecutive_start.months <- FALSE + consecutive_start_months <- FALSE } - ## forecast.months - if (!is.numeric(forecast.months)) { - stop("Parameter 'forecast.months' must be a numeric vector indicating ", - "the starting months.") - } - ## input.path - if (!is.character(input.path)) { - stop("Parameter 'input.path must be a character string.") + ## input_path + if (!is.character(input_path)) { + stop("Parameter 'input_path must be a character string.") } - if (length(input.path) > 1) { - input.path <- input.path[1] - warning("Parameter 'input.path' has length greater than 1 and only the ", + if (length(input_path) > 1) { + input_path <- input_path[1] + warning("Parameter 'input_path' has length greater than 1 and only the ", "first element will be used.") } @@ -111,9 +106,6 @@ LoadMetrics <- function(system, reference, var, start.year, end.year, system <- gsub('.','', system, fixed = T) reference <- gsub('.','', reference, fixed = T) - period <- paste0(start.year, "-", end.year) - - ## Define empty list to saved data all_metrics <- sapply(system, function(x) NULL) names(all_metrics) <- system ## Load data for each system @@ -124,41 +116,55 @@ LoadMetrics <- function(system, reference, var, start.year, end.year, ## Load data for each reference for (ref in 1:length(reference)) { ## Call function to load metrics data - met <- .Loadmetrics(input.path = input.path, # recipe$Run$output, - system = system[sys], - reference = reference[ref], - var = var, - period = period, - start.months = start.months, - forecast.months = forecast.months, - metrics = metrics) + met_by_smonth <- NULL + for (met in metrics) { + result <- .loadmetrics(input_path = input_path, + system = system[sys], + reference = reference[ref], + var = var, + period = period, + start_months = start_months, + calib_method = calib_method, + metric = met) + + result_attr <- attributes(result) + met_by_smonth <- abind::abind(met_by_smonth, result, along = length(dim(result)) + 1) + } + attributes(met_by_smonth) <- result_attr[-1] + # names(dim(met_by_smonth)) <- c(names(result_attr$dim), 'metric') + + dim(met_by_smonth) <- c(dim(result), metric = length(metrics)) ## Save metric data as array in reference list - by_reference[[reference[ref]]] <- met + by_reference[[reference[ref]]] <- met_by_smonth ## Remove -Inf from crpss data if variable is precipitation if (inf_to_na) { - by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA + by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA } } ## close loop on reference ## Save reference data in list of system all_metrics[[system[sys]]] <- by_reference } ## close loop on system + attributes(all_metrics)$metrics <- metrics + attributes(all_metrics)$start_months <- start_months + return(all_metrics) } ## close function -############################################################ +########################################################### -.Loadmetrics <- function(input.path, system, reference, - var, period, start.months, - forecast.months, metrics) { +.loadmetrics <- function(input_path, system, reference, + var, period, start_months, + calib_method, metric) { ## Load data for each start date - allfiles <- sapply(start.months, function(m) { - paste0(input.path, "/", system, "/", var, - "/scorecards_", system, "_", reference, "_", - var, "-skill_", period, "_s", m, # mod.pressure, + allfiles <- sapply(start_months, function(m) { + paste0(input_path, "/", system, "/", reference, "/", calib_method, "/", + var, "/scorecards_", system, "_", reference, "_", + var, "_", metric, "_", period, "_s", m, # mod.pressure, ".nc")}) allfiles_exist <- sapply(allfiles, file.exists) + # Check dims files_exist_by_month <- seq(1:length(allfiles))[allfiles_exist] allfiledims <- sapply(allfiles[allfiles_exist], easyNCDF::NcReadDims) @@ -169,7 +175,7 @@ LoadMetrics <- function(system, reference, var, start.year, end.year, num_dims <- numeric(dim(allfiledims)[1]) for (i in 1:dim(allfiledims)[1]) { if (length(unique(allfiledims[i,])) > 1) { - warning(paste0("Dimensions of system ", system," with var ", var, + warning(paste0("Dimensions of system ", system," with var ", var, " don't match.")) } num_dims[i] <- max(allfiledims[i,]) # We take the largest dimension @@ -182,7 +188,7 @@ LoadMetrics <- function(system, reference, var, start.year, end.year, array_met_by_sdate <- Apply(data = allfiles, target_dims = 'dat', fun = function(x) { if (file.exists(x)) { - res <- easyNCDF::NcToArray(x, vars_to_read = metrics, unlist = T, + res <- easyNCDF::NcToArray(x, vars_to_read = metric, unlist = T, drop_var_dim = T) names(dim(res)) <- NULL } else { @@ -191,26 +197,25 @@ LoadMetrics <- function(system, reference, var, start.year, end.year, } res})$output1 - dim(array_met_by_sdate) <- c(metric = length(metrics), allfiledims[-1,1], + dim(array_met_by_sdate) <- c(allfiledims[-1,1], sdate = length(allfiles)) - # Attributes # Read attributes from the first existing file if ("region" %in% rownames(allfiledims)) { file_for_att <- ncdf4::nc_open(allfiles[allfiles_exist[1]]) region <- ncdf4::ncatt_get(file_for_att, 'region') - ncdf4::nc_close(file_for_att) + ncdf4::nc_close(file_for_att) attributes(array_met_by_sdate)$region <- region } else { - lon <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'longitude', + lon <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'longitude', unlist = T, drop_var_dim = T) - lat <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'latitude', + lat <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'latitude', unlist = T, drop_var_dim = T) attributes(array_met_by_sdate)$lon <- lon attributes(array_met_by_sdate)$lat <- lat } - attributes(array_met_by_sdate)$metrics <- metrics - attributes(array_met_by_sdate)$start.months <- start.months - attributes(array_met_by_sdate)$forecast.months <- forecast.months + return(array_met_by_sdate) } + + \ No newline at end of file diff --git a/modules/Scorecards/R/tmp/SCPlotScorecard.R b/modules/Scorecards/R/tmp/SCPlotScorecard.R deleted file mode 100644 index 4373057b6e0d3901abcb3fc27c09006f5156131d..0000000000000000000000000000000000000000 --- a/modules/Scorecards/R/tmp/SCPlotScorecard.R +++ /dev/null @@ -1,444 +0,0 @@ -#'Scorecards function create simple scorecards by region (types 1 & 3) -#' -#'@description This function creates a scorecard for a single system and -#'reference combination, showing data by region and forecast month. -#' -#'@param data A multidimensional array containing spatially aggregated metrics -#' data with dimensions: metric, region, sdate and ftime. -#'@param row.dim A character string indicating the dimension name to show in the -#' rows of the plot. -#'@param subrow.dim A character string indicating the dimension name to show in -#' the sub-rows of the plot. -#'@param col.dim A character string indicating the dimension name to show in the -#' columns of the plot. -#'@param subcol.dim A character string indicating the dimension name to show in -#' the sub-columns of the plot. -#'@param legend.dim A character string indicating the dimension name to use for -#' the legend. -#'@param row.names A vector of character strings with row display names. -#'@param subrow.names A vector of character strings with sub-row display names. -#'@param col.names A vector of character strings with column display names. -#'@param subcol.names A vector of character strings with sub-column display -#' names. -#'@param row.title A character string for the title of the row names. -#'@param subrow.title A character string for the title of the sub-row names. -#'@param table.title A character string for the title of the plot. -#'@param table.subtitle A character string for the sub-title of the plot. -#'@param legend.breaks A vector of numerics or a list of vectors of numerics, -#' containing the breaks for the legends. If a vector is given as input, then -#' these breaks will be repeated for each legend.dim. A list of vectors can be -#' given as input if the legend.dims require different breaks. This parameter -#' is required even if the legend is not plotted, to define the colors in the -#' scorecard table. -#'@param plot.legend A logical value to determine if the legend is plotted. -#'@param legend.width A numeric value to define the width of the legend bars. -#'@param legend.height A numeric value to define the height of the legend bars. -#'@param palette A vector of character strings or a list of vectors of -#' character strings containing the colors to use in the legends. If a vector -#' is given as input, then these colors will be used for each legend.dim. A -#' list of vectors can be given as input if different colors are desired for -#' the legend.dims. This parameter must be included even if the the legend is -#' not plotted, to define the colors in the scorecard table. -#'@param colorunder A character string or of vector of character strings -#' defining the colors to use for data values with are inferior to the lowest -#' breaks value. This parameter will also plot a inferior triangle in the -#' legend bar. The parameter can be set to NULL if there are no inferior values. -#' If a character string is given this color will be applied to all legend.dims. -#'@param colorsup A character string or of vector of character strings -#' defining the colors to use for data values with are superior to the highest -#' breaks value. This parameter will also plot a inferior triangle in the -#' legend bar. The parameter can be set to NULL if there are no superior values. -#' If a character string is given this color will be applied to all legend.dims. -#'@param round.decimal A numeric indicating to which decimal point the data -#' is to be displayed in the scorecard table. -#'@param font.size A numeric indicating the font size on the scorecard table. -#'@param fileout A path of the location to save the scorecard plots. -#' -#'@return An image file containing the scorecard. -#'@example -#'data <- array(rnorm(1000), dim = c('sdate' = 12, 'metric' = 4, 'region' = 3, -#' 'time' = 6)) -#'row.names <- c('Tropics', 'Extra-tropical NH', 'Extra-tropical SH') -#'col.names <- c('Mean bias (K)', 'Correlation', 'RPSS','CRPSS') -#'SCPlotScorecard(data = data, row.names = row.names, col.names = col.names, -#' subcol.names = month.abb[as.numeric(1:12)], -#' row.title = 'Region', subrow.title = 'Forecast Month', -#' col.title = 'Start date', -#' table.title = "Temperature of ECMWF System 5", -#' table.subtitle = "(Ref: ERA5 1994-2016)", -#' fileout = 'test.png') -#' -#'@import kableExtra -#'@import s2dv -#'@import ClimProjDiags -#'@export -SCPlotScorecard <- function(data, row.dim = 'region', subrow.dim = 'time', - col.dim = 'metric', subcol.dim = 'sdate', - legend.dim = 'metric', row.names = NULL, - subrow.names = NULL, col.names = NULL, - subcol.names = NULL, row.title = NULL, - subrow.title = NULL, col.title = NULL, - table.title = NULL, table.subtitle = NULL, - legend.breaks = NULL, plot.legend = TRUE, - label.scale = NULL, legend.width = NULL, - legend.height = NULL, palette = NULL, - colorunder = NULL, colorsup = NULL, - round.decimal = 2, font.size = 1.1, - legend.white.space = NULL, - col1.width = NULL, col2.width = NULL, - fileout = './scorecard.png') { - # Input parameter checks - ## Check data - if (!is.array(data)) { - stop("Parameter 'data' must be a numeric array.") - } - ## Check row.dim - if (!is.character(row.dim)) { - stop("Parameter 'row.dim' must be a character string.") - } - if (!row.dim %in% names(dim(data))) { - stop("Parameter 'row.dim' is not found in 'data' dimensions.") - } - ## Check row.names - if (!is.null(row.names)) { - if (length(row.names) != as.numeric(dim(data)[row.dim])) { - stop("Parameter 'row.names' must have the same length of dimension 'row.dims'.") - } - } else { - row.names <- as.character(1:dim(data)[row.dim]) - } - ## Check subrow.dim - if (!is.character(subrow.dim)) { - stop("Parameter 'subrow.dim' must be a character string.") - } - if (!subrow.dim %in% names(dim(data))) { - stop("Parameter 'subrow.dim' is not found in 'data' dimensions.") - } - ## Check subrow.names - if (!is.null(subrow.names)) { - if (length(subrow.names) != as.numeric(dim(data)[subrow.dim])) { - stop("Parameter 'subrow.names' must have the same length of dimension 'subrow.dims'.") - } - } else { - subrow.names <- as.character(1:dim(data)[subrow.dim]) - } - ## Check col.dim - if (!is.character(col.dim)) { - stop("Parameter 'col.dim' must be a character string.") - } - if (!col.dim %in% names(dim(data))) { - stop("Parameter 'col.dim' is not found in 'data' dimensions.") - } - ## Check col.names - if (!is.null(col.names)) { - if (length(col.names) != as.numeric(dim(data)[col.dim])) { - stop("Parameter 'col.names' must have the same length of dimension 'col.dims'.") - } - } else { - col.names <- as.character(1:dim(data)[col.dim]) - } - ## Check subcol.dim - if (!is.character(subcol.dim)) { - stop("Parameter 'subcol.dim' must be a character string.") - } - if (!subcol.dim %in% names(dim(data))) { - stop("Parameter 'subcol.dim' is not found in 'data' dimensions.") - } - ## Check subcol.names - if (!is.null(subcol.names)) { - if (length(subcol.names) != as.numeric(dim(data)[subcol.dim])) { - stop("Parameter 'subcol.names' must have the same length of dimension 'subcol.dims'.") - } - } else { - subcol.names <- as.character(1:dim(data)[subcol.dim]) - } - ## Check legend.dim - if (!is.character(legend.dim)) { - stop("Parameter 'legend.dim' must be a character string.") - } - if (!legend.dim %in% names(dim(data))) { - stop("Parameter 'legend.dim' is not found in 'data' dimensions.") - } - ## Check row.title inputs - if (!is.null(row.title)) { - if (!is.character(row.title)) { - stop("Parameter 'row.title must be a character string.") - } - } else { - row.title <- "" - } - ## Check subrow.title - if (!is.null(subrow.title)) { - if (!is.character(subrow.title)) { - stop("Parameter 'subrow.title must be a character string.") - } - } else { - subrow.title <- "" - } - ## Check col.title - if (!is.null(col.title)) { - if (!is.character(col.title)) { - stop("Parameter 'col.title must be a character string.") - } - } else { - col.title <- "" - } - ## Check table.title - if (!is.null(table.title)) { - if (!is.character(table.title)) { - stop("Parameter 'table.title' must be a character string.") - } - } else { - table.title <- "" - } - ## Check table.subtitle - if (!is.null(table.subtitle)) { - if (!is.character(table.subtitle)) { - stop("Parameter 'table.subtitle' must be a character string.") - } - } else { - table.subtitle <- "" - } - # Check legend.breaks - if (is.vector(legend.breaks) && is.numeric(legend.breaks)) { - legend.breaks <- rep(list(legend.breaks), as.numeric(dim(data)[legend.dim])) - } else if (is.null(legend.breaks)) { - legend.breaks <- rep(list(seq(-1, 1, 0.2)), as.numeric(dim(data)[legend.dim])) - } else if (inherits(legend.breaks, 'list')) { - stopifnot(length(legend.breaks) == as.numeric(dim(data)[legend.dim])) - } else { - stop("Parameter 'legend.breaks' must be a numeric vector, a list or NULL.") - } - ## Check plot.legend - if (!inherits(plot.legend, 'logical')) { - stop("Parameter 'plot.legend' must be a logical value.") - } - ## Check label.scale - if (is.null(label.scale)) { - label.scale <- 1.4 - } else { - if (!is.numeric(label.scale) | length(label.scale) != 1) { - stop("Parameter 'label.scale' must be a numeric value of length 1.") - } - } - ## Check legend.width - if (is.null(legend.width)) { - legend.width <- length(subcol.names) * 46.5 - } else { - if (!is.numeric(legend.width) | length(legend.width) != 1) { - stop("Parameter 'legend.width' must be a numeric value of length 1.") - } - } - if (is.null(legend.height)) { - legend.height <- 50 - } else { - if (!is.numeric(legend.height) | length(legend.height) != 1) { - stop("Parameter 'legend.height' must be a numeric value of length 1.") - } - } - ## Check colour palette input - if (is.vector(palette)) { - palette <- rep(list(palette), as.numeric(dim(data)[legend.dim])) - } else if (is.null(palette)) { - palette <- rep(list(c('#2D004B', '#542789', '#8073AC', '#B2ABD2', '#D8DAEB', - '#FEE0B6', '#FDB863', '#E08214', '#B35806', '#7F3B08')), - as.numeric(dim(data)[legend.dim])) - } else if (inherits(palette, 'list')) { - stopifnot(length(palette) == as.numeric(dim(data)[legend.dim])) - } else { - stop("Parameter 'palette' must be a numeric vector, a list or NULL.") - } - ## Check colorunder - if (is.null(colorunder)) { - colorunder <- rep("#04040E",as.numeric(dim(data)[legend.dim])) - } else if (is.character(colorunder) && length(colorunder) == 1) { - colorunder <- rep(colorunder, as.numeric(dim(data)[legend.dim])) - } else if (is.character(colorunder) && - length(colorunder) != as.numeric(dim(data)[legend.dim])) { - stop("Parameter 'colorunder' must be a numeric vector, a list or NULL.") - } - ## Check colorsup - if (is.null(colorsup)) { - colorsup <- rep("#730C04", as.numeric(dim(data)[legend.dim])) - } else if (is.character(colorsup) && length(colorsup) == 1) { - colorsup <- rep(colorsup,as.numeric(dim(data)[legend.dim])) - } else if (is.character(colorsup) && - length(colorsup) != as.numeric(dim(data)[legend.dim])) { - stop("Parameter 'colorsup' must be a numeric vector, a list or NULL.") - } - ## Check round.decimal - if (is.null(round.decimal)) { - round.decimal <- 2 - } else if (!is.numeric(round.decimal) | length(round.decimal) != 1) { - stop("Parameter 'round.decimal' must be a numeric value of length 1.") - } - ## Check font.size - if (is.null(font.size)) { - font.size <- 1 - } else if (!is.numeric(font.size) | length(font.size) != 1) { - stop("Parameter 'font.size' must be a numeric value of length 1.") - } - ## Check legend white space - if (is.null(legend.white.space)){ - legend.white.space <- 6 - } else { - legend.white.space <- legend.white.space - } - ## Check col1.width - if (is.null(col1.width)) { - if (max(nchar(row.names)) == 1 ) { - col1.width <- max(nchar(row.names)) - } else { - col1.width <- max(nchar(row.names))/4 - } - } else if (!is.numeric(col1.width)) { - stop("Parameter 'col1.width' must be a numeric value of length 1.") - } - ## Check col2.width - if (is.null(col2.width)) { - if (max(nchar(subrow.names)) == 1 ) { - col2.width <- max(nchar(subrow.names)) - } else { - col2.width <- max(nchar(subrow.names))/4 - } - } else if (!is.numeric(col2.width)) { - stop("Parameter 'col2.width' must be a numeric value of length 1.") - } - - - # Get dimensions of inputs - n.col.names <- length(col.names) - n.subcol.names <- length(subcol.names) - n.row.names <- length(row.names) - n.subrow.names <- length(subrow.names) - - # Define table size - n.rows <- n.row.names * n.subrow.names - n.columns <- 2 + (n.col.names * n.subcol.names) - - # Column names - row.names.table <- rep("", n.rows) - for (row in 1:n.row.names) { - row.names.table[floor(n.subrow.names/2) + (row - 1) * n.subrow.names] <- row.names[row] - } - - # Define scorecard table titles - column.titles <- c(row.title, subrow.title, rep(c(subcol.names), n.col.names)) - - # Round data - data <- round(data, round.decimal) - - # Define data inside the scorecards table - for (row in 1:n.row.names) { - table_temp <- data.frame(table_column_2 = as.character(subrow.names)) - for (col in 1:n.col.names) { - table_temp <- data.frame(table_temp, - Reorder(data = Subset(x = data, along = c(col.dim, row.dim), - indices = list(col, row), drop = 'selected'), - order = c(subrow.dim, subcol.dim))) - } - if (row == 1) { - table_data <- table_temp - } else { - table_data <- rbind(table_data, table_temp) - } - } - - # All data for plotting in table - table <- data.frame(table_column_1 = row.names.table, table_data) - table_temp <- array(unlist(table[3:n.columns]), dim = c(n.rows, n.columns - 2)) - # Define colors to show in table - table_colors <- .SCTableColors(table = table_temp, n.col = n.col.names, - n.subcol = n.subcol.names, n.row = n.row.names, - n.subrow = n.subrow.names, legend.breaks = legend.breaks, - palette = palette, colorunder = colorunder, - colorsup = colorsup) - metric.color <- table_colors$metric.color - metric.text.color <- table_colors$metric.text.color - # metric.text.bold <- table_colors$metric.text.bold - - options(stringsAsFactors = FALSE) - title <- data.frame(c1 = table.title, c2 = n.columns) - subtitle <- data.frame(c1 = table.subtitle, c2 = n.columns) - header.names <- as.data.frame(data.frame(c1 = c("", col.names), - c2 = c(2, rep(n.subcol.names, n.col.names)))) - header.names2 <- as.data.frame(data.frame(c1 = c("", paste0(rep(col.title, n.col.names))), - c2 = c(2, rep(n.subcol.names, n.col.names)))) - title.space <- data.frame(c1 = "\n", c2 = n.columns) - - # Hide NA values in table - options(knitr.kable.NA = '') - - # Create HTML table - table.html.part <- list() - table.html.part[[1]] <- kbl(table, escape = F, col.names = column.titles, align = rep("c", n.columns)) %>% - kable_paper("hover", full_width = F, font_size = 14 * font.size) %>% - add_header_above(header = header.names2, font_size = 16 * font.size) %>% - add_header_above(header = title.space, font_size = 10 * font.size) %>% - add_header_above(header = header.names, font_size = 20 * font.size) %>% - add_header_above(header = title.space, font_size = 10 * font.size) %>% - add_header_above(header = subtitle, font_size = 16 * font.size, align = "left") %>% - add_header_above(header = title.space, font_size = 10 * font.size) %>% - add_header_above(header = title, font_size = 22 * font.size, align = "left") - - for (i in 1:n.col.names) { - for (j in 1:n.subcol.names) { - my.background <- metric.color[, (i - 1) * n.subcol.names + j] - my.text.color <- metric.text.color[, (i - 1) * n.subcol.names + j] - # my.bold <- metric.text.bold[(i - 1) * n.subcol.names + j] - - table.html.part[[(i - 1) * n.subcol.names + j + 1]] <- - column_spec(table.html.part[[(i - 1) * n.subcol.names + j]], - 2 + n.subcol.names * (i - 1) + j, - background = my.background[1:n.rows], - color = my.text.color[1:n.rows], - bold = T) ## strsplit(toString(bold), ', ')[[1]] - } - } - - # Define position of table borders - column.borders <- NULL - for (i in 1:n.col.names) { - column.spacing <- (n.subcol.names * i) + 2 - column.borders <- c(column.borders, column.spacing) - } - - n.last.list <- n.col.names * n.subcol.names + 1 - - table.html <- column_spec(table.html.part[[n.last.list]], 1, bold = TRUE, width_min = paste0(col1.width, 'cm')) %>% - column_spec(2, bold = TRUE, width_min = paste0(col2.width, 'cm')) %>% - column_spec(3:n.columns, width_min = "1.2cm") %>% - column_spec(c(1, 2, column.borders), border_right = "2px solid black") %>% - column_spec(1, border_left = "2px solid black") %>% - column_spec(n.columns, border_right = "2px solid black") %>% - row_spec(seq(from = 0, to = n.subrow.names * n.row.names, by = n.subrow.names), - extra_css = "border-bottom: 2px solid black", hline_after = TRUE) - - if (plot.legend == TRUE) { - # Save the scorecard (without legend) - save_kable(table.html, file = paste0(fileout, '_tmpScorecard.png'), vheight = 1) - - # White space for legend - legend.white.space <- 37.8 * legend.white.space ## converting pixels to cm - - # Create and save color bar legend - scorecard_legend <- .SCLegend(legend.breaks = legend.breaks, - palette = palette, - colorunder = colorunder, - colorsup = colorsup, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - legend.white.space = legend.white.space, - fileout = fileout) - - # Add the legends below the scorecard table - system(paste0('convert -append ', fileout, '_tmpScorecard.png ', fileout, - '_tmpScorecardLegend.png ', fileout)) - # Remove temporary scorecard table - unlink(paste0(fileout, '_tmpScorecard*.png')) - } - if (plot.legend == FALSE) { - save_kable(table.html, file = fileout) - } -} diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index 99909f095072b6c6d170439b31da812decc858a9..7278eb16a2663ac9f723c68e375522439b046237 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -2,8 +2,10 @@ #' #'@description Scorecards function to create scorecard tables for multiple systems #' and references (types 9 to 12). -#'@param input_data is an array of spatially aggregated metrics containing the +#'@param data is an array of spatially aggregated metrics containing the #' following dimensions; system, reference, metric, time, sdate, region. +#'@param sign is an array with the same dimensions as data indicting the +#' significance of the metrics, with either true, false or null. #'@param system a vector of character strings defining the systems following the #' archive.yml format from verification suite #'@param reference a vector of character strings defining the references @@ -21,6 +23,28 @@ #' include in the scorecard title #'@param fileout.label a character string containing additional information to #' include in the output png file when saving the scorecard. +#'@param plot.legend A logical value to determine if the legend is plotted. +#'@param legend.breaks A vector of numerics or a list of vectors of numerics, +#' containing the breaks for the legends. If a vector is given as input, then +#' these breaks will be repeated for each legend.dim. A list of vectors can be +#' given as input if the legend.dims require different breaks. This parameter +#' is required even if the legend is not plotted, to define the colors in the +#' scorecard table. +#'@param legend.white.space A numeric value defining the initial starting +#' position of the legend bars, the white space infront of the legend is +#' calculated from the left most point of the table as a distance in cm. +#'@param legend.width A numeric value to define the width of the legend bars. +#'@param legend.height A numeric value to define the height of the legend bars. +#'@param label.scale A numeric value to define the size of the legend labels. +#'@param col1.width A numeric value defining the width of the first table column +#' in cm. +#'@param col2.width A numeric value defining the width of the second table +#' column in cm. +#'@param columns.width A numeric value defining the width all columns within the +#' table in cm (excluding the first and second columns containing the titles). +#'@param font.size A numeric indicating the font size on the scorecard table. +#'@param round.decimal A numeric indicating to which decimal point the data +#' is to be displayed in the scorecard table. Default is 2. #'@param output.path a path of the location to save the scorecard plots. #' #'@return @@ -44,19 +68,16 @@ #' ) -ScorecardsMulti <- function(data, - system, - reference, - var, - start.year, - end.year, - start.months, - forecast.months, - region.names, - metrics, - table.label, - fileout.label, - output.path){ +ScorecardsMulti <- function(data, sign, system, reference, var, start.year, + end.year, start.months, forecast.months, + region.names, metrics, plot.legend = TRUE, + legend.breaks = NULL, legend.white.space = NULL, + legend.width = 555, legend.height = 50, + table.label = NULL, fileout.label = NULL, + label.scale = 1.4, font.size = 1.1, + col1.width = NULL, col2.width = NULL, + columns.width = NULL, + round.decimal = 2, output.path){ ## Checks to apply: # first dimension in aggregated_metrics is system and second dimension is reference @@ -70,31 +91,66 @@ ScorecardsMulti <- function(data, fileout.label <- "" } - ## Make sure input_data is in correct order for using in functions: + ## Make sure data is in correct order for using in functions: data_order <- c('system','reference','metric','time','sdate','region') data <- Reorder(data, data_order) - + ## Identify metrics loaded metrics_loaded <- attributes(data)$metrics ## Select only the metrics to visualize from data - input_data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) - attributes(input_data)$metrics <- metrics + data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) + attributes(data)$metrics <- metrics + + if(!is.null(sign)){ + sign <- Reorder(sign, data_order) + sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) + attributes(sign)$metrics <- metrics + } + ## Transform data for scorecards by forecast month (types 11 & 12) if(length(start.months) >= length(forecast.months)){ - transformed_data <- SCTransform(data = input_data, + + transformed_data <- SCTransform(data = data, sdate_dim = 'sdate', ftime_dim = 'time') + + if(!is.null(sign)){ + transformed_sign <- SCTransform(data = sign, + sdate_dim = 'sdate', + ftime_dim = 'time') + } else { + transformed_sign <- NULL + } } ## Load configuration files - sys_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/conf/archive.yml")$esarchive - var_dict <- read_yaml("/esarchive/scratch/nmilders/gitlab/git_clones/csscorecards/inst/config/variable-dictionary.yml")$vars + if (is.null(recipe$Run$filesystem)) { + filesystem <- 'esarchive' + } else { + filesystem <- recipe$Run$filesystem + } + sys_dict <- read_yaml("conf/archive.yml")[[filesystem]] + var_dict <- read_yaml("conf/variable-dictionary.yml")$vars ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - var.units <- var_dict[[var]]$units + + if ('name' %in% names(recipe$Analysis$Variables)){ + if (recipe$Analysis$Variables$name == var) { + var.units <- recipe$Analysis$Variables$units + } + } else { + for (i in 1:length(recipe$Analysis$Variables)) { + if (recipe$Analysis$Variables[[i]]$name == var) { + var.units <- recipe$Analysis$Variables[[i]]$units + } + } + } + if (is.null(var.units)) { + var.units <- var_dict[[var]]$units + } system.name <- NULL reference.name <- NULL @@ -153,19 +209,7 @@ ScorecardsMulti <- function(data, ## Legend upper limit color legend.col.sup <- .legend_col_sup(metrics, colorsup) legend.col.sup <- legend.col.sup[metrics] - - ## Legend inputs - plot.legend = TRUE - label.scale = 1.4 - legend.width = 555 - legend.height = 50 - - ## Data display inputs - round.decimal = 2 - font.size = 1.1 - - legend.white.space <- col1.width <- col2.width <- NULL ## Use default values of function - + ## Loop over region for(reg in 1:length(region.names)){ @@ -195,40 +239,54 @@ ScorecardsMulti <- function(data, start.year = start.year, end.year = end.year, scorecard.type = 9, region = sub(" ", "-", region.names[reg]), fileout.label = fileout.label, output.path = output.path) + if(model == 'system'){ - data_sc_9 <- Subset(input_data, c('reference','region'), list(1, reg), drop = 'selected') + data_sc_9 <- Subset(data, c('reference','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_9 <- Subset(sign, c('reference','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_9 <- NULL + } } else if(model == 'reference'){ - data_sc_9 <- Subset(input_data, c('system','region'), list(1, reg), drop = 'selected') + data_sc_9 <- Subset(data, c('system','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_9 <- Subset(sign, c('system','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_9 <- NULL + } } - SCPlotScorecard(data = data_sc_9, - row.dim = model, - subrow.dim = 'time', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = model.name, - subrow.names = forecast.months, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = table.model.name, - subrow.title = 'Forecast Month', - col.title = 'Start date', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = 4, - col2.width = col2.width, - fileout = fileout) + + VizScorecard(data = data_sc_9, + sign = sign_sc_9, + row_dim = model, + subrow_dim = 'time', + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = model.name, + subrow_names = forecast.months, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = table.model.name, + subrow_title = 'Forecast Month', + col_title = 'Start date', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = 4, + col2_width = col2.width, + columns_width = columns.width, + fileout = fileout) #### Scorecard_type 10 #### @@ -237,87 +295,115 @@ ScorecardsMulti <- function(data, start.year = start.year, end.year = end.year, scorecard.type = 10, region = sub(" ", "-", region.names[reg]), fileout.label = fileout.label, output.path = output.path) + new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') + if(model == 'system'){ - data_sc_10 <- Subset(Reorder(input_data, new_order), c('reference','region'), list(1, reg), drop = 'selected') + data_sc_10 <- Subset(Reorder(data, new_order), c('reference','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_10 <- Subset(Reorder(sign, new_order), c('reference','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_10 <- NULL + } } else if(model == 'reference'){ - data_sc_10 <- Subset(Reorder(input_data, new_order), c('system','region'), list(1, reg), drop = 'selected') + data_sc_10 <- Subset(Reorder(data, new_order), c('system','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_10 <- Subset(Reorder(sign, new_order), c('system','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_10 <- NULL + } } - SCPlotScorecard(data = data_sc_10, - row.dim = 'time', - subrow.dim = model, - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = forecast.months, - subrow.names = model.name, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Forecast month', - subrow.title = table.model.name, - col.title = 'Start date', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = 4, - fileout = fileout) + VizScorecard(data = data_sc_10, + sign = sign_sc_10, + row_dim = 'time', + subrow_dim = model, + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = forecast.months, + subrow_names = model.name, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = 'Forecast month', + subrow_title = table.model.name, + col_title = 'Start date', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = col1.width, + col2_width = 4, + columns_width = columns.width, + fileout = fileout) #### Scorecard_type 11 #### ## (transformation only) if(length(start.months) >= length(forecast.months)){ + fileout <- .Filename(model = model, eval.name = eval.filename, var = var, start.year = start.year, end.year = end.year, scorecard.type = 11, region = sub(" ", "-", region.names[reg]), fileout.label = fileout.label, output.path = output.path) + if(model == 'system'){ data_sc_11 <- Subset(transformed_data, c('reference','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_11 <- Subset(transformed_sign, c('reference','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_11 <- NULL + } } else if(model == 'reference'){ data_sc_11 <- Subset(transformed_data, c('system','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_11 <- Subset(transformed_sign, c('system','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_11 <- NULL + } } - SCPlotScorecard(data = data_sc_11, - row.dim = model, - subrow.dim = 'time', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = model.name, - subrow.names = forecast.months, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = table.model.name, - subrow.title = 'Forecast Month', - col.title = 'Target month', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = 4, - col2.width = col2.width, - fileout = fileout) + + VizScorecard(data = data_sc_11, + sign = sign_sc_11, + row_dim = model, + subrow_dim = 'time', + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = model.name, + subrow_names = forecast.months, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = table.model.name, + subrow_title = 'Forecast Month', + col_title = 'Target month', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = 4, + col2_width = col2.width, + columns_width = columns.width, + fileout = fileout) } - #### Scorecard_type 12 #### ## (transformation and reorder) if(length(start.months) >= length(forecast.months)){ @@ -325,41 +411,56 @@ ScorecardsMulti <- function(data, start.year = start.year, end.year = end.year, scorecard.type = 12, region = sub(" ", "-", region.names[reg]), fileout.label = fileout.label, output.path = output.path) + new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') + if(model == 'system'){ data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('reference','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_12 <- Subset(Reorder(transformed_sign, new_order), c('reference','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_12 <- NULL + } } else if(model == 'reference'){ data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('system','region'), list(1, reg), drop = 'selected') + if(!is.null(sign)){ + sign_sc_12 <- Subset(Reorder(transformed_sign, new_order), c('system','region'), list(1, reg), drop = 'selected') + } else { + sign_sc_12 <- NULL + } } - SCPlotScorecard(data = data_sc_12, - row.dim = 'time', - subrow.dim = model, - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = forecast.months, - subrow.names = model.name, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Forecast Month', - subrow.title = table.model.name, - col.title = 'Target month', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = 4, - fileout = fileout) + + VizScorecard(data = data_sc_12, + sign = sign_sc_12, + row_dim = 'time', + subrow_dim = model, + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = forecast.months, + subrow_names = model.name, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = 'Forecast Month', + subrow_title = table.model.name, + col_title = 'Target month', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = col1.width, + col2_width = 4, + columns_width = columns.width, + fileout = fileout) } } ## close loop on region diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index 0e1d7cbc9738111a13f0b760372c6f96712ec183..9fd445451dcd4bb54d3aeb7c7eab43075434e6eb 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -2,8 +2,10 @@ #' #'@description Scorecards function to create scorecard tables for one system and #' one reference combination (types 1 to 4). -#'@param input_data is an array of spatially aggregated metrics containing the +#'@param data is an array of spatially aggregated metrics containing the #' following dimensions; system, reference, metric, time, sdate, region. +#'@param sign is an array with the same dimensions as data indicting the +#' significance of the metrics, with either true, false or null. #'@param system a vector of character strings defining the systems following the #' archive.yml format from verification suite #'@param reference a vector of character strings defining the references @@ -21,7 +23,29 @@ #' include in the scorecard title #'@param fileout.label a character string containing additional information to #' include in the output png file when saving the scorecard. -#'@param output.path a path of the location to save the scorecard plots +#'@param plot.legend A logical value to determine if the legend is plotted. +#'@param legend.breaks A vector of numerics or a list of vectors of numerics, +#' containing the breaks for the legends. If a vector is given as input, then +#' these breaks will be repeated for each legend.dim. A list of vectors can be +#' given as input if the legend.dims require different breaks. This parameter +#' is required even if the legend is not plotted, to define the colors in the +#' scorecard table. +#'@param legend.white.space A numeric value defining the initial starting +#' position of the legend bars, the white space infront of the legend is +#' calculated from the left most point of the table as a distance in cm. +#'@param legend.width A numeric value to define the width of the legend bars. +#'@param legend.height A numeric value to define the height of the legend bars. +#'@param label.scale A numeric value to define the size of the legend labels. +#'@param col1.width A numeric value defining the width of the first table column +#' in cm. +#'@param col2.width A numeric value defining the width of the second table +#' column in cm. +#'@param columns.width A numeric value defining the width all columns within the +#' table in cm (excluding the first and second columns containing the titles). +#'@param font.size A numeric indicating the font size on the scorecard table. +#'@param round.decimal A numeric indicating to which decimal point the data +#' is to be displayed in the scorecard table. Default is 2. +#'@param output.path A path of the location to save the scorecard plots #' #'@return #' This function returns 4 scorecards images, saved in the directory output.path @@ -41,15 +65,18 @@ #' output.path = '/esarchive/scratch/nmilders/scorecards_images/test' #' ) #'@export -ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, - start.months, forecast.months, region.names, - metrics, legend.breaks = NULL, +ScorecardsSingle <- function(data, sign, system, reference, var, start.year, + end.year, start.months, forecast.months, + region.names, metrics, plot.legend = TRUE, + legend.breaks = NULL, legend.white.space = NULL, + legend.width = 550, legend.height = 50, table.label = NULL, fileout.label = NULL, - legend.white.space = NULL, - col1.width = NULL, col2.width = NULL, - output.path){ - - ## Checks to apply: + label.scale = 1.4, font.size = 1.1, + col1.width = NULL, col2.width = NULL, + columns.width = 1.2, + round.decimal = 2, output.path){ + + ## Checks to apply: # First dimension in aggregated_metrics is system and second dimension is reference # To allow 1 region - if region = 1 --> only scorecards 1 & 3 need to be plotted # If any dimension of input dat is 1, make sure dimension is still present in array @@ -77,31 +104,65 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, fileout.label <- "" } - ## Make sure input_data is in correct order for using in functions: + ## Make sure data is in correct order for using in functions: data_order <- c('system', 'reference', 'metric', 'time', 'sdate', 'region') data <- Reorder(data, data_order) ## Identify metrics loaded metrics_loaded <- attributes(data)$metrics - + ## Select only the metrics to visualize from data - input_data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) - attributes(input_data)$metrics <- metrics + data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) + attributes(data)$metrics <- metrics + + if(!is.null(sign)){ + sign <- Reorder(sign, data_order) + sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) + attributes(sign)$metrics <- metrics + } ## Transform data for scorecards by forecast month (types 3 & 4) if(length(start.months) >= length(forecast.months)){ - transformed_data <- SCTransform(data = input_data, + + transformed_data <- SCTransform(data = data, sdate_dim = 'sdate', ftime_dim = 'time') + + if(!is.null(sign)){ + transformed_sign <- SCTransform(data = sign, + sdate_dim = 'sdate', + ftime_dim = 'time') + } else { + transformed_sign <- NULL + } } ## Load configuration files - sys_dict <- read_yaml("conf/archive.yml")$esarchive + if (is.null(recipe$Run$filesystem)) { + filesystem <- 'esarchive' + } else { + filesystem <- recipe$Run$filesystem + } + sys_dict <- read_yaml("conf/archive.yml")[[filesystem]] var_dict <- read_yaml("conf/variable-dictionary.yml")$vars ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - var.units <- var_dict[[var]]$units + + if ('name' %in% names(recipe$Analysis$Variables)){ + if (recipe$Analysis$Variables$name == var) { + var.units <- recipe$Analysis$Variables$units + } + } else { + for (i in 1:length(recipe$Analysis$Variables)) { + if (recipe$Analysis$Variables[[i]]$name == var) { + var.units <- recipe$Analysis$Variables[[i]]$units + } + } + } + if (is.null(var.units)) { + var.units <- var_dict[[var]]$units + } ## Get metric long names metric.names.list <- .met_names(metrics, var.units) @@ -128,31 +189,21 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, legend.col.sup <- .legend_col_sup(metrics, colorsup) legend.col.sup <- legend.col.sup[metrics] - ## Legend inputs - plot.legend = TRUE - label.scale = 1.4 - legend.width = 555 - legend.height = 50 - - ## Data display inputs - round.decimal = 2 - font.size = 1.1 - ## Loop over system and reference for each scorecard plot - for (sys in 1:dim(input_data)['system']) { - for (ref in 1:dim(input_data)['reference']) { + for (sys in 1:dim(data)['system']) { + for (ref in 1:dim(data)['reference']) { ## TO DO: Apply check to each scorecard function ## check dimension 'metric' exists: - if (!("metric" %in% names(dim(input_data)))) { - dim(input_data) <- c(metric = 1, dim(input_data)) + if (!("metric" %in% names(dim(data)))) { + dim(data) <- c(metric = 1, dim(data)) } ## Find position of mean bias metric to calculate breaks breaks_bias <- NULL if ('mean_bias' %in% metrics){ - stopifnot(identical(names(dim(Subset(input_data, c('system', 'reference'), list(sys, ref), drop = 'selected'))), c('metric','time','sdate','region'))) - temp_data <- Subset(input_data, c('system', 'reference'), list(sys, ref), drop = 'selected') + stopifnot(identical(names(dim(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'))), c('metric','time','sdate','region'))) + temp_data <- Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected') pos_bias <- which(metrics == 'mean_bias') if(var == 'psl'){ temp_data[pos_bias,,,] <- temp_data[pos_bias,,,]/100 @@ -182,77 +233,99 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, start.year = start.year, end.year = end.year, scorecard.type = 1, fileout.label = fileout.label, output.path = output.path) - data_sc_1 <- Subset(input_data, c('system', 'reference'), list(sys, ref), drop = 'selected') - SCPlotScorecard(data = data_sc_1, - row.dim = 'region', - subrow.dim = 'time', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = region.names, - subrow.names = forecast.months, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Region', - subrow.title = 'Forecast Month', - col.title = 'Start date', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = col2.width, - fileout = fileout) + + data_sc_1 <- Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected') + + if(!is.null(sign)){ + sign_sc_1 <- Subset(sign, c('system', 'reference'), list(sys, ref), drop = 'selected') + } else { + sign_sc_1 <- NULL + } + + VizScorecard(data = data_sc_1, + sign = sign_sc_1, + row_dim = 'region', + subrow_dim = 'time', + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = region.names, + subrow_names = forecast.months, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = 'Region', + subrow_title = 'Forecast Month', + col_title = 'Start date', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = col1.width, + col2_width = col2.width, + columns_width = columns.width, + fileout = fileout) #### Scorecard_type 2 #### ## (reorder only) ## Scorecard type 2 is same as type 1 for only one region, therefore is ## only plotted if more that one region is requested - if(dim(input_data)['region'] > 1) { + if(dim(data)['region'] > 1) { + fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, start.year = start.year, end.year = end.year, scorecard.type = 2, fileout.label = fileout.label, output.path = output.path) + new_order <- c('metric', 'region', 'sdate', 'time') - data_sc_2 <- Reorder(Subset(input_data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) - SCPlotScorecard(data = data_sc_2, - row.dim = 'time', - subrow.dim = 'region', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = forecast.months, - subrow.names = region.names, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Forecast Month', - subrow.title = 'Region', - col.title = 'Start date', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = col2.width, - fileout = fileout) + + data_sc_2 <- Reorder(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) + + if(!is.null(sign)){ + sign_sc_2 <- Reorder(Subset(sign, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) + } else { + sign_sc_2 <- NULL + } + + VizScorecard(data = data_sc_2, + sign = sign_sc_2, + row_dim = 'time', + subrow_dim = 'region', + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = forecast.months, + subrow_names = region.names, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = 'Forecast Month', + subrow_title = 'Region', + col_title = 'Start date', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = col1.width, + col2_width = col2.width, + columns_width = columns.width, + fileout = fileout) } ## close if @@ -262,39 +335,48 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, start.year = start.year, end.year = end.year, scorecard.type = 3, fileout.label = fileout.label, output.path = output.path) + data_sc_3 <- Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected') - SCPlotScorecard(data = data_sc_3, - row.dim = 'region', - subrow.dim = 'time', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = region.names, - subrow.names = forecast.months, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Region', - subrow.title = 'Forecast Month', - col.title = 'Target month', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = col2.width, - fileout = fileout) + + if(!is.null(sign)){ + sign_sc_3 <- Subset(transformed_sign, c('system', 'reference'), list(sys, ref), drop = 'selected') + } else { + sign_sc_3 <- NULL + } + + VizScorecard(data = data_sc_3, + sign = sign_sc_3, + row_dim = 'region', + subrow_dim = 'time', + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = region.names, + subrow_names = forecast.months, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = 'Region', + subrow_title = 'Forecast Month', + col_title = 'Target month', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = col1.width, + col2_width = col2.width, + columns_width = columns.width, + fileout = fileout) } - #### Scorecard_type 4 #### ## (transformation and reorder) ## Scorecard type 4 is same as type 3 for only one region, therefore is @@ -303,37 +385,48 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, start.year = start.year, end.year = end.year, scorecard.type = 4, fileout.label = fileout.label, output.path = output.path) + new_order <- c('metric', 'region', 'sdate', 'time') + data_sc_4 <- Reorder(Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) - SCPlotScorecard(data = data_sc_4, - row.dim = 'time', - subrow.dim = 'region', - col.dim = 'metric', - subcol.dim = 'sdate', - legend.dim = 'metric', - row.names = forecast.months, - subrow.names = region.names, - col.names = metric.names, - subcol.names = month.abb[as.numeric(start.months)], - table.title = table.title, - table.subtitle = table.subtitle, - row.title = 'Forecast Month', - subrow.title = 'Region', - col.title = 'Target month', - legend.breaks = legend.breaks, - plot.legend = plot.legend, - label.scale = label.scale, - legend.width = legend.width, - legend.height = legend.height, - palette = palette, - colorunder = legend.col.inf, - colorsup = legend.col.sup, - round.decimal = round.decimal, - font.size = font.size, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = col2.width, - fileout = fileout) + + if(!is.null(sign)){ + sign_sc_4 <- Reorder(Subset(transformed_sign, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) + } else { + sign_sc_4 + } + + VizScorecard(data = data_sc_4, + sign = sign_sc_4, + row_dim = 'time', + subrow_dim = 'region', + col_dim = 'metric', + subcol_dim = 'sdate', + legend_dim = 'metric', + row_names = forecast.months, + subrow_names = region.names, + col_names = metric.names, + subcol_names = month.abb[as.numeric(start.months)], + table_title = table.title, + table_subtitle = table.subtitle, + row_title = 'Forecast Month', + subrow_title = 'Region', + col_title = 'Target month', + legend_breaks = legend.breaks, + plot_legend = plot.legend, + label_scale = label.scale, + legend_width = legend.width, + legend_height = legend.height, + palette = palette, + colorunder = legend.col.inf, + colorsup = legend.col.sup, + round_decimal = round.decimal, + font_size = font.size, + legend_white_space = legend.white.space, + col1_width = col1.width, + col2_width = col2.width, + columns_width = columns.width, + fileout = fileout) } ## close if } ## close loop on ref diff --git a/modules/Scorecards/R/tmp/Utils.R b/modules/Scorecards/R/tmp/Utils.R index 6ba49e8c5c887c938902e30d200e49bff1af142e..caee98e497012b5e9dac36583545837489708265 100644 --- a/modules/Scorecards/R/tmp/Utils.R +++ b/modules/Scorecards/R/tmp/Utils.R @@ -201,8 +201,6 @@ } - - ## Output file name to save scorecard .Filename <- function(system = NULL, reference = NULL, model = NULL, eval.name = NULL, var = NULL, start.year = NULL, end.year = NULL, scorecard.type = NULL, @@ -230,96 +228,6 @@ return(scorecard_save_path) } -# Scorecards function to assign background color of table cells, -# color of text in table and to bold the text. -# -# It will return a list with 2 arrays: -# (1) metric.color, A 2-dimensional array with character strings containing the -# color codes for each cell background. -# (2) metric.text.color, A 2-dimensional array with character strings -# containing the color codes for each cell text. -.SCTableColors <- function(table, n.col, n.subcol, n.row, n.subrow, - legend.breaks, palette, colorunder, colorsup) { - # Define rows and columns - n.rows <- n.row * n.subrow - n.columns <- n.col * n.subcol - - ## Set table background colors - metric.color <- array(colorunder, c(n.row * n.subrow, n.columns)) - metric.text.color <- array("#2A2A2A", c(n.row * n.subrow , n.columns)) - # metric.text.bold <- array(TRUE, c(n.row * n.subrow , n.columns - 2)) ## Setting all values to bold - - ## Define cell and text colors to show in table - for (i in 1:n.col) { - metric.int <- legend.breaks[[i]] - for (rr in 1:n.rows) { - for (j in 1:n.subcol) { - for (pp in 1:(length(metric.int) - 1)) { - if (is.nan(table[rr,((i - 1) * n.subcol + j)])) { - metric.color[rr,((i - 1) * n.subcol + j)] <- "gray" - } else { - if (table[rr,((i - 1) * n.subcol + j)] >= - metric.int[pp] && table[rr,((i - 1) * n.subcol + j)] <= - metric.int[pp+1]) { - metric.color[rr,((i - 1) * n.subcol + j)] <- palette[[i]][pp] #palette[pp] - } - if (table[rr,((i - 1) * n.subcol + j)] < metric.int[1]) { - metric.color[rr,((i - 1) * n.subcol + j)] <- colorunder[i] - } - if (table[rr,((i - 1) * n.subcol + j)] >= - metric.int[length(metric.int)]) { - metric.color[rr,((i - 1) * n.subcol + j)] <- colorsup[i] - } - } - ## color text in white and bold if background is white or dark blue or dark red: - if (is.nan(table[rr,((i - 1) * n.subcol + j)]) || - (!is.nan(table[rr,((i - 1) * n.subcol + j)]) && pp == 1 && - table[rr,((i - 1) * n.subcol + j)] < metric.int[2]) || - (!is.nan(table[rr,((i - 1) * n.subcol + j)]) && pp == 2 && - table[rr,((i - 1) * n.subcol + j)] < metric.int[3]) || - (!is.nan(table[rr,((i - 1) * n.subcol + j)]) && pp == (length(metric.int) - 1) && - table[rr,((i - 1) * n.subcol + j)] >= metric.int[length(metric.int) - 1]) || - (!is.nan(table[rr,((i - 1) * n.subcol + j)]) && pp == (length(metric.int) - 2) && - table[rr,((i - 1) * n.subcol + j)] >= metric.int[length(metric.int) - 2])) { - metric.text.color[rr,((i - 1) * n.subcol + j)] <- "white" - #metric.text.bold[rr,((i - 1) * n.subcol + j)] <- TRUE - } - } - } - } - } - - return(list(metric.color = metric.color, - metric.text.color = metric.text.color)) - -} - -# Scorecards function to create the color bar legends for the required metrics -# and paste them below the scorecard table -.SCLegend <- function(legend.breaks, palette, colorunder, colorsup, - label.scale, legend.width, legend.height, - legend.white.space, fileout) { - - ## Create color bar legends for each metric - for (i in 1:length(palette)) { - png(filename = paste0(fileout, '_tmpLegend', i, '.png'), width = legend.width, - height = legend.height) - ColorBar(brks = legend.breaks[[i]], cols = palette[[i]], vertical = FALSE, - label_scale = label.scale, col_inf = colorunder[[i]], - col_sup = colorsup[[i]]) - dev.off() - if (i == 1) { - ## Add white space to the left of the first color bar legend - system(paste0('convert ', fileout, '_tmpLegend1.png -background white -splice ', - legend.white.space, 'x0 ', fileout, '_tmpScorecardLegend.png')) - } else { - system(paste0('convert +append ', fileout, '_tmpScorecardLegend.png ', - fileout, '_tmpLegend', i, '.png ', fileout, - '_tmpScorecardLegend.png')) - } - } - unlink(paste0(fileout,'_tmpLegend*.png')) -} # Function to calculate color bar breaks for bias metric .SCBiasBreaks <- function(data){ diff --git a/modules/Scorecards/R/tmp/VizScorecard.R b/modules/Scorecards/R/tmp/VizScorecard.R new file mode 100644 index 0000000000000000000000000000000000000000..425b799fd1b3b83f3574a959a837b96aeba24fcf --- /dev/null +++ b/modules/Scorecards/R/tmp/VizScorecard.R @@ -0,0 +1,627 @@ +#'Function to plot Scorecard tables +#' +#'This function renders a scorecard table from a multidimensional array +#'in HTML style. The structure of the table is based on the assignment of each +#'dimension of the array as a structure element: row, subrow, column or +#'subcolumn. It is useful to present tabular results with colors in a nice way. +#' +#'Note: Module PhantomJS is required. +#' +#'@param data A multidimensional array containing the data to be plotted with +#' at least four dimensions. Each dimension will have assigned a structure +#' element: row, subrow, column and subcolumn. +#'@param sign A multidimensional boolean array with the same dimensions as +#' 'data', indicting which values to be highlighted. If set to NULL no values +#' will be highlighted. +#'@param row_dim A character string indicating the dimension name to show in the +#' rows of the plot. It is set as 'region' by default. +#'@param subrow_dim A character string indicating the dimension name to show in +#' the sub-rows of the plot. It is set as 'time' by default. +#'@param col_dim A character string indicating the dimension name to show in the +#' columns of the plot. It is set as 'metric' by default. +#'@param subcol_dim A character string indicating the dimension name to show in +#' the sub-columns of the plot. It is set as 'sdate' by default. +#'@param legend_dim A character string indicating the dimension name to use for +#' the legend. It is set as 'metric' by default. +#'@param row_names A vector of character strings with row display names. It +#' is set as NULL by default. +#'@param subrow_names A vector of character strings with sub-row display names. +#' It is set as NULL by default. +#'@param col_names A vector of character strings with column display names. It +#' is set as NULL by default. +#'@param subcol_names A vector of character strings with sub-column display +#' names. It is set as NULL by default. +#'@param row_title A character string for the title of the row names. It is set +#' as NULL by default. +#'@param subrow_title A character string for the title of the sub-row names. It +#' is set as NULL by default. +#'@param table_title A character string for the title of the plot. It is set as +#' NULL by default. +#'@param table_subtitle A character string for the sub-title of the plot. It is +#' set as NULL by default. +#'@param legend_breaks A vector of numerics or a list of vectors of numerics, +#' containing the breaks for the legends. If a vector is given as input, then +#' these breaks will be repeated for each 'legend_dim'. A list of vectors can +#' be given as input if the 'legend_dims' require different breaks. This +#' parameter is required even if the legend is not plotted, to define the +#' colors in the scorecard table. It is set as NULL by default. +#'@param plot_legend A logical value to determine if the legend is plotted. It +#' is set as TRUE by default. +#'@param label_scale A numeric value to define the size of the legend labels. +#' It is set as 1.4 by default. +#'@param legend_width A numeric value to define the width of the legend bars. By +#' default it is set to NULL and calculated internally from the table width. +#'@param legend_height A numeric value to define the height of the legend bars. +#' It is set as 50 by default. +#'@param palette A vector of character strings or a list of vectors of +#' character strings containing the colors to use in the legends. If a vector +#' is given as input, then these colors will be used for each legend_dim. A +#' list of vectors can be given as input if different colors are desired for +#' the legend_dims. This parameter must be included even if the legend is +#' not plotted, to define the colors in the scorecard table. +#'@param colorunder A character string, a vector of character strings or a +#' list with single character string elements defining the colors to use for +#' data values with are inferior to the lowest breaks value. This parameter +#' will also plot a inferior triangle in the legend bar. The parameter can be +#' set to NULL if there are no inferior values. If a character string is given +#' this color will be applied to all 'legend_dims'. It is set as NULL by +#' default. +#'@param colorsup A character string, a vector of character strings or a +#' list with single character string elements defining the colors to use for +#' data values with are superior to the highest breaks value. This parameter +#' will also plot a inferior triangle in the legend bar. The parameter can be +#' set to NULL if there are no superior values. If a character string is given +#' this color will be applied to all legend_dims. It is set as NULL by default. +#'@param round_decimal A numeric indicating to which decimal point the data +#' is to be displayed in the scorecard table. It is set as 2 by default. +#'@param font_size A numeric indicating the font size on the scorecard table. +#' Default is 2. +#'@param legend_white_space A numeric value defining the initial starting +#' position of the legend bars, the white space infront of the legend is +#' calculated from the left most point of the table as a distance in cm. The +#' default value is 6. +#'@param columns_width A numeric value defining the width all columns within the +#' table in cm (excluding the first and second columns containing the titles). +#'@param col1_width A numeric value defining the width of the first table column +#' in cm. It is set as NULL by default. +#'@param col2_width A numeric value defining the width of the second table +#' column in cm. It is set as NULL by default. +#'@param fileout A path of the location to save the scorecard plots. By default +#' the plots will be saved to the working directory. +#' +#'@return An image file containing the scorecard. +#'@examples +#'data <- array(rnorm(1000), dim = c('sdate' = 12, 'metric' = 4, 'region' = 3, +#' 'time' = 6)) +#'row_names <- c('Tropics', 'Extra-tropical NH', 'Extra-tropical SH') +#'col_names <- c('Mean bias (K)', 'Correlation', 'RPSS','CRPSS') +#'VizScorecard(data = data, row_names = row_names, col_names = col_names, +#' subcol_names = month.abb[as.numeric(1:12)], +#' row_title = 'Region', subrow_title = 'Forecast Month', +#' col_title = 'Start date', +#' table_title = "Temperature of ECMWF System 5", +#' table_subtitle = "(Ref: ERA5 1994-2016)", +#' fileout = 'test.png') +#' +#'@import kableExtra +#'@importFrom RColorBrewer brewer.pal +#'@importFrom s2dv Reorder +#'@importFrom ClimProjDiags Subset +#'@importFrom CSTools MergeDims +#'@export +VizScorecard <- function(data, sign = NULL, row_dim = 'region', + subrow_dim = 'time', col_dim = 'metric', + subcol_dim = 'sdate', legend_dim = 'metric', + row_names = NULL, subrow_names = NULL, + col_names = NULL, subcol_names = NULL, + row_title = NULL, subrow_title = NULL, + col_title = NULL, table_title = NULL, + table_subtitle = NULL, legend_breaks = NULL, + plot_legend = TRUE, label_scale = 1.4, + legend_width = NULL, legend_height = 50, + palette = NULL, colorunder = NULL, colorsup = NULL, + round_decimal = 2, font_size = 1.1, + legend_white_space = 6, columns_width = 1.2, + col1_width = NULL, col2_width = NULL, + fileout = './scorecard.png') { + + # Input parameter checks + # Check data + if (!is.array(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (length(dim(data)) != 4) { + stop("Parameter 'data' must have four dimensions.") + } + dimnames <- names(dim(data)) + # Check sign + if (is.null(sign)) { + sign <- array(FALSE, dim = dim(data)) + } else { + if (!is.array(sign)) { + stop("Parameter 'sign' must be a boolean array or NULL.") + } + if (any(sort(names(dim(sign))) != sort(dimnames))) { + stop("Parameter 'sign' must have same dimensions as 'data'.") + } + if (typeof(sign) != 'logical') { + stop("Parameter 'sign' must be an array with logical values.") + } + } + # Check row_dim + if (!is.character(row_dim)) { + stop("Parameter 'row_dim' must be a character string.") + } + if (!row_dim %in% names(dim(data))) { + stop("Parameter 'row_dim' is not found in 'data' dimensions.") + } + # Check row_names + if (is.null(row_names)) { + row_names <- as.character(1:dim(data)[row_dim]) + } + if (length(row_names) != as.numeric(dim(data)[row_dim])) { + stop("Parameter 'row_names' must have the same length of dimension ", + "'row_dim'.") + } + # Check subrow_dim + if (!is.character(subrow_dim)) { + stop("Parameter 'subrow_dim' must be a character string.") + } + if (!subrow_dim %in% names(dim(data))) { + stop("Parameter 'subrow_dim' is not found in 'data' dimensions.") + } + # Check subrow_names + if (is.null(subrow_names)) { + subrow_names <- as.character(1:dim(data)[subrow_dim]) + } + if (length(subrow_names) != as.numeric(dim(data)[subrow_dim])) { + stop("Parameter 'subrow_names' must have the same length of dimension ", + "'subrow_dim'.") + } + # Check col_dim + if (!is.character(col_dim)) { + stop("Parameter 'col_dim' must be a character string.") + } + if (!col_dim %in% names(dim(data))) { + stop("Parameter 'col_dim' is not found in 'data' dimensions.") + } + # Check col_names + if (is.null(col_names)) { + col_names <- as.character(1:dim(data)[col_dim]) + } + if (length(col_names) != as.numeric(dim(data)[col_dim])) { + stop("Parameter 'col_names' must have the same length of dimension ", + "'col_dim'.") + } + # Check subcol_dim + if (!is.character(subcol_dim)) { + stop("Parameter 'subcol_dim' must be a character string.") + } + if (!subcol_dim %in% names(dim(data))) { + stop("Parameter 'subcol_dim' is not found in 'data' dimensions.") + } + # Check subcol_names + if (is.null(subcol_names)) { + subcol_names <- as.character(1:dim(data)[subcol_dim]) + } + if (length(subcol_names) != as.numeric(dim(data)[subcol_dim])) { + stop("Parameter 'subcol_names' must have the same length of dimension ", + "'subcol_dim'.") + } + # Check legend_dim + if (!is.character(legend_dim)) { + stop("Parameter 'legend_dim' must be a character string.") + } + if (!legend_dim %in% names(dim(data))) { + stop("Parameter 'legend_dim' is not found in 'data' dimensions.") + } + # Check row_title + if (is.null(row_title)) { + row_title <- "" + } else { + if (!is.character(row_title)) { + stop("Parameter 'row_title' must be a character string.") + } + } + # Check subrow_title + if (is.null(subrow_title)) { + subrow_title <- "" + } else { + if (!is.character(subrow_title)) { + stop("Parameter 'subrow_title' must be a character string.") + } + } + # Check col_title + if (is.null(col_title)) { + col_title <- "" + } else { + if (!is.character(col_title)) { + stop("Parameter 'col_title' must be a character string.") + } + } + # Check table_title + if (is.null(table_title)) { + table_title <- "" + } else { + if (!is.character(table_title)) { + stop("Parameter 'table_title' must be a character string.") + } + } + # Check table_subtitle + if (is.null(table_subtitle)) { + table_subtitle <- "" + } else { + if (!is.character(table_subtitle)) { + stop("Parameter 'table_subtitle' must be a character string.") + } + } + # Check legend_breaks + if (inherits(legend_breaks, 'list')) { + if (!(length(legend_breaks) == as.numeric(dim(data)[legend_dim]))) { + stop("Parameter 'legend_breaks' must be a list with the same number of ", + "elements as the length of the 'legend_dim' dimension in data.") + } + } else if (is.numeric(legend_breaks)) { + legend_breaks <- rep(list(legend_breaks), as.numeric(dim(data)[legend_dim])) + } else if (is.null(legend_breaks)) { + legend_breaks <- rep(list(seq(-1, 1, 0.2)), as.numeric(dim(data)[legend_dim])) + } else { + stop("Parameter 'legend_breaks' must be a numeric vector, a list or NULL.") + } + # Check plot_legend + if (!inherits(plot_legend, 'logical')) { + stop("Parameter 'plot_legend' must be a logical value.") + } + # Check label_scale + if (any(!is.numeric(label_scale), length(label_scale) != 1)) { + stop("Parameter 'label_scale' must be a numeric value of length 1.") + } + # Check legend_width + if (is.null(legend_width)) { + legend_width <- length(subcol_names) * 46.5 + } else if (any(!is.numeric(legend_width), length(legend_width) != 1)) { + stop("Parameter 'legend_width' must be a numeric value of length 1.") + } + # Check legend_height + if (any(!is.numeric(legend_height), length(legend_height) != 1)) { + stop("Parameter 'legend_height' must be a numeric value of length 1.") + } + # Check colour palette input + if (inherits(palette, 'list')) { + if (!(length(palette) == as.numeric(dim(data)[legend_dim]))) { + stop("Parameter 'palette' must be a list with the same number of ", + "elements as the length of the 'legend_dim' dimension in data.") + } + if (!all(sapply(palette, is.character))) { + stop("Parameter 'palette' must be a list of character vectors.") + } + } else if (is.character(palette)) { + palette <- rep(list(palette), as.numeric(dim(data)[legend_dim])) + } else if (is.null(palette)) { + n <- length(legend_breaks[[1]]) + if (n == 1) { + stop("Parameter 'legend_breaks' can't be of length 1.") + } else if (n == 2) { + colors <- c('#B35806') + } else if (n == 3) { + colors <- c('#8073AC', '#E08214') + } else if (n == 11) { + colors <- c('#2D004B', '#542789', '#8073AC', '#B2ABD2', '#D8DAEB', + '#FEE0B6', '#FDB863', '#E08214', '#B35806', '#7F3B08') + } else if (n > 11) { + stop("Parameter 'palette' must be provided when 'legend_breaks' ", + "exceed the length of 11.") + } else { + colors <- rev(brewer.pal(n-1, "PuOr")) + } + palette <- rep(list(colors), as.numeric(dim(data)[legend_dim])) + } else { + stop("Parameter 'palette' must be a character vector, a list or NULL.") + } + # Check colorunder + if (is.null(colorunder)) { + colorunder <- rep("#04040E", as.numeric(dim(data)[legend_dim])) + } + if (length(colorunder) == 1) { + colorunder <- rep(colorunder, as.numeric(dim(data)[legend_dim])) + } + if (length(colorunder) != as.numeric(dim(data)[legend_dim])) { + stop("Parameter 'colorunder' must be a character string vector or a list ", + "with the same number of elements as the length of the 'legend_dim' ", + "dimension in data.") + } + if (!is.character(unlist(colorunder))) { + stop("Parameter 'colorunder' must be a character string vector ", + "or a list of character string elements.") + } + # Check colorsup + if (is.null(colorsup)) { + colorsup <- rep("#730C04", as.numeric(dim(data)[legend_dim])) + } + if (length(colorsup) == 1) { + colorsup <- rep(colorsup, as.numeric(dim(data)[legend_dim])) + } + if (length(colorsup) != as.numeric(dim(data)[legend_dim])) { + stop("Parameter 'colorsup' must be a character string vector or a list ", + "with the same number of elements as the length of the 'legend_dim' ", + "dimension in data.") + } + if (!is.character(unlist(colorsup))) { + stop("Parameter 'colorsup' must be a character string vector ", + "or a list of character string elements.") + } + # Check round_decimal + if (!is.numeric(round_decimal)) { + stop("Parameter 'round_decimal' must be a numeric value of length 1.") + } + # Check font_size + if (!is.numeric(font_size)) { + stop("Parameter 'font_size' must be a numeric value of length 1.") + } + # Check legend white space + if (!is.numeric(legend_white_space)) { + stop("Parameter 'legend_white_space' must be a numeric value of length 1.") + } + # columns_width + if (!is.numeric(columns_width)) { + stop("Parameter 'columns_width' must be a numeric value.") + } + # Check col1_width + if (is.null(col1_width)) { + if (max(nchar(row_names)) == 1) { + col1_width <- max(nchar(row_names)) + } else { + col1_width <- max(nchar(row_names))/4 + } + } else if (!is.numeric(col1_width)) { + stop("Parameter 'col1_width' must be a numeric value of length 1.") + } + # Check col2_width + if (is.null(col2_width)) { + if (max(nchar(subrow_names)) == 1 ) { + col2_width <- max(nchar(subrow_names)) + } else { + col2_width <- max(nchar(subrow_names))/4 + } + } else if (!is.numeric(col2_width)) { + stop("Parameter 'col2_width' must be a numeric value of length 1.") + } + + # Get dimensions of inputs + n_col_names <- length(col_names) + n_subcol_names <- length(subcol_names) + n_row_names <- length(row_names) + n_subrow_names <- length(subrow_names) + + # Define table size + n_rows <- n_row_names * n_subrow_names + n_columns <- 2 + (n_col_names * n_subcol_names) + + # Column names + row_names_table <- rep("", n_rows) + for (row in 1:n_row_names) { + row_names_table[floor(n_subrow_names/2) + (row - 1) * n_subrow_names] <- row_names[row] + } + + # Define scorecard table titles + column_titles <- c(row_title, subrow_title, rep(c(subcol_names), n_col_names)) + + # Round data + data <- round(data, round_decimal) + + # Define data inside the scorecards table + for (row in 1:n_row_names) { + table_temp <- data.frame(table_column_2 = as.character(subrow_names)) + for (col in 1:n_col_names) { + table_temp <- data.frame(table_temp, + Reorder(data = Subset(x = data, along = c(col_dim, row_dim), + indices = list(col, row), drop = 'selected'), + order = c(subrow_dim, subcol_dim))) + } + if (row == 1) { + table_data <- table_temp + } else { + table_data <- rbind(table_data, table_temp) + } + } + + # All data for plotting in table + table <- data.frame(table_column_1 = row_names_table, table_data) + table_temp <- array(unlist(table[3:n_columns]), dim = c(n_rows, n_columns - 2)) + + # Define colors to show in table + table_colors <- .ScorecardColors(table = table_temp, n_col = n_col_names, + n_subcol = n_subcol_names, n_row = n_row_names, + n_subrow = n_subrow_names, legend_breaks = legend_breaks, + palette = palette, colorunder = colorunder, + colorsup = colorsup) + metric_color <- table_colors$metric_color + metric_text_color <- table_colors$metric_text_color + # metric_text_bold <- table_colors$metric_text_bold + + # Remove temporary table + rm(table_temp) + + # Format values to underline in table + metric_underline <- MergeDims(sign, c(subcol_dim, col_dim), + rename_dim = 'col', na.rm = FALSE) + metric_underline <- MergeDims(metric_underline, c(subrow_dim, row_dim), + rename_dim = 'row', na.rm = FALSE) + metric_underline <- Reorder(metric_underline, c('row', 'col')) + + options(stringsAsFactors = FALSE) + title <- data.frame(c1 = table_title, c2 = n_columns) + subtitle <- data.frame(c1 = table_subtitle, c2 = n_columns) + header_names <- as.data.frame(data.frame(c1 = c("", col_names), + c2 = c(2, rep(n_subcol_names, n_col_names)))) + header_names2 <- as.data.frame(data.frame(c1 = c("", paste0(rep(col_title, n_col_names))), + c2 = c(2, rep(n_subcol_names, n_col_names)))) + title_space <- data.frame(c1 = "\n", c2 = n_columns) + + # Hide NA values in table + options(knitr.kable.NA = '') + + # Create HTML table + table_html_part <- list() + table_html_part[[1]] <- kbl(table, escape = F, col.names = column_titles, align = rep("c", n_columns)) %>% + kable_paper("hover", full_width = FALSE, font_size = 14 * font_size) %>% + add_header_above(header = header_names2, font_size = 16 * font_size) %>% + add_header_above(header = title_space, font_size = 10 * font_size) %>% + add_header_above(header = header_names, font_size = 20 * font_size) %>% + add_header_above(header = title_space, font_size = 10 * font_size) %>% + add_header_above(header = subtitle, font_size = 16 * font_size, align = "left") %>% + add_header_above(header = title_space, font_size = 10 * font_size) %>% + add_header_above(header = title, font_size = 22 * font_size, align = "left") + + for (i in 1:n_col_names) { + for (j in 1:n_subcol_names) { + my_background <- metric_color[, (i - 1) * n_subcol_names + j] + my_text_color <- metric_text_color[, (i - 1) * n_subcol_names + j] + my_underline <- metric_underline[, (i - 1) * n_subcol_names + j] + # my_bold <- metric_text_bold[(i - 1) * n_subcol_names + j] + + table_html_part[[(i - 1) * n_subcol_names + j + 1]] <- + column_spec(table_html_part[[(i - 1) * n_subcol_names + j]], + 2 + n_subcol_names * (i - 1) + j, + background = my_background[1:n_rows], + color = my_text_color[1:n_rows], + underline = my_underline[1:n_rows], + bold = T) # strsplit(toString(bold), ', ')[[1]] + } + } + + # Define position of table borders + column_borders <- NULL + for (i in 1:n_col_names) { + column_spacing <- (n_subcol_names * i) + 2 + column_borders <- c(column_borders, column_spacing) + } + + n_last_list <- n_col_names * n_subcol_names + 1 + + table_html <- column_spec(table_html_part[[n_last_list]], 1, bold = TRUE, + width_min = paste0(col1_width, 'cm')) %>% + column_spec(2, bold = TRUE, width_min = paste0(col2_width, 'cm')) %>% + column_spec(3:n_columns, width_min = paste0(columns_width, 'cm')) %>% + column_spec(c(1, 2, column_borders), border_right = "2px solid black") %>% + column_spec(1, border_left = "2px solid black") %>% + column_spec(n_columns, border_right = "2px solid black") %>% + row_spec(seq(from = 0, to = n_subrow_names * n_row_names, by = n_subrow_names), + extra_css = "border-bottom: 2px solid black", hline_after = TRUE) + if (plot_legend == TRUE) { + # Save the scorecard (without legend) + save_kable(table_html, file = paste0(fileout, '_tmpScorecard.png'), vheight = 1) + + # White space for legend + legend_white_space <- 37.8 * legend_white_space # converting pixels to cm + + # Create and save color bar legend + .ScorecardLegend(legend_breaks = legend_breaks, + palette = palette, + colorunder = colorunder, + colorsup = colorsup, + label_scale = label_scale, + legend_width = legend_width, + legend_height = legend_height, + legend_white_space = legend_white_space, + fileout = fileout) + + # Add the legends below the scorecard table + system(paste0('convert -append ', fileout, '_tmpScorecard.png ', fileout, + '_tmpScorecardLegend.png ', fileout)) + # Remove temporary scorecard table + unlink(paste0(fileout, '_tmpScorecard*.png')) + } + if (plot_legend == FALSE) { + save_kable(table_html, file = fileout) + } +} + +# Scorecards function to assign background color of table cells, +# color of text in table and to bold the text. +# +# It will return a list with 2 arrays: +# (1) metric_color, A 2-dimensional array with character strings containing the +# color codes for each cell background. +# (2) metric_text_color, A 2-dimensional array with character strings +# containing the color codes for each cell text. +.ScorecardColors <- function(table, n_col, n_subcol, n_row, n_subrow, + legend_breaks, palette, colorunder, colorsup) { + # Define rows and columns + n_rows <- n_row * n_subrow + n_columns <- n_col * n_subcol + + # Set table background colors + metric_color <- array(colorunder, c(n_row * n_subrow, n_columns)) + metric_text_color <- array("#2A2A2A", c(n_row * n_subrow , n_columns)) + # metric_text_bold <- array(TRUE, c(n_row * n_subrow , n_columns - 2)) # Setting all values to bold + + # Define cell and text colors to show in table + for (i in 1:n_col) { + metric_int <- legend_breaks[[i]] + for (rr in 1:n_rows) { + for (j in 1:n_subcol) { + for (pp in 1:(length(metric_int) - 1)) { + if (is.na(table[rr, ((i - 1) * n_subcol + j)])) { + metric_color[rr, ((i - 1) * n_subcol + j)] <- "gray" + } else { + if (table[rr, ((i - 1) * n_subcol + j)] >= + metric_int[pp] && table[rr, ((i - 1) * n_subcol + j)] <= + metric_int[pp + 1]) { + metric_color[rr, ((i - 1) * n_subcol + j)] <- palette[[i]][pp] # palette[pp] + } + if (table[rr, ((i - 1) * n_subcol + j)] < metric_int[1]) { + metric_color[rr, ((i - 1) * n_subcol + j)] <- colorunder[i] + } + if (table[rr,((i - 1) * n_subcol + j)] >= + metric_int[length(metric_int)]) { + metric_color[rr, ((i - 1) * n_subcol + j)] <- colorsup[i] + } + } + # color text in white and bold if background is white or dark blue or dark red: + if (is.na(table[rr, ((i - 1) * n_subcol + j)]) || + (!is.na(table[rr, ((i - 1) * n_subcol + j)]) && pp == 1 && + table[rr, ((i - 1) * n_subcol + j)] < metric_int[2]) || + (!is.na(table[rr, ((i - 1) * n_subcol + j)]) && pp == 2 && + table[rr, ((i - 1) * n_subcol + j)] < metric_int[3]) || + (!is.na(table[rr, ((i - 1) * n_subcol + j)]) && pp == (length(metric_int) - 1) && + table[rr, ((i - 1) * n_subcol + j)] >= metric_int[length(metric_int) - 1]) || + (!is.na(table[rr, ((i - 1) * n_subcol + j)]) && pp == (length(metric_int) - 2) && + table[rr, ((i - 1) * n_subcol + j)] >= metric_int[length(metric_int) - 2])) { + metric_text_color[rr, ((i - 1) * n_subcol + j)] <- "white" + # metric_text_bold[rr,((i - 1) * n_subcol + j)] <- TRUE + } + } + } + } + } + return(list(metric_color = metric_color, + metric_text_color = metric_text_color)) +} + +# Scorecards function to create the color bar legends for the required metrics +# and paste them below the scorecard table +.ScorecardLegend <- function(legend_breaks, palette, colorunder, colorsup, + label_scale, legend_width, legend_height, + legend_white_space, fileout) { + + # Create color bar legends for each metric + for (i in 1:length(palette)) { + png(filename = paste0(fileout, '_tmpLegend', i, '.png'), width = legend_width, + height = legend_height) + ColorBarContinuous(brks = legend_breaks[[i]], cols = palette[[i]], vertical = FALSE, + label_scale = label_scale, col_inf = colorunder[[i]], + col_sup = colorsup[[i]]) + dev.off() + if (i == 1) { + # Add white space to the left of the first color bar legend + system(paste0('convert ', fileout, '_tmpLegend1.png -background white -splice ', + legend_white_space, 'x0 ', fileout, '_tmpScorecardLegend.png')) + } else { + system(paste0('convert +append ', fileout, '_tmpScorecardLegend.png ', + fileout, '_tmpLegend', i, '.png ', fileout, + '_tmpScorecardLegend.png')) + } + } + unlink(paste0(fileout,'_tmpLegend*.png')) +} diff --git a/modules/Scorecards/R/tmp/WeightedMetrics.R b/modules/Scorecards/R/tmp/WeightedMetrics.R index aea23c566851f523cc8ee19ae2336861e329d0c0..9d1630c419ebc967fb6dc94ba59c3a4f0d5b1461 100644 --- a/modules/Scorecards/R/tmp/WeightedMetrics.R +++ b/modules/Scorecards/R/tmp/WeightedMetrics.R @@ -27,8 +27,8 @@ #'@importFrom ClimProjDiags WeightedMean #'@importFrom s2dv Reorder #'@export -WeightedMetrics <- function(loaded_metrics, regions, metric.aggregation, - ncores = NULL, na.rm = TRUE) { +WeightedMetrics <- function(loaded_metrics, regions, forecast.months, + metric.aggregation, ncores = NULL, na.rm = TRUE) { ## Initial checks # loaded_metrics if (any(sapply(loaded_metrics, function(x) { @@ -53,9 +53,8 @@ WeightedMetrics <- function(loaded_metrics, regions, metric.aggregation, ## Get metric names ## TO DO: check all metric are in the same order for all sys - metrics <- attributes(loaded_metrics[[1]][[1]])$metrics - forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months - start.months <- attributes(loaded_metrics[[1]][[1]])$start.months + metrics <- attributes(loaded_metrics)$metrics + start.months <- attributes(loaded_metrics)$start_months all_metric_means <- array(dim = c(metric = length(metrics), time = length(forecast.months), @@ -83,47 +82,20 @@ WeightedMetrics <- function(loaded_metrics, regions, metric.aggregation, latdim = lat_dim_name, na.rm = na.rm, ncores = ncores) + all_metric_means[, , , reg, ref, sys] <- weighted.mean + } ## close loop on region } ## close loop on reference } ## close loop on system - - ## skill aggregation: - if (metric.aggregation == 'score') { - if (all(c("rps", "rps_clim") %in% metrics)) { - ## Calculate RPSS from aggregated RPS and RPS_clim - all_metric_means <- multiApply::Apply(data = all_metric_means, - target_dims = 'metric', - fun = function(x, met) { - res <- 1 - x[which(met == 'rps')] / x[which(met == 'rps_clim')] - c(x, res)}, met = metrics, - output_dims = 'metric', - ncores = ncores)$output1 - ## Define name of newly calculated RPSS metric - metrics <- c(metrics, "rpss_score_aggr") - } - if (all(c("crps", "crps_clim") %in% metrics)) { - ## Calculate CRPSS from aggragated CRPS and CRPS_clim - all_metric_means <- multiApply::Apply(data = all_metric_means, - target_dims = 'metric', - fun = function(x, met) { - res <- 1 - x[which(met == 'crps')] / x[which(met == 'crps_clim')] - c(x, res)}, - met = metrics, - output_dims = 'metric', - ncores = ncores)$output1 - ## Define name of newly calculated CRPSS metric - metrics <- c(metrics, "crpss_score_aggr") - } - ## Add warning in case metric.aggregation == 'score' but 1 of the metrics from each pair is missing - } + ## reorder dimensions in array all_metric_means <- s2dv::Reorder(all_metric_means, c('system','reference','metric','time','sdate','region')) ## Add attributes attributes(all_metric_means)$metrics <- metrics - attributes(all_metric_means)$start.months <- attributes(loaded_metrics[[1]][[1]])$start.months - attributes(all_metric_means)$forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months + attributes(all_metric_means)$start.months <- start.months + attributes(all_metric_means)$forecast.months <- forecast.months attributes(all_metric_means)$regions <- regions attributes(all_metric_means)$system.name <- names(loaded_metrics) attributes(all_metric_means)$reference.name <- names(loaded_metrics[[1]]) diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index e0b55641c5b0cbc12e4ec273c1bd54c08665ab48..37aa421c978d8aad8479b9b972684222207ddd5f 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -10,130 +10,525 @@ source('modules/Scorecards/R/tmp/SCTransform.R') source('modules/Scorecards/R/tmp/ScorecardsSingle.R') source('modules/Scorecards/R/tmp/ScorecardsMulti.R') source('modules/Scorecards/R/tmp/ScorecardsSystemDiff.R') -source('modules/Scorecards/R/tmp/SCPlotScorecard.R') +source('modules/Scorecards/R/tmp/VizScorecard.R') +## Temporary for new ESviz function +source('modules/Scorecards/R/tmp/ColorBarContinuous.R') +source('modules/Scorecards/R/tmp/ClimPalette.R') +.IsColor <- s2dv:::.IsColor +.FilterUserGraphicArgs <- s2dv:::.FilterUserGraphicArgs -## TODO: Change function name to 'Scorecards'? ## Define function Scorecards <- function(recipe) { - ## set parameters - input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") + ## Parameters for loading data files + skill.input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") + stats.input.path <- paste0(recipe$Run$output_dir, "/outputs/Statistics/") output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) - - system <- recipe$Analysis$Datasets$System - reference <- recipe$Analysis$Datasets$Reference + system <- recipe$Analysis$Datasets$System$name + reference <- recipe$Analysis$Datasets$Reference$name var <- recipe$Analysis$Variables$name start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) } else { start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, split = ", | |,")[[1]]) - if(!any(as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))) %in% start.months){ - error(recipe$Run$logger,"Requested start dates for scorecards must be loaded") - } + if(!any(as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))) %in% start.months){ + error(recipe$Run$logger,"Requested start dates for scorecards must be loaded") + } } - + + start.months <- sprintf("%02d", start.months) + period <- paste0(start.year, "-", end.year) + + ## Parameters for data aggregation regions <- recipe$Analysis$Workflow$Scorecards$regions for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation metrics.load <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Skill$metric), ", | |,")) + metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + ncores <- 1 # recipe$Analysis$ncores - ## Define skill scores in score aggregation has been requested - - if(metric.aggregation == 'score'){ - if('rps' %in% metrics.load){ - metrics.load <- c(metrics.load, 'rps_clim') - } - if('crps' %in% metrics.load){ - metrics.load <- c(metrics.load, 'crps_clim') - } + if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ + alpha <- 0.05 + } else { + alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha } - metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) - - ## Define skill scores in score aggregation has been requested + if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ + inf.to.na <- FALSE + } else { + inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na + } - if(metric.aggregation == 'score'){ - if('rpss' %in% metrics.visualize){ - metrics.visualize[metrics.visualize == 'rpss'] <- 'rpss_score_aggr' - } - if('crpss' %in% metrics.visualize){ - metrics.visualize[metrics.visualize == 'crpss'] <- 'crpss_score_aggr' - } + if(is.null(recipe$Analysis$remove_NAs)){ + na.rm <- FALSE + } else { + na.rm <- recipe$Analysis$remove_NAs } - inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na + ## Parameters for scorecard layout table.label <- recipe$Analysis$Workflow$Scorecards$table_label fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label - legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width - calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff - ncores <- 1 # recipe$Analysis$ncores + legend.breaks <- recipe$Analysis$Workflow$Scorecards$legend_breaks + legend.width <- recipe$Analysis$Workflow$Scorecards$legend_width + + if (is.null(recipe$Analysis$Workflow$Scorecards$plot_legend)){ + plot.legend <- TRUE + } else { + plot.legend <- recipe$Analysis$Workflow$Scorecards$plot_legend + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$columns_width)){ + columns.width <- 1.2 + } else { + columns.width <- recipe$Analysis$Workflow$Scorecards$columns_width + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$legend_white_space)){ + legend.white.space <- 6 + } else { + legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space + } - ## Load data files - loaded_metrics <- LoadMetrics(system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - metrics = metrics.load, - start.months = start.months, - forecast.months = forecast.months, - inf_to_na = inf.to.na, - input.path = input.path) + if(is.null(recipe$Analysis$Workflow$Scorecards$legend_height)){ + legend.height <- 50 + } else { + legend.height <- recipe$Analysis$Workflow$Scorecards$legend_height + } + if(is.null(recipe$Analysis$Workflow$Scorecards$label_scale)){ + label.scale <- 1.4 + } else { + label.scale <- recipe$Analysis$Workflow$Scorecards$label_scale + } - if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ - - ### Convert loaded metrics to array for allready aggregated data - metrics.dim <- attributes(loaded_metrics[[1]][[1]])$metrics - forecast.months.dim <- attributes(loaded_metrics[[1]][[1]])$forecast.months - start.months.dim <- attributes(loaded_metrics[[1]][[1]])$start.months - regions.dim <- regions #list('NAO' = c(lon.min = -80, lon.max = 40, lat.min = 20, lat.max = 80)) - - aggregated_metrics <- array(dim = c(system = length(loaded_metrics), - reference = length(loaded_metrics[[1]]), - metric = length(metrics.dim), - time = length(forecast.months.dim), - sdate = length(start.months.dim), - region = length(regions.dim))) + if(is.null(recipe$Analysis$Workflow$Scorecards$round_decimal)){ + round.decimal <- 2 + } else { + round.decimal <- recipe$Analysis$Workflow$Scorecards$round_decimal + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$font_size)){ + font.size <- 1.1 + } else { + font.size <- recipe$Analysis$Workflow$Scorecards$font_size + } + + ## Define if difference scorecard is to be plotted + if (is.null(recipe$Analysis$Workflow$Scorecards$calculate_diff)){ + calculate.diff <- FALSE + } else { + calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff + } + + ####### SKILL AGGREGATION ####### + if(metric.aggregation == 'skill'){ + + ## Load data files + loaded_metrics <- LoadMetrics(input_path = skill.input.path, + system = system, + reference = reference, + var = var, + metrics = metrics.visualize, + period = period, + start_months = start.months, + calib_method = calib.method, + inf_to_na = inf.to.na + ) + + ## Spatial Aggregation of metrics + if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ + + ### Convert loaded metrics to array for already aggregated data + metrics.dim <- attributes(loaded_metrics)$metrics + forecast.months.dim <- forecast.months + start.months.dim <- attributes(loaded_metrics)$start_months + regions.dim <- regions #list('NAO' = c(lon.min = -80, lon.max = 40, lat.min = 20, lat.max = 80)) + + aggregated_metrics <- array(dim = c(system = length(loaded_metrics), + reference = length(loaded_metrics[[1]]), + metric = length(metrics.dim), + time = length(forecast.months.dim), + sdate = length(start.months.dim), + region = length(regions.dim))) + + + for (sys in 1:length(names(loaded_metrics))){ + for (ref in 1:length(names(loaded_metrics[[sys]]))){ + aggregated_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) + } + } + + ## Add attributes + attributes(aggregated_metrics)$metrics <- metrics.load + attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics)$start_months + attributes(aggregated_metrics)$forecast.months <- forecast.months + attributes(aggregated_metrics)$regions <- regions + attributes(aggregated_metrics)$system.name <- names(loaded_metrics) + attributes(aggregated_metrics)$reference.name <- names(loaded_metrics[[1]]) + + + } else { + ## Calculate weighted mean of spatial aggregation + aggregated_metrics <- WeightedMetrics(loaded_metrics, + regions = regions, + forecast.months = forecast.months, + metric.aggregation = metric.aggregation, + ncores = ncores) + } ## close if on region + metrics_significance <- NULL + } ## close if on skill + + ###### SCORE AGGREGATION ###### + if(metric.aggregation == 'score'){ + + lon_dim <- 'longitude' + lat_dim <- 'latitude' + time_dim <- 'syear' + memb_dim <- 'ensemble' + + ## Define arrays to filled with data + aggregated_metrics <- array(data = NA, + dim = c(system = length(system), + reference = length(reference), + time = length(forecast.months), + sdate = length(start.months), + region = length(regions), + metric = length(metrics.visualize))) - for (sys in 1:length(names(loaded_metrics))){ - for (ref in 1:length(names(loaded_metrics[[sys]]))){ - aggregated_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) - } - } + metrics_significance <- array(data = NA, + dim = c(system = length(system), + reference = length(reference), + time = length(forecast.months), + sdate = length(start.months), + region = length(regions), + metric = length(metrics.visualize))) - ## Add attributes - attributes(aggregated_metrics)$metrics <- metrics.load - attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics[[1]][[1]])$start.months - attributes(aggregated_metrics)$forecast.months <- attributes(loaded_metrics[[1]][[1]])$forecast.months - attributes(aggregated_metrics)$regions <- regions - attributes(aggregated_metrics)$system.name <- names(loaded_metrics) - attributes(aggregated_metrics)$reference.name <- names(loaded_metrics[[1]]) + ## Load and aggregated data for each metric + for (sys in 1:length(system)){ + for (ref in 1:length(reference)){ + for (met in metrics.visualize) { + + if(met == 'rpss'){ + ## Load data from saved files + rps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + calib_method = calib.method, metric = 'rps_syear') + + rps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + calib_method = calib.method, metric = 'rps_clim_syear') + + ## Spatially aggregate data + rps_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = rps_syear, + region = regions[[X]], + lon = as.vector(attributes(rps_syear)$lon), + lat = as.vector(attributes(rps_syear)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = F) + }, simplify = 'array') + + rps_clim_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = rps_clim_syear, + region = regions[[X]], + lon = as.vector(attributes(rps_clim_syear)$lon), + lat = as.vector(attributes(rps_clim_syear)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = F) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(rps_syear))[length(dim(rps_syear))] <- 'region' + names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' + + ## Calculate significance + sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, + time_dim = time_dim, test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Temporally aggregate data + rps_syear <- Apply(data = rps_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + rps_clim_syear <- Apply(data = rps_clim_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate RPSS from aggregated RPS and RPS_clim + rpss <- 1 - rps_syear / rps_clim_syear + + ## Save metric result in arrays + aggregated_metrics[sys, ref, , , ,which(metrics.visualize == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'sdate','region')) + metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'sdate','region')) + + } ## close if on rpss + + if(met == 'crpss'){ + + ## Load data from saved files + crps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + calib_method = calib.method, metric = 'crps_syear') + + crps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + calib_method = calib.method, metric = 'crps_clim_syear') + + ## Spatially aggregate data + crps_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = crps_syear, + region = regions[[X]], + lon = as.vector(attributes(crps_syear)$lon), + lat = as.vector(attributes(crps_syear)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + crps_clim_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = crps_clim_syear, + region = regions[[X]], + lon = as.vector(attributes(crps_clim_syear)$lon), + lat = as.vector(attributes(crps_clim_syear)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(crps_syear))[length(dim(crps_syear))] <- 'region' + names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' + + ## Calculate significance + sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, + time_dim = time_dim, test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Temporally aggregate data + crps_syear <- Apply(data = crps_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + crps_clim_syear <- Apply(data = crps_clim_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate CRPSS from aggregated CRPS and CRPS_clim + crpss <- 1 - crps_syear / crps_clim_syear + + ## Save metric result in arrays + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'sdate','region')) + metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'sdate','region')) + + } ## close if on crpss + + if(met == 'enscorr'){ + ## Load data from saved files + cov <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + calib_method = calib.method, metric = 'cov') + + std_hcst <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + calib_method = calib.method, metric = 'std_hcst') + + std_obs <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + calib_method = calib.method, metric = 'std_obs') + + + n_eff <- .loadmetrics(input_path = stats.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + calib_method = calib.method, metric = 'n_eff') + + ## Calculate spatial aggregation + cov <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = cov, + region = regions[[X]], + lon = as.vector(attributes(cov)$lon), + lat = as.vector(attributes(cov)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(cov))[length(dim(cov))] <- 'region' + + std_hcst <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = std_hcst, + region = regions[[X]], + lon = as.vector(attributes(std_hcst)$lon), + lat = as.vector(attributes(std_hcst)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(std_hcst))[length(dim(std_hcst))] <- 'region' + + std_obs <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = std_obs, + region = regions[[X]], + lon = as.vector(attributes(std_obs)$lon), + lat = as.vector(attributes(std_obs)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(std_obs))[length(dim(std_obs))] <- 'region' + + n_eff <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = n_eff, + region = regions[[X]], + lon = as.vector(attributes(n_eff)$lon), + lat = as.vector(attributes(n_eff)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(n_eff))[length(dim(n_eff))] <- 'region' + + ## Calculate correlation + enscorr <- cov / (std_hcst * std_obs) + + ## Calculate significance of corr + t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) + t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) + + sign_corr<- array(data = NA, + dim = c(time = length(forecast.months), + sdate = length(start.months), + region = length(regions))) + + for (time in 1:dim(sign_corr)[['time']]){ + for (mon in 1:dim(sign_corr)[['sdate']]){ + for (reg in 1:dim(sign_corr)[['region']]){ + + if (anyNA(c(t[time, mon, reg], t_alpha2_n2[time, mon, reg])) == FALSE + && t[time, mon, reg] >= t_alpha2_n2[time, mon, reg]){ + sign_corr[time, mon, reg] <- TRUE + } else { + sign_corr[time, mon, reg] <- FALSE + } + } + } + } - } else { - ## Calculate weighted mean of spatial aggregation - aggregated_metrics <- WeightedMetrics(loaded_metrics, - regions = regions, - metric.aggregation = metric.aggregation, - ncores = ncores) - }## close if + ## Save metric result in arrays + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'sdate','region')) + metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'sdate','region')) + + } ## close if on enscorr + + if(met == 'mean_bias'){ + + mean_bias <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + calib_method = calib.method, metric = 'mean_bias') + + ## Calculate spatial aggregation + mean_bias <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = mean_bias, + region = regions[[X]], + lon = as.vector(attributes(mean_bias)$lon), + lat = as.vector(attributes(mean_bias)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' + + ## Save metric result in array + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = mean_bias, order = c('time', 'sdate','region')) + + } ## close on mean_bias + + if(met == 'enssprerr'){ + + enssprerr <- .loadmetrics(input_path = skill.input.path, system = system[sys], + reference = reference[ref], var = var, + period = period, start_months = start.months, + calib_method = calib.method, metric = 'enssprerr') + + ## Calculate spatial aggregation + enssprerr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = enssprerr, + region = regions[[X]], + lon = as.vector(attributes(enssprerr)$lon), + lat = as.vector(attributes(enssprerr)$lat), + londim = lon_dim, + latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(enssprerr))[length(dim(enssprerr))] <- 'region' + + ## Save metric result in array + aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enssprerr, order = c('time', 'sdate','region')) + + } ## close on enssprerr + + } ## close loop on metric + } ## close if on reference + } ## close if on system + + ## Include metric attributes + attributes(aggregated_metrics)$metrics <- metrics.visualize + + ## Set NAs to False + metrics_significance[is.na(metrics_significance)] <- FALSE + + } ## close if on score + + ####### PLOT SCORECARDS ########## + ## Create simple scorecard tables ## (one system only) ## Metrics input must be in the same order as function SC_spatial_aggregation scorecard_single <- ScorecardsSingle(data = aggregated_metrics, + sign = metrics_significance, system = system, reference = reference, var = var, @@ -145,9 +540,17 @@ Scorecards <- function(recipe) { metrics = metrics.visualize, table.label = table.label, fileout.label = fileout.label, + plot.legend = plot.legend, + legend.breaks = legend.breaks, legend.white.space = legend.white.space, + legend.width = legend.width, + legend.height = legend.height, + label.scale = label.scale, col1.width = col1.width, col2.width = col2.width, + columns.width = columns.width, + font.size = font.size, + round.decimal = round.decimal, output.path = output.path) ## Create multi system/reference scorecard tables @@ -155,6 +558,7 @@ Scorecards <- function(recipe) { ## Metrics input must be in the same order as function SC_spatial_aggregation if(length(system) > 1 || length(reference) > 1){ scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, + sign = metrics_significance, system = system, reference = reference, var = var, @@ -162,10 +566,21 @@ Scorecards <- function(recipe) { end.year = end.year, start.months = start.months, forecast.months = forecast.months, - region.names = attributes(regions)$names, + region.names = names(regions), metrics = metrics.visualize, table.label = table.label, fileout.label = fileout.label, + plot.legend = plot.legend, + legend.breaks = legend.breaks, + legend.white.space = legend.white.space, + legend.width = legend.width, + legend.height = legend.height, + label.scale = label.scale, + col1.width = col1.width, + col2.width = col2.width, + columns.width = columns.width, + font.size = font.size, + round.decimal = round.decimal, output.path = output.path) } ## close if diff --git a/modules/Skill/R/CRPS_clim.R b/modules/Skill/R/CRPS_clim.R index b66cab7872bf91102766a9124c82830deba90d24..bede13e4c02b62de0b7117c7c4f445902c14fcfd 100644 --- a/modules/Skill/R/CRPS_clim.R +++ b/modules/Skill/R/CRPS_clim.R @@ -3,10 +3,14 @@ CRPS_clim <- function(obs, memb_dim ='ensemble', return_mean = TRUE, clim.cross. time_dim <- names(dim(obs)) obs_time_len <- dim(obs)[time_dim] - if (isFALSE(clim.cross.val)) { ## Without cross-validation - ref <- array(data = rep(obs, each = obs_time_len), dim = c(obs_time_len, obs_time_len)) - } else if (isTRUE(clim.cross.val)) { ## With cross-validation (excluding the value of that year to create ref for that year) - ref <- array(data = NA, dim = c(obs_time_len, obs_time_len - 1)) + if (isFALSE(clim.cross.val)) { + # Without cross-validation + ref <- array(data = rep(obs, each = obs_time_len), + dim = c(obs_time_len, obs_time_len)) + } else if (isTRUE(clim.cross.val)) { + # With cross-validation (excluding the value of that year to create ref for that year) + ref <- array(data = NA, + dim = c(obs_time_len, obs_time_len - 1)) for (i in 1:obs_time_len) { ref[i, ] <- obs[-i] } @@ -15,8 +19,11 @@ CRPS_clim <- function(obs, memb_dim ='ensemble', return_mean = TRUE, clim.cross. names(dim(ref)) <- c(time_dim, memb_dim) # ref: [sdate, memb] # obs: [sdate] - crps_ref <- s2dv:::.CRPS(exp = ref, obs = obs, time_dim = time_dim, memb_dim = memb_dim, - dat_dim = NULL, Fair = FALSE) + crps_ref <- s2dv:::.CRPS(exp = ref, obs = obs, + time_dim = time_dim, + memb_dim = memb_dim, + dat_dim = NULL, + Fair = FALSE) # crps_ref should be [sdate] if (return_mean == TRUE) { diff --git a/modules/Skill/R/RPS_clim.R b/modules/Skill/R/RPS_clim.R index 601a10c3245ab8d4ea5103dbd5fe4aad2ce589a0..4b309c004f1881bd21944460d3439672c839cb9d 100644 --- a/modules/Skill/R/RPS_clim.R +++ b/modules/Skill/R/RPS_clim.R @@ -1,14 +1,22 @@ # RPS version for climatology -RPS_clim <- function(obs, indices_for_clim = NULL, prob_thresholds = c(1/3, 2/3), cross.val = TRUE) { +RPS_clim <- function(obs, indices_for_clim = NULL, + prob_thresholds = c(1/3, 2/3), + cross.val = TRUE, + return_mean = TRUE) { if (is.null(indices_for_clim)){ indices_for_clim <- 1:length(obs) } - obs_probs <- .GetProbs(data = obs, indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: - prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) + obs_probs <- .GetProbs(data = obs, + indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: + prob_thresholds = prob_thresholds, + weights = NULL, + cross.val = cross.val) + # clim_probs: [bin, sdate] - clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- c(prob_thresholds[1], + diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) clim_probs <- array(clim_probs, dim = dim(obs_probs)) # Calculate RPS for each time step @@ -16,5 +24,9 @@ RPS_clim <- function(obs, indices_for_clim = NULL, prob_thresholds = c(1/3, 2/3) probs_obs_cumsum <- apply(obs_probs, 2, cumsum) rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) - return(mean(rps_ref)) + if (return_mean == TRUE) { + return(mean(rps_ref)) + } else { + return(rps_ref) + } } diff --git a/modules/Skill/R/tmp/CRPS.R b/modules/Skill/R/tmp/CRPS.R new file mode 100644 index 0000000000000000000000000000000000000000..bb63095c0e0e58f18a6c24c70d747776af0325be --- /dev/null +++ b/modules/Skill/R/tmp/CRPS.R @@ -0,0 +1,183 @@ +#'Compute the Continuous Ranked Probability Score +#' +#'The Continuous Ranked Probability Score (CRPS; Wilks, 2011) is the continuous +#'version of the Ranked Probability Score (RPS; Wilks, 2011). It is a skill +#'metric to evaluate the full distribution of probabilistic forecasts. It has a +#'negative orientation (i.e., the higher-quality forecast the smaller CRPS) and +#'it rewards the forecast that has probability concentration around the observed +#'value. In case of a deterministic forecast, the CRPS is reduced to the mean +#'absolute error. It has the same units as the data. The function is based on +#'enscrps_cpp from SpecsVerification. If there is more than one dataset, CRPS +#'will be computed for each pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. The +#' default value is NULL. +#'@param Fair A logical indicating whether to compute the FairCRPS (the +#' potential CRPS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param return_mean A logical indicating whether to return the temporal mean +#' of the CRPS or not. If TRUE, the temporal mean is calculated along time_dim, +#' if FALSE the time dimension is not aggregated. The default is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of CRPS with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +#'experiment (i.e., dat_dim in exp), and nobs is the number of observation +#'(i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'res <- CRPS(exp = exp, obs = obs) +#' +#'@import multiApply +#'@importFrom SpecsVerification enscrps_cpp +#'@importFrom ClimProjDiags Subset +#'@export +CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, + Fair = FALSE, return_mean = TRUE, ncores = NULL) { + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop("Parameter 'exp' must be a numeric array.") + if (!is.array(obs) | !is.numeric(obs)) + stop("Parameter 'obs' must be a numeric array.") + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop("Parameter 'time_dim' must be a character string.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ", + "('obs' can have 'memb_dim', but it should be of length = 1).") + } + } + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## return_mean + if (!is.logical(return_mean) | length(return_mean) > 1) { + stop("Parameter 'return_mean' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute CRPS + crps <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + obs = c(time_dim, dat_dim)), + fun = .CRPS, + time_dim = time_dim, memb_dim = memb_dim, dat_dim = dat_dim, + Fair = Fair, + ncores = ncores)$output1 + + if (return_mean) { + crps <- MeanDims(crps, time_dim, na.rm = FALSE) + } else { + crps <- crps + } + + return(crps) +} + +.CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, + Fair = FALSE) { + + # exp: [sdate, memb, (dat_dim)] + # obs: [sdate, (dat_dim)] + + # Adjust dimensions if needed + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + dim(exp) <- c(dim(exp), nexp = nexp) + dim(obs) <- c(dim(obs), nobs = nobs) + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + # for FairCRPS + R_new <- ifelse(Fair, Inf, NA) + + CRPS <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + exp_data <- exp[, , i] + obs_data <- obs[, j] + + if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) + if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1]) + + crps <- SpecsVerification::enscrps_cpp(ens = exp_data, obs = obs_data, R_new = R_new) + CRPS[, i, j] <- crps + } + } + + if (is.null(dat_dim)) { + dim(CRPS) <- c(dim(CRPS)[time_dim]) + } + + return(CRPS) +} diff --git a/modules/Skill/R/tmp/RPS.R b/modules/Skill/R/tmp/RPS.R new file mode 100644 index 0000000000000000000000000000000000000000..59b2d01a0d842967cdaa4d0351ca1321e19ccf8c --- /dev/null +++ b/modules/Skill/R/tmp/RPS.R @@ -0,0 +1,388 @@ +#'Compute the Ranked Probability Score +#' +#'The Ranked Probability Score (RPS; Wilks, 2011) is defined as the sum of the +#'squared differences between the cumulative forecast probabilities (computed +#'from the ensemble members) and the observations (defined as 0% if the category +#'did not happen and 100% if it happened). It can be used to evaluate the skill +#'of multi-categorical probabilistic forecasts. The RPS ranges between 0 +#'(perfect forecast) and n-1 (worst possible forecast), where n is the number of +#'categories. In the case of a forecast divided into two categories (the lowest +#'number of categories that a probabilistic forecast can have), the RPS +#'corresponds to the Brier Score (BS; Wilks, 2011), therefore ranging between 0 +#'and 1.\cr +#'The function first calculates the probabilities for forecasts and observations, +#'then use them to calculate RPS. Or, the probabilities of exp and obs can be +#'provided directly to compute the score. If there is more than one dataset, RPS +#'will be computed for each pair of exp and obs data. The fraction of acceptable +#'NAs can be adjusted. +#' +#'@param exp A named numerical array of either the forecasts with at least time +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#' If the data are probabilities, set memb_dim as NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when the exp and obs are probabilities. The default +#' value is NULL, which means that the data are not probabilities. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPS (the +#' potential RPS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param weights A named numerical array of the weights for 'exp' probability +#' calculation. If 'dat_dim' is NULL, the dimensions should include 'memb_dim' +#' and 'time_dim'. Else, the dimension should also include 'dat_dim'. The +#' default value is NULL. The ensemble should have at least 70 members or span +#' at least 10 time steps and have more than 45 members if consistency between +#' the weighted and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation. The default value is +#' FALSE. +#'@param return_mean A logical indicating whether to return the temporal mean +#' of the RPS or not. If TRUE, the temporal mean is calculated along time_dim, +#' if FALSE the time dimension is not aggregated. The default is TRUE. +#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it +#' means the lower limit for the fraction of the non-NA values. 1 is equal to +#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). +# The function returns NA if the fraction of non-NA values in the data is less +#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of RPS with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +#'experiment (i.e., dat_dim in exp), and nobs is the number of observation +#'(i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'# Use synthetic data +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'res <- RPS(exp = exp, obs = obs) +#'# Use probabilities as inputs +#'exp_probs <- GetProbs(exp, time_dim = 'sdate', memb_dim = 'member') +#'obs_probs <- GetProbs(obs, time_dim = 'sdate', memb_dim = NULL) +#'res2 <- RPS(exp = exp_probs, obs = obs_probs, memb_dim = NULL, cat_dim = 'bin') +#' +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, weights = NULL, cross.val = FALSE, return_mean = TRUE, + na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs))) { + stop("Parameter 'cat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- seq_len(dim(obs)[time_dim]) + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## return_mean + if (!is.logical(return_mean) | length(return_mean) > 1) { + stop("Parameter 'return_mean' must be either TRUE or FALSE.") + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## weights + if (!is.null(weights) & is.null(cat_dim)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + if (is.null(dat_dim)) { + if (length(dim(weights)) != 2 | !all(names(dim(weights)) %in% c(memb_dim, time_dim))) + stop("Parameter 'weights' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim' and 'time_dim' in 'exp'.") + } + weights <- Reorder(weights, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights)) != 3 | !all(names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) + stop("Parameter 'weights' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim] | + dim(weights)[dat_dim] != dim(exp)[dat_dim]) { + stop("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.") + } + weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) + + } + } else if (!is.null(weights) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'exp' and 'obs' are probabilities already, so parameter ", + "'weights' is not used. Change 'weights' to NULL.")) + weights <- NULL + } + ## na.rm + if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) { + stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1') + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPS + + ## Decide target_dims + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) + } + + rps <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs), + fun = .RPS, + dat_dim = dat_dim, time_dim = time_dim, + memb_dim = memb_dim, cat_dim = cat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights = weights, cross.val = cross.val, + na.rm = na.rm, ncores = ncores)$output1 + + if (return_mean) { + rps <- MeanDims(rps, time_dim, na.rm = TRUE) + } else { + rps <- rps + } + + return(rps) +} + + +.RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, weights = NULL, cross.val = FALSE, na.rm = FALSE) { + #--- if memb_dim: + # exp: [sdate, memb, (dat)] + # obs: [sdate, (memb), (dat)] + # weights: NULL or same as exp + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] + + # Adjust dimensions to be [sdate, memb, dat] for both exp and obs + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + dim(exp) <- c(dim(exp), nexp = nexp) + dim(obs) <- c(dim(obs), nobs = nobs) + if (!is.null(weights)) dim(weights) <- c(dim(weights), nexp = nexp) + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + rps <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + exp_data <- exp[, , i] + obs_data <- obs[, , j] + + if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) + if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) + + # Find the fraction of NAs + ## If any member/bin is NA at this time step, it is not good value. + exp_mean <- rowMeans(exp_data) + obs_mean <- rowMeans(obs_data) + good_values <- !is.na(exp_mean) & !is.na(obs_mean) + + if (isTRUE(na.rm)) { + f_NAs <- 0 + } else if (isFALSE(na.rm)) { + f_NAs <- 1 + } else { + f_NAs <- na.rm + } + + if (f_NAs <= sum(good_values) / length(obs_mean)) { + + exp_data <- exp_data[good_values, , drop = F] + obs_data <- obs_data[good_values, , drop = F] + + # If the data inputs are forecast/observation, calculate probabilities + if (is.null(cat_dim)) { + if (!is.null(weights)) { + weights_data <- weights[which(good_values), , i] + if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2]) + } else { + weights_data <- weights #NULL + } + + # Subset indices_for_clim + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + + exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, weights = weights_data, + cross.val = cross.val) + # exp_probs: [bin, sdate] + obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, weights = NULL, + cross.val = cross.val) + # obs_probs: [bin, sdate] + + } else { # inputs are probabilities already + exp_probs <- t(exp_data) + obs_probs <- t(obs_data) + } + + probs_exp_cumsum <- apply(exp_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + + # rps: [sdate, nexp, nobs] + rps [good_values, i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2) + + if (Fair) { # FairRPS + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + R <- dim(exp)[2] #memb + adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) + adjustment <- colSums(adjustment) + rps[, i, j] <- rps[, i, j] + adjustment + } + + } else { ## not enough values different from NA + + rps[, i, j] <- NA_real_ + + } + + } + } + + if (is.null(dat_dim)) { + dim(rps) <- dim(exp)[time_dim] + } + + return(rps) +} + + + diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 04fbc52efb5246e86c601007be1ced893439fc14..3dbbd5f66daf78d4e3a79d307e5a1fb01d185c70 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -19,6 +19,10 @@ source("modules/Skill/R/tmp/GetProbs.R") source("modules/Skill/compute_skill_metrics.R") source("modules/Skill/compute_probabilities.R") +## Temporary +source("modules/Skill/R/tmp/RPS.R") +source("modules/Skill/R/tmp/CRPS.R") + Skill <- function(recipe, data, agg = 'global') { # data$hcst: s2dv_cube containing the hindcast @@ -63,7 +67,7 @@ Skill <- function(recipe, data, agg = 'global') { cross.val <- recipe$Analysis$Workflow$Skill$cross_validation } skill_metrics <- list() - for (metric in strsplit(metrics, ", | |,")[[1]]) { + for (metric in strsplit(metrics, ", | |,")[[1]]) { # Whether the fair version of the metric is to be computed if (metric %in% c('frps', 'frpss', 'bss10', 'bss90', 'fcrps', 'fcrpss')) { @@ -84,15 +88,39 @@ Skill <- function(recipe, data, agg = 'global') { memb_dim = memb_dim, Fair = Fair, cross.val = cross.val, + return_mean = TRUE, ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill - rps_clim <- Apply(list(data$obs$data), - target_dims = c(time_dim, memb_dim), - cross.val = cross.val, - RPS_clim)$output1 - rps_clim <- .drop_dims(rps_clim) - skill_metrics[[paste0(metric, "_clim")]] <- rps_clim + # RPS_clim + } else if (metric == 'rps_clim') { + skill <- Apply(list(data$obs$data), + target_dims = c(time_dim, memb_dim), + cross.val = cross.val, + return_mean = TRUE, + fun = RPS_clim)$output1 + skill <- .drop_dims(skill) + skill_metrics[[metric]] <- skill + # RPS_syear and FRPS_syear + } else if (metric %in% c('rps_syear', 'frps_syear')) { + skill <- RPS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + cross.val = cross.val, + return_mean = FALSE, + ncores = ncores) + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill ## temp + # RPS_clim_syear + } else if (metric == 'rps_clim_syear') { + skill <- Apply(list(data$obs$data), + target_dims = c(time_dim, memb_dim), + cross.val = cross.val, + fun = RPS_clim, + return_mean = FALSE, output_dims = 'syear')$output1 + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill ## temp # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(data$hcst$data, data$obs$data, @@ -137,14 +165,36 @@ Skill <- function(recipe, data, agg = 'global') { time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, + return_mean = TRUE, ncores = ncores) skill <- .drop_dims(skill) skill_metrics[[ metric ]] <- skill - crps_clim <- Apply(list(data$obs$data), target_dims = time_dim, + # CRPS_clim + } else if (metric %in% c('crps_clim')) { + skill <- Apply(list(data$obs$data), target_dims = time_dim, + fun = CRPS_clim, memb_dim = memb_dim, + clim.cross.val = cross.val, + return_mean = TRUE)$output1 + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill + # CRPS_syear and FCRPS_syear + } else if (metric %in% c('crps_syear', 'fcrps_syear')) { + skill <- CRPS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + return_mean = FALSE, + ncores = ncores) + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill + # CRPS_clim_syear + } else if (metric %in% c('crps_clim_syear')) { + skill <- Apply(list(data$obs$data), target_dims = time_dim, fun = CRPS_clim, memb_dim = memb_dim, - clim.cross.val = cross.val)$output1 - crps_clim <- .drop_dims(crps_clim) - skill_metrics[['crps_clim']] <- crps_clim + clim.cross.val = cross.val, + return_mean = FALSE)$output1 + skill <- .drop_dims(skill) + skill_metrics[[metric]] <- skill # CRPSS and FCRPSS } else if (metric %in% c('crpss', 'fcrpss')) { skill <- CRPSS(data$hcst$data, data$obs$data, @@ -313,11 +363,12 @@ Skill <- function(recipe, data, agg = 'global') { recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/outputs/Skill/") # Separate 'corr' from the rest of the metrics because of extra 'ensemble' dim + ## TODO: merge save_metrics() and save_metrics_scorecards() if (recipe$Analysis$Workflow$Skill$save == 'all') { corr_metric_names <- grep("^corr_individual_members", names(skill_metrics)) if (length(corr_metric_names) == 0) { - save_metrics(recipe = recipe, skill = skill_metrics, - data_cube = data$hcst, agg = agg) + save_metrics(recipe = recipe, metrics = skill_metrics, + data_cube = data$hcst, agg = agg, module = "skill") } else { # Save corr if (length(skill_metrics[corr_metric_names]) > 0) { @@ -326,7 +377,7 @@ Skill <- function(recipe, data, agg = 'global') { } # Save other skill metrics if (length(skill_metrics[-corr_metric_names]) > 0) { - save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], + save_metrics(recipe = recipe, metrics = skill_metrics[-corr_metric_names], data_cube = data$hcst, agg = agg) } } diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R new file mode 100644 index 0000000000000000000000000000000000000000..085bcdc58a162133b1316bf1187ae3974f69234d --- /dev/null +++ b/modules/Statistics/Statistics.R @@ -0,0 +1,99 @@ +Statistics <- function(recipe, data, agg = 'global') { + # data$hcst: s2dv_cube containing the hindcast + # data$obs: s2dv_cube containing the observations + # recipe: auto-s2s recipe as provided by read_yaml + # agg: data aggregation + + time_dim <- 'syear' + ncores <- recipe$Analysis$ncores + + ## Calculate ensemble mean + hcst_ensmean <- Apply(data$hcst$data, + target_dims = 'ensemble', + fun = 'mean')$output1 + obs_ensmean <- Apply(data$obs$data, + target_dims = 'ensemble', + fun = 'mean')$output1 + + ## Remove unwanted dimensions + ## TODO: Apply .drop_dims() function instead? + hcst_ensmean <- Subset(hcst_ensmean, + along = c('dat', 'sday', 'sweek'), + indices = list(1, 1, 1), + drop = 'selected') + obs_ensmean <- Subset(obs_ensmean, + along = c('dat', 'sday', 'sweek'), + indices = list(1, 1, 1), + drop = 'selected') + + statistics_list <- tolower(recipe$Analysis$Workflow$Statistics$metric) + statistics <- list() + # Compute statistics in the list + for (stat in strsplit(statistics_list, ", | |,")[[1]]) { + if (stat %in% c('cov', 'covariance')) { + ## Calculate covariance + covariance <- Apply(data = list(x = obs_ensmean, y = hcst_ensmean), + target_dims = time_dim, + fun = function(x,y) {cov(as.vector(x),as.vector(y), + use = "everything", + method = "pearson")})$output1 + + statistics[[ stat ]] <- covariance + } ## close if on covariance + if (stat %in% c('std', 'standard_deviation')) { + ## Calculate standard deviation + std_hcst <- Apply(data = hcst_ensmean, + target_dims = c(time_dim), + fun = 'sd')$output1 + + std_obs <- Apply(data = obs_ensmean, + target_dims = c(time_dim), + fun = 'sd')$output1 + + statistics[['std_hcst']] <- std_hcst + statistics[['std_obs']] <- std_obs + + } ## close if on std + if (stat %in% c('var', 'variance')) { + ## Calculate variance + var_hcst <- (Apply(data = hcst_ensmean, + target_dims = c(time_dim), + fun = 'sd')$output1)^2 + + var_obs <- (Apply(data = obs_ensmean, + target_dims = c(time_dim), + fun = 'sd')$output1)^2 + + statistics[['var_hcst']] <- var_hcst + statistics[['var_obs']] <- var_obs + } ## close if on variance + if (stat == 'n_eff') { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(data = obs_ensmean, + time_dim = time_dim, + na.action = na.pass, + ncores = ncores) + statistics[['n_eff']] <- n_eff + } ## close on n_eff + } + + info(recipe$Run$logger, "##### STATISTICS COMPUTATION COMPLETE #####") + .log_memory_usage(recipe$Run$logger, when = "After statistics computation") + + # Save outputs + if (recipe$Analysis$Workflow$Skill$save != 'none') { + info(recipe$Run$logger, "##### START SAVING STATISTICS #####") + } + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Statistics/") + + if (recipe$Analysis$Workflow$Statistics$save == 'all') { + # Save all statistics + save_metrics(recipe = recipe, metrics = statistics, + data_cube = data$hcst, agg = agg, + module = "statistics") + } + # Return results + return(statistics) +} + diff --git a/modules/Visualization/R/plot_skill_metrics.R b/modules/Visualization/R/plot_metrics.R similarity index 72% rename from modules/Visualization/R/plot_skill_metrics.R rename to modules/Visualization/R/plot_metrics.R index 8ba679eff8d482a840311657f066d07bd5ceee4e..8fc84e6f81493db542b07c2fd7586eb955ab4a49 100644 --- a/modules/Visualization/R/plot_skill_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -1,11 +1,11 @@ library(stringr) -plot_skill_metrics <- function(recipe, data_cube, skill_metrics, - outdir, significance = F, output_conf) { +plot_metrics <- function(recipe, data_cube, metrics, + outdir, significance = F, output_conf) { # recipe: Auto-S2S recipe # archive: Auto-S2S archive # data_cube: s2dv_cube object with the corresponding hindcast data - # skill_metrics: list of named skill metrics arrays + # metrics: list of named metric arrays with named dimensions # outdir: output directory # significance: T/F, whether to display the significance dots in the plots @@ -15,9 +15,9 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, for daily data.") stop() } - # Abort if skill_metrics is not list - if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { - stop("The element 'skill_metrics' must be a list of named arrays.") + # Abort if metrics is not list + if (!is.list(metrics) || is.null(names(metrics))) { + stop("The element 'metrics' must be a list of named arrays.") } latitude <- data_cube$coords$lat @@ -63,21 +63,24 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", "enscorr_specs", "rmsss", "msss") scores <- c("rps", "frps", "crps", "frps_specs", "mse") + statistics <- c("cov", "std_hcst", "std_obs", "var_hcst", "var_obs", "n_eff") + # Loop over variables and assign colorbar and plot parameters to each metric for (var in 1:data_cube$dims[['var']]) { - var_skill <- lapply(skill_metrics, function(x) { + var_name <- data_cube$attrs$Variable$varName[[var]] ## For statistics + var_metric <- lapply(metrics, function(x) { ClimProjDiags::Subset(x, along = 'var', indices = var, drop = 'selected')}) - for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { - if (name %in% names(skill_metrics)) { + for (name in c(skill_scores, scores, statistics, "mean_bias", "enssprerr")) { + if (name %in% names(metrics)) { units <- NULL # Define plot characteristics and metric name to display in plot if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", - "rpss_specs", "bss90_specs", "bss10_specs", + "rpss_specs", "bss90:_specs", "bss10_specs", "rmsss", "msss")) { display_name <- toupper(strsplit(name, "_")[[1]][1]) - skill <- var_skill[[name]] + metric <- var_metric[[name]] brks <- seq(-1, 1, by = 0.2) colorbar <- clim.colors(length(brks) + 1, diverging_palette) cols <- colorbar[2:(length(colorbar) - 1)] @@ -85,7 +88,7 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_sup <- NULL } else if (name == "mean_bias_ss") { display_name <- "Mean Bias Skill Score" - skill <- var_skill[[name]] + metric <- var_metric[[name]] brks <- seq(-1, 1, by = 0.2) colorbar <- clim.colors(length(brks) + 1, diverging_palette) cols <- colorbar[2:(length(colorbar) - 1)] @@ -93,13 +96,13 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_sup <- NULL } else if (name %in% c("enscorr", "enscorr_specs")) { display_name <- "Ensemble Mean Correlation" - skill <- var_skill[[name]] + metric <- var_metric[[name]] brks <- seq(-1, 1, by = 0.2) cols <- clim.colors(length(brks) - 1, diverging_palette) col_inf <- NULL col_sup <- NULL } else if (name %in% scores) { - skill <- var_skill[[name]] + metric <- var_metric[[name]] display_name <- toupper(strsplit(name, "_")[[1]][1]) brks <- seq(0, 1, by = 0.1) colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) @@ -107,7 +110,7 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_inf <- NULL col_sup <- colorbar[length(colorbar)] } else if (name == "enssprerr") { - skill <- var_skill[[name]] + metric <- var_metric[[name]] display_name <- "Spread-to-Error Ratio" brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) colorbar <- clim.colors(length(brks), diverging_palette) @@ -115,35 +118,79 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_inf <- NULL col_sup <- colorbar[length(colorbar)] } else if (name %in% "mean_bias") { - skill <- var_skill[[name]] + metric <- var_metric[[name]] display_name <- "Mean Bias" - max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), - abs(quantile(skill, 0.98, na.rm = T))) + max_value <- max(abs(quantile(metric, 0.02, na.rm = T)), + abs(quantile(metric, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] + units <- data_cube$attrs$Variable$metadata[[var_name]]$units + } else if (name %in% "cov") { + metric <- var_metric[[name]] + display_name <- "Covariance" + max_value <- max(abs(quantile(metric, 0.02, na.rm = T)), + abs(quantile(metric, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] + units <- paste0(data_cube$attrs$Variable$metadata[[var_name]]$units, "²") + } else if (name %in% "std_hcst") { + metric <- var_metric[[name]] + display_name <- "Hindcast Standard Deviation" + max_value <- max(abs(quantile(metric, 0.02, na.rm = T)), + abs(quantile(metric, 0.98, na.rm = T))) brks <- max_value * seq(-1, 1, by = 0.2) colorbar <- clim.colors(length(brks) + 1, diverging_palette) cols <- colorbar[2:(length(colorbar) - 1)] col_inf <- colorbar[1] col_sup <- colorbar[length(colorbar)] units <- data_cube$attrs$Variable$metadata[[var_name]]$units + } else if (name %in% "std_obs") { + metric <- var_metric[[name]] + display_name <- "Observation Standard Deviation" + max_value <- max(abs(quantile(metric, 0.02, na.rm = T)), + abs(quantile(metric, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] + units <- data_cube$attrs$Variable$metadata[[var_name]]$units + } else if (name %in% "n_eff") { + metric <- var_metric[[name]] + display_name <- "Effective Sample Size" + max_value <- max(metric) + brks <- max_value * seq(0, 1, by = 0.1) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- NULL + col_sup <- NULL } + + # Reorder dimensions - skill <- Reorder(skill, c("time", "longitude", "latitude")) + metric <- Reorder(metric, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, # retrieve it and reorder its dimensions. significance_name <- paste0(name, "_significance") - if ((significance) && (significance_name %in% names(skill_metrics))) { - skill_significance <- var_skill[[significance_name]] - skill_significance <- Reorder(skill_significance, c("time", + if ((significance) && (significance_name %in% names(metrics))) { + metric_significance <- var_metric[[significance_name]] + metric_significance <- Reorder(metric_significance, c("time", "longitude", "latitude")) - # Split skill significance into list of lists, along the time dimension + # Split significance into list of lists, along the time dimension # This allows for plotting the significance dots correctly. - skill_significance <- ClimProjDiags::ArrayToList(skill_significance, + metric_significance <- ClimProjDiags::ArrayToList(metric_significance, dim = "time", level = "sublist", names = "dots") } else { - skill_significance <- NULL + metric_significance <- NULL } # Define output file name and titles if (tolower(recipe$Analysis$Horizon) == "seasonal") { @@ -163,9 +210,9 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, ## TODO: Combine PlotLayout with PlotRobinson? suppressWarnings( PlotLayout(PlotEquiMap, c('longitude', 'latitude'), - asplit(skill, MARGIN=1), # Splitting array into a list + asplit(metric, MARGIN=1), # Splitting array into a list longitude, latitude, - special_args = skill_significance, + special_args = metric_significance, dot_symbol = 20, toptitle = toptitle, title_scale = 0.6, @@ -216,7 +263,7 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, col_inf = col_inf, col_sup = col_sup) } # Loop over forecast times - for (i in 1:dim(skill)[['time']]) { + for (i in 1:dim(metric)[['time']]) { # Get forecast time label forecast_time <- match(months[i], month.name) - init_month + 1 @@ -230,9 +277,9 @@ plot_skill_metrics <- function(recipe, data_cube, skill_metrics, "\n", display_name, "/", months[i], "/", hcst_period) # Modify base arguments - base_args[[1]] <- skill[i, , ] - if (!is.null(skill_significance)) { - base_args[[2]] <- skill_significance[[i]][[1]] + base_args[[1]] <- metric[i, , ] + if (!is.null(metric_significance)) { + base_args[[2]] <- metric_significance[[i]][[1]] significance_caption <- "alpha = 0.05" } else { significance_caption <- NULL diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index ae94ed3d01526f8cf98932a2adf6f6bce6d7359d..a9badff6c5bfbedb65e84ff55651f95bb7d745d9 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -4,7 +4,7 @@ ## TODO: Add param 'raw'? ## TODO: Decadal plot names -source("modules/Visualization/R/plot_skill_metrics.R") +source("modules/Visualization/R/plot_metrics.R") source("modules/Visualization/R/get_proj_code.R") ## TODO: Remove after the next s2dv release source("modules/Visualization/R/tmp/PlotRobinson.R") @@ -16,6 +16,7 @@ source("modules/Visualization/plot_data.R") Visualization <- function(recipe, data, skill_metrics = NULL, + statistics = NULL, probabilities = NULL, significance = F, output_conf = NULL) { @@ -62,10 +63,12 @@ Visualization <- function(recipe, dir.create(directory, showWarnings = FALSE, recursive = TRUE) } - if ((is.null(skill_metrics)) && (is.null(data$fcst))) { - error(recipe$Run$logger, "The Visualization module has been called, - but there is no fcst in 'data', and 'skill_metrics' is NULL - so there is no data that can be plotted.") + if ((is.null(skill_metrics)) && (is.null(statistics)) && + (is.null(data$fcst))) { + error(recipe$Run$logger, + paste0("The Visualization module has been called, but there is no ", + "fcst in 'data', and 'skill_metrics' and 'statistics' are ", + "NULL, so there is no data that can be plotted.")) stop() } # Set default single-panel plots if not specified @@ -75,14 +78,26 @@ Visualization <- function(recipe, # Plot skill metrics if ("skill_metrics" %in% plots) { if (!is.null(skill_metrics)) { - plot_skill_metrics(recipe, data$hcst, skill_metrics, outdir, - significance, output_conf = output_conf) + plot_metrics(recipe, data$hcst, skill_metrics, outdir, + significance, output_conf = output_conf) } else { error(recipe$Run$logger, paste0("The skill metric plots have been requested, but the ", "parameter 'skill_metrics' is NULL")) } } + + # Plot statistics + if ("statistics" %in% plots) { + if (!is.null(statistics)) { + plot_metrics(recipe, data$hcst, statistics, outdir, + significance, output_conf = output_conf) + } else { + error(recipe$Run$logger, + paste0("The statistics plots have been requested, but the ", + "parameter 'statistics' is NULL")) + } + } # Plot forecast ensemble mean if ("forecast_ensemble_mean" %in% plots) { diff --git a/recipes/atomic_recipes/recipe_scorecards_s2s-suite.yml b/recipes/atomic_recipes/recipe_scorecards_s2s-suite.yml index ae17f9cd1dbbcf2ada5b47a68219272d983aff6d..17c1acae318ffe254f8781c2def99cb73fe509d3 100644 --- a/recipes/atomic_recipes/recipe_scorecards_s2s-suite.yml +++ b/recipes/atomic_recipes/recipe_scorecards_s2s-suite.yml @@ -16,28 +16,36 @@ Analysis: Time: sdate: '0101' ## MMDD fcst_year: # Optional, int: Forecast year 'YYYY' - hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_start: '2014' # Mandatory, int: Hindcast start year 'YYYY' hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months - ftime_max: 6 # Mandatory, int: Last leadtime time step in months + ftime_max: 2 # Mandatory, int: Last leadtime time step in months Region: - latmin: -90 # Mandatory, int: minimum latitude - latmax: 90 # Mandatory, int: maximum latitude + latmin: 30 # Mandatory, int: minimum latitude + latmax: 35 # Mandatory, int: maximum latitude lonmin: 0 # Mandatory, int: minimum longitude - lonmax: 359.9 # Mandatory, int: maximum longitude + lonmax: 10 # Mandatory, int: maximum longitude Regrid: method: conservative # conservative for prlr, bilinear for tas, psl, sfcWind type: to_system Workflow: Calibration: method: raw # Mandatory, str: Calibration method. See docu. + save: 'none' Anomalies: compute: yes - cross_validation: no + cross_validation: no + save: 'none' Skill: - metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # str: Skill metric or list of skill metrics. See docu. + metric: rps_syear rps_clim_syear crps_syear crps_clim_syear # str: Skill metric or list of skill metrics. See docu. + cross_validation: yes + save: 'all' + Statistics: + metric: cov std var + save: 'all' Probabilities: percentiles: [[1/3, 2/3], [1/10], [9/10]] # frac: Quantile thresholds. + save: 'none' Indicators: index: no ncores: 15 # Optional, int: number of cores, defaults to 1 @@ -46,5 +54,5 @@ Analysis: Run: Loglevel: INFO Terminal: yes - output_dir: /esarchive/scratch/nmilders/scorecards_data/to_system/cross_validation/tercile_cross_val/ECMWF-SEAS5/prlr/ + output_dir: /esarchive/scratch/nmilders/scorecards_data/test/ code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/ diff --git a/recipes/examples/recipe_scorecards.yml b/recipes/examples/recipe_scorecards.yml index 434426d02d499db78c6884c6f6a9322390935526..a75ad1d26bd368be33b288e0fb530faa62f6a833 100644 --- a/recipes/examples/recipe_scorecards.yml +++ b/recipes/examples/recipe_scorecards.yml @@ -56,6 +56,9 @@ Analysis: metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # list, don't split cross_validation: yes save: 'all' + Statistics: + metric: cov std n_eff + save: 'all' Probabilities: percentiles: [[1/3, 2/3]] # list, don't split save: 'none' @@ -64,19 +67,30 @@ Analysis: Indicators: index: no # ? Scorecards: - execute: yes # yes/no - regions: + execute: yes # Mandatory, yes/no + regions: # Mandatory, define regions over which to aggregate data Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} - Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: -90} - start_months: NULL - metric: mean_bias enscorr rpss crpss enssprerr - metric_aggregation: 'score' - table_label: NULL - fileout_label: NULL - col1_width: NULL - col2_width: NULL - calculate_diff: FALSE + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory, define metrics to visualize in scorecard. + metric_aggregation: 'score' # Mandatory, defines the aggregation level of the metrics. + signif_alpha: 0.05 # Optional, to set alpha for signifiance calculation, default is 0.05. + table_label: NULL # Optional, to add extra information to the table title. + fileout_label: NULL # Optional, to add extra information to the output filename. + col1_width: NULL # Optional, to set the width (cm) of the first table column, default is calculated depending on row names. + col2_width: NULL # Optional, to set the width (cm) of the first table column, default is calculated depending on subrow names. + columns_width: NULL # Optional, to set the width (cm) of all other columns within the table, defualt is 1.2. + plot_legend: TRUE ## Optional, to define is the legend is included in the scorecards image, default is TRUE. + legend_breaks: NULL # Optional, default used legend breaks from modules/Scorecards/R/tmp/Utils.R. + legend_white_space: NULL # Optional, default is automatically calculted depend on column sizes. + legend_width: NULL # Optional, to set the width of the lengend bars, default is 550. + legend_height: NULL # Optional, to set the height of the legend bars, default is 50. + label_scale: NULL # Optional, to set the scale of the legend bar lables, default is 1.4. + round_decimal: NULL # Optional, to round the data shown in the scorecard, default is 2 (decimals). + inf_to_na: TRUE # Optional, to set infinite values to NA, default is FALSE. + font_size: NULL # Optional, to set the font size of the scorecard table values, default is 1.2. + calculate_diff: FALSE # Optional, to calculate difference between two systems or two references, default is FALSE. ncores: 7 remove_NAs: no # bool, don't split Output_format: Scorecards # string, don't split diff --git a/tests/recipes/recipe-seasonal_monthly_1_statistics.yml b/tests/recipes/recipe-seasonal_monthly_1_statistics.yml new file mode 100644 index 0000000000000000000000000000000000000000..ccfd3cf42bf4893354306a39ff408cce4766546d --- /dev/null +++ b/tests/recipes/recipe-seasonal_monthly_1_statistics.yml @@ -0,0 +1,58 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: Meteo-France-System7 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '1996' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system + Workflow: + # Anomalies: + # compute: no + # cross_validation: + # save: 'none' + Calibration: + method: mse_min + save: 'all' + Skill: + metric: RPSS CRPSS EnsCorr Corr_individual_members Enscorr_specs + save: 'all' + Statistics: + metric: cov std var n_eff + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' + Indicators: + index: no + Visualization: + plots: statistics + multi_panel: yes + projection: cylindrical_equidistant + Output_format: scorecards +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/ #/esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml b/tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml index 41048db127759ba614115fdc77eeb9ab398a95f6..c1404e7d5ec79d129dc1130b3af15d4eb2372030 100644 --- a/tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml +++ b/tests/recipes/recipe-seasonal_monthly_1_tas-tos.yml @@ -53,4 +53,4 @@ Run: Loglevel: INFO Terminal: yes output_dir: ./tests/out-logs/ - code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/ ##/esarchive/scratch/vagudets/repos/auto-s2s/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/testthat/test-decadal_monthly_2.R b/tests/testthat/test-decadal_monthly_2.R index 23821dbb3df08b5f84da2df992de2b4edbbc18e6..ec1e709de89835a5f362257af45e3d1fb6415c98 100644 --- a/tests/testthat/test-decadal_monthly_2.R +++ b/tests/testthat/test-decadal_monthly_2.R @@ -164,7 +164,7 @@ TRUE ) expect_equal( names(skill_metrics), -c("rpss_specs", "enscorr_specs", "frps_specs", "frpss_specs", "bss10_specs", "frps", "frps_clim") +c("rpss_specs", "enscorr_specs", "frps_specs", "frpss_specs", "bss10_specs", "frps") ) expect_equal( class(skill_metrics$rpss_specs), diff --git a/tests/testthat/test-seasonal_NAO.R b/tests/testthat/test-seasonal_NAO.R index 9582bdfcfdb89ac11b4fa8f3c7dbb79f983fac04..4bbfd3b3e4019354e3517c159da4f793f7b49c5c 100644 --- a/tests/testthat/test-seasonal_NAO.R +++ b/tests/testthat/test-seasonal_NAO.R @@ -214,8 +214,8 @@ TRUE expect_equal( names(skill_metrics), c("mean_bias", "enscorr", - "enscorr_significance", "rps", "rps_clim", "rpss", "rpss_significance", - "crps", "crps_clim", "crpss", "crpss_significance", "enssprerr") + "enscorr_significance", "rps", "rpss", "rpss_significance", + "crps", "crpss", "crpss_significance", "enssprerr") ) expect_equal( class(skill_metrics$rpss), @@ -247,12 +247,22 @@ expect_equal( all(list.files(outputs, recursive = T) %in% c(paste0("Indices/ECMWF-SEAS5/nao/", paste0("nao_", 1993:2000, "0301.nc")), paste0("Indices/ERA5/nao/", paste0("nao_", 1993:2000, "0301.nc")), - "Skill/ECMWF-SEAS5/nao/scorecards_ECMWF-SEAS5_ERA5_nao-skill_1993-2000_s03.nc") + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_crps_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_crpss_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_crpss_significance_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enscorr_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enscorr_significance_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enssprerr_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_mean_bias_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rps_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rpss_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rpss_significance_1993-2000_s03.nc" + ) ), TRUE) expect_equal( length(list.files(outputs, recursive = T)), -17 +26 ) }) diff --git a/tests/testthat/test-seasonal_monthly_statistics.R b/tests/testthat/test-seasonal_monthly_statistics.R new file mode 100644 index 0000000000000000000000000000000000000000..d45b5ed60e5bd58d46792b3fea4115b146cf3b86 --- /dev/null +++ b/tests/testthat/test-seasonal_monthly_statistics.R @@ -0,0 +1,189 @@ +context("Seasonal monthly data") + +source("modules/Loading/Loading.R") +source("modules/Statistics/Statistics.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") + +recipe_file <- "tests/recipes/recipe-seasonal_monthly_1_statistics.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = F) +archive <- read_yaml(paste0(recipe$Run$code_dir, "conf/archive.yml"))$archive + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + +# Compute statistics +suppressWarnings({invisible(capture.output( +statistics <- Statistics(recipe, data) +))}) + +# Saving +suppressWarnings({invisible(capture.output( +Saving(recipe = recipe, data = data, + skill_metrics = statistics) +))}) + +# Plotting +suppressWarnings({invisible(capture.output( +Visualization(recipe = recipe, data = data, + statistics = statistics, + significance = T) +))}) +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) + +# ------- TESTS -------- + +test_that("1. Loading", { + +expect_equal( +is.list(data), +TRUE +) +expect_equal( +names(data), +c("hcst", "fcst", "obs") +) +expect_equal( +class(data$hcst), +"s2dv_cube" +) +expect_equal( +class(data$fcst), +"s2dv_cube" +) +expect_equal( +class(data$obs), +"s2dv_cube" +) +expect_equal( +names(data$hcst), +c("data", "dims", "coords", "attrs") +) +expect_equal( +names(data$hcst), +names(data$fcst) +) +expect_equal( +names(data$hcst), +names(data$obs) +) +expect_equal( +dim(data$hcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 4, time = 3, latitude = 3, longitude = 3, ensemble = 25) +) +expect_equal( +dim(data$fcst$data), +c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, latitude = 3, longitude = 3, ensemble = 51) +) +expect_equal( +dim(data$hcst$attrs$Dates), +c(sday = 1, sweek = 1, syear = 4, time = 3) +) +expect_equal( +as.vector(drop(data$hcst$data)[1:2,1:2,1,2,3]), +c(293.9651, 295.9690, 290.6771, 290.7957), +tolerance = 0.0001 +) +expect_equal( +mean(data$hcst$data), +290.8758, +tolerance = 0.0001 +) +expect_equal( +range(data$hcst$data), +c(284.7413, 299.6219), +tolerance = 0.0001 +) +expect_equal( +(data$hcst$attrs$Dates)[1], +as.POSIXct("1993-11-30 23:59:59", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[2], +as.POSIXct("1994-11-30 23:59:59", tz = 'UTC') +) +expect_equal( +(data$hcst$attrs$Dates)[5], +as.POSIXct("1993-12-31 23:59:59", tz = 'UTC') +) +expect_equal( +(data$obs$attrs$Dates)[10], +as.POSIXct("1995-01-15 12:00:00", tz = 'UTC') +) + +}) + +#====================================== +test_that("2. Statistics", { + +expect_equal( +is.list(statistics), +TRUE +) +expect_equal( +names(statistics), +c("cov", "std_hcst", "std_obs", "var_hcst", "var_obs", "n_eff") +) +expect_equal( +class(statistics$cov), +"array" +) +expect_equal( +dim(statistics$cov), +c(var = 1, time = 3, latitude = 3, longitude = 3) +) +expect_equal( +dim(statistics$cov), +dim(statistics$var_hcst) +) +expect_equal( +as.vector(statistics$cov[, , 2, 3]), +c(1.14846389, 0.05694802, 0.02346492), +tolerance = 0.0001 +) +expect_equal( +as.vector(statistics$var_hcst[, , 2, 3]), +c(0.74897676, 0.14698283, 0.04864656), +tolerance = 0.0001 +) + +}) + +test_that("3. Saving", { +outputs <- paste0(recipe$Run$output_dir, "/outputs/") +expect_equal( +all(basename(list.files(outputs, recursive = T)) %in% +c("scorecards_Meteo-France-System7_ERA5_tas_cov_1993-1996_s11.nc", + "scorecards_Meteo-France-System7_ERA5_tas_n_eff_1993-1996_s11.nc", + "scorecards_Meteo-France-System7_ERA5_tas_std_hcst_1993-1996_s11.nc", + "scorecards_Meteo-France-System7_ERA5_tas_std_obs_1993-1996_s11.nc", + "scorecards_Meteo-France-System7_ERA5_tas_var_hcst_1993-1996_s11.nc", + "scorecards_Meteo-France-System7_ERA5_tas_var_obs_1993-1996_s11.nc")), +TRUE +) +expect_equal( +length(list.files(outputs, recursive = T)), +6 +) + +}) + +test_that("4. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") +expect_equal( +all(basename(list.files(plots, recursive = T)) %in% +c("cov-november.png", "n_eff-november.png", "std_hcst-november.png", + "std_obs-november.png", "var_hcst-november.png", "var_obs-november.png" )), +TRUE +) +expect_equal( +length(list.files(plots, recursive = T)), +6 +) + +}) + +# Delete files +unlink(recipe$Run$output_dir, recursive = T) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 1f1c7dc8c1bc766305cea49f4c3dae83c814d9c7..4d583ed448db8d58931ee5fa9fe039ef222e7543 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -2,11 +2,11 @@ check_recipe <- function(recipe) { # recipe: yaml recipe already read it ## TODO: set up logger-less case info(recipe$Run$logger, paste("Checking recipe:", recipe$recipe_path)) - + # --------------------------------------------------------------------- # ANALYSIS CHECKS # --------------------------------------------------------------------- - + TIME_SETTINGS_SEASONAL <- c("sdate", "ftime_min", "ftime_max", "hcst_start", "hcst_end") TIME_SETTINGS_DECADAL <- c("ftime_min", "ftime_max", "hcst_start", "hcst_end") @@ -15,29 +15,29 @@ check_recipe <- function(recipe) { HORIZONS <- c("subseasonal", "seasonal", "decadal") ARCHIVE_SEASONAL <- "conf/archive.yml" ARCHIVE_DECADAL <- "conf/archive_decadal.yml" - + # Define error status variable error_status <- F - + # Check basic elements in recipe:Analysis: if (!("Analysis" %in% names(recipe))) { error(recipe$Run$logger, "The recipe must contain an element called 'Analysis'.") error_status <- TRUE } - + if (!all(PARAMS %in% names(recipe$Analysis))) { error(recipe$Run$logger, paste0("The element 'Analysis' in the recipe must contain all of ", "the following: ", paste(PARAMS, collapse = ", "), ".")) error_status <- TRUE } - + if (!any(HORIZONS %in% tolower(recipe$Analysis$Horizon))) { error(recipe$Run$logger, paste0("The element 'Horizon' in the recipe must be one of the ", - "following: ", paste(HORIZONS, collapse = ", "), ".")) - error_status <- TRUE + "following: ", paste(HORIZONS, collapse = ", "), ".")) + error_status <- T } # Check time settings if (tolower(recipe$Analysis$Horizon) == "seasonal") { @@ -73,10 +73,11 @@ check_recipe <- function(recipe) { if (("tas-tos" %in% recipe_variables) && (!is.null(recipe$Analysis$Variables$sic_threshold))) { if (!is.numeric(recipe$Analysis$Variables$sic_threshold) || - dplyr::between(recipe$Analysis$Variables$sic_threshold, 0, 1)) { + !dplyr::between(recipe$Analysis$Variables$sic_threshold, 0, 1)) { error(recipe$Run$logger, paste("The element Analysis:Variables:sic_threshold must be a", "numeric value between 0 and 1.")) + error_status <- TRUE } } # Check system names @@ -89,7 +90,7 @@ check_recipe <- function(recipe) { } # Check reference names if (!all(recipe$Analysis$Datasets$Reference$name %in% - names(archive$Reference))) { + names(archive$Reference))) { error(recipe$Run$logger, "The specified Reference name was not found in the archive.") error_status <- TRUE @@ -179,7 +180,7 @@ check_recipe <- function(recipe) { } else { stream <- "fcst" } - + ## TODO: To be implemented in the future # if (length(recipe$Analysis$Time$sdate$fcst_day) > 1 && # tolower(recipe$Analysis$Horizon) != "subseasonal") { @@ -193,7 +194,7 @@ check_recipe <- function(recipe) { # error(recipe$Run$logger, # paste("The element 'fcst_sday' in the recipe should be defined.")) # } - + if (is.null(recipe$Analysis$Time$fcst_year)) { warn(recipe$Run$logger, paste("The element 'fcst_year' is not defined in the recipe.", @@ -209,7 +210,7 @@ check_recipe <- function(recipe) { # } # } # fcst.sdate <- list(stream = stream, fcst.sdate = fcst.sdate) - + # Regrid checks: if (length(recipe$Analysis$Regrid) != 2) { error(recipe$Run$logger, @@ -231,7 +232,7 @@ check_recipe <- function(recipe) { "Only one single Horizon can be specified in the recipe") error_status <- TRUE } - + ## TODO: Refine this # nvar <- length(recipe$Analysis$Variables) # if (nvar > 2) { @@ -239,19 +240,19 @@ check_recipe <- function(recipe) { # "Only two type of Variables can be listed: ECVs and Indicators.") # stop("EXECUTION FAILED") # } - + # remove NULL or None Indicators or ECVs from the recipe: if (!is.null(recipe$Analysis$Variables$Indicators) && !is.list(recipe$Analysis$Variables$Indicators)) { recipe$Analysis$Variables <- recipe$Analysis$Variables[ - -which(names(recipe$Analysis$Variables) == 'Indicators')] + -which(names(recipe$Analysis$Variables) == 'Indicators')] } if (!is.null(recipe$Analysis$Variables$ECVs) && !is.list(recipe$Analysis$Variables$ECVs)) { recipe$Analysis$Variables <- recipe$Analysis$Variables[ - -which(names(recipe$Analysis$Variables) == 'ECVs')] + -which(names(recipe$Analysis$Variables) == 'ECVs')] } - + # Region checks: LIMITS <- c('latmin', 'latmax', 'lonmin', 'lonmax') # Ordinary recipe @@ -260,8 +261,8 @@ check_recipe <- function(recipe) { if (!all(LIMITS %in% names(region))) { error(recipe$Run$logger, paste0("There must be 4 elements in 'Region': ", - paste(LIMITS, collapse = ", "), ".")) - error_status <- TRUE + paste(LIMITS, collapse = ", "), ".")) + error_status <- T } } if (length(recipe$Analysis$Region) > 1) { @@ -273,35 +274,35 @@ check_recipe <- function(recipe) { } } } - # Atomic recipe + # Atomic recipe } else if (!all(LIMITS %in% names(recipe$Analysis$Region))) { error(recipe$Run$logger, paste0("There must be 4 elements in 'Region': ", paste(LIMITS, collapse = ", "), ".")) error_status <- TRUE } - ## TODO: Implement multiple regions - # nregions <- length(recipe$Analysis$Region) - # for (i in 1:length(recipe$Analysis$Region)) { - # if (!all(limits %in% names(recipe$Analysis$Region[[i]]))) { - # limits <- paste(limits, collapse = " ") - # error(recipe$Run$logger, - # paste0("Each region defined in element 'Region' ", - # "should have 4 elements: ", - # paste(limits, collapse = ", "), ".")) - # error_status <- TRUE - # } - # if (length(recipe$Analysis$Region) > 1) { - # if (!("name" %in% names(recipe$Analysis$Region[[i]]))) { - # error(recipe$Run$logger, - # paste("If multiple regions are requested, each region must", - # "have a 'name'".) - # # are numeric? class list mode list - # } + ## TODO: Implement multiple regions + # nregions <- length(recipe$Analysis$Region) + # for (i in 1:length(recipe$Analysis$Region)) { + # if (!all(limits %in% names(recipe$Analysis$Region[[i]]))) { + # limits <- paste(limits, collapse = " ") + # error(recipe$Run$logger, + # paste0("Each region defined in element 'Region' ", + # "should have 4 elements: ", + # paste(limits, collapse = ", "), ".")) + # error_status <- T + # } + # if (length(recipe$Analysis$Region) > 1) { + # if (!("name" %in% names(recipe$Analysis$Region[[i]]))) { + # error(recipe$Run$logger, + # paste("If multiple regions are requested, each region must", + # "have a 'name'".) + # # are numeric? class list mode list + # } # --------------------------------------------------------------------- # WORKFLOW CHECKS # --------------------------------------------------------------------- - + # Calibration # If 'method' is FALSE/no/'none' or NULL, set to 'raw' ## TODO: Review this check @@ -361,7 +362,7 @@ check_recipe <- function(recipe) { } } } - + # Downscaling if ("Downscaling" %in% names(recipe$Analysis$Workflow)) { downscal_params <- lapply(recipe$Analysis$Workflow$Downscaling, tolower) @@ -377,28 +378,28 @@ check_recipe <- function(recipe) { downscal_params$type <- "none" warn(recipe$Run$logger, paste("Downscaling 'type' is empty in the recipe, setting it to", - "'none'.")) + "'none'.")) } if (!(downscal_params$type %in% DOWNSCAL_TYPES)) { error(recipe$Run$logger, paste0("The type of Downscaling request in the recipe is not ", - "available. It must be one of the following: ", - paste(DOWNSCAL_TYPES, collapse = ", "), ".")) - error_status <- TRUE + "available. It must be one of the following: ", + paste(DOWNSCAL_TYPES, collapse = ", "), ".")) + error_status <- T } if ((downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) && (is.null(downscal_params$target_grid))) { error(recipe$Run$logger, paste("A target grid is required for the downscaling method", - "requested in the recipe.")) - error_status <- TRUE + "requested in the recipe.")) + error_status <- T } if (downscal_params$type == "int") { if (is.null(downscal_params$int_method)) { error(recipe$Run$logger, paste("Downscaling type 'int' was requested, but no", - "interpolation method is provided in the recipe.")) - error_status <- TRUE + "interpolation method is provided in the recipe.")) + error_status <- T } } else if (downscal_params$type %in% c("int", "intbc", "intlr", "logreg")) { @@ -406,63 +407,63 @@ check_recipe <- function(recipe) { error(recipe$Run$logger, paste("Downscaling type", downscal_params$type, "was requested in the recipe, but no", - "interpolation method is provided.")) - error_status <- TRUE + "interpolation method is provided.")) + error_status <- T } } else if (downscal_params$type == "intbc") { if (is.null(downscal_params$bc_method)) { error(recipe$Run$logger, paste("Downscaling type 'intbc' was requested in the recipe, but", - "no bias correction method is provided.")) - error_status <- TRUE + "no bias correction method is provided.")) + error_status <- T } else if (!(downscal_params$bc_method %in% BC_METHODS)) { error(recipe$Run$logger, paste0("The accepted Bias Correction methods for the downscaling", - " module are: ", paste(BC_METHODS, collapse = ", "), ".")) - error_status <- TRUE + " module are: ", paste(BC_METHODS, collapse = ", "), ".")) + error_status <- T } } else if (downscal_params$type == "intlr") { if (length(downscal_params$lr_method) == 0) { error(recipe$Run$logger, paste("Downscaling type 'intlr' was requested in the recipe, but", - "no linear regression method was provided.")) - error_status <- TRUE + "no linear regression method was provided.")) + error_status <- T } else if (!(downscal_params$lr_method %in% LR_METHODS)) { error(recipe$Run$logger, paste0("The accepted linear regression methods for the", - " downscaling module are: ", - paste(LR_METHODS, collapse = ", "), ".")) - error_status <- TRUE + " downscaling module are: ", + paste(LR_METHODS, collapse = ", "), ".")) + error_status <- T } } else if (downscal_params$type == "analogs") { if (is.null(downscal_params$nanalogs)) { warn(recipe$Run$logger, paste("Downscaling type is 'analogs, but the number of analogs", - "has not been provided in the recipe. The default is 3.")) + "has not been provided in the recipe. The default is 3.")) } } else if (downscal_params$type == "logreg") { if (is.null(downscal_params$int_method)) { error(recipe$Run$logger, paste("Downscaling type 'logreg' was requested in the recipe, but", - "no interpolation method was provided.")) - error_status <- TRUE + "no interpolation method was provided.")) + error_status <- T } if (is.null(downscal_params$log_reg_method)) { error(recipe$Run$logger, paste("Downscaling type 'logreg' was requested in the recipe,", - "but no logistic regression method is provided.")) - error_status <- TRUE + "but no logistic regression method is provided.")) + error_status <- T } else if (!(downscal_params$log_reg_method %in% LOGREG_METHODS)) { error(recipe$Run$logger, paste0("The accepted logistic regression methods for the ", - "downscaling module are: ", - paste(LOGREG_METHODS, collapse = ", "), ".")) - error_status <- TRUE + "downscaling module are: ", + paste(LOGREG_METHODS, collapse = ", "), ".")) + error_status <- T } } } } - + # Indices if ("Indices" %in% names(recipe$Analysis$Workflow)) { nino_indices <- paste0("nino", c("1+2", "3", "3.4", "4")) @@ -473,10 +474,10 @@ check_recipe <- function(recipe) { "in the recipe.")) error_status <- TRUE } else if (!(recipe$Analysis$Workflow$Anomalies$compute)) { - error(recipe$Run$logger, - paste0("Indices uses Anomalies as input, but the parameter", - "'Anomalies:compute' is set as no/False.")) - error_status <- TRUE + error(recipe$Run$logger, + paste0("Indices uses Anomalies as input, but the parameter", + "'Anomalies:compute' is set as no/False.")) + error_status <- T } recipe_indices <- tolower(names(recipe$Analysis$Workflow$Indices)) if (!all(recipe_indices %in% indices)) { @@ -504,18 +505,20 @@ check_recipe <- function(recipe) { error_status <- TRUE } } - + # Skill - AVAILABLE_METRICS <- c("enscorr", "corr_individual_members", "rps", "rpss", - "frps", "frpss", "crps", "crpss", "bss10", "bss90", + AVAILABLE_METRICS <- c("enscorr", "corr_individual_members", "rps", "rps_syear", + "rpss", "frps", "frpss", "crps", "crps_syear", + "crpss", "bss10", "bss90", "mean_bias", "mean_bias_ss", "enssprerr", "rps_clim", - "rpss_clim", "enscorr_specs", "frps_specs", "rpss_specs", + "rps_clim_syear", "crps_clim", "crps_clim_syear", + "enscorr_specs", "frps_specs", "rpss_specs", "frpss_specs", "bss10_specs", "bss90_specs") if ("Skill" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Skill$metric)) { - error(recipe$Run$logger, - "Parameter 'metric' must be defined under 'Skill'.") - error_status <- TRUE + error(recipe$Run$logger, + "Parameter 'metric' must be defined under 'Skill'.") + error_status <- T } else { requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, ", | |,")[[1]] @@ -526,6 +529,14 @@ check_recipe <- function(recipe) { "full list of accepted skill metrics.")) error_status <- TRUE } + if (tolower(recipe$Analysis$Output_format) != 'scorecards') { + if (any(grepl('_syear', requested_metrics))) { + recipe$Analysis$Output_format <- 'scorecards' + warn(recipe$Run$logger, + paste0("'_syear' metrics can only be saved as 'scorecards' ", + "output format. The output format is now 'scorecards'.")) + } + } } # Saving checks SAVING_OPTIONS_SKILL <- c("all", "none") @@ -538,7 +549,7 @@ check_recipe <- function(recipe) { error_status <- TRUE } } - + # Probabilities if ("Probabilities" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { @@ -563,11 +574,11 @@ check_recipe <- function(recipe) { error_status <- TRUE } } - + # Visualization if ("Visualization" %in% names(recipe$Analysis$Workflow)) { PLOT_OPTIONS <- c("skill_metrics", "forecast_ensemble_mean", - "most_likely_terciles") + "most_likely_terciles", "statistics") # Separate plots parameter and check if all elements are in PLOT_OPTIONS if (is.null(recipe$Analysis$Workflow$Visualization$plots)) { error(recipe$Run$logger, @@ -628,29 +639,56 @@ check_recipe <- function(recipe) { } # Scorecards if ("Scorecards" %in% names(recipe$Analysis$Workflow)) { - if (is.null(recipe$Analysis$Workflow$Scorecards$metric)) { - error(recipe$Run$logger, - "Parameter 'metric' must be defined under 'Scorecards'.") - error_status <- TRUE - } else { - sc_metrics <- strsplit(recipe$Analysis$Workflow$Scorecards$metric, - ", | |,")[[1]] - if (!all(tolower(sc_metrics) %in% tolower(requested_metrics))) { + if(recipe$Analysis$Workflow$Scorecards$execute == TRUE){ + if (is.null(recipe$Analysis$Workflow$Scorecards$metric)) { error(recipe$Run$logger, - paste0("All of the metrics requested under 'Scorecards' must ", - "be requested in the 'Skill' section.")) - error_status <- TRUE + "Parameter 'metric' must be defined under 'Scorecards'.") + error_status <- T + } else { + sc_metrics <- strsplit(recipe$Analysis$Workflow$Scorecards$metric, + ", | |,")[[1]] + if (recipe$Analysis$Workflow$Scorecards$metric_aggregation == 'score') { + if ('rpss' %in% tolower(sc_metrics)) { + if (!('rps_clim_syear' %in% requested_metrics)) { + requested_metrics <- c(requested_metrics, 'rps_clim_syear') + } + if (!('rps_syear' %in% requested_metrics)) { + requested_metrics <- c(requested_metrics, 'rps_syear') + } + } + if ('crpss' %in% tolower(sc_metrics)) { + if (!('crps_clim_syear' %in% requested_metrics)) { + requested_metrics <- c(requested_metrics, 'crps_clim_syear') + } + if (!('crps_syear' %in% requested_metrics)) { + requested_metrics <- c(requested_metrics, 'crps_syear') + } + } + if ('enscorr' %in% tolower(sc_metrics)) { + recipe$Analysis$Workflow$Statistics <- c('std', 'cov', 'n_eff') + } + recipe$Analysis$Workflow$Skill$metric <- requested_metrics + } + if (tolower(recipe$Analysis$Output_format) != 'scorecards') { + recipe$Analysis$Output_format <- 'scorecards' + } + if (!all(tolower(sc_metrics) %in% tolower(requested_metrics))) { + error(recipe$Run$logger, + paste0("All of the metrics requested under 'Scorecards' must ", + "be requested in the 'Skill' section.")) + error_status <- T + } } - } + } } # --------------------------------------------------------------------- # RUN CHECKS # --------------------------------------------------------------------- - + ## TODO: These checks should probably go first RUN_FIELDS = c("Loglevel", "Terminal", "output_dir", "code_dir") LOG_LEVELS = c("INFO", "DEBUG", "WARN", "ERROR", "FATAL") - + if (!("Run" %in% names(recipe))) { stop("The recipe must contain an element named 'Run'.") } @@ -690,11 +728,11 @@ check_recipe <- function(recipe) { paste0(LOG_LEVELS, collapse='/'))) error_status <- TRUE } - + # --------------------------------------------------------------------- # AUTOSUBMIT CHECKS # --------------------------------------------------------------------- - + AUTO_PARAMS <- c("script", "expid", "hpc_user", "wallclock", "processors_per_job", "platform", "email_notifications", "email_address", "notify_completed", "notify_failed") @@ -788,7 +826,7 @@ check_recipe <- function(recipe) { } } } - + # --------------------------------------------------------------------- # WORKFLOW CHECKS # --------------------------------------------------------------------- @@ -798,7 +836,7 @@ check_recipe <- function(recipe) { #nverifications <- check_number_of_dependent_verifications(recipe) # info(recipe$Run$logger, paste("Start Dates:", # paste(fcst.sdate, collapse = " "))) - + # Return error if any check has failed if (error_status) { error(recipe$Run$logger, "RECIPE CHECK FAILED.") @@ -807,4 +845,5 @@ check_recipe <- function(recipe) { } else { info(recipe$Run$logger, "##### RECIPE CHECK SUCCESSFULL #####") } + return(recipe) } diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index f994252506a6b3edfe644100deb94b35c7a8a4b7..aeb9e2f8bd2f685db73fcbec1f94e3934c5e91ec 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -98,7 +98,7 @@ prepare_outputs <- function(recipe_file, warn(recipe$Run$logger, "Recipe checks disabled. The recipe will not be checked for errors.") } else { - check_recipe(recipe) + recipe <- check_recipe(recipe) } return(recipe) }