diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index 84e44028901a715023f71d0662a75b588e98a7e3..2a70eff9c20e33d7241fc1ad13fd1c789e5dd916 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -28,18 +28,16 @@ #' reference. = 'ERA5', #' var = 'tas', #' period = '1993-2016' -#' metrics = c('mean_bias', 'enscorr', 'rpss', 'crpss', 'enssprerr'), #' start_months = sprintf("%02d", 1:12), #' calib_method = 'raw', -#' input_path = '/esarchive/scratch/nmilders/scorecards_data/input_data') +#' input_path = './scorecards_data/input_data') #'} #'@import easyNCDF #'@import multiApply #'@export -LoadMetrics <- function(input_path, system, reference, var, period, - metrics, start_months, calib_method = NULL, - inf_to_na = FALSE) { +LoadMetrics <- function(input_path, system, reference, var, period, data_type, + start_months, calib_method = NULL) { # Initial checks ## system @@ -61,10 +59,6 @@ LoadMetrics <- function(input_path, system, reference, var, period, "will be used.") var <- var[1] } - ## metrics - if (!is.character(metrics)) { - stop("Parameter 'metrics' cannot be NULL.") - } ## start_months if (is.character(start_months)) { warning("Parameter 'start_months' must be a numeric vector indicating ", @@ -96,18 +90,13 @@ LoadMetrics <- function(input_path, system, reference, var, period, system <- gsub('.','', system, fixed = T) reference <- gsub('.','', reference, fixed = T) - all_metrics <- sapply(system, function(x) NULL) - names(all_metrics) <- system ## Load data for each system + all_metrics <- NULL for (sys in 1:length(system)) { - ## Define empty list to saved data - by_reference <- sapply(reference, function(x) NULL) - names(by_reference) <- reference ## Load data for each reference + by_reference <- NULL for (ref in 1:length(reference)) { ## Call function to load metrics data - met_by_smonth <- NULL - for (met in metrics) { result <- .loadmetrics(input_path = input_path, system = system[sys], reference = reference[ref], @@ -115,27 +104,19 @@ LoadMetrics <- function(input_path, system, reference, var, period, period = period, start_months = start_months, calib_method = calib_method, - metric = met) + data_type = data_type) result_attr <- attributes(result) - met_by_smonth <- abind::abind(met_by_smonth, result, along = length(dim(result)) + 1) - } - attributes(met_by_smonth) <- result_attr[-1] - # names(dim(met_by_smonth)) <- c(names(result_attr$dim), 'metric') - - dim(met_by_smonth) <- c(dim(result), metric = length(metrics)) - ## Save metric data as array in reference list - by_reference[[reference[ref]]] <- met_by_smonth - ## Remove -Inf from crpss data if variable is precipitation - if (inf_to_na) { - by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA - } + + by_reference <- abind::abind(by_reference, result, + along = length(dim(result)) + 1) + dim(by_reference) <- c(dim(result), reference = length(reference)) } ## close loop on reference - ## Save reference data in list of system - all_metrics[[system[sys]]] <- by_reference + all_metrics <- abind::abind(all_metrics, by_reference, + along = length(dim(by_reference)) + 1) + dim(all_metrics) <- c(dim(by_reference), system = length(system)) } ## close loop on system - attributes(all_metrics)$metrics <- metrics attributes(all_metrics)$start_months <- start_months return(all_metrics) @@ -145,13 +126,13 @@ LoadMetrics <- function(input_path, system, reference, var, period, .loadmetrics <- function(input_path, system, reference, var, period, start_months, - calib_method, metric) { + calib_method, data_type) { ## Load data for each start date allfiles <- sapply(start_months, function(m) { paste0(input_path, "/", system, "/", reference, "/", calib_method, "/", var, "/scorecards_", system, "_", reference, "_", - var, "_", metric, "_", period, "_s", m, # mod.pressure, + var, "_", data_type, "_", period, "_s", m, ".nc")}) allfiles_exist <- sapply(allfiles, file.exists) @@ -170,40 +151,23 @@ LoadMetrics <- function(input_path, system, reference, var, period, } num_dims[i] <- max(allfiledims[i,]) # We take the largest dimension } - # dims: [metric, longitude, latitude, time, smonth] - # or [metric, region, time, smonth] - + # Loop for file dim(allfiles) <- c(dat = 1, sdate = length(allfiles)) array_met_by_sdate <- Apply(data = allfiles, target_dims = 'dat', fun = function(x) { if (file.exists(x)) { - res <- easyNCDF::NcToArray(x, vars_to_read = metric, unlist = T, + res <- easyNCDF::NcToArray(x, vars_to_read = data_type, unlist = T, drop_var_dim = T) names(dim(res)) <- NULL } else { - res <- array(dim = c(length(metrics), allfiledims[-1,1])) + res <- array(dim = c(length(data_type), allfiledims[-1,1])) names(dim(res)) <- NULL } res})$output1 dim(array_met_by_sdate) <- c(allfiledims[-1,1], sdate = length(allfiles)) - # Attributes - # Read attributes from the first existing file - if ("region" %in% rownames(allfiledims)) { - file_for_att <- ncdf4::nc_open(allfiles[allfiles_exist[1]]) - region <- ncdf4::ncatt_get(file_for_att, 'region') - ncdf4::nc_close(file_for_att) - attributes(array_met_by_sdate)$region <- region - } else { - lon <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'longitude', - unlist = T, drop_var_dim = T) - lat <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'latitude', - unlist = T, drop_var_dim = T) - attributes(array_met_by_sdate)$lon <- lon - attributes(array_met_by_sdate)$lat <- lat - } return(array_met_by_sdate) } diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index 7278eb16a2663ac9f723c68e375522439b046237..1633eb868881b769fd8a429a63a513f5d26e4796 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -7,11 +7,11 @@ #'@param sign is an array with the same dimensions as data indicting the #' significance of the metrics, with either true, false or null. #'@param system a vector of character strings defining the systems following the -#' archive.yml format from verification suite +#' archive.yml format for SUNSET. #'@param reference a vector of character strings defining the references -#' following the archive.yml format from verification suite -#'@param var a character string following the format from -#' variable-dictionary.yml from verification suite +#' following the archive.yml format for SUNSET. +#'@param var a character string indicating the variable. +#'@param var.units a character sting indicating the the variable units. #'@param start.year a numeric indicating the start year of the reference period #'@param end.year a numeric indicating the end year of the reference period #'@param start.date a vector of character strings indicating the start months @@ -56,20 +56,21 @@ #' system.name = c('ECMWF-SEAS5','DWD-GFCS2.1'), #' reference.name = 'ERA5', #' var = 'tas', +#' var.units = 'C', #' start.year = 1993, #' end.year = 2016, #' start.months = 1:12, #' forecast.months = 1:6, #' region.names = c('Tropics', 'Extra-tropical NH', 'Extra-tropical SH') #' metrics = c('mean_bias', 'enscorr', 'rpss','crpss', 'enssprerr'), -#' table.label = '(Interpolation = to system, Aggregation level = skill, Cross-validation = terciles)', +#' table.label = '(Interpolation = to system, Cross-validation = True)', #' fileout.label = '_crossval-terciles_agg-skill', -#' output.path = '/esarchive/scratch/nmilders/scorecards_images/testing' +#' output.path = './scorecards' #' ) -ScorecardsMulti <- function(data, sign, system, reference, var, start.year, - end.year, start.months, forecast.months, +ScorecardsMulti <- function(data, sign, system, reference, var, var.units, + start.year, end.year, start.months, forecast.months, region.names, metrics, plot.legend = TRUE, legend.breaks = NULL, legend.white.space = NULL, legend.width = 555, legend.height = 50, @@ -108,7 +109,6 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, attributes(sign)$metrics <- metrics } - ## Transform data for scorecards by forecast month (types 11 & 12) if(length(start.months) >= length(forecast.months)){ @@ -133,25 +133,14 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, } sys_dict <- read_yaml("conf/archive.yml")[[filesystem]] var_dict <- read_yaml("conf/variable-dictionary.yml")$vars - + ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - if ('name' %in% names(recipe$Analysis$Variables)){ - if (recipe$Analysis$Variables$name == var) { - var.units <- recipe$Analysis$Variables$units - } - } else { - for (i in 1:length(recipe$Analysis$Variables)) { - if (recipe$Analysis$Variables[[i]]$name == var) { - var.units <- recipe$Analysis$Variables[[i]]$units - } - } - } if (is.null(var.units)) { var.units <- var_dict[[var]]$units } - + system.name <- NULL reference.name <- NULL diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index 9fd445451dcd4bb54d3aeb7c7eab43075434e6eb..26e2eeb2974d33ccf3b4b4b581d444b178d1cf0c 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -7,11 +7,11 @@ #'@param sign is an array with the same dimensions as data indicting the #' significance of the metrics, with either true, false or null. #'@param system a vector of character strings defining the systems following the -#' archive.yml format from verification suite +#' archive.yml format for SUNSET. #'@param reference a vector of character strings defining the references -#' following the archive.yml format from verification suite -#'@param var a character string following the format from -#' variable-dictionary.yml from verification suite +#' following the archive.yml format for SUNSET. +#'@param var a character string indicating the variable. +#'@param var.units a character sting indicating the the variable units. #'@param start.year a numeric indicating the start year of the reference period #'@param end.year a numeric indicating the end year of the reference period #'@param start.date a vector of character strings indicating the start months @@ -54,19 +54,20 @@ #' system.name = c('ECMWF-SEAS5','DWD-GFCS2.1'), #' reference.name = 'ERA5', #' var = 'tas', +#' var.units = 'C', #' start.year = 1993, #' end.year = 2016, #' start.months = 1:12, #' forecast.months = 1:6, #' region.names = c('Tropics', 'Extra-tropical NH', 'Extra-tropical SH') #' metrics = c('mean_bias', 'enscorr', 'rpss','crpss', 'enssprerr'), -#' table.label = '(Interpolation = to system, Aggregation level = skill, Cross-validation = terciles)', +#' table.label = '(Interpolation = to system, Cross-validation = True)', #' fileout.label = '_crossval-terciles_agg-skill', -#' output.path = '/esarchive/scratch/nmilders/scorecards_images/test' +#' output.path = './scorecards' #' ) #'@export -ScorecardsSingle <- function(data, sign, system, reference, var, start.year, - end.year, start.months, forecast.months, +ScorecardsSingle <- function(data, sign, system, reference, var, var.units, + start.year, end.year, start.months, forecast.months, region.names, metrics, plot.legend = TRUE, legend.breaks = NULL, legend.white.space = NULL, legend.width = 550, legend.height = 50, @@ -149,17 +150,6 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - if ('name' %in% names(recipe$Analysis$Variables)){ - if (recipe$Analysis$Variables$name == var) { - var.units <- recipe$Analysis$Variables$units - } - } else { - for (i in 1:length(recipe$Analysis$Variables)) { - if (recipe$Analysis$Variables[[i]]$name == var) { - var.units <- recipe$Analysis$Variables[[i]]$units - } - } - } if (is.null(var.units)) { var.units <- var_dict[[var]]$units } @@ -205,9 +195,6 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, stopifnot(identical(names(dim(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'))), c('metric','time','sdate','region'))) temp_data <- Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected') pos_bias <- which(metrics == 'mean_bias') - if(var == 'psl'){ - temp_data[pos_bias,,,] <- temp_data[pos_bias,,,]/100 - } breaks_bias <- .SCBiasBreaks(Subset(temp_data, along = 'metric', indices = pos_bias)) } diff --git a/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R b/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R index 88e9de1530b68be962773f9f836386947ebc0c79..2d17f94c06caafccbcd67c8e035ec064d2fabb4c 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R +++ b/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R @@ -5,11 +5,11 @@ #'@param input_data is an array of spatially aggregated metrics containing the #' following dimensions; system, reference, metric, time, sdate, region. #'@param system a vector of character strings defining the systems following the -#' archive.yml format from verification suite +#' archive.yml format for SUNSET. #'@param reference a vector of character strings defining the references -#' following the archive.yml format from verification suite -#'@param var a character string following the format from -#' variable-dictionary.yml from verification suite +#' following the archive.yml format for SUNSET. +#'@param var a character string indicating the variable. +#'@param var.units a character sting indicating the the variable units. #'@param start.year a numeric indicating the start year of the reference period #'@param end.year a numeric indicating the end year of the reference period #'@param start.date a vector of character strings indicating the start months @@ -32,6 +32,7 @@ #' system.name = c('ECMWF-SEAS5','DWD-GFCS2.1'), #' reference.name = 'ERA5', #' var = 'tas', +#' var.units = 'C', #' start.year = 1993, #' end.year = 2016, #' start.months = 1:12, @@ -44,22 +45,12 @@ #' ) -ScorecardsSystemDiff <- function(data, - system, - reference, - var, - start.year, - end.year, - start.months, - forecast.months, - region.names, - metrics, - table.label = NULL, - fileout.label = NULL, - legend.white.space = NULL, - col1.width = NULL, - col2.width = NULL, - output.path){ +ScorecardsSystemDiff <- function(data, system, reference, var, var.units, + start.year, end.year, start.months, + forecast.months, region.names, metrics, + table.label = NULL, fileout.label = NULL, + legend.white.space = NULL, col1.width = NULL, + col2.width = NULL, output.path){ ## Checks to apply: # first dimension in aggregated_metrics is system and second dimension is reference @@ -121,7 +112,10 @@ ScorecardsSystemDiff <- function(data, ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - var.units <- var_dict[[var]]$units ## TODO: Get units from recipe or elsewhere + + if (is.null(var.units)) { + var.units <- var_dict[[var]]$units + } system.name <- NULL reference.name <- NULL diff --git a/modules/Scorecards/R/tmp/WeightedMetrics.R b/modules/Scorecards/R/tmp/WeightedMetrics.R deleted file mode 100644 index 08267db4cce8c5590598acc6f3bd4ab93af15a47..0000000000000000000000000000000000000000 --- a/modules/Scorecards/R/tmp/WeightedMetrics.R +++ /dev/null @@ -1,110 +0,0 @@ -#' Scorecards spatial aggregation of loaded metrics -#' -#'@description Scorecards function to perform the spatial aggregation of the -#' loaded metrics for the specified regions. -#' -#'@param loaded_metrics is a list of arrays containing the metrics loaded by the -#' function SC_load_metrics. -#'@param region is a named list of vectors containing the desired regions to -#' analyze. For each region the following should be specified in this order: -#' lon_min, lon_max, lat_min, lat_max. -#'@param metric.aggregation a character indicating whether the skill score RPS -#' and CRPSS are calculated from aggregated scores or aggregated skill score -#' directly, either 'score' or 'skill' respectively -#'@param ncores is the number of cores to use for the calculation. -#' -#'@return An array with the following dimensions: system, reference, metrics, -#' time, sdate, region. -#' -#'@examples -#'regions <- list('global' = c(lon.min = 0, lon.max = 360, lat.min = -90, lat.max = 90), -#' 'europe' = c(lon.min = -10, lon.max = 40, lat.min = 30, lat.max = 70)) -#'aggregated_metrics <- WeightedMetrics(loaded_metrics, -#' regions = regions, -#' metric.aggregation = 'skill', -#' ncores = 4) -#'@import multiApply -#'@importFrom ClimProjDiags WeightedMean -#'@importFrom s2dv Reorder -#'@export -WeightedMetrics <- function(loaded_metrics, regions, forecast.months, - metric.aggregation, ncores = NULL, na.rm = TRUE) { - ## Initial checks - # loaded_metrics - if (any(sapply(loaded_metrics, function(x) { - sapply(x, function(y) {is.null(dim(y))}) - }))) { - stop(paste0("Parameter 'loaded_metrics' must be a list of lists of arrays ", - "with named dimensions.")) - } - # regions - if (!all(sapply(regions, is.numeric))) { - stop(paste0("Parameter 'regions' must be a named list of vectors ", - "containing the desired regions to analyze.")) - } - # metric.aggregation - if (!is.character(metric.aggregation)) { - stop("Parameter 'metric.aggregation' must be a character indicating.") - } - # ncores - if (!is.numeric(ncores)) { - stop("Parameter 'ncores' must be an integer.") - } - - ## Get metric names - ## TO DO: check all metric are in the same order for all sys - metrics <- attributes(loaded_metrics)$metrics - start.months <- attributes(loaded_metrics)$start_months - - all_metric_means <- array(dim = c(metric = length(metrics), - time = length(forecast.months), - sdate = length(start.months), - region = length(regions), - reference = length(loaded_metrics[[1]]), - system = length(loaded_metrics))) - - ## Loop over system - for (sys in 1:length(loaded_metrics)) { - ## Loop over reference - for (ref in 1:length(loaded_metrics[[sys]])) { - dimnames <- names(dim(loaded_metrics[[sys]][[ref]])) - lon_dim_name <- dimnames[which(dimnames %in% .KnownLonNames())] - lat_dim_name <- dimnames[which(dimnames %in% .KnownLatNames())] - ## Get latitude and longitude from attributes of loaded metrics - ## Loop over region - for (reg in 1:length(regions)) { - ## Calculate weighted means for defined regions for each system and reference - weighted.mean <- WeightedMean(data = loaded_metrics[[sys]][[ref]], - lon = as.vector(attributes(loaded_metrics[[sys]][[ref]])$lon), - lat = as.vector(attributes(loaded_metrics[[sys]][[ref]])$lat), - region = regions[[reg]], - londim = lon_dim_name, - latdim = lat_dim_name, - na.rm = na.rm, - ncores = ncores) - - if (!all(names(dim(weighted.mean)) == c('metric', 'time', 'sdate'))) { - weighted.mean <- Reorder(weighted.mean, c('metric', 'time', 'sdate')) - } - - all_metric_means[, , , reg, ref, sys] <- weighted.mean - - } ## close loop on region - } ## close loop on reference - } ## close loop on system - - ## reorder dimensions in array - all_metric_means <- s2dv::Reorder(all_metric_means, c('system','reference','metric','time','sdate','region')) - - ## Add attributes - attributes(all_metric_means)$metrics <- metrics - attributes(all_metric_means)$start.months <- start.months - attributes(all_metric_means)$forecast.months <- forecast.months - attributes(all_metric_means)$regions <- regions - attributes(all_metric_means)$system.name <- names(loaded_metrics) - attributes(all_metric_means)$reference.name <- names(loaded_metrics[[1]]) - - return(all_metric_means) - -} ## close function - diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R deleted file mode 100644 index 37aa421c978d8aad8479b9b972684222207ddd5f..0000000000000000000000000000000000000000 --- a/modules/Scorecards/Scorecards.R +++ /dev/null @@ -1,610 +0,0 @@ -############################################################################### -##################### SCORECARDS MODULE FOR SUNSET SUITE ###################### -############################################################################### - -##### Load source functions ##### -source('modules/Scorecards/R/tmp/LoadMetrics.R') -source('modules/Scorecards/R/tmp/WeightedMetrics.R') -source('modules/Scorecards/R/tmp/Utils.R') -source('modules/Scorecards/R/tmp/SCTransform.R') -source('modules/Scorecards/R/tmp/ScorecardsSingle.R') -source('modules/Scorecards/R/tmp/ScorecardsMulti.R') -source('modules/Scorecards/R/tmp/ScorecardsSystemDiff.R') -source('modules/Scorecards/R/tmp/VizScorecard.R') - -## Temporary for new ESviz function -source('modules/Scorecards/R/tmp/ColorBarContinuous.R') -source('modules/Scorecards/R/tmp/ClimPalette.R') -.IsColor <- s2dv:::.IsColor -.FilterUserGraphicArgs <- s2dv:::.FilterUserGraphicArgs - -## Define function -Scorecards <- function(recipe) { - - ## Parameters for loading data files - skill.input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") - stats.input.path <- paste0(recipe$Run$output_dir, "/outputs/Statistics/") - output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") - dir.create(output.path, recursive = T, showWarnings = F) - system <- recipe$Analysis$Datasets$System$name - reference <- recipe$Analysis$Datasets$Reference$name - var <- recipe$Analysis$Variables$name - start.year <- as.numeric(recipe$Analysis$Time$hcst_start) - end.year <- as.numeric(recipe$Analysis$Time$hcst_end) - forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max - calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) - - if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { - start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) - } else { - start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, - split = ", | |,")[[1]]) - if(!any(as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))) %in% start.months){ - error(recipe$Run$logger,"Requested start dates for scorecards must be loaded") - } - } - - start.months <- sprintf("%02d", start.months) - period <- paste0(start.year, "-", end.year) - - ## Parameters for data aggregation - regions <- recipe$Analysis$Workflow$Scorecards$regions - for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} - - metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation - metrics.load <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Skill$metric), ", | |,")) - metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) - ncores <- 1 # recipe$Analysis$ncores - - if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ - alpha <- 0.05 - } else { - alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha - } - - if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ - inf.to.na <- FALSE - } else { - inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na - } - - if(is.null(recipe$Analysis$remove_NAs)){ - na.rm <- FALSE - } else { - na.rm <- recipe$Analysis$remove_NAs - } - - ## Parameters for scorecard layout - table.label <- recipe$Analysis$Workflow$Scorecards$table_label - fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label - col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width - col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width - legend.breaks <- recipe$Analysis$Workflow$Scorecards$legend_breaks - legend.width <- recipe$Analysis$Workflow$Scorecards$legend_width - - if (is.null(recipe$Analysis$Workflow$Scorecards$plot_legend)){ - plot.legend <- TRUE - } else { - plot.legend <- recipe$Analysis$Workflow$Scorecards$plot_legend - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$columns_width)){ - columns.width <- 1.2 - } else { - columns.width <- recipe$Analysis$Workflow$Scorecards$columns_width - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$legend_white_space)){ - legend.white.space <- 6 - } else { - legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$legend_height)){ - legend.height <- 50 - } else { - legend.height <- recipe$Analysis$Workflow$Scorecards$legend_height - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$label_scale)){ - label.scale <- 1.4 - } else { - label.scale <- recipe$Analysis$Workflow$Scorecards$label_scale - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$round_decimal)){ - round.decimal <- 2 - } else { - round.decimal <- recipe$Analysis$Workflow$Scorecards$round_decimal - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$font_size)){ - font.size <- 1.1 - } else { - font.size <- recipe$Analysis$Workflow$Scorecards$font_size - } - - ## Define if difference scorecard is to be plotted - if (is.null(recipe$Analysis$Workflow$Scorecards$calculate_diff)){ - calculate.diff <- FALSE - } else { - calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff - } - - ####### SKILL AGGREGATION ####### - if(metric.aggregation == 'skill'){ - - ## Load data files - loaded_metrics <- LoadMetrics(input_path = skill.input.path, - system = system, - reference = reference, - var = var, - metrics = metrics.visualize, - period = period, - start_months = start.months, - calib_method = calib.method, - inf_to_na = inf.to.na - ) - - ## Spatial Aggregation of metrics - if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ - - ### Convert loaded metrics to array for already aggregated data - metrics.dim <- attributes(loaded_metrics)$metrics - forecast.months.dim <- forecast.months - start.months.dim <- attributes(loaded_metrics)$start_months - regions.dim <- regions #list('NAO' = c(lon.min = -80, lon.max = 40, lat.min = 20, lat.max = 80)) - - aggregated_metrics <- array(dim = c(system = length(loaded_metrics), - reference = length(loaded_metrics[[1]]), - metric = length(metrics.dim), - time = length(forecast.months.dim), - sdate = length(start.months.dim), - region = length(regions.dim))) - - - for (sys in 1:length(names(loaded_metrics))){ - for (ref in 1:length(names(loaded_metrics[[sys]]))){ - aggregated_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) - } - } - - ## Add attributes - attributes(aggregated_metrics)$metrics <- metrics.load - attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics)$start_months - attributes(aggregated_metrics)$forecast.months <- forecast.months - attributes(aggregated_metrics)$regions <- regions - attributes(aggregated_metrics)$system.name <- names(loaded_metrics) - attributes(aggregated_metrics)$reference.name <- names(loaded_metrics[[1]]) - - - } else { - ## Calculate weighted mean of spatial aggregation - aggregated_metrics <- WeightedMetrics(loaded_metrics, - regions = regions, - forecast.months = forecast.months, - metric.aggregation = metric.aggregation, - ncores = ncores) - } ## close if on region - metrics_significance <- NULL - - } ## close if on skill - - ###### SCORE AGGREGATION ###### - if(metric.aggregation == 'score'){ - - lon_dim <- 'longitude' - lat_dim <- 'latitude' - time_dim <- 'syear' - memb_dim <- 'ensemble' - - ## Define arrays to filled with data - aggregated_metrics <- array(data = NA, - dim = c(system = length(system), - reference = length(reference), - time = length(forecast.months), - sdate = length(start.months), - region = length(regions), - metric = length(metrics.visualize))) - - metrics_significance <- array(data = NA, - dim = c(system = length(system), - reference = length(reference), - time = length(forecast.months), - sdate = length(start.months), - region = length(regions), - metric = length(metrics.visualize))) - - ## Load and aggregated data for each metric - for (sys in 1:length(system)){ - for (ref in 1:length(reference)){ - for (met in metrics.visualize) { - - if(met == 'rpss'){ - ## Load data from saved files - rps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'rps_syear') - - rps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'rps_clim_syear') - - ## Spatially aggregate data - rps_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_syear)$lon), - lat = as.vector(attributes(rps_syear)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = F) - }, simplify = 'array') - - rps_clim_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_clim_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_clim_syear)$lon), - lat = as.vector(attributes(rps_clim_syear)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = F) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(rps_syear))[length(dim(rps_syear))] <- 'region' - names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' - - ## Calculate significance - sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, - time_dim = time_dim, test.type = 'two.sided', - alpha = alpha, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - - ## Temporally aggregate data - rps_syear <- Apply(data = rps_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - rps_clim_syear <- Apply(data = rps_clim_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate RPSS from aggregated RPS and RPS_clim - rpss <- 1 - rps_syear / rps_clim_syear - - ## Save metric result in arrays - aggregated_metrics[sys, ref, , , ,which(metrics.visualize == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'sdate','region')) - metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'sdate','region')) - - } ## close if on rpss - - if(met == 'crpss'){ - - ## Load data from saved files - crps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'crps_syear') - - crps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'crps_clim_syear') - - ## Spatially aggregate data - crps_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = crps_syear, - region = regions[[X]], - lon = as.vector(attributes(crps_syear)$lon), - lat = as.vector(attributes(crps_syear)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - crps_clim_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = crps_clim_syear, - region = regions[[X]], - lon = as.vector(attributes(crps_clim_syear)$lon), - lat = as.vector(attributes(crps_clim_syear)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(crps_syear))[length(dim(crps_syear))] <- 'region' - names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' - - ## Calculate significance - sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, - time_dim = time_dim, test.type = 'two.sided', - alpha = alpha, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - - ## Temporally aggregate data - crps_syear <- Apply(data = crps_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - crps_clim_syear <- Apply(data = crps_clim_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate CRPSS from aggregated CRPS and CRPS_clim - crpss <- 1 - crps_syear / crps_clim_syear - - ## Save metric result in arrays - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'sdate','region')) - metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'sdate','region')) - - } ## close if on crpss - - if(met == 'enscorr'){ - ## Load data from saved files - cov <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'cov') - - std_hcst <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'std_hcst') - - std_obs <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'std_obs') - - - n_eff <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'n_eff') - - ## Calculate spatial aggregation - cov <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = cov, - region = regions[[X]], - lon = as.vector(attributes(cov)$lon), - lat = as.vector(attributes(cov)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(cov))[length(dim(cov))] <- 'region' - - - std_hcst <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = std_hcst, - region = regions[[X]], - lon = as.vector(attributes(std_hcst)$lon), - lat = as.vector(attributes(std_hcst)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(std_hcst))[length(dim(std_hcst))] <- 'region' - - std_obs <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = std_obs, - region = regions[[X]], - lon = as.vector(attributes(std_obs)$lon), - lat = as.vector(attributes(std_obs)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(std_obs))[length(dim(std_obs))] <- 'region' - - n_eff <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = n_eff, - region = regions[[X]], - lon = as.vector(attributes(n_eff)$lon), - lat = as.vector(attributes(n_eff)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(n_eff))[length(dim(n_eff))] <- 'region' - - ## Calculate correlation - enscorr <- cov / (std_hcst * std_obs) - - ## Calculate significance of corr - t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) - t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) - - sign_corr<- array(data = NA, - dim = c(time = length(forecast.months), - sdate = length(start.months), - region = length(regions))) - - for (time in 1:dim(sign_corr)[['time']]){ - for (mon in 1:dim(sign_corr)[['sdate']]){ - for (reg in 1:dim(sign_corr)[['region']]){ - - if (anyNA(c(t[time, mon, reg], t_alpha2_n2[time, mon, reg])) == FALSE - && t[time, mon, reg] >= t_alpha2_n2[time, mon, reg]){ - sign_corr[time, mon, reg] <- TRUE - } else { - sign_corr[time, mon, reg] <- FALSE - } - } - } - } - - ## Save metric result in arrays - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'sdate','region')) - metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'sdate','region')) - - } ## close if on enscorr - - if(met == 'mean_bias'){ - - mean_bias <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'mean_bias') - - ## Calculate spatial aggregation - mean_bias <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = mean_bias, - region = regions[[X]], - lon = as.vector(attributes(mean_bias)$lon), - lat = as.vector(attributes(mean_bias)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' - - ## Save metric result in array - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = mean_bias, order = c('time', 'sdate','region')) - - } ## close on mean_bias - - if(met == 'enssprerr'){ - - enssprerr <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'enssprerr') - - ## Calculate spatial aggregation - enssprerr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = enssprerr, - region = regions[[X]], - lon = as.vector(attributes(enssprerr)$lon), - lat = as.vector(attributes(enssprerr)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(enssprerr))[length(dim(enssprerr))] <- 'region' - - ## Save metric result in array - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enssprerr, order = c('time', 'sdate','region')) - - } ## close on enssprerr - - } ## close loop on metric - } ## close if on reference - } ## close if on system - - ## Include metric attributes - attributes(aggregated_metrics)$metrics <- metrics.visualize - - ## Set NAs to False - metrics_significance[is.na(metrics_significance)] <- FALSE - - } ## close if on score - - - ####### PLOT SCORECARDS ########## - - ## Create simple scorecard tables - ## (one system only) - ## Metrics input must be in the same order as function SC_spatial_aggregation - scorecard_single <- ScorecardsSingle(data = aggregated_metrics, - sign = metrics_significance, - system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - start.months = start.months, - forecast.months = forecast.months, - region.names = names(regions), - metrics = metrics.visualize, - table.label = table.label, - fileout.label = fileout.label, - plot.legend = plot.legend, - legend.breaks = legend.breaks, - legend.white.space = legend.white.space, - legend.width = legend.width, - legend.height = legend.height, - label.scale = label.scale, - col1.width = col1.width, - col2.width = col2.width, - columns.width = columns.width, - font.size = font.size, - round.decimal = round.decimal, - output.path = output.path) - - ## Create multi system/reference scorecard tables - ## (multiple systems with one reference or one system with multiple references) - ## Metrics input must be in the same order as function SC_spatial_aggregation - if(length(system) > 1 || length(reference) > 1){ - scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, - sign = metrics_significance, - system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - start.months = start.months, - forecast.months = forecast.months, - region.names = names(regions), - metrics = metrics.visualize, - table.label = table.label, - fileout.label = fileout.label, - plot.legend = plot.legend, - legend.breaks = legend.breaks, - legend.white.space = legend.white.space, - legend.width = legend.width, - legend.height = legend.height, - label.scale = label.scale, - col1.width = col1.width, - col2.width = col2.width, - columns.width = columns.width, - font.size = font.size, - round.decimal = round.decimal, - output.path = output.path) - } ## close if - - - if(calculate.diff == TRUE){ - if(length(system) == 2 || length(reference) == 2){ - scorecard_diff <- ScorecardsSystemDiff(data = aggregated_metrics, - system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - start.months = start.months, - forecast.months = forecast.months, - region.names = attributes(regions)$names, - metrics = metrics.visualize, - table.label = table.label, - fileout.label = fileout.label, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = col2.width, - output.path = output.path) - } else {stop ("Difference scorecard can only be computed with two systems or two references.")} - } ## close if on calculate.diff - -} - diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R new file mode 100644 index 0000000000000000000000000000000000000000..ed785ae71356e7bce335522cad4a0102a0138a67 --- /dev/null +++ b/modules/Scorecards/Scorecards_calculations.R @@ -0,0 +1,448 @@ +############################################################################### +############# Spatial aggregation calculations for scorecards ################# +############################################################################### + +## This function spatially aggregates the skill metrics and statistics for the +## scorecards. The aggregated statistical significance is also calculate for the +## metrics when 'score' aggregate is requested. The aggregation regions are +## defined in the recipe under the scorecards section. This function reads from +## the atomic recipes. + +## Define function +Scorecards_calculations <- function(recipe, data, skill_metrics, + statistics = NULL) { + + # Check that skill_metrics is not NULL + if (is.null(skill_metrics)) { + stop("Scorecards calculations requested but 'skill_metrics' not provided.") + } else if (!is.list(skill_metrics)) { + stop("'skill_metrics' should be a named list with metric arrays") + } + # Assign parameters + ncores <- recipe$Analysis$ncores + if (is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)) { + alpha <- 0.05 + } else { + alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha + } + + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm <- FALSE + } else { + na.rm <- recipe$Analysis$remove_NAs + } + + if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ + inf.to.na <- FALSE + } else { + inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na + } + + ## Parameters for saving output data files + output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") + dir.create(output.path, recursive = T, showWarnings = F) + + system <- recipe$Analysis$Datasets$System$name + reference <- recipe$Analysis$Datasets$Reference$name + var <- recipe$Analysis$Variables$name + start.year <- as.numeric(recipe$Analysis$Time$hcst_start) + end.year <- as.numeric(recipe$Analysis$Time$hcst_end) + forecast.months <- recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max + start.months <- substr(recipe$Analysis$Time$sdate, 1,2) + period <- paste0(start.year, "-", end.year) + + ## Parameters for data aggregation + regions <- recipe$Analysis$Workflow$Scorecards$regions + for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} + + metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation + metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), + ", | |,")) + + ## Define array to filled with data + aggr_significance <- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions), + metric = length(metrics))) + + ## For data that is already aggregated by region + if ("region" %in% names(dim(skill_metrics[[1]]))) { + aggr_metrics <- NULL + for (met in metrics) { + skill_metrics[[met]] <- Reorder(skill_metrics[[met]], + c("time", "region", "var")) + aggr_metrics <- abind(aggr_metrics, skill_metrics[[met]], along=3) + } + names(dim(aggr_metrics)) <- c("time", "region", "metric") + } else { + ## Define arrays to filled with data + aggr_metrics <- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions), + metric = length(metrics))) + + lon_dim <- 'longitude' + lat_dim <- 'latitude' + time_dim <- 'syear' + memb_dim <- 'ensemble' + lon <- as.numeric(data$hcst$coords$longitude) + lat <- as.numeric(data$hcst$coords$latitude) + + ## Skill aggregation + if (metric.aggregation == 'skill') { + ## Calculate weighted mean of spatial aggregation + for (met in metrics) { + result <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics[[met]], + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(result))[length(dim(result))] <- 'region' + result <-Subset(result, 'var', 1, drop = 'selected') + + if (met =='crpss' && inf.to.na == TRUE) { + result[result == -Inf] <- NA + } + aggr_metrics[,,which(metrics == met)] <- Reorder(data = result, + order = c('time', 'region')) + } ## close on met + } ## close if on skill + + ## Score Aggregation + if (metric.aggregation == 'score') { + ## Spatially aggregate data for each metric + for (met in metrics) { + if (met == 'rpss') { + rps_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$rps_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + rps_clim_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$rps_clim_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(rps_syear))[length(dim(rps_syear))] <- 'region' + names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' + + ## Remove 'var' dimension + rps_syear <- Subset(rps_syear, 'var', 1, drop = 'selected') + rps_clim_syear <- Subset(rps_clim_syear, 'var', 1, drop = 'selected') + + ## Calculate significance + sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, + time_dim = time_dim, + test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Average over 'syear' dimension + rps_syear <- Apply(data = rps_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + rps_clim_syear <- Apply(data = rps_clim_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate RPSS from aggregated RPS and RPS_clim + rpss <- 1 - rps_syear / rps_clim_syear + + ## Save metric result in arrays + aggr_metrics[,,which(metrics == met)] <- Reorder(data = rpss, + order = c('time', 'region')) + aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_rpss, + order = c('time', 'region')) + + } else if (met == 'crpss') { + crps_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$crps_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + crps_clim_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$crps_clim_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(crps_syear))[length(dim(crps_syear))] <- 'region' + names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' + + ## Remove 'var' dimension + crps_syear <- Subset(crps_syear, 'var', 1, drop = 'selected') + crps_clim_syear <- Subset(crps_clim_syear, 'var', 1, drop = 'selected') + + ## Calculate significance + sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, + time_dim = time_dim, + test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Average over 'syear' dimension + crps_syear <- Apply(data = crps_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + crps_clim_syear <- Apply(data = crps_clim_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate CRPSS from aggregated CRPS and CRPS_clim + crpss <- 1 - crps_syear / crps_clim_syear + + ## Save metric result in arrays + aggr_metrics[ , , which(metrics == met)] <- Reorder(data = crpss, + order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- Reorder(data = sign_crpss, + order = c('time', 'region')) + } else if (met == 'enscorr') { + cov <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$cov, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + std_hcst <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$std_hcst, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + std_obs <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$std_obs, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + n_eff <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$n_eff, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(cov))[length(dim(cov))] <- 'region' + names(dim(std_hcst))[length(dim(std_hcst))] <- 'region' + names(dim(std_obs))[length(dim(std_obs))] <- 'region' + names(dim(n_eff))[length(dim(n_eff))] <- 'region' + + ## Remove 'var' dimension + cov <- Subset(cov, 'var', 1, drop = 'selected') + std_hcst <- Subset(std_hcst, 'var', 1, drop = 'selected') + std_obs <- Subset(std_obs, 'var', 1, drop = 'selected') + n_eff <- Subset(n_eff, 'var', 1, drop = 'selected') + + ## Calculate correlation + enscorr <- cov / (std_hcst * std_obs) + + ## Calculate significance of corr + t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) + t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) + + sign_corr<- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions))) + + for (time in 1:dim(sign_corr)[['time']]) { + for (reg in 1:dim(sign_corr)[['region']]) { + if ((anyNA(c(t[time, reg], t_alpha2_n2[time, reg])) == FALSE) && + (t[time, reg] >= t_alpha2_n2[time, reg])) { + sign_corr[time, reg] <- TRUE + } else { + sign_corr[time, reg] <- FALSE + } + } + } + + ## Save metric result in arrays + aggr_metrics[,,which(metrics == met)] <- Reorder(data = enscorr, + order = c('time', 'region')) + aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_corr, + order = c('time', 'region')) + + } else if (met == 'mean_bias') { + ## Calculate ensemble mean + hcst_data_ens <- MeanDims(data$hcst$data, dims = 'ensemble') + obs_data_ens <- MeanDims(data$obs$data, dims = 'ensemble') + + ## Aggregate data over regions + hcst_data_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = hcst_data_ens, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + obs_data_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = obs_data_ens, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(hcst_data_aggr))[length(dim(hcst_data_aggr))] <- 'region' + names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' + + ## Remove unnecessary dimension + hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','var', 'sday','sweek'), + list(1,1,1,1) , drop = 'selected') + obs_data_aggr <- Subset(obs_data_aggr, c('dat','var', 'sday','sweek'), + list(1,1,1,1) , drop = 'selected') + + ## Calculate significance + pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), + target_dims = c('syear'), ncores = ncores, + fun = function(x,y) + {t.test(as.vector(x),as.vector(y))})$p.value + + sign_mean_bias <- pval_mean_bias <= alpha + + ## Calculate aggregated mean bias metric + mean_bias <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$mean_bias, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' + + ## Remove 'var' dimension + mean_bias <- Subset(mean_bias, 'var', 1, drop = 'selected') + + ## Save metric result in array + aggr_metrics[ , , which(metrics == met)] <- Reorder(data = mean_bias, + order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- Reorder(data = sign_mean_bias, + order = c('time', 'region')) + + } else if (met == 'enssprerr') { + ## Calculate metric + spread <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$spread, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + error <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$rms, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(spread))[length(dim(spread))] <- 'region' + names(dim(error))[length(dim(error))] <- 'region' + + ## Remove 'var' dimension + spread <- Subset(spread, 'var', 1, drop = 'selected') + error <- Subset(error, 'var', 1, drop = 'selected') + + enssprerr <- spread / error + + # ## Significance calculation + # + # # Effective sample size + # # exp <- data$hcst$data + # # ens_exp <- MeanDims(data$hcst$data, dims = 'ensemble') + # # enospr <- sum(Eno(exp - InsertDim(ens_exp, length(dim(exp)), dim(exp)['ensemble']), "ensemble")) + # # enodif <- .Eno(ens_exp - ens_obs, na.action = na.pass) + # + # # Removing eno at the moment + # # F <- (enospr * spread^2 / (enospr - 1)) / (enodif * error^2 / (enodif - 1)) + # F <- spread^2 / error^2 + # # if (!is.na(F) & !is.na(enospr) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + # # p.val <- pf(F, enospr - 1, enodif - 1) + # + # pval_enssprerr <- Apply(data = list(x = F, y = data$hcst$data), ### DOES NOT WORK + # target_dims = c('time'), + # fun = function(x, y) + # {pf(x, dim(y)['syear'] -1 ,dim(y)['syear'] -1)}) + # + # pval_enssprerr <- pf(F,dim(data$hcst$data)['syear'] -1 ,dim(data$hcst$data)['syear'] -1) + # pval_enssprerr <- 2 * min(pval_enssprerr, 1 - pval_enssprerr) + # + # sign_enssprerr <- pval_enssprerr <= alpha + + ## Save metric result in array + aggr_metrics[ , , which(metrics == met)] <- Reorder(data = enssprerr, + order = c('time', 'region')) + # aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_corr, + # order = c('time', 'region')) + } + } ## close loop on metric + } ## close if on score + } ## close if on region + + ## Set NAs to False + aggr_significance[is.na(aggr_significance)] <- FALSE + + ## Include 'var' dimension to be able to save array + if (!'var' %in% names(dim(aggr_metrics))) { + aggr_metrics <- InsertDim(aggr_metrics, 1, 1, name = 'var') + aggr_significance <- InsertDim(aggr_significance, 1, 1, name = 'var') + } + + ## Include attributes + attributes(aggr_metrics)$metrics <- metrics + attributes(aggr_metrics)$forecast.months <- forecast.months + attributes(aggr_metrics)$regions <- regions + attributes(aggr_metrics)$system.name <- system + attributes(aggr_metrics)$reference.name <- reference + + aggr_scorecards <- list(aggr_metrics = aggr_metrics, + aggr_significance = aggr_significance) + + ## Save metric data arrays + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, + "outputs", "Scorecards/") + save_metrics(recipe = recipe, metrics = aggr_scorecards, + data_cube = data$hcst, agg = 'region', + module = "scorecards") +} diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R new file mode 100644 index 0000000000000000000000000000000000000000..ff1ba18671bc3f33a652d3832fdefd36a5eb9f13 --- /dev/null +++ b/modules/Scorecards/Scorecards_plotting.R @@ -0,0 +1,242 @@ +############################################################################### +##################### Scorecard visualization plotting ####################### +############################################################################### + +## This function loads the saved netcdf files containing the spatially +## aggregated skill metrics for the scorecards and plots the scorecard +## visualizations. The function reads from the general recipe. + +##### Load source functions ##### +source('modules/Scorecards/R/tmp/LoadMetrics.R') +source('modules/Scorecards/R/tmp/Utils.R') +source('modules/Scorecards/R/tmp/SCTransform.R') +source('modules/Scorecards/R/tmp/ScorecardsSingle.R') +source('modules/Scorecards/R/tmp/ScorecardsMulti.R') +source('modules/Scorecards/R/tmp/ScorecardsSystemDiff.R') +source('modules/Scorecards/R/tmp/VizScorecard.R') + +## Temporary for new ESviz function +source('modules/Scorecards/R/tmp/ColorBarContinuous.R') +source('modules/Scorecards/R/tmp/ClimPalette.R') +.IsColor <- s2dv:::.IsColor +.FilterUserGraphicArgs <- s2dv:::.FilterUserGraphicArgs + +## Define function +Scorecards_plotting <- function(recipe) { + + ## Parameters for loading data files + input.path <- paste0(recipe$Run$output_dir, "/outputs/Scorecards/") + output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") + dir.create(output.path, recursive = T, showWarnings = F) + + system <- as.vector(unlist(recipe$Analysis$Datasets$System$name)) + reference <- as.vector(unlist(recipe$Analysis$Datasets$Reference$name)) + + var <- recipe$Analysis$Variables$name + start.year <- as.numeric(recipe$Analysis$Time$hcst_start) + end.year <- as.numeric(recipe$Analysis$Time$hcst_end) + forecast.months <- recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), + ", | |,")) + + if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || + is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { + start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) + } else { + start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, + split = ", | |,")[[1]]) + if(!any(as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))) %in% start.months){ + error(recipe$Run$logger,"Requested start dates for scorecards must be loaded") + } + } + start.months <- sprintf("%02d", start.months) + period <- paste0(start.year, "-", end.year) + + ## Parameters for data aggregation + regions <- recipe$Analysis$Workflow$Scorecards$regions + for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} + + ## Parameters for scorecard layout + table.label <- recipe$Analysis$Workflow$Scorecards$table_label + fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label + col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width + col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width + legend.breaks <- recipe$Analysis$Workflow$Scorecards$legend_breaks + legend.width <- recipe$Analysis$Workflow$Scorecards$legend_width + + if (is.null(recipe$Analysis$Workflow$Scorecards$plot_legend)) { + plot.legend <- TRUE + } else { + plot.legend <- recipe$Analysis$Workflow$Scorecards$plot_legend + } + + if (is.null(recipe$Analysis$Workflow$Scorecards$columns_width)) { + columns.width <- 1.2 + } else { + columns.width <- recipe$Analysis$Workflow$Scorecards$columns_width + } + + if (is.null(recipe$Analysis$Workflow$Scorecards$legend_white_space)) { + legend.white.space <- 6 + } else { + legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space + } + + if (is.null(recipe$Analysis$Workflow$Scorecards$legend_height)) { + legend.height <- 50 + } else { + legend.height <- recipe$Analysis$Workflow$Scorecards$legend_height + } + + if (is.null(recipe$Analysis$Workflow$Scorecards$label_scale)) { + label.scale <- 1.4 + } else { + label.scale <- recipe$Analysis$Workflow$Scorecards$label_scale + } + + if (is.null(recipe$Analysis$Workflow$Scorecards$round_decimal)) { + round.decimal <- 2 + } else { + round.decimal <- recipe$Analysis$Workflow$Scorecards$round_decimal + } + + if (is.null(recipe$Analysis$Workflow$Scorecards$font_size)) { + font.size <- 1.1 + } else { + font.size <- recipe$Analysis$Workflow$Scorecards$font_size + } + + ## Define if difference scorecard is to be plotted + if (is.null(recipe$Analysis$Workflow$Scorecards$calculate_diff)) { + calculate.diff <- FALSE + } else { + calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff + } + + + if ('name' %in% names(recipe$Analysis$Variables)) { + if (recipe$Analysis$Variables$name == var) { + var.units <- recipe$Analysis$Variables$units + } + } else { + for (i in 1:length(recipe$Analysis$Variables)) { + if (recipe$Analysis$Variables[[i]]$name == var) { + var.units <- recipe$Analysis$Variables[[i]]$units + } + } + } + + ####### Load data files ####### + aggregated_metrics <- LoadMetrics(input_path = input.path, + system = system, + reference = reference, + var = var, + data_type = "aggr_metrics", + period = period, + start_months = as.numeric(start.months), + calib_method = calib.method) + + attributes(aggregated_metrics)$metrics <- metrics + + aggregated_significance <- LoadMetrics(input_path = input.path, + system = system, + reference = reference, + var = var, + data_type = "aggr_significance", + period = period, + start_months = as.numeric(start.months), + calib_method = calib.method) + + aggregated_significance <- aggregated_significance == 1 + + + ####### PLOT SCORECARDS ########## + + ## Create simple scorecard tables + ## (one system only) + ## Metrics input must be in the same order as function SC_spatial_aggregation + scorecard_single <- ScorecardsSingle(data = aggregated_metrics, + sign = aggregated_significance, + system = system, + reference = reference, + var = var, + var.units = var.units, + start.year = start.year, + end.year = end.year, + start.months = start.months, + forecast.months = forecast.months, + region.names = names(regions), + metrics = metrics, + table.label = table.label, + fileout.label = fileout.label, + plot.legend = plot.legend, + legend.breaks = legend.breaks, + legend.white.space = legend.white.space, + legend.width = legend.width, + legend.height = legend.height, + label.scale = label.scale, + col1.width = col1.width, + col2.width = col2.width, + columns.width = columns.width, + font.size = font.size, + round.decimal = round.decimal, + output.path = output.path) + + ## Create multi system/reference scorecard tables + ## (multiple systems with one reference or one system with multiple references) + ## Metrics input must be in the same order as function SC_spatial_aggregation + if (length(system) > 1 || length(reference) > 1) { + scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, + sign = aggregated_significance, + system = system, + reference = reference, + var.units = var.units, + start.year = start.year, + end.year = end.year, + start.months = start.months, + forecast.months = forecast.months, + region.names = names(regions), + metrics = metrics, + table.label = table.label, + fileout.label = fileout.label, + plot.legend = plot.legend, + legend.breaks = legend.breaks, + legend.white.space = legend.white.space, + legend.width = legend.width, + legend.height = legend.height, + label.scale = label.scale, + col1.width = col1.width, + col2.width = col2.width, + columns.width = columns.width, + font.size = font.size, + round.decimal = round.decimal, + output.path = output.path) + } ## close if + + if (calculate.diff == TRUE) { + if (length(system) == 2 || length(reference) == 2) { + scorecard_diff <- ScorecardsSystemDiff(data = aggregated_metrics, + system = system, + reference = reference, + var = var, + var.units = var.units, + start.year = start.year, + end.year = end.year, + start.months = start.months, + forecast.months = forecast.months, + region.names = attributes(regions)$names, + metrics = metrics, + table.label = table.label, + fileout.label = fileout.label, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + output.path = output.path) + } else { + stop("Difference scorecard can only be computed with two systems or ", + "two references.")} + } ## close if on calculate.diff + +} + diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index c10def7abcbe329508fcee0705d4cd62095ff3d9..2fa27549a20f414ae20d3db188b7918bd5ec26b0 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -1,11 +1,10 @@ source('tools/libs.R') -source('modules/Scorecards/Scorecards.R') +source('modules/Scorecards/Scorecards_plotting.R') args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] output_dir <- args[2] -## TODO: Replace with function # Read recipe and set outdir recipe <- read_yaml(recipe_file) recipe$Run$output_dir <- output_dir @@ -21,8 +20,8 @@ for (variable in 1:length(recipe$Analysis$Variables)) { list(name = as.vector(unlist(recipe$Analysis$Datasets$System))) # Include multimodel in systems if (!isFALSE(recipe$Analysis$Datasets$Multimodel$execute)) { - scorecard_recipe$Analysis$Datasets$System$name <- - c(scorecard_recipe$Analysis$Datasets$System$name, 'Multimodel') + scorecard_recipe$Analysis$Datasets$System <- + c(scorecard_recipe$Analysis$Datasets$System, 'Multimodel') } # Collect all reference names scorecard_recipe$Analysis$Datasets$Reference <- @@ -31,7 +30,7 @@ for (variable in 1:length(recipe$Analysis$Variables)) { scorecard_recipe$Analysis$Variables <- recipe$Analysis$Variables[[variable]] # Plot Scorecards - Scorecards(scorecard_recipe) + Scorecards_plotting(scorecard_recipe) } print("##### SCORECARDS SAVED TO THE OUTPUT DIRECTORY #####") diff --git a/modules/Skill/R/tmp/Bias.R b/modules/Skill/R/tmp/Bias.R new file mode 100644 index 0000000000000000000000000000000000000000..d4887189220daaad310cf7794870ffb108c6435d --- /dev/null +++ b/modules/Skill/R/tmp/Bias.R @@ -0,0 +1,235 @@ +#'Compute the Mean Bias +#' +#'The Mean Bias or Mean Error (Wilks, 2011) is defined as the mean difference +#'between the ensemble mean forecast and the observations. It is a deterministic +#'metric. Positive values indicate that the forecasts are on average too high +#'and negative values indicate that the forecasts are on average too low. +#'It also allows to compute the Absolute Mean Bias or bias without temporal +#'mean. If there is more than one dataset, the result will be computed for each +#'pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter +#' 'exp' is already the ensemble mean. The default value is NULL. +#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or +#' kept (FALSE) for computation. The default value is FALSE. +#'@param absolute A logical value indicating whether to compute the absolute +#' bias. The default value is FALSE. +#'@param time_mean A logical value indicating whether to compute the temporal +#' mean of the bias. The default value is TRUE. +#'@param alpha A numeric or NULL (default) to indicate the significance level +#' using Welch's t-test. Only available when absolute is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number +#'of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If +#'alpha is specified, and absolute is FALSE, the result is a list with two +#'elements: the bias as described above and the significance as a logical array +#'with the same dimensions. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) +#'bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') +#'bias2 <- Bias(exp = exp, obs = obs, memb_dim = 'member', alpha = 0.01) +#'abs_bias <- Bias(exp = exp, obs = obs, memb_dim = 'member', absolute = TRUE, alpha = NULL) +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, + na.rm = FALSE, absolute = FALSE, time_mean = TRUE, + alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop("Parameter 'exp' must be a numeric array.") + if (!is.array(obs) | !is.numeric(obs)) + stop("Parameter 'obs' must be a numeric array.") + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop("Parameter 'time_dim' must be a character string.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## absolute + if (!is.logical(absolute) | length(absolute) > 1) { + stop("Parameter 'absolute' must be one logical value.") + } + ## time_mean + if (!is.logical(time_mean) | length(time_mean) > 1) { + stop("Parameter 'time_mean' must be one logical value.") + } + ## alpha + if (!is.null(alpha)) { + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop("Parameter 'alpha' must be null or a numeric value.") + } + if (absolute) { + alpha <- NULL + .warning("Parameter 'absolute' is TRUE, so 'alpha' has been set to", + "false and significance will not be returned.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = na.rm) + } + + ## (Mean) Bias + bias <- Apply(data = list(exp, obs), + target_dims = c(time_dim, dat_dim), + fun = .Bias, + time_dim = time_dim, + dat_dim = dat_dim, + na.rm = na.rm, + absolute = absolute, + time_mean = time_mean, + alpha = alpha, + ncores = ncores) + + if (is.null(alpha)) { + bias <- bias$output1 + } + return(bias) +} + + +.Bias <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, alpha = NULL) { + # exp and obs: [sdate, (dat)] + if (is.null(dat_dim)) { + bias <- exp - obs + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- mean(bias, na.rm = na.rm) + } + + if (!is.null(alpha)) { + if (!absolute) { + if (all(is.na(bias))) { + sign <- NA + } else { + pval <- t.test(x = obs, y = exp, alternative = "two.sided")$p.value + sign <- pval <= alpha + } + } + } + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + bias <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + pval <- array(dim = c(nexp = nexp, nobs = nobs)) + sign <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + bias[, i, j] <- exp[, i] - obs[, j] + if (!is.null(alpha)) { + if (!absolute) { + pval[i, j] <- t.test(x = obs[, j], y = exp[, i], + alternative = "two.sided")$p.value + sign[i, j] <- pval[i, j] <= alpha + } + } + } + } + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- MeanDims(bias, time_dim, na.rm = na.rm) + if (!is.null(sign)) { + sign[which(is.na(bias))] <- NA + } + } + } + if (!is.null(alpha) && !absolute) { + return(list(bias = bias, sign = sign)) + } else { + return(bias) + } +} diff --git a/modules/Skill/R/tmp/Eno.R b/modules/Skill/R/tmp/Eno.R new file mode 100644 index 0000000000000000000000000000000000000000..cb927602221e10f6d3c0853bf0935aad93834099 --- /dev/null +++ b/modules/Skill/R/tmp/Eno.R @@ -0,0 +1,103 @@ +#'Compute effective sample size with classical method +#' +#'Compute the number of effective samples along one dimension of an array. This +#'effective number of independent observations can be used in +#'statistical/inference tests.\cr +#'The calculation is based on eno function from Caio Coelho from rclim.txt. +#' +#'@param data A numeric array with named dimensions. +#'@param time_dim A function indicating the dimension along which to compute +#' the effective sample size. The default value is 'sdate'. +#'@param na.action A function. It can be na.pass (missing values are allowed) +#' or na.fail (no missing values are allowed). See details in stats::acf(). +#' The default value is na.pass. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return An array with the same dimension as parameter 'data' except the +#' time_dim dimension, which is removed after the computation. The array +#' indicates the number of effective sample along time_dim. +#' +#'@examples +#'set.seed(1) +#'data <- array(rnorm(800), dim = c(dataset = 1, member = 2, sdate = 4, +#' ftime = 4, lat = 10, lon = 10)) +#'na <- floor(runif(40, min = 1, max = 800)) +#'data[na] <- NA +#'res <- Eno(data) +#' +#'@importFrom stats acf na.pass na.fail +#'@import multiApply +#'@export +Eno <- function(data, time_dim = 'sdate', na.action = na.pass, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(dim(data))) { #is vector + dim(data) <- c(length(data)) + names(dim(data)) <- time_dim + } + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## na.action + if (as.character(substitute(na.action)) != "na.pass" & + as.character(substitute(na.action)) != "na.fail") { + stop("Parameter 'na.action' must be a function either na.pass or na.fail.") + } + if (as.character(substitute(na.action)) == "na.fail" && anyNA(data)) { + stop("Calculation fails because NA is found in paratemter 'data', ", + "which is not accepted when ", + "parameter 'na.action' = na.fail.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Eno + + eno <- Apply(data = list(data), + target_dims = time_dim, + output_dims = NULL, + fun = .Eno, + na.action = na.action, + ncores = ncores)$output1 + + return(eno) +} + +.Eno <- function(x, na.action) { + n <- length(sort(x)) + if (n > 1) { + a <- acf(x, lag.max = n - 1, plot = FALSE, + na.action = na.action)$acf[2:n, 1, 1] + s <- 0 + for (k in 1:(n - 1)) { + s <- s + (((n - k) / n) * a[k]) + } + eno <- min(n / (1 + (2 * s)), n) + } else { + eno <- NA + } + + return(eno) +} + diff --git a/modules/Skill/R/tmp/SprErr.R b/modules/Skill/R/tmp/SprErr.R new file mode 100644 index 0000000000000000000000000000000000000000..a22bdf0d6610ab181a2e3c0f060408dc5ffd0238 --- /dev/null +++ b/modules/Skill/R/tmp/SprErr.R @@ -0,0 +1,219 @@ +#'Compute the ratio between the ensemble spread and RMSE +#' +#'Compute the ratio between the spread of the members around the +#'ensemble mean in experimental data and the RMSE between the ensemble mean of +#'experimental and observational data. The p-value and/or the statistical +#'significance is provided by a two-sided Fisher's test. +#' +#'@param exp A named numeric array of experimental data with at least two +#' dimensions 'memb_dim' and 'time_dim'. +#'@param obs A named numeric array of observational data with at least two +#' dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. The default value +#' is 'member'. +#'@param time_dim A character string indicating the name of dimension along +#' which the ratio is computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param na.rm A logical value indicating whether to remove NA values. The default +#' value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of two arrays with dimensions c(nexp, nobs, the rest of +#' dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is +#' the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. +#' If dat_dim is NULL, nexp and nobs are omitted. \cr +#'\item{$ratio}{ +#' The ratio of the ensemble spread and RMSE. +#'} +#'\item{$p_val}{ +#' The p-value of the two-sided Fisher's test with Ho: Spread/RMSE = 1. Only present +#' if \code{pval = TRUE}. +#'} +#' +#'@examples +#'exp <- array(rnorm(30), dim = c(lat = 2, sdate = 3, member = 5)) +#'obs <- array(rnorm(30), dim = c(lat = 2, sdate = 3)) +#'sprerr1 <- SprErr(exp, obs) +#'sprerr2 <- SprErr(exp, obs, pval=F, sign=T) +#'sprerr3 <- SprErr(exp, obs, pval=T, sign=T) +#' +#'@import multiApply +#'@export +SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', pval = TRUE, sign = FALSE, + alpha = 0.05, na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions memb_dim and time_dim.")) + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. ", + "Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions except 'dat_dim' and 'memb_dim'.")) + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + # alpha + if (!is.numeric(alpha) | any(alpha < 0) | any(alpha > 1) | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + # na.rm + if (!na.rm %in% c(TRUE, FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate RatioSDRMS + + # If dat_dim = NULL, insert dat dim + remove_dat_dim <- FALSE + if (is.null(dat_dim)) { + dat_dim <- 'dataset' + exp <- InsertDim(exp, posdim = 1, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = 'dataset') + remove_dat_dim <- TRUE + } + + res <- Apply(list(exp, obs), + target_dims = list(c(dat_dim, memb_dim, time_dim), + c(dat_dim, memb_dim, time_dim)), + pval = pval, + sign = sign, + na.rm = na.rm, + fun = .SprErr, + ncores = ncores) + + if (remove_dat_dim) { + if (length(dim(res[[1]])) > 2) { + res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') + } else { + res <- lapply(res, as.numeric) + } + } + + return(res) +} + +.SprErr <- function(exp, obs, pval = TRUE, sign = FALSE, alpha = 0.05, na.rm = FALSE) { + + # exp: [dat_exp, member, sdate] + # obs: [dat_obs, member, sdate] + nexp <- dim(exp)[1] + nobs <- dim(obs)[1] + + # ensemble mean + ens_exp <- MeanDims(exp, 2, na.rm = na.rm) # [dat, sdate] + ens_obs <- MeanDims(obs, 2, na.rm = na.rm) + + # Create empty arrays + ratio <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + p.val <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + + # spread and error + spread <- sqrt(mean(apply(exp[jexp,,], 2, var, na.rm = na.rm), na.rm = na.rm)) + error <- sqrt(mean((ens_obs - ens_exp[jexp,])^2, na.rm = na.rm)) + ratio[jexp, jobs] <- spread/error + + # effective sample size + enospr <- sum(Eno(apply(exp[jexp,,], 2, var, na.rm = na.rm), names(dim(exp))[3])) + enodif <- .Eno((ens_exp[jexp, ] - ens_obs[jobs, ])^2, na.action = na.pass) + if (pval | sign) { + F <- (enospr[jexp] * spread[jexp]^2 / (enospr[jexp] - 1)) / (enodif * error^2 / (enodif - 1)) + if (!is.na(F) & !is.na(enospr[jexp]) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + p.val[jexp, jobs] <- pf(F, enospr[jexp] - 1, enodif - 1) + p.val[jexp, jobs] <- 2 * min(p.val[jexp, jobs], 1 - p.val[jexp, jobs]) + } else { + p.val[jexp, jobs] <- NA + } + } + } + } + + res <- list(ratio = ratio) + if (pval) {res$p.val <- p.val} + if (sign) {res$sign <- p.val <= alpha} + + return(res) +} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index c03841b828ea70746f6a62a2db3afa703e745c3c..7c829c253512e11437bf7f983b99cb04d842af48 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -12,16 +12,18 @@ source("modules/Skill/R/compute_probs.R") source("modules/Skill/R/s2s.metrics.R") source("modules/Saving/R/drop_dims.R") ## TODO: Remove when they are included in s2dv +## TODO: Add pending functions source("modules/Skill/R/RPS_clim.R") source("modules/Skill/R/CRPS_clim.R") source("modules/Skill/R/tmp/GetProbs.R") - -## Temporary source("modules/Skill/R/tmp/RPS.R") source("modules/Skill/R/tmp/CRPS.R") +source("modules/Skill/R/tmp/SprErr.R") # https://earth.bsc.es/gitlab/es/s2dv/-/issues/115 +source("modules/Skill/R/tmp/Bias.R") # https://earth.bsc.es/gitlab/es/s2dv/-/issues/118 +source("modules/Skill/R/tmp/Eno.R") Skill <- function(recipe, data, agg = 'global') { - + # data$hcst: s2dv_cube containing the hindcast # obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml @@ -224,15 +226,19 @@ Skill <- function(recipe, data, agg = 'global') { skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data, time_dim = time_dim, memb_dim = memb_dim, + alpha = 0.05, ncores = ncores) } else { skill <- Bias(data$hcst$data, data$obs$data, time_dim = time_dim, memb_dim = memb_dim, + alpha = 0.05, ncores = ncores) } - skill <- .drop_dims(skill) - skill_metrics[[ metric ]] <- skill + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$bias + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Mean bias skill score } else if (metric == 'mean_bias_ss') { if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && @@ -309,21 +315,20 @@ Skill <- function(recipe, data, agg = 'global') { skill_metrics[[ metric ]] <- skill$msss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'enssprerr') { - # Remove ensemble dim from obs to avoid veriApply warning - obs_noensdim <- ClimProjDiags::Subset(data$obs$data, "ensemble", 1, - drop = "selected") - capture.output( - skill <- easyVerification::veriApply(verifun = 'EnsSprErr', - fcst = data$hcst$data, - obs = obs_noensdim, - tdim = which(names(dim(data$hcst$data))==time_dim), - ensdim = which(names(dim(data$hcst$data))==memb_dim), - na.rm = na.rm, - ncpus = ncores) - ) - remove(obs_noensdim) - skill <- .drop_dims(skill) - skill_metrics[[ metric ]] <- skill + .Eno <- s2dv:::.Eno + skill <- SprErr(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + dat_dim = 'dat', + pval = FALSE, + sign = TRUE, + alpha = 0.05, + na.rm = na.rm, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$ratio + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # SpecsVerification metrics } else if (grepl("specs", metric, fixed = TRUE)) { # Compute SpecsVerification version of the metrics diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 085bcdc58a162133b1316bf1187ae3974f69234d..e3c27e9769877140a5e959b6461b9b8bb88d9b16 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -3,31 +3,18 @@ Statistics <- function(recipe, data, agg = 'global') { # data$obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml # agg: data aggregation - + time_dim <- 'syear' + memb_dim <- 'ensemble' ncores <- recipe$Analysis$ncores ## Calculate ensemble mean - hcst_ensmean <- Apply(data$hcst$data, - target_dims = 'ensemble', - fun = 'mean')$output1 - obs_ensmean <- Apply(data$obs$data, - target_dims = 'ensemble', - fun = 'mean')$output1 - - ## Remove unwanted dimensions - ## TODO: Apply .drop_dims() function instead? - hcst_ensmean <- Subset(hcst_ensmean, - along = c('dat', 'sday', 'sweek'), - indices = list(1, 1, 1), - drop = 'selected') - obs_ensmean <- Subset(obs_ensmean, - along = c('dat', 'sday', 'sweek'), - indices = list(1, 1, 1), - drop = 'selected') + hcst_ensmean <- MeanDims(data$hcst$data, dims = memb_dim) + obs_ensmean <- MeanDims(data$obs$data, dims = memb_dim) statistics_list <- tolower(recipe$Analysis$Workflow$Statistics$metric) statistics <- list() + # Compute statistics in the list for (stat in strsplit(statistics_list, ", | |,")[[1]]) { if (stat %in% c('cov', 'covariance')) { @@ -37,45 +24,50 @@ Statistics <- function(recipe, data, agg = 'global') { fun = function(x,y) {cov(as.vector(x),as.vector(y), use = "everything", method = "pearson")})$output1 - + covariance <- .drop_dims(covariance) statistics[[ stat ]] <- covariance - } ## close if on covariance - if (stat %in% c('std', 'standard_deviation')) { + } else if (stat %in% c('std', 'standard_deviation')) { ## Calculate standard deviation std_hcst <- Apply(data = hcst_ensmean, target_dims = c(time_dim), fun = 'sd')$output1 - std_obs <- Apply(data = obs_ensmean, target_dims = c(time_dim), fun = 'sd')$output1 - + std_hcst <- .drop_dims(std_hcst) + std_obs <- .drop_dims(std_obs) statistics[['std_hcst']] <- std_hcst statistics[['std_obs']] <- std_obs - - } ## close if on std - if (stat %in% c('var', 'variance')) { + } else if (stat %in% c('var', 'variance')) { ## Calculate variance var_hcst <- (Apply(data = hcst_ensmean, target_dims = c(time_dim), fun = 'sd')$output1)^2 - var_obs <- (Apply(data = obs_ensmean, target_dims = c(time_dim), fun = 'sd')$output1)^2 - + var_hcst <- .drop_dims(var_hcst) + var_obs <- .drop_dims(var_obs) statistics[['var_hcst']] <- var_hcst statistics[['var_obs']] <- var_obs - } ## close if on variance - if (stat == 'n_eff') { + } else if (stat == 'n_eff') { ## Calculate degrees of freedom n_eff <- s2dv::Eno(data = obs_ensmean, time_dim = time_dim, na.action = na.pass, ncores = ncores) + n_eff <- .drop_dims(n_eff) statistics[['n_eff']] <- n_eff - } ## close on n_eff - } + } else if (stat == 'spread') { + C_cov <- stats:::C_cov + spread <- sqrt(Apply(Apply(data = data$hcst$data, + target_dims = c(memb_dim), + fun = stats::var)$output1, + fun = 'mean', target_dims = time_dim)$output1) + spread <- .drop_dims(spread) + statistics[['spread']] <- spread + } + } ## close on stat info(recipe$Run$logger, "##### STATISTICS COMPUTATION COMPLETE #####") .log_memory_usage(recipe$Run$logger, when = "After statistics computation") diff --git a/tests/testthat/test-seasonal_NAO.R b/tests/testthat/test-seasonal_NAO.R index f70eefc755ef256c856ad9a123de336c577fedb1..f5a77af2569e2b66ecaa2591fcbdba0dd9dbd149 100644 --- a/tests/testthat/test-seasonal_NAO.R +++ b/tests/testthat/test-seasonal_NAO.R @@ -213,9 +213,9 @@ TRUE ) expect_equal( names(skill_metrics), -c("mean_bias", "enscorr", +c("mean_bias", "mean_bias_significance" , "enscorr", "enscorr_significance", "rps", "rpss", "rpss_significance", - "crps", "crpss", "crpss_significance", "enssprerr") + "crps", "crpss", "crpss_significance", "enssprerr", "enssprerr_significance") ) expect_equal( class(skill_metrics$rpss), @@ -253,7 +253,9 @@ c(paste0("Indices/ECMWF-SEAS5/nao/", paste0("nao_", 1993:2000, "0301.nc")), "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enscorr_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enscorr_significance_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enssprerr_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enssprerr_significance_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_mean_bias_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_mean_bias_significance_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rps_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rpss_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rpss_significance_1993-2000_s03.nc" @@ -262,7 +264,7 @@ c(paste0("Indices/ECMWF-SEAS5/nao/", paste0("nao_", 1993:2000, "0301.nc")), expect_equal( length(list.files(outputs, recursive = T)), -26 +28 ) }) diff --git a/tests/testthat/test-seasonal_downscaling.R b/tests/testthat/test-seasonal_downscaling.R index 8a52657a8b4dfd3665d82b27af46af6a60375b63..1ba0e3dfc0a9189b47d1e891b6bb57f413e6ae82 100644 --- a/tests/testthat/test-seasonal_downscaling.R +++ b/tests/testthat/test-seasonal_downscaling.R @@ -214,7 +214,7 @@ TRUE ) expect_equal( names(skill_metrics), -c("bss10", "bss10_significance", "crpss", "crpss_significance","rpss", "rpss_significance", "mean_bias") +c("bss10", "bss10_significance", "crpss", "crpss_significance","rpss", "rpss_significance", "mean_bias", "mean_bias_significance") ) expect_equal( class(skill_metrics$rpss), diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 245e4e3dc42501c310f57410420e419bcafc022a..e588683da50640712316a691f94acf1caf738f4a 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -133,7 +133,7 @@ check_recipe <- function(recipe) { } } } else { - recipe$Analysis$Datasets$Multimodel$execute <- FALSE + recipe$Analysis$Datasets$Multimodel$execute <- FALSE } # Check ftime_min and ftime_max if ((!(recipe$Analysis$Time$ftime_min > 0)) || @@ -216,12 +216,12 @@ check_recipe <- function(recipe) { "The 'Regrid' element must specify the 'method' and 'type'.") error_status <- TRUE } - + if (recipe$Analysis$Regrid$type == 'to_system' && multimodel) { - error(recipe$Run$logger, - paste0("The 'Regrid$type' cannot be 'to_system' if ", - "'Multimodel$execute' is yes/true or both.")) - error_status <- TRUE + error(recipe$Run$logger, + paste0("The 'Regrid$type' cannot be 'to_system' if ", + "'Multimodel$execute' is yes/true or both.")) + error_status <- TRUE } # TODO: Add Workflow checks? # ... @@ -592,7 +592,7 @@ check_recipe <- function(recipe) { # Skill AVAILABLE_METRICS <- c("enscorr", "corr_individual_members", "rps", "rps_syear", "rpss", "frps", "frpss", "crps", "crps_syear", - "crpss", "bss10", "bss90", + "crpss", "bss10", "bss90", "rms", "mean_bias", "mean_bias_ss", "enssprerr", "rps_clim", "rps_clim_syear", "crps_clim", "crps_clim_syear", "enscorr_specs", "frps_specs", "rpss_specs", @@ -772,6 +772,18 @@ check_recipe <- function(recipe) { "'n_eff' are required.")) error_status <- TRUE } + if ('enssprerr' %in% tolower(sc_metrics)) { + if (!('spread' %in% statistics)) { + error(recipe$Run$logger, + paste("For 'enssprerr' to be plotted in the Scorecards with", + "the 'score' aggregation, the Statistics module must", + "be called and the statistics 'spread' and 'rms'", + "are required.")) + error_status <- TRUE + } else if (!('rms' %in% requested_metrics)) { + requested_metrics <- c(requested_metrics, 'rms') + } + } recipe$Analysis$Workflow$Skill$metric <- paste0(requested_metrics, collapse = " ") } @@ -921,7 +933,7 @@ check_recipe <- function(recipe) { warn(recipe$Run$logger, paste("No 'custom_directives_multimodel' specified, the", "single-model verification custom directives will be used.")) - recipe$Run$auto_conf$custom_directives_multimodel <- + recipe$Run$auto_conf$custom_directives_multimodel <- recipe$Run$auto_conf$custom_directives } if (is.null(recipe$Run$auto_conf$processors_multimodel) && diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md index aab5b87568374ad04f75477a42a2eedd4d71ddac..323d8430a50520d634644b4afd8e903fb14351b4 100644 --- a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md @@ -82,6 +82,7 @@ source("modules/Loading/Loading.R") source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") source("modules/Statistics/Statistics.R") +source("modules/Scorecards/Scorecards_calculations.R") source("modules/Saving/Saving.R") # Read recipe @@ -91,7 +92,7 @@ recipe_file <- args[1] recipe <- read_atomic_recipe(recipe_file) ``` -The rest of the user-defined script can be written in the same way as any other SUNSET script. We load the data, calculate the anomalies, then compute the skill scores and save the result as netCDF files for Scorecards. +The rest of the user-defined script can be written in the same way as any other SUNSET script. We load the data, calculate the anomalies, then compute the skill metrics and statistics, and we call `Scorecards_calculations()` to do some specific computations and save the result as netCDF files for Scorecards. ```R # Load data @@ -102,6 +103,11 @@ data <- Anomalies(recipe, data) skill_metrics <- Skill(recipe, data) # Compute statistics statistics <- Statistics(recipe, data) +# Pre-computations required for the Scorecards +Scorecards_calculations(recipe, data = data, + skill_metrics = skill_metrics, + statistics = statistics) + ``` Check the example script at [ex1_2-script.yml](use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R). You can execute it as-is or copy it and modify it according to your needs. diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml index 68a8dc627dbfdfffd3772aab55c2b1c80ba12881..3cd547ed0ee2fa412c82850caa39fcec2fe58592 100644 --- a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml @@ -50,7 +50,7 @@ Analysis: cross_validation: yes save: 'all' Statistics: - metric: cov std n_eff + metric: cov std n_eff spread save: 'all' Probabilities: percentiles: [[1/3, 2/3]] @@ -80,20 +80,20 @@ Run: Loglevel: INFO Terminal: yes filesystem: esarchive - output_dir: /esarchive/scratch/aho/auto-s2s-outputs/ - code_dir: /esarchive/scratch/aho/git/auto-s2s/ + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /home/Earth/vagudets/git/auto-s2s/ autosubmit: yes # fill only if using autosubmit auto_conf: - script: /esarchive/scratch/aho/git/auto-s2s/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R # replace with the path to your script - expid: a6pc # replace with your EXPID - hpc_user: bsc032734 # replace with your hpc username + script: use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username wallclock: 03:00 # hh:mm processors_per_job: 8 platform: nord3v2 custom_directives: ['#SBATCH --exclusive'] email_notifications: yes # enable/disable email notifications. Change it if you want to. - email_address: an.ho@bsc.es # replace with your email address + email_address: victoria.agudetse@bsc.es # replace with your email address notify_completed: yes # notify me by email when a job finishes notify_failed: yes # notify me by email when a job fails diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R index a39c1c39800d195dac59466d3af4e06c9caf02e2..a8ee7effca9684b9f4f13d874bc52f1eeac96daf 100644 --- a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R @@ -11,6 +11,7 @@ source("modules/Loading/Loading.R") source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") source("modules/Statistics/Statistics.R") +source("modules/Scorecards/Scorecards_calculations.R") source("modules/Saving/Saving.R") # Read recipe @@ -26,3 +27,7 @@ data <- Anomalies(recipe, data) skill_metrics <- Skill(recipe, data) # Compute statistics statistics <- Statistics(recipe, data) +# Pre-computations required for the scorecards +Scorecards_calculations(recipe, data = data, + skill_metrics = skill_metrics, + statistics = statistics)