From 786480ec9d503681d27cb5b8bef4fd94e84e81a9 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 22 Apr 2024 15:41:13 +0200 Subject: [PATCH 01/24] Scorecards development in progress --- modules/Scorecards/R/tmp/LoadMetrics.R | 72 +-- modules/Scorecards/Scorecards.R | 610 ------------------- modules/Scorecards/Scorecards_calculations.R | 484 +++++++++++++++ modules/Scorecards/Scorecards_plotting.R | 222 +++++++ modules/Statistics/Statistics.R | 48 +- tools/check_recipe.R | 6 +- 6 files changed, 751 insertions(+), 691 deletions(-) delete mode 100644 modules/Scorecards/Scorecards.R create mode 100644 modules/Scorecards/Scorecards_calculations.R create mode 100644 modules/Scorecards/Scorecards_plotting.R diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index 84e44028..155a569b 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -28,7 +28,6 @@ #' reference. = 'ERA5', #' var = 'tas', #' period = '1993-2016' -#' metrics = c('mean_bias', 'enscorr', 'rpss', 'crpss', 'enssprerr'), #' start_months = sprintf("%02d", 1:12), #' calib_method = 'raw', #' input_path = '/esarchive/scratch/nmilders/scorecards_data/input_data') @@ -37,9 +36,9 @@ #'@import multiApply #'@export -LoadMetrics <- function(input_path, system, reference, var, period, - metrics, start_months, calib_method = NULL, - inf_to_na = FALSE) { +LoadMetrics <- function(input_path, system, reference, var, period, data_type, + # metrics, + start_months, calib_method = NULL) { # Initial checks ## system @@ -62,9 +61,9 @@ LoadMetrics <- function(input_path, system, reference, var, period, var <- var[1] } ## metrics - if (!is.character(metrics)) { - stop("Parameter 'metrics' cannot be NULL.") - } + # if (!is.character(metrics)) { + # stop("Parameter 'metrics' cannot be NULL.") + # } ## start_months if (is.character(start_months)) { warning("Parameter 'start_months' must be a numeric vector indicating ", @@ -96,18 +95,13 @@ LoadMetrics <- function(input_path, system, reference, var, period, system <- gsub('.','', system, fixed = T) reference <- gsub('.','', reference, fixed = T) - all_metrics <- sapply(system, function(x) NULL) - names(all_metrics) <- system ## Load data for each system + all_metrics <- NULL for (sys in 1:length(system)) { - ## Define empty list to saved data - by_reference <- sapply(reference, function(x) NULL) - names(by_reference) <- reference ## Load data for each reference + by_reference <- NULL for (ref in 1:length(reference)) { ## Call function to load metrics data - met_by_smonth <- NULL - for (met in metrics) { result <- .loadmetrics(input_path = input_path, system = system[sys], reference = reference[ref], @@ -115,27 +109,18 @@ LoadMetrics <- function(input_path, system, reference, var, period, period = period, start_months = start_months, calib_method = calib_method, - metric = met) + data_type = data_type) result_attr <- attributes(result) - met_by_smonth <- abind::abind(met_by_smonth, result, along = length(dim(result)) + 1) - } - attributes(met_by_smonth) <- result_attr[-1] - # names(dim(met_by_smonth)) <- c(names(result_attr$dim), 'metric') - - dim(met_by_smonth) <- c(dim(result), metric = length(metrics)) - ## Save metric data as array in reference list - by_reference[[reference[ref]]] <- met_by_smonth - ## Remove -Inf from crpss data if variable is precipitation - if (inf_to_na) { - by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA - } + + by_reference <- abind::abind(by_reference, result, along = length(dim(result)) + 1) + dim(by_reference) <- c(dim(result), reference = length(reference)) } ## close loop on reference - ## Save reference data in list of system - all_metrics[[system[sys]]] <- by_reference + all_metrics <- abind::abind(all_metrics, by_reference, along = length(dim(by_reference)) + 1) + dim(all_metrics) <- c(dim(by_reference), system = length(system)) + # attributes(all_metrics) <- result_attr[-1] } ## close loop on system - attributes(all_metrics)$metrics <- metrics attributes(all_metrics)$start_months <- start_months return(all_metrics) @@ -145,13 +130,13 @@ LoadMetrics <- function(input_path, system, reference, var, period, .loadmetrics <- function(input_path, system, reference, var, period, start_months, - calib_method, metric) { + calib_method, data_type) { ## Load data for each start date allfiles <- sapply(start_months, function(m) { paste0(input_path, "/", system, "/", reference, "/", calib_method, "/", var, "/scorecards_", system, "_", reference, "_", - var, "_", metric, "_", period, "_s", m, # mod.pressure, + var, "_", data_type, "_", period, "_s", m, ".nc")}) allfiles_exist <- sapply(allfiles, file.exists) @@ -170,40 +155,23 @@ LoadMetrics <- function(input_path, system, reference, var, period, } num_dims[i] <- max(allfiledims[i,]) # We take the largest dimension } - # dims: [metric, longitude, latitude, time, smonth] - # or [metric, region, time, smonth] - + # Loop for file dim(allfiles) <- c(dat = 1, sdate = length(allfiles)) array_met_by_sdate <- Apply(data = allfiles, target_dims = 'dat', fun = function(x) { if (file.exists(x)) { - res <- easyNCDF::NcToArray(x, vars_to_read = metric, unlist = T, + res <- easyNCDF::NcToArray(x, vars_to_read = data_type, unlist = T, drop_var_dim = T) names(dim(res)) <- NULL } else { - res <- array(dim = c(length(metrics), allfiledims[-1,1])) + res <- array(dim = c(length(data_type), allfiledims[-1,1])) names(dim(res)) <- NULL } res})$output1 dim(array_met_by_sdate) <- c(allfiledims[-1,1], sdate = length(allfiles)) - # Attributes - # Read attributes from the first existing file - if ("region" %in% rownames(allfiledims)) { - file_for_att <- ncdf4::nc_open(allfiles[allfiles_exist[1]]) - region <- ncdf4::ncatt_get(file_for_att, 'region') - ncdf4::nc_close(file_for_att) - attributes(array_met_by_sdate)$region <- region - } else { - lon <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'longitude', - unlist = T, drop_var_dim = T) - lat <- easyNCDF::NcToArray(allfiles[allfiles_exist][1], vars_to_read = 'latitude', - unlist = T, drop_var_dim = T) - attributes(array_met_by_sdate)$lon <- lon - attributes(array_met_by_sdate)$lat <- lat - } return(array_met_by_sdate) } diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R deleted file mode 100644 index 37aa421c..00000000 --- a/modules/Scorecards/Scorecards.R +++ /dev/null @@ -1,610 +0,0 @@ -############################################################################### -##################### SCORECARDS MODULE FOR SUNSET SUITE ###################### -############################################################################### - -##### Load source functions ##### -source('modules/Scorecards/R/tmp/LoadMetrics.R') -source('modules/Scorecards/R/tmp/WeightedMetrics.R') -source('modules/Scorecards/R/tmp/Utils.R') -source('modules/Scorecards/R/tmp/SCTransform.R') -source('modules/Scorecards/R/tmp/ScorecardsSingle.R') -source('modules/Scorecards/R/tmp/ScorecardsMulti.R') -source('modules/Scorecards/R/tmp/ScorecardsSystemDiff.R') -source('modules/Scorecards/R/tmp/VizScorecard.R') - -## Temporary for new ESviz function -source('modules/Scorecards/R/tmp/ColorBarContinuous.R') -source('modules/Scorecards/R/tmp/ClimPalette.R') -.IsColor <- s2dv:::.IsColor -.FilterUserGraphicArgs <- s2dv:::.FilterUserGraphicArgs - -## Define function -Scorecards <- function(recipe) { - - ## Parameters for loading data files - skill.input.path <- paste0(recipe$Run$output_dir, "/outputs/Skill/") - stats.input.path <- paste0(recipe$Run$output_dir, "/outputs/Statistics/") - output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") - dir.create(output.path, recursive = T, showWarnings = F) - system <- recipe$Analysis$Datasets$System$name - reference <- recipe$Analysis$Datasets$Reference$name - var <- recipe$Analysis$Variables$name - start.year <- as.numeric(recipe$Analysis$Time$hcst_start) - end.year <- as.numeric(recipe$Analysis$Time$hcst_end) - forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max - calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) - - if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { - start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) - } else { - start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, - split = ", | |,")[[1]]) - if(!any(as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))) %in% start.months){ - error(recipe$Run$logger,"Requested start dates for scorecards must be loaded") - } - } - - start.months <- sprintf("%02d", start.months) - period <- paste0(start.year, "-", end.year) - - ## Parameters for data aggregation - regions <- recipe$Analysis$Workflow$Scorecards$regions - for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} - - metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation - metrics.load <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Skill$metric), ", | |,")) - metrics.visualize <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) - ncores <- 1 # recipe$Analysis$ncores - - if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ - alpha <- 0.05 - } else { - alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha - } - - if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ - inf.to.na <- FALSE - } else { - inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na - } - - if(is.null(recipe$Analysis$remove_NAs)){ - na.rm <- FALSE - } else { - na.rm <- recipe$Analysis$remove_NAs - } - - ## Parameters for scorecard layout - table.label <- recipe$Analysis$Workflow$Scorecards$table_label - fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label - col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width - col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width - legend.breaks <- recipe$Analysis$Workflow$Scorecards$legend_breaks - legend.width <- recipe$Analysis$Workflow$Scorecards$legend_width - - if (is.null(recipe$Analysis$Workflow$Scorecards$plot_legend)){ - plot.legend <- TRUE - } else { - plot.legend <- recipe$Analysis$Workflow$Scorecards$plot_legend - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$columns_width)){ - columns.width <- 1.2 - } else { - columns.width <- recipe$Analysis$Workflow$Scorecards$columns_width - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$legend_white_space)){ - legend.white.space <- 6 - } else { - legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$legend_height)){ - legend.height <- 50 - } else { - legend.height <- recipe$Analysis$Workflow$Scorecards$legend_height - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$label_scale)){ - label.scale <- 1.4 - } else { - label.scale <- recipe$Analysis$Workflow$Scorecards$label_scale - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$round_decimal)){ - round.decimal <- 2 - } else { - round.decimal <- recipe$Analysis$Workflow$Scorecards$round_decimal - } - - if(is.null(recipe$Analysis$Workflow$Scorecards$font_size)){ - font.size <- 1.1 - } else { - font.size <- recipe$Analysis$Workflow$Scorecards$font_size - } - - ## Define if difference scorecard is to be plotted - if (is.null(recipe$Analysis$Workflow$Scorecards$calculate_diff)){ - calculate.diff <- FALSE - } else { - calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff - } - - ####### SKILL AGGREGATION ####### - if(metric.aggregation == 'skill'){ - - ## Load data files - loaded_metrics <- LoadMetrics(input_path = skill.input.path, - system = system, - reference = reference, - var = var, - metrics = metrics.visualize, - period = period, - start_months = start.months, - calib_method = calib.method, - inf_to_na = inf.to.na - ) - - ## Spatial Aggregation of metrics - if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ - - ### Convert loaded metrics to array for already aggregated data - metrics.dim <- attributes(loaded_metrics)$metrics - forecast.months.dim <- forecast.months - start.months.dim <- attributes(loaded_metrics)$start_months - regions.dim <- regions #list('NAO' = c(lon.min = -80, lon.max = 40, lat.min = 20, lat.max = 80)) - - aggregated_metrics <- array(dim = c(system = length(loaded_metrics), - reference = length(loaded_metrics[[1]]), - metric = length(metrics.dim), - time = length(forecast.months.dim), - sdate = length(start.months.dim), - region = length(regions.dim))) - - - for (sys in 1:length(names(loaded_metrics))){ - for (ref in 1:length(names(loaded_metrics[[sys]]))){ - aggregated_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) - } - } - - ## Add attributes - attributes(aggregated_metrics)$metrics <- metrics.load - attributes(aggregated_metrics)$start.months <- attributes(loaded_metrics)$start_months - attributes(aggregated_metrics)$forecast.months <- forecast.months - attributes(aggregated_metrics)$regions <- regions - attributes(aggregated_metrics)$system.name <- names(loaded_metrics) - attributes(aggregated_metrics)$reference.name <- names(loaded_metrics[[1]]) - - - } else { - ## Calculate weighted mean of spatial aggregation - aggregated_metrics <- WeightedMetrics(loaded_metrics, - regions = regions, - forecast.months = forecast.months, - metric.aggregation = metric.aggregation, - ncores = ncores) - } ## close if on region - metrics_significance <- NULL - - } ## close if on skill - - ###### SCORE AGGREGATION ###### - if(metric.aggregation == 'score'){ - - lon_dim <- 'longitude' - lat_dim <- 'latitude' - time_dim <- 'syear' - memb_dim <- 'ensemble' - - ## Define arrays to filled with data - aggregated_metrics <- array(data = NA, - dim = c(system = length(system), - reference = length(reference), - time = length(forecast.months), - sdate = length(start.months), - region = length(regions), - metric = length(metrics.visualize))) - - metrics_significance <- array(data = NA, - dim = c(system = length(system), - reference = length(reference), - time = length(forecast.months), - sdate = length(start.months), - region = length(regions), - metric = length(metrics.visualize))) - - ## Load and aggregated data for each metric - for (sys in 1:length(system)){ - for (ref in 1:length(reference)){ - for (met in metrics.visualize) { - - if(met == 'rpss'){ - ## Load data from saved files - rps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'rps_syear') - - rps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'rps_clim_syear') - - ## Spatially aggregate data - rps_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_syear)$lon), - lat = as.vector(attributes(rps_syear)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = F) - }, simplify = 'array') - - rps_clim_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = rps_clim_syear, - region = regions[[X]], - lon = as.vector(attributes(rps_clim_syear)$lon), - lat = as.vector(attributes(rps_clim_syear)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = F) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(rps_syear))[length(dim(rps_syear))] <- 'region' - names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' - - ## Calculate significance - sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, - time_dim = time_dim, test.type = 'two.sided', - alpha = alpha, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - - ## Temporally aggregate data - rps_syear <- Apply(data = rps_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - rps_clim_syear <- Apply(data = rps_clim_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate RPSS from aggregated RPS and RPS_clim - rpss <- 1 - rps_syear / rps_clim_syear - - ## Save metric result in arrays - aggregated_metrics[sys, ref, , , ,which(metrics.visualize == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'sdate','region')) - metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'sdate','region')) - - } ## close if on rpss - - if(met == 'crpss'){ - - ## Load data from saved files - crps_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'crps_syear') - - crps_clim_syear <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'crps_clim_syear') - - ## Spatially aggregate data - crps_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = crps_syear, - region = regions[[X]], - lon = as.vector(attributes(crps_syear)$lon), - lat = as.vector(attributes(crps_syear)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - crps_clim_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = crps_clim_syear, - region = regions[[X]], - lon = as.vector(attributes(crps_clim_syear)$lon), - lat = as.vector(attributes(crps_clim_syear)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(crps_syear))[length(dim(crps_syear))] <- 'region' - names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' - - ## Calculate significance - sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, - time_dim = time_dim, test.type = 'two.sided', - alpha = alpha, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - - ## Temporally aggregate data - crps_syear <- Apply(data = crps_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - crps_clim_syear <- Apply(data = crps_clim_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate CRPSS from aggregated CRPS and CRPS_clim - crpss <- 1 - crps_syear / crps_clim_syear - - ## Save metric result in arrays - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'sdate','region')) - metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'sdate','region')) - - } ## close if on crpss - - if(met == 'enscorr'){ - ## Load data from saved files - cov <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'cov') - - std_hcst <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'std_hcst') - - std_obs <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'std_obs') - - - n_eff <- .loadmetrics(input_path = stats.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'n_eff') - - ## Calculate spatial aggregation - cov <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = cov, - region = regions[[X]], - lon = as.vector(attributes(cov)$lon), - lat = as.vector(attributes(cov)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(cov))[length(dim(cov))] <- 'region' - - - std_hcst <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = std_hcst, - region = regions[[X]], - lon = as.vector(attributes(std_hcst)$lon), - lat = as.vector(attributes(std_hcst)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(std_hcst))[length(dim(std_hcst))] <- 'region' - - std_obs <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = std_obs, - region = regions[[X]], - lon = as.vector(attributes(std_obs)$lon), - lat = as.vector(attributes(std_obs)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(std_obs))[length(dim(std_obs))] <- 'region' - - n_eff <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = n_eff, - region = regions[[X]], - lon = as.vector(attributes(n_eff)$lon), - lat = as.vector(attributes(n_eff)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(n_eff))[length(dim(n_eff))] <- 'region' - - ## Calculate correlation - enscorr <- cov / (std_hcst * std_obs) - - ## Calculate significance of corr - t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) - t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) - - sign_corr<- array(data = NA, - dim = c(time = length(forecast.months), - sdate = length(start.months), - region = length(regions))) - - for (time in 1:dim(sign_corr)[['time']]){ - for (mon in 1:dim(sign_corr)[['sdate']]){ - for (reg in 1:dim(sign_corr)[['region']]){ - - if (anyNA(c(t[time, mon, reg], t_alpha2_n2[time, mon, reg])) == FALSE - && t[time, mon, reg] >= t_alpha2_n2[time, mon, reg]){ - sign_corr[time, mon, reg] <- TRUE - } else { - sign_corr[time, mon, reg] <- FALSE - } - } - } - } - - ## Save metric result in arrays - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'sdate','region')) - metrics_significance[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'sdate','region')) - - } ## close if on enscorr - - if(met == 'mean_bias'){ - - mean_bias <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'mean_bias') - - ## Calculate spatial aggregation - mean_bias <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = mean_bias, - region = regions[[X]], - lon = as.vector(attributes(mean_bias)$lon), - lat = as.vector(attributes(mean_bias)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' - - ## Save metric result in array - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = mean_bias, order = c('time', 'sdate','region')) - - } ## close on mean_bias - - if(met == 'enssprerr'){ - - enssprerr <- .loadmetrics(input_path = skill.input.path, system = system[sys], - reference = reference[ref], var = var, - period = period, start_months = start.months, - calib_method = calib.method, metric = 'enssprerr') - - ## Calculate spatial aggregation - enssprerr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = enssprerr, - region = regions[[X]], - lon = as.vector(attributes(enssprerr)$lon), - lat = as.vector(attributes(enssprerr)$lat), - londim = lon_dim, - latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(enssprerr))[length(dim(enssprerr))] <- 'region' - - ## Save metric result in array - aggregated_metrics[sys, ref, , , , which(metrics.visualize == met)] <- s2dv::Reorder(data = enssprerr, order = c('time', 'sdate','region')) - - } ## close on enssprerr - - } ## close loop on metric - } ## close if on reference - } ## close if on system - - ## Include metric attributes - attributes(aggregated_metrics)$metrics <- metrics.visualize - - ## Set NAs to False - metrics_significance[is.na(metrics_significance)] <- FALSE - - } ## close if on score - - - ####### PLOT SCORECARDS ########## - - ## Create simple scorecard tables - ## (one system only) - ## Metrics input must be in the same order as function SC_spatial_aggregation - scorecard_single <- ScorecardsSingle(data = aggregated_metrics, - sign = metrics_significance, - system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - start.months = start.months, - forecast.months = forecast.months, - region.names = names(regions), - metrics = metrics.visualize, - table.label = table.label, - fileout.label = fileout.label, - plot.legend = plot.legend, - legend.breaks = legend.breaks, - legend.white.space = legend.white.space, - legend.width = legend.width, - legend.height = legend.height, - label.scale = label.scale, - col1.width = col1.width, - col2.width = col2.width, - columns.width = columns.width, - font.size = font.size, - round.decimal = round.decimal, - output.path = output.path) - - ## Create multi system/reference scorecard tables - ## (multiple systems with one reference or one system with multiple references) - ## Metrics input must be in the same order as function SC_spatial_aggregation - if(length(system) > 1 || length(reference) > 1){ - scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, - sign = metrics_significance, - system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - start.months = start.months, - forecast.months = forecast.months, - region.names = names(regions), - metrics = metrics.visualize, - table.label = table.label, - fileout.label = fileout.label, - plot.legend = plot.legend, - legend.breaks = legend.breaks, - legend.white.space = legend.white.space, - legend.width = legend.width, - legend.height = legend.height, - label.scale = label.scale, - col1.width = col1.width, - col2.width = col2.width, - columns.width = columns.width, - font.size = font.size, - round.decimal = round.decimal, - output.path = output.path) - } ## close if - - - if(calculate.diff == TRUE){ - if(length(system) == 2 || length(reference) == 2){ - scorecard_diff <- ScorecardsSystemDiff(data = aggregated_metrics, - system = system, - reference = reference, - var = var, - start.year = start.year, - end.year = end.year, - start.months = start.months, - forecast.months = forecast.months, - region.names = attributes(regions)$names, - metrics = metrics.visualize, - table.label = table.label, - fileout.label = fileout.label, - legend.white.space = legend.white.space, - col1.width = col1.width, - col2.width = col2.width, - output.path = output.path) - } else {stop ("Difference scorecard can only be computed with two systems or two references.")} - } ## close if on calculate.diff - -} - diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R new file mode 100644 index 00000000..a5fe67ba --- /dev/null +++ b/modules/Scorecards/Scorecards_calculations.R @@ -0,0 +1,484 @@ +############################################################################### +##################### SCORECARDS MODULE FOR SUNSET SUITE ###################### +############################################################################### + +##### Load source functions ##### +# source('modules/Scorecards/R/tmp/LoadMetrics.R') +# source('modules/Scorecards/R/tmp/WeightedMetrics.R') + +## Define function +Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, recipe) { + + ## Parameters for saving output data files + output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") + dir.create(output.path, recursive = T, showWarnings = F) + + system <- recipe$Analysis$Datasets$System$name + reference <- recipe$Analysis$Datasets$Reference$name + var <- recipe$Analysis$Variables$name + start.year <- as.numeric(recipe$Analysis$Time$hcst_start) + end.year <- as.numeric(recipe$Analysis$Time$hcst_end) + forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max + start.months <- substr(recipe$Analysis$Time$sdate, 1,2) + period <- paste0(start.year, "-", end.year) + + file.name.metrics <- paste0('scorecards_aggr_skill_',system,'_',reference,'_',var,'_',period,'_s',start.months,'.RDS') + file.name.sign <- paste0('scorecards_aggr_sign_',system,'_',reference,'_',var,'_',period,'_s',start.months,'.RDS') + + ## Parameters for data aggregation + regions <- recipe$Analysis$Workflow$Scorecards$regions + for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} + + metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation + metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + ncores <- recipe$Analysis$ncores + + if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ + alpha <- 0.05 + } else { + alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha + } + + if(is.null(recipe$Analysis$remove_NAs)){ + na.rm <- FALSE + } else { + na.rm <- recipe$Analysis$remove_NAs + } + + if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ + inf.to.na <- FALSE + } else { + inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na + } + + ##### TO DO: Need to include condition for removing -INF ##### + # if (inf_to_na) { + # by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA + # } + + + ############################# SKILL AGGREGATION ############################# + if(metric.aggregation == 'skill'){ + + # ## Load data files + # loaded_metrics <- LoadMetrics(input_path = skill.input.path, + # system = system, + # reference = reference, + # var = var, + # metrics = metrics, + # period = period, + # start_months = start.months, + # calib_method = calib.method, + # inf_to_na = inf.to.na + # ) + + # ## Remove -Inf from crpss data if variable is precipitation ## from loadMetrics function + # if (inf_to_na) { + # by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA + # } + + ## Spatial Aggregation of metrics + if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ + + ### Convert loaded metrics to array for already aggregated data + metrics.dim <- attributes(loaded_metrics)$metrics + forecast.months.dim <- forecast.months + start.months.dim <- attributes(loaded_metrics)$start_months + regions.dim <- regions + + aggr_metrics <- array(dim = c(system = length(loaded_metrics), + reference = length(loaded_metrics[[1]]), + metric = length(metrics.dim), + time = length(forecast.months.dim), + sdate = length(start.months.dim), + region = length(regions.dim))) + + + for (sys in 1:length(names(loaded_metrics))){ + for (ref in 1:length(names(loaded_metrics[[sys]]))){ + aggr_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) + } + } + + ## Add attributes + attributes(aggr_metrics)$metrics <- metrics.load + attributes(aggr_metrics)$start.months <- attributes(loaded_metrics)$start_months + attributes(aggr_metrics)$forecast.months <- forecast.months + attributes(aggr_metrics)$regions <- regions + attributes(aggr_metrics)$system.name <- names(loaded_metrics) + attributes(aggr_metrics)$reference.name <- names(loaded_metrics[[1]]) + + + } else { + ## Calculate weighted mean of spatial aggregation + aggr_metrics <- WeightedMetrics(loaded_metrics, + regions = regions, + forecast.months = forecast.months, + metric.aggregation = metric.aggregation, + ncores = ncores) + } ## close if on region + aggr_significance <- NULL + + } ## close if on skill + + ############################# SCORE AGGREGATION ############################# + if(metric.aggregation == 'score'){ + + lon_dim <- 'longitude' + lat_dim <- 'latitude' + time_dim <- 'syear' + memb_dim <- 'ensemble' + lon <- as.numeric(data$hcst$coords$longitude) + lat <- as.numeric(data$hcst$coords$latitude) + + ## Define arrays to filled with data + aggr_metrics <- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions), + metric = length(metrics))) + + aggr_significance <- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions), + metric = length(metrics))) + + ## Spatially aggregate data for each metric + for (met in metrics) { + + if(met == 'rpss'){ + + rps_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$rps_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + rps_clim_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$rps_clim_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(rps_syear))[length(dim(rps_syear))] <- 'region' + names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' + + ## Remove 'var' dimension + rps_syear <-Subset(rps_syear, 'var', 1, drop = 'selected') + rps_clim_syear <-Subset(rps_clim_syear, 'var', 1, drop = 'selected') + + ## Calculate significance + sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, + time_dim = time_dim, test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Average over 'syear' dimension + rps_syear <- Apply(data = rps_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + rps_clim_syear <- Apply(data = rps_clim_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate RPSS from aggregated RPS and RPS_clim + rpss <- 1 - rps_syear / rps_clim_syear + + ## Save metric result in arrays + aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'region')) + + } ## close if on rpss + + if(met == 'crpss'){ + + crps_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$crps_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + crps_clim_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$crps_clim_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(crps_syear))[length(dim(crps_syear))] <- 'region' + names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' + + ## Remove 'var' dimension + crps_syear <-Subset(crps_syear, 'var', 1, drop = 'selected') + crps_clim_syear <-Subset(crps_clim_syear, 'var', 1, drop = 'selected') + + ## Calculate significance + sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, + time_dim = time_dim, test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Average over 'syear' dimension + crps_syear <- Apply(data = crps_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + crps_clim_syear <- Apply(data = crps_clim_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate CRPSS from aggregated CRPS and CRPS_clim + crpss <- 1 - crps_syear / crps_clim_syear + + ## Save metric result in arrays + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'region')) + + } ## close if on crpss + + if(met == 'enscorr'){ + + cov <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$cov, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + std_hcst <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$std_hcst, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + std_obs <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$std_obs, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + n_eff <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$n_eff, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(cov))[length(dim(cov))] <- 'region' + names(dim(std_hcst))[length(dim(std_hcst))] <- 'region' + names(dim(std_obs))[length(dim(std_obs))] <- 'region' + names(dim(n_eff))[length(dim(n_eff))] <- 'region' + + ## Remove 'var' dimension + cov <- Subset(cov, 'var', 1, drop = 'selected') + std_hcst <- Subset(std_hcst, 'var', 1, drop = 'selected') + std_obs <- Subset(std_obs, 'var', 1, drop = 'selected') + n_eff <- Subset(n_eff, 'var', 1, drop = 'selected') + + ## Calculate correlation + enscorr <- cov / (std_hcst * std_obs) + + ## Calculate significance of corr + t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) + t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) + + sign_corr<- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions))) + + for (time in 1:dim(sign_corr)[['time']]){ + for (reg in 1:dim(sign_corr)[['region']]){ + + if (anyNA(c(t[time, reg], t_alpha2_n2[time, reg])) == FALSE + && t[time, reg] >= t_alpha2_n2[time, reg]){ + sign_corr[time, reg] <- TRUE + } else { + sign_corr[time, reg] <- FALSE + } + } + } + + ## Save metric result in arrays + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) + + } ## close if on enscorr + + if(met == 'mean_bias'){ + + ## Calculate ensemble mean + hcst_data_ens <- MeanDims(data$hcst$data, dims = 'ensemble') + obs_data_ens <- MeanDims(data$obs$data, dims = 'ensemble') + + ## Aggregate data over regions + hcst_data_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = hcst_data_ens, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + obs_data_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = obs_data_ens, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(hcst_data_aggr))[length(dim(hcst_data_aggr))] <- 'region' + names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' + + ## Remove unnecessary dimension + hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') + obs_data_aggr <- Subset(obs_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') + + ## Calculate significance + pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), + target_dims = c('syear'), ncores = ncores, + fun = function(x,y){t.test(as.vector(x),as.vector(y))})$p.value + + sign_mean_bias <- pval_mean_bias <= alpha + + ## Calculate aggregated mean bias metric + mean_bias <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$mean_bias, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' + + ## Remove 'var' dimension + mean_bias <- Subset(mean_bias, 'var', 1, drop = 'selected') + + ## Save metric result in array + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = mean_bias, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_mean_bias, order = c('time', 'region')) + + } ## close on mean_bias + + if(met == 'enssprerr'){ + + ## Calculate metric + spread <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$spread, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + error <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$rms, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(spread))[length(dim(spread))] <- 'region' + names(dim(error))[length(dim(error))] <- 'region' + + ## Remove 'var' dimension + spread <- Subset(spread, 'var', 1, drop = 'selected') + error <- Subset(error, 'var', 1, drop = 'selected') + + enssprerr <- spread / error + + # ## Significance calculation + # + # # Effective sample size + # # exp <- data$hcst$data + # # ens_exp <- MeanDims(data$hcst$data, dims = 'ensemble') + # # enospr <- sum(Eno(exp - InsertDim(ens_exp, length(dim(exp)), dim(exp)['ensemble']), "ensemble")) + # # enodif <- .Eno(ens_exp - ens_obs, na.action = na.pass) + # + # # Removing eno at the moment + # # F <- (enospr * spread^2 / (enospr - 1)) / (enodif * error^2 / (enodif - 1)) + # F <- spread^2 / error^2 + # # if (!is.na(F) & !is.na(enospr) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + # # p.val <- pf(F, enospr - 1, enodif - 1) + # + # pval_enssprerr <- Apply(data = list(x = F, y = data$hcst$data), ### DOES NOT WORK + # target_dims = c('time'), + # fun = function(x, y) + # {pf(x, dim(y)['syear'] -1 ,dim(y)['syear'] -1)}) + # + # pval_enssprerr <- pf(F,dim(data$hcst$data)['syear'] -1 ,dim(data$hcst$data)['syear'] -1) + # pval_enssprerr <- 2 * min(pval_enssprerr, 1 - pval_enssprerr) + # + # sign_enssprerr <- pval_enssprerr <= alpha + + ## Save metric result in array + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enssprerr, order = c('time', 'region')) + # aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) + + } ## close on enssprerr + + } ## close loop on metric + + ## Set NAs to False + aggr_significance[is.na(aggr_significance)] <- FALSE + + } ## close if on score + + ## Include 'var' dimension to be able to save array + if(!'var' %in% names(dim(aggr_metrics))){ + aggr_metrics <- InsertDim(aggr_metrics, 1, 1, name = 'var') + aggr_significance <- InsertDim(aggr_significance, 1, 1, name = 'var') + } + + ## Include attributes + attributes(aggr_metrics)$metrics <- metrics + attributes(aggr_metrics)$forecast.months <- forecast.months + attributes(aggr_metrics)$regions <- regions + attributes(aggr_metrics)$system.name <- system + attributes(aggr_metrics)$reference.name <- reference + + aggr_scorecards <- list(aggr_metrics = aggr_metrics, aggr_significance = aggr_significance) + + ## Save metric data arrays + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Scorecards/") + + save_metrics(recipe = recipe, metrics = aggr_scorecards, + data_cube = data$hcst, agg = 'region', + module = "scorecard") + } + diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R new file mode 100644 index 00000000..ff00e55c --- /dev/null +++ b/modules/Scorecards/Scorecards_plotting.R @@ -0,0 +1,222 @@ +############################################################################### +##################### SCORECARDS MODULE FOR SUNSET SUITE ###################### +############################################################################### + +##### Load source functions ##### +source('modules/Scorecards/R/tmp/LoadMetrics.R') +source('modules/Scorecards/R/tmp/Utils.R') +source('modules/Scorecards/R/tmp/SCTransform.R') +source('modules/Scorecards/R/tmp/ScorecardsSingle.R') +source('modules/Scorecards/R/tmp/ScorecardsMulti.R') +source('modules/Scorecards/R/tmp/ScorecardsSystemDiff.R') +source('modules/Scorecards/R/tmp/VizScorecard.R') + +## Temporary for new ESviz function +source('modules/Scorecards/R/tmp/ColorBarContinuous.R') +source('modules/Scorecards/R/tmp/ClimPalette.R') +.IsColor <- s2dv:::.IsColor +.FilterUserGraphicArgs <- s2dv:::.FilterUserGraphicArgs + +## Define function +Scorecards_plotting <- function(recipe) { + + ## Parameters for loading data files + input.path <- paste0(recipe$Run$output_dir, "/outputs/Scorecards/") + output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") + dir.create(output.path, recursive = T, showWarnings = F) + + system <- recipe$Analysis$Datasets$System$name + reference <- recipe$Analysis$Datasets$Reference$name + var <- recipe$Analysis$Variables$name + start.year <- as.numeric(recipe$Analysis$Time$hcst_start) + end.year <- as.numeric(recipe$Analysis$Time$hcst_end) + forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max + calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) + metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + + if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { + start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + } else { + start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, + split = ", | |,")[[1]]) + if(!any(as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))) %in% start.months){ + error(recipe$Run$logger,"Requested start dates for scorecards must be loaded") + } + } + start.months <- sprintf("%02d", start.months) + period <- paste0(start.year, "-", end.year) + + ## Parameters for data aggregation + regions <- recipe$Analysis$Workflow$Scorecards$regions + for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} + + ## Parameters for scorecard layout + table.label <- recipe$Analysis$Workflow$Scorecards$table_label + fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label + col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width + col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width + legend.breaks <- recipe$Analysis$Workflow$Scorecards$legend_breaks + legend.width <- recipe$Analysis$Workflow$Scorecards$legend_width + + if (is.null(recipe$Analysis$Workflow$Scorecards$plot_legend)){ + plot.legend <- TRUE + } else { + plot.legend <- recipe$Analysis$Workflow$Scorecards$plot_legend + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$columns_width)){ + columns.width <- 1.2 + } else { + columns.width <- recipe$Analysis$Workflow$Scorecards$columns_width + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$legend_white_space)){ + legend.white.space <- 6 + } else { + legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$legend_height)){ + legend.height <- 50 + } else { + legend.height <- recipe$Analysis$Workflow$Scorecards$legend_height + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$label_scale)){ + label.scale <- 1.4 + } else { + label.scale <- recipe$Analysis$Workflow$Scorecards$label_scale + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$round_decimal)){ + round.decimal <- 2 + } else { + round.decimal <- recipe$Analysis$Workflow$Scorecards$round_decimal + } + + if(is.null(recipe$Analysis$Workflow$Scorecards$font_size)){ + font.size <- 1.1 + } else { + font.size <- recipe$Analysis$Workflow$Scorecards$font_size + } + + ## Define if difference scorecard is to be plotted + if (is.null(recipe$Analysis$Workflow$Scorecards$calculate_diff)){ + calculate.diff <- FALSE + } else { + calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff + } + + ####### Load data files ####### + aggregated_metrics <- LoadMetrics(input_path = input.path, + system = system, + reference = reference, + var = var, + data_type = "aggr_metrics", + period = period, + start_months = as.numeric(start.months), + calib_method = calib.method) + + attributes(aggregated_metrics)$metrics <- metrics + # attributes(aggregated_metrics)$forecast.months <- forecast.months + # attributes(aggregated_metrics)$regions <- regions + # attributes(aggregated_metrics)$system.name <- system + # attributes(aggregated_metrics)$reference.name <- reference + + aggregated_significance <- LoadMetrics(input_path = input.path, + system = system, + reference = reference, + var = var, + data_type = "aggr_significance", + period = period, + start_months = as.numeric(start.months), + calib_method = calib.method) + + aggregated_significance <- aggregated_significance == 1 + + ####### PLOT SCORECARDS ########## + + ## Create simple scorecard tables + ## (one system only) + ## Metrics input must be in the same order as function SC_spatial_aggregation + scorecard_single <- ScorecardsSingle(data = aggregated_metrics, + sign = aggregated_significance, + system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + start.months = start.months, + forecast.months = forecast.months, + region.names = names(regions), + metrics = metrics, + table.label = table.label, + fileout.label = fileout.label, + plot.legend = plot.legend, + legend.breaks = legend.breaks, + legend.white.space = legend.white.space, + legend.width = legend.width, + legend.height = legend.height, + label.scale = label.scale, + col1.width = col1.width, + col2.width = col2.width, + columns.width = columns.width, + font.size = font.size, + round.decimal = round.decimal, + output.path = output.path) + + ## Create multi system/reference scorecard tables + ## (multiple systems with one reference or one system with multiple references) + ## Metrics input must be in the same order as function SC_spatial_aggregation + if(length(system) > 1 || length(reference) > 1){ + scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, + sign = aggregated_significance, + system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + start.months = start.months, + forecast.months = forecast.months, + region.names = names(regions), + metrics = metrics, + table.label = table.label, + fileout.label = fileout.label, + plot.legend = plot.legend, + legend.breaks = legend.breaks, + legend.white.space = legend.white.space, + legend.width = legend.width, + legend.height = legend.height, + label.scale = label.scale, + col1.width = col1.width, + col2.width = col2.width, + columns.width = columns.width, + font.size = font.size, + round.decimal = round.decimal, + output.path = output.path) + } ## close if + + + if(calculate.diff == TRUE){ + if(length(system) == 2 || length(reference) == 2){ + scorecard_diff <- ScorecardsSystemDiff(data = aggregated_metrics, + system = system, + reference = reference, + var = var, + start.year = start.year, + end.year = end.year, + start.months = start.months, + forecast.months = forecast.months, + region.names = attributes(regions)$names, + metrics = metrics, + table.label = table.label, + fileout.label = fileout.label, + legend.white.space = legend.white.space, + col1.width = col1.width, + col2.width = col2.width, + output.path = output.path) + } else {stop ("Difference scorecard can only be computed with two systems or two references.")} + } ## close if on calculate.diff + +} + diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 085bcdc5..ed0af72a 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -5,29 +5,16 @@ Statistics <- function(recipe, data, agg = 'global') { # agg: data aggregation time_dim <- 'syear' + memb_dim <- 'ensemble' ncores <- recipe$Analysis$ncores ## Calculate ensemble mean - hcst_ensmean <- Apply(data$hcst$data, - target_dims = 'ensemble', - fun = 'mean')$output1 - obs_ensmean <- Apply(data$obs$data, - target_dims = 'ensemble', - fun = 'mean')$output1 - - ## Remove unwanted dimensions - ## TODO: Apply .drop_dims() function instead? - hcst_ensmean <- Subset(hcst_ensmean, - along = c('dat', 'sday', 'sweek'), - indices = list(1, 1, 1), - drop = 'selected') - obs_ensmean <- Subset(obs_ensmean, - along = c('dat', 'sday', 'sweek'), - indices = list(1, 1, 1), - drop = 'selected') + hcst_ensmean <- MeanDims(data$hcst$data, dims = memb_dim) + obs_ensmean <- MeanDims(data$obs$data, dims = memb_dim) statistics_list <- tolower(recipe$Analysis$Workflow$Statistics$metric) statistics <- list() + # Compute statistics in the list for (stat in strsplit(statistics_list, ", | |,")[[1]]) { if (stat %in% c('cov', 'covariance')) { @@ -37,7 +24,7 @@ Statistics <- function(recipe, data, agg = 'global') { fun = function(x,y) {cov(as.vector(x),as.vector(y), use = "everything", method = "pearson")})$output1 - + covariance <- .drop_dims(covariance) statistics[[ stat ]] <- covariance } ## close if on covariance if (stat %in% c('std', 'standard_deviation')) { @@ -45,37 +32,46 @@ Statistics <- function(recipe, data, agg = 'global') { std_hcst <- Apply(data = hcst_ensmean, target_dims = c(time_dim), fun = 'sd')$output1 - std_obs <- Apply(data = obs_ensmean, target_dims = c(time_dim), fun = 'sd')$output1 - + std_hcst <- .drop_dims(std_hcst) + std_obs <- .drop_dims(std_obs) statistics[['std_hcst']] <- std_hcst statistics[['std_obs']] <- std_obs - } ## close if on std if (stat %in% c('var', 'variance')) { ## Calculate variance var_hcst <- (Apply(data = hcst_ensmean, target_dims = c(time_dim), fun = 'sd')$output1)^2 - var_obs <- (Apply(data = obs_ensmean, target_dims = c(time_dim), fun = 'sd')$output1)^2 - + var_hcst <- .drop_dims(var_hcst) + var_obs <- .drop_dims(var_obs) statistics[['var_hcst']] <- var_hcst statistics[['var_obs']] <- var_obs } ## close if on variance - if (stat == 'n_eff') { + if (stat == 'n_eff') { ## Calculate degrees of freedom n_eff <- s2dv::Eno(data = obs_ensmean, time_dim = time_dim, na.action = na.pass, ncores = ncores) + n_eff <- .drop_dims(n_eff) statistics[['n_eff']] <- n_eff - } ## close on n_eff - } + } ## close on n_eff + if (stat == 'spread') { + C_cov <- stats:::C_cov + spread <- sqrt(Apply(Apply(data = data$hcst$data, + target_dims = c(memb_dim), + fun = 'var')$output1, + fun = 'mean', target_dims = time_dim)$output1) + spread <- .drop_dims(spread) + statistics[['spread']] <- spread + } ## close on spread + } ## close on stat info(recipe$Run$logger, "##### STATISTICS COMPUTATION COMPLETE #####") .log_memory_usage(recipe$Run$logger, when = "After statistics computation") diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 01896877..ab77743b 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -100,7 +100,7 @@ check_recipe <- function(recipe) { if (is.null(recipe$Analysis$Datasets$Multimodel) || (is.logical(recipe$Analysis$Datasets$Multimodel) && !(recipe$Analysis$Datasets$Multimodel))) { - recipe$Analysis$Datasets$Multimodel <- list(execute = FALSE) + recipe$Analysis$Datasets$Multimodel$execute <- FALSE } if (tolower(recipe$Analysis$Datasets$Multimodel$execute) == 'false') { multimodel <- FALSE @@ -134,7 +134,7 @@ check_recipe <- function(recipe) { } } } else { - recipe$Analysis$Datasets$Multimodel <- FALSE + recipe$Analysis$Datasets$Multimodel$execute <- FALSE } # Check ftime_min and ftime_max if ((!(recipe$Analysis$Time$ftime_min > 0)) || @@ -509,7 +509,7 @@ check_recipe <- function(recipe) { # Skill AVAILABLE_METRICS <- c("enscorr", "corr_individual_members", "rps", "rps_syear", "rpss", "frps", "frpss", "crps", "crps_syear", - "crpss", "bss10", "bss90", + "crpss", "bss10", "bss90", "rms", "mean_bias", "mean_bias_ss", "enssprerr", "rps_clim", "rps_clim_syear", "crps_clim", "crps_clim_syear", "enscorr_specs", "frps_specs", "rpss_specs", -- GitLab From 309a7d1c9b83e0fbcb3ce6cf6b78257e548ac0bf Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 26 Apr 2024 17:11:45 +0200 Subject: [PATCH 02/24] reading var.units from recipe --- modules/Scorecards/R/tmp/LoadMetrics.R | 4 -- modules/Scorecards/R/tmp/ScorecardsMulti.R | 33 +++++---------- modules/Scorecards/R/tmp/ScorecardsSingle.R | 31 +++++--------- .../Scorecards/R/tmp/ScorecardsSystemDiff.R | 36 +++++++---------- modules/Scorecards/Scorecards_calculations.R | 4 +- modules/Scorecards/Scorecards_plotting.R | 16 +++++++- modules/Scorecards/execute_scorecards.R | 2 +- modules/Skill/Skill.R | 2 +- modules/Statistics/Statistics.R | 5 ++- tools/check_recipe.R | 40 +++++++++++++------ 10 files changed, 84 insertions(+), 89 deletions(-) diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index 155a569b..fe19dc85 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -60,10 +60,6 @@ LoadMetrics <- function(input_path, system, reference, var, period, data_type, "will be used.") var <- var[1] } - ## metrics - # if (!is.character(metrics)) { - # stop("Parameter 'metrics' cannot be NULL.") - # } ## start_months if (is.character(start_months)) { warning("Parameter 'start_months' must be a numeric vector indicating ", diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index 7278eb16..1633eb86 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -7,11 +7,11 @@ #'@param sign is an array with the same dimensions as data indicting the #' significance of the metrics, with either true, false or null. #'@param system a vector of character strings defining the systems following the -#' archive.yml format from verification suite +#' archive.yml format for SUNSET. #'@param reference a vector of character strings defining the references -#' following the archive.yml format from verification suite -#'@param var a character string following the format from -#' variable-dictionary.yml from verification suite +#' following the archive.yml format for SUNSET. +#'@param var a character string indicating the variable. +#'@param var.units a character sting indicating the the variable units. #'@param start.year a numeric indicating the start year of the reference period #'@param end.year a numeric indicating the end year of the reference period #'@param start.date a vector of character strings indicating the start months @@ -56,20 +56,21 @@ #' system.name = c('ECMWF-SEAS5','DWD-GFCS2.1'), #' reference.name = 'ERA5', #' var = 'tas', +#' var.units = 'C', #' start.year = 1993, #' end.year = 2016, #' start.months = 1:12, #' forecast.months = 1:6, #' region.names = c('Tropics', 'Extra-tropical NH', 'Extra-tropical SH') #' metrics = c('mean_bias', 'enscorr', 'rpss','crpss', 'enssprerr'), -#' table.label = '(Interpolation = to system, Aggregation level = skill, Cross-validation = terciles)', +#' table.label = '(Interpolation = to system, Cross-validation = True)', #' fileout.label = '_crossval-terciles_agg-skill', -#' output.path = '/esarchive/scratch/nmilders/scorecards_images/testing' +#' output.path = './scorecards' #' ) -ScorecardsMulti <- function(data, sign, system, reference, var, start.year, - end.year, start.months, forecast.months, +ScorecardsMulti <- function(data, sign, system, reference, var, var.units, + start.year, end.year, start.months, forecast.months, region.names, metrics, plot.legend = TRUE, legend.breaks = NULL, legend.white.space = NULL, legend.width = 555, legend.height = 50, @@ -108,7 +109,6 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, attributes(sign)$metrics <- metrics } - ## Transform data for scorecards by forecast month (types 11 & 12) if(length(start.months) >= length(forecast.months)){ @@ -133,25 +133,14 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, } sys_dict <- read_yaml("conf/archive.yml")[[filesystem]] var_dict <- read_yaml("conf/variable-dictionary.yml")$vars - + ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - if ('name' %in% names(recipe$Analysis$Variables)){ - if (recipe$Analysis$Variables$name == var) { - var.units <- recipe$Analysis$Variables$units - } - } else { - for (i in 1:length(recipe$Analysis$Variables)) { - if (recipe$Analysis$Variables[[i]]$name == var) { - var.units <- recipe$Analysis$Variables[[i]]$units - } - } - } if (is.null(var.units)) { var.units <- var_dict[[var]]$units } - + system.name <- NULL reference.name <- NULL diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index 9fd44545..26e2eeb2 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -7,11 +7,11 @@ #'@param sign is an array with the same dimensions as data indicting the #' significance of the metrics, with either true, false or null. #'@param system a vector of character strings defining the systems following the -#' archive.yml format from verification suite +#' archive.yml format for SUNSET. #'@param reference a vector of character strings defining the references -#' following the archive.yml format from verification suite -#'@param var a character string following the format from -#' variable-dictionary.yml from verification suite +#' following the archive.yml format for SUNSET. +#'@param var a character string indicating the variable. +#'@param var.units a character sting indicating the the variable units. #'@param start.year a numeric indicating the start year of the reference period #'@param end.year a numeric indicating the end year of the reference period #'@param start.date a vector of character strings indicating the start months @@ -54,19 +54,20 @@ #' system.name = c('ECMWF-SEAS5','DWD-GFCS2.1'), #' reference.name = 'ERA5', #' var = 'tas', +#' var.units = 'C', #' start.year = 1993, #' end.year = 2016, #' start.months = 1:12, #' forecast.months = 1:6, #' region.names = c('Tropics', 'Extra-tropical NH', 'Extra-tropical SH') #' metrics = c('mean_bias', 'enscorr', 'rpss','crpss', 'enssprerr'), -#' table.label = '(Interpolation = to system, Aggregation level = skill, Cross-validation = terciles)', +#' table.label = '(Interpolation = to system, Cross-validation = True)', #' fileout.label = '_crossval-terciles_agg-skill', -#' output.path = '/esarchive/scratch/nmilders/scorecards_images/test' +#' output.path = './scorecards' #' ) #'@export -ScorecardsSingle <- function(data, sign, system, reference, var, start.year, - end.year, start.months, forecast.months, +ScorecardsSingle <- function(data, sign, system, reference, var, var.units, + start.year, end.year, start.months, forecast.months, region.names, metrics, plot.legend = TRUE, legend.breaks = NULL, legend.white.space = NULL, legend.width = 550, legend.height = 50, @@ -149,17 +150,6 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - if ('name' %in% names(recipe$Analysis$Variables)){ - if (recipe$Analysis$Variables$name == var) { - var.units <- recipe$Analysis$Variables$units - } - } else { - for (i in 1:length(recipe$Analysis$Variables)) { - if (recipe$Analysis$Variables[[i]]$name == var) { - var.units <- recipe$Analysis$Variables[[i]]$units - } - } - } if (is.null(var.units)) { var.units <- var_dict[[var]]$units } @@ -205,9 +195,6 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, stopifnot(identical(names(dim(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'))), c('metric','time','sdate','region'))) temp_data <- Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected') pos_bias <- which(metrics == 'mean_bias') - if(var == 'psl'){ - temp_data[pos_bias,,,] <- temp_data[pos_bias,,,]/100 - } breaks_bias <- .SCBiasBreaks(Subset(temp_data, along = 'metric', indices = pos_bias)) } diff --git a/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R b/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R index 88e9de15..2d17f94c 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R +++ b/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R @@ -5,11 +5,11 @@ #'@param input_data is an array of spatially aggregated metrics containing the #' following dimensions; system, reference, metric, time, sdate, region. #'@param system a vector of character strings defining the systems following the -#' archive.yml format from verification suite +#' archive.yml format for SUNSET. #'@param reference a vector of character strings defining the references -#' following the archive.yml format from verification suite -#'@param var a character string following the format from -#' variable-dictionary.yml from verification suite +#' following the archive.yml format for SUNSET. +#'@param var a character string indicating the variable. +#'@param var.units a character sting indicating the the variable units. #'@param start.year a numeric indicating the start year of the reference period #'@param end.year a numeric indicating the end year of the reference period #'@param start.date a vector of character strings indicating the start months @@ -32,6 +32,7 @@ #' system.name = c('ECMWF-SEAS5','DWD-GFCS2.1'), #' reference.name = 'ERA5', #' var = 'tas', +#' var.units = 'C', #' start.year = 1993, #' end.year = 2016, #' start.months = 1:12, @@ -44,22 +45,12 @@ #' ) -ScorecardsSystemDiff <- function(data, - system, - reference, - var, - start.year, - end.year, - start.months, - forecast.months, - region.names, - metrics, - table.label = NULL, - fileout.label = NULL, - legend.white.space = NULL, - col1.width = NULL, - col2.width = NULL, - output.path){ +ScorecardsSystemDiff <- function(data, system, reference, var, var.units, + start.year, end.year, start.months, + forecast.months, region.names, metrics, + table.label = NULL, fileout.label = NULL, + legend.white.space = NULL, col1.width = NULL, + col2.width = NULL, output.path){ ## Checks to apply: # first dimension in aggregated_metrics is system and second dimension is reference @@ -121,7 +112,10 @@ ScorecardsSystemDiff <- function(data, ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - var.units <- var_dict[[var]]$units ## TODO: Get units from recipe or elsewhere + + if (is.null(var.units)) { + var.units <- var_dict[[var]]$units + } system.name <- NULL reference.name <- NULL diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index a5fe67ba..f54cfbc2 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -464,13 +464,13 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci aggr_significance <- InsertDim(aggr_significance, 1, 1, name = 'var') } - ## Include attributes + ## Include attributes (necessary?) attributes(aggr_metrics)$metrics <- metrics attributes(aggr_metrics)$forecast.months <- forecast.months attributes(aggr_metrics)$regions <- regions attributes(aggr_metrics)$system.name <- system attributes(aggr_metrics)$reference.name <- reference - + aggr_scorecards <- list(aggr_metrics = aggr_metrics, aggr_significance = aggr_significance) ## Save metric data arrays diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R index ff00e55c..c2793bf1 100644 --- a/modules/Scorecards/Scorecards_plotting.R +++ b/modules/Scorecards/Scorecards_plotting.R @@ -107,6 +107,18 @@ Scorecards_plotting <- function(recipe) { calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff } + if ('name' %in% names(recipe$Analysis$Variables)){ + if (recipe$Analysis$Variables$name == var) { + var.units <- recipe$Analysis$Variables$units + } + } else { + for (i in 1:length(recipe$Analysis$Variables)) { + if (recipe$Analysis$Variables[[i]]$name == var) { + var.units <- recipe$Analysis$Variables[[i]]$units + } + } + } + ####### Load data files ####### aggregated_metrics <- LoadMetrics(input_path = input.path, system = system, @@ -144,6 +156,7 @@ Scorecards_plotting <- function(recipe) { system = system, reference = reference, var = var, + var.units = var.units, start.year = start.year, end.year = end.year, start.months = start.months, @@ -173,7 +186,7 @@ Scorecards_plotting <- function(recipe) { sign = aggregated_significance, system = system, reference = reference, - var = var, + var.units = var.units, start.year = start.year, end.year = end.year, start.months = start.months, @@ -203,6 +216,7 @@ Scorecards_plotting <- function(recipe) { system = system, reference = reference, var = var, + var.units = var.units, start.year = start.year, end.year = end.year, start.months = start.months, diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index e4dcd4ec..6a8de1bc 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -32,6 +32,6 @@ for (variable in 1:length(recipe$Analysis$Variables)) { scorecard_recipe$Analysis$Variables <- recipe$Analysis$Variables[[variable]] # Plot Scorecards - Scorecards(scorecard_recipe) + Scorecards_plotting(scorecard_recipe) } print("##### SCORECARDS SAVED TO THE OUTPUT DIRECTORY #####") diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index aa22838a..d59f29d7 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -205,7 +205,7 @@ Skill <- function(recipe, data, agg = 'global') { skill_metrics[[ metric ]] <- skill$crpss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'rms') { - source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/RMS.R") + # source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/RMS.R") hcst_mean <- Apply(list(data$hcst$data), target_dims = memb_dim, fun = mean, na.rm = na.rm, ncores = ncores)$output1 hcst_mean <- InsertDim(hcst_mean, pos = 1, len = 1, name = memb_dim) diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index ed0af72a..d3b805f8 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -3,7 +3,7 @@ Statistics <- function(recipe, data, agg = 'global') { # data$obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml # agg: data aggregation - + time_dim <- 'syear' memb_dim <- 'ensemble' ncores <- recipe$Analysis$ncores @@ -29,6 +29,7 @@ Statistics <- function(recipe, data, agg = 'global') { } ## close if on covariance if (stat %in% c('std', 'standard_deviation')) { ## Calculate standard deviation + print() std_hcst <- Apply(data = hcst_ensmean, target_dims = c(time_dim), fun = 'sd')$output1 @@ -66,7 +67,7 @@ Statistics <- function(recipe, data, agg = 'global') { C_cov <- stats:::C_cov spread <- sqrt(Apply(Apply(data = data$hcst$data, target_dims = c(memb_dim), - fun = 'var')$output1, + fun = stats::var)$output1, fun = 'mean', target_dims = time_dim)$output1) spread <- .drop_dims(spread) statistics[['spread']] <- spread diff --git a/tools/check_recipe.R b/tools/check_recipe.R index ab77743b..7e7f33cb 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -98,9 +98,8 @@ check_recipe <- function(recipe) { } # Check multimodel if (is.null(recipe$Analysis$Datasets$Multimodel) || - (is.logical(recipe$Analysis$Datasets$Multimodel) && - !(recipe$Analysis$Datasets$Multimodel))) { - recipe$Analysis$Datasets$Multimodel$execute <- FALSE + isFALSE(recipe$Analysis$Datasets$Multimodel)) { + recipe$Analysis$Datasets$Multimodel <- list(execute = FALSE) } if (tolower(recipe$Analysis$Datasets$Multimodel$execute) == 'false') { multimodel <- FALSE @@ -134,7 +133,7 @@ check_recipe <- function(recipe) { } } } else { - recipe$Analysis$Datasets$Multimodel$execute <- FALSE + recipe$Analysis$Datasets$Multimodel$execute <- FALSE } # Check ftime_min and ftime_max if ((!(recipe$Analysis$Time$ftime_min > 0)) || @@ -217,12 +216,12 @@ check_recipe <- function(recipe) { "The 'Regrid' element must specify the 'method' and 'type'.") error_status <- TRUE } - + if (recipe$Analysis$Regrid$type == 'to_system' && multimodel) { - error(recipe$Run$logger, - paste0("The 'Regrid$type' cannot be 'to_system' if ", - "'Multimodel$execute' is yes/true or both.")) - error_status <- TRUE + error(recipe$Run$logger, + paste0("The 'Regrid$type' cannot be 'to_system' if ", + "'Multimodel$execute' is yes/true or both.")) + error_status <- TRUE } # TODO: Add Workflow checks? # ... @@ -648,8 +647,14 @@ check_recipe <- function(recipe) { } } # Scorecards + if (!is.null(recipe$Analysis$Workflow$Statistics)) { + statistics <- strsplit(recipe$Analysis$Workflow$Statistics$metric, + ", | |,")[[1]] + } else { + statistics <- NULL + } if ("Scorecards" %in% names(recipe$Analysis$Workflow)) { - if(recipe$Analysis$Workflow$Scorecards$execute == TRUE){ + if (recipe$Analysis$Workflow$Scorecards$execute == TRUE) { if (is.null(recipe$Analysis$Workflow$Scorecards$metric)) { error(recipe$Run$logger, "Parameter 'metric' must be defined under 'Scorecards'.") @@ -674,12 +679,21 @@ check_recipe <- function(recipe) { requested_metrics <- c(requested_metrics, 'crps_syear') } } - if ('enscorr' %in% tolower(sc_metrics)) { - recipe$Analysis$Workflow$Statistics <- c('std', 'cov', 'n_eff') + if ('enscorr' %in% tolower(sc_metrics) && + !all(c('std', 'cov', 'n_eff') %in% statistics)) { + error(recipe$Run$logger, + paste("For 'enscorr' to be plotted in the Scorecards with", + "the 'score' aggregation, the Statistics module must", + "be called and the statistics 'std', 'cov' and", + "'n_eff' are required.")) + error_status <- TRUE } - recipe$Analysis$Workflow$Skill$metric <- requested_metrics + recipe$Analysis$Workflow$Skill$metric <- paste0(requested_metrics, + collapse = " ") } if (tolower(recipe$Analysis$Output_format) != 'scorecards') { + warn(recipe$Run$logger, + "Scorecards requested: setting output format as 'Scorecards'") recipe$Analysis$Output_format <- 'scorecards' } if (!all(tolower(sc_metrics) %in% tolower(requested_metrics))) { -- GitLab From c47fb7179056b3498a46dd9da879ecfa7adb6841 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Tue, 7 May 2024 17:06:11 +0200 Subject: [PATCH 03/24] Adjustments for scorecards with launcher --- modules/Scorecards/Scorecards_calculations.R | 129 +++++++------------ modules/Scorecards/Scorecards_plotting.R | 13 +- modules/Scorecards/execute_scorecards.R | 4 +- modules/Statistics/Statistics.R | 1 - 4 files changed, 52 insertions(+), 95 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index f54cfbc2..468a72d9 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -1,14 +1,17 @@ ############################################################################### -##################### SCORECARDS MODULE FOR SUNSET SUITE ###################### +############# Spatial aggregation calculations for scorecards ################# ############################################################################### -##### Load source functions ##### -# source('modules/Scorecards/R/tmp/LoadMetrics.R') -# source('modules/Scorecards/R/tmp/WeightedMetrics.R') +## This function spatially aggregates the skill metrics and statistics for the +## scorecards. The aggregated statistical significance is also calculate for the +## metrics when 'score' aggregate is requested. The aggregation regions are +## defined in the general recipe under the scorecards section. ## Define function Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, recipe) { + ## TO DO: need to test with NAO data + ## Parameters for saving output data files output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) @@ -21,9 +24,6 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max start.months <- substr(recipe$Analysis$Time$sdate, 1,2) period <- paste0(start.year, "-", end.year) - - file.name.metrics <- paste0('scorecards_aggr_skill_',system,'_',reference,'_',var,'_',period,'_s',start.months,'.RDS') - file.name.sign <- paste0('scorecards_aggr_sign_',system,'_',reference,'_',var,'_',period,'_s',start.months,'.RDS') ## Parameters for data aggregation regions <- recipe$Analysis$Workflow$Scorecards$regions @@ -51,72 +51,49 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na } - ##### TO DO: Need to include condition for removing -INF ##### - # if (inf_to_na) { - # by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA - # } + lon_dim <- 'longitude' + lat_dim <- 'latitude' + time_dim <- 'syear' + memb_dim <- 'ensemble' + lon <- as.numeric(data$hcst$coords$longitude) + lat <- as.numeric(data$hcst$coords$latitude) + ## Define arrays to filled with data + aggr_metrics <- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions), + metric = length(metrics))) + + aggr_significance <- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions), + metric = length(metrics))) ############################# SKILL AGGREGATION ############################# if(metric.aggregation == 'skill'){ - - # ## Load data files - # loaded_metrics <- LoadMetrics(input_path = skill.input.path, - # system = system, - # reference = reference, - # var = var, - # metrics = metrics, - # period = period, - # start_months = start.months, - # calib_method = calib.method, - # inf_to_na = inf.to.na - # ) - - # ## Remove -Inf from crpss data if variable is precipitation ## from loadMetrics function - # if (inf_to_na) { - # by_reference[[reference]][by_reference[[reference]]==-Inf] <- NA - # } - - ## Spatial Aggregation of metrics - if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ - - ### Convert loaded metrics to array for already aggregated data - metrics.dim <- attributes(loaded_metrics)$metrics - forecast.months.dim <- forecast.months - start.months.dim <- attributes(loaded_metrics)$start_months - regions.dim <- regions - aggr_metrics <- array(dim = c(system = length(loaded_metrics), - reference = length(loaded_metrics[[1]]), - metric = length(metrics.dim), - time = length(forecast.months.dim), - sdate = length(start.months.dim), - region = length(regions.dim))) - - - for (sys in 1:length(names(loaded_metrics))){ - for (ref in 1:length(names(loaded_metrics[[sys]]))){ - aggr_metrics[sys, ref, , , , ] <- s2dv::Reorder(data = loaded_metrics[[sys]][[ref]], order = c('metric','time','sdate','region')) + ## Calculate weighted mean of spatial aggregation + for(met in metrics){ + result <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics[[met]], + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(result))[length(dim(result))] <- 'region' + result <-Subset(result, 'var', 1, drop = 'selected') + + if(met =='crpss' && inf_to_na == TRUE){ + result[result == -Inf] <- NA } - } - - ## Add attributes - attributes(aggr_metrics)$metrics <- metrics.load - attributes(aggr_metrics)$start.months <- attributes(loaded_metrics)$start_months - attributes(aggr_metrics)$forecast.months <- forecast.months - attributes(aggr_metrics)$regions <- regions - attributes(aggr_metrics)$system.name <- names(loaded_metrics) - attributes(aggr_metrics)$reference.name <- names(loaded_metrics[[1]]) - + + aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = result, order = c('time', 'region')) - } else { - ## Calculate weighted mean of spatial aggregation - aggr_metrics <- WeightedMetrics(loaded_metrics, - regions = regions, - forecast.months = forecast.months, - metric.aggregation = metric.aggregation, - ncores = ncores) - } ## close if on region + } ##close on met + aggr_significance <- NULL } ## close if on skill @@ -124,24 +101,6 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci ############################# SCORE AGGREGATION ############################# if(metric.aggregation == 'score'){ - lon_dim <- 'longitude' - lat_dim <- 'latitude' - time_dim <- 'syear' - memb_dim <- 'ensemble' - lon <- as.numeric(data$hcst$coords$longitude) - lat <- as.numeric(data$hcst$coords$latitude) - - ## Define arrays to filled with data - aggr_metrics <- array(data = NA, - dim = c(time = length(forecast.months), - region = length(regions), - metric = length(metrics))) - - aggr_significance <- array(data = NA, - dim = c(time = length(forecast.months), - region = length(regions), - metric = length(metrics))) - ## Spatially aggregate data for each metric for (met in metrics) { @@ -464,7 +423,7 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci aggr_significance <- InsertDim(aggr_significance, 1, 1, name = 'var') } - ## Include attributes (necessary?) + ## Include attributes attributes(aggr_metrics)$metrics <- metrics attributes(aggr_metrics)$forecast.months <- forecast.months attributes(aggr_metrics)$regions <- regions diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R index c2793bf1..ead6294e 100644 --- a/modules/Scorecards/Scorecards_plotting.R +++ b/modules/Scorecards/Scorecards_plotting.R @@ -25,8 +25,9 @@ Scorecards_plotting <- function(recipe) { output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) - system <- recipe$Analysis$Datasets$System$name - reference <- recipe$Analysis$Datasets$Reference$name + system <- as.vector(unlist(recipe$Analysis$Datasets$System)) + reference <- as.vector(unlist(recipe$Analysis$Datasets$Reference)) + var <- recipe$Analysis$Variables$name start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) @@ -107,6 +108,7 @@ Scorecards_plotting <- function(recipe) { calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff } + if ('name' %in% names(recipe$Analysis$Variables)){ if (recipe$Analysis$Variables$name == var) { var.units <- recipe$Analysis$Variables$units @@ -129,11 +131,7 @@ Scorecards_plotting <- function(recipe) { start_months = as.numeric(start.months), calib_method = calib.method) - attributes(aggregated_metrics)$metrics <- metrics - # attributes(aggregated_metrics)$forecast.months <- forecast.months - # attributes(aggregated_metrics)$regions <- regions - # attributes(aggregated_metrics)$system.name <- system - # attributes(aggregated_metrics)$reference.name <- reference + attributes(aggregated_metrics)$metrics <- metrics aggregated_significance <- LoadMetrics(input_path = input.path, system = system, @@ -146,6 +144,7 @@ Scorecards_plotting <- function(recipe) { aggregated_significance <- aggregated_significance == 1 + ####### PLOT SCORECARDS ########## ## Create simple scorecard tables diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index 6a8de1bc..0999b8ec 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -1,5 +1,5 @@ source('tools/libs.R') -source('modules/Scorecards/Scorecards.R') +source('modules/Scorecards/Scorecards_plotting.R') args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] @@ -31,7 +31,7 @@ for (variable in 1:length(recipe$Analysis$Variables)) { as.vector(unlist(recipe$Analysis$Datasets$Reference)) scorecard_recipe$Analysis$Variables <- recipe$Analysis$Variables[[variable]] - # Plot Scorecards + ## Plot Scorecards Scorecards_plotting(scorecard_recipe) } print("##### SCORECARDS SAVED TO THE OUTPUT DIRECTORY #####") diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index d3b805f8..6fa5d03c 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -29,7 +29,6 @@ Statistics <- function(recipe, data, agg = 'global') { } ## close if on covariance if (stat %in% c('std', 'standard_deviation')) { ## Calculate standard deviation - print() std_hcst <- Apply(data = hcst_ensmean, target_dims = c(time_dim), fun = 'sd')$output1 -- GitLab From e105a27c90e233585f3b7f4fa84bf9cf751bcdac Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 10 May 2024 16:38:23 +0200 Subject: [PATCH 04/24] include indices condition for scorecards --- modules/Scorecards/Scorecards_calculations.R | 662 ++++++++++--------- 1 file changed, 339 insertions(+), 323 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index 468a72d9..5ba0a569 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -51,12 +51,6 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na } - lon_dim <- 'longitude' - lat_dim <- 'latitude' - time_dim <- 'syear' - memb_dim <- 'ensemble' - lon <- as.numeric(data$hcst$coords$longitude) - lat <- as.numeric(data$hcst$coords$latitude) ## Define arrays to filled with data aggr_metrics <- array(data = NA, @@ -69,353 +63,375 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci region = length(regions), metric = length(metrics))) - ############################# SKILL AGGREGATION ############################# - if(metric.aggregation == 'skill'){ - - ## Calculate weighted mean of spatial aggregation - for(met in metrics){ - result <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics[[met]], - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(result))[length(dim(result))] <- 'region' - result <-Subset(result, 'var', 1, drop = 'selected') - - if(met =='crpss' && inf_to_na == TRUE){ - result[result == -Inf] <- NA - } - - aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = result, order = c('time', 'region')) + + ## For data that is already aggregated by region + if ("region" %in% names(dim(skill_metrics[[1]]))) { + + aggr_metrics <- NULL + + for(met in metrics){ + skill_metrics[[met]] <- Reorder(skill_metrics[[met]], c("time", "region", "var")) + aggr_metrics <- abind(aggr_metrics, skill_metrics[[met]], along=3) + } - } ##close on met - - aggr_significance <- NULL + names(dim(aggr_metrics)) <- c("time", "region", "metric") - } ## close if on skill + } else { - ############################# SCORE AGGREGATION ############################# - if(metric.aggregation == 'score'){ + lon_dim <- 'longitude' + lat_dim <- 'latitude' + time_dim <- 'syear' + memb_dim <- 'ensemble' + lon <- as.numeric(data$hcst$coords$longitude) + lat <- as.numeric(data$hcst$coords$latitude) - ## Spatially aggregate data for each metric - for (met in metrics) { - - if(met == 'rpss'){ - - rps_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics$rps_syear, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - rps_clim_syear <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics$rps_clim_syear, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(rps_syear))[length(dim(rps_syear))] <- 'region' - names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' - - ## Remove 'var' dimension - rps_syear <-Subset(rps_syear, 'var', 1, drop = 'selected') - rps_clim_syear <-Subset(rps_clim_syear, 'var', 1, drop = 'selected') - - ## Calculate significance - sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, - time_dim = time_dim, test.type = 'two.sided', - alpha = alpha, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - - ## Average over 'syear' dimension - rps_syear <- Apply(data = rps_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - rps_clim_syear <- Apply(data = rps_clim_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate RPSS from aggregated RPS and RPS_clim - rpss <- 1 - rps_syear / rps_clim_syear - - ## Save metric result in arrays - aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'region')) + ############################# SKILL AGGREGATION ############################# + if(metric.aggregation == 'skill'){ + + ## Calculate weighted mean of spatial aggregation + for(met in metrics){ + result <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics[[met]], + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(result))[length(dim(result))] <- 'region' + result <-Subset(result, 'var', 1, drop = 'selected') + + if(met =='crpss' && inf_to_na == TRUE){ + result[result == -Inf] <- NA + } + + aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = result, order = c('time', 'region')) - } ## close if on rpss + } ##close on met + + } ## close if on skill + + ############################# SCORE AGGREGATION ############################# + if(metric.aggregation == 'score'){ - if(met == 'crpss'){ - - crps_syear <- sapply(X = 1:length(regions), + ## Spatially aggregate data for each metric + for (met in metrics) { + + if(met == 'rpss'){ + + rps_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$rps_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + rps_clim_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$rps_clim_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(rps_syear))[length(dim(rps_syear))] <- 'region' + names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' + + ## Remove 'var' dimension + rps_syear <-Subset(rps_syear, 'var', 1, drop = 'selected') + rps_clim_syear <-Subset(rps_clim_syear, 'var', 1, drop = 'selected') + + ## Calculate significance + sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, + time_dim = time_dim, test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Average over 'syear' dimension + rps_syear <- Apply(data = rps_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + rps_clim_syear <- Apply(data = rps_clim_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate RPSS from aggregated RPS and RPS_clim + rpss <- 1 - rps_syear / rps_clim_syear + + ## Save metric result in arrays + aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'region')) + + } ## close if on rpss + + if(met == 'crpss'){ + + crps_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$crps_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + crps_clim_syear <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$crps_clim_syear, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(crps_syear))[length(dim(crps_syear))] <- 'region' + names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' + + ## Remove 'var' dimension + crps_syear <-Subset(crps_syear, 'var', 1, drop = 'selected') + crps_clim_syear <-Subset(crps_clim_syear, 'var', 1, drop = 'selected') + + ## Calculate significance + sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, + time_dim = time_dim, test.type = 'two.sided', + alpha = alpha, pval = FALSE, sign = TRUE, + ncores = NULL)$sign + + ## Average over 'syear' dimension + crps_syear <- Apply(data = crps_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + crps_clim_syear <- Apply(data = crps_clim_syear, + target_dims = time_dim, + fun = 'mean', ncores = ncores)$output1 + + ## Calculate CRPSS from aggregated CRPS and CRPS_clim + crpss <- 1 - crps_syear / crps_clim_syear + + ## Save metric result in arrays + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'region')) + + } ## close if on crpss + + if(met == 'enscorr'){ + + cov <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$cov, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + std_hcst <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = skill_metrics$crps_syear, + WeightedMean(data = statistics$std_hcst, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - - crps_clim_syear <- sapply(X = 1:length(regions), + + std_obs <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$std_obs, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + n_eff <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$n_eff, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(cov))[length(dim(cov))] <- 'region' + names(dim(std_hcst))[length(dim(std_hcst))] <- 'region' + names(dim(std_obs))[length(dim(std_obs))] <- 'region' + names(dim(n_eff))[length(dim(n_eff))] <- 'region' + + ## Remove 'var' dimension + cov <- Subset(cov, 'var', 1, drop = 'selected') + std_hcst <- Subset(std_hcst, 'var', 1, drop = 'selected') + std_obs <- Subset(std_obs, 'var', 1, drop = 'selected') + n_eff <- Subset(n_eff, 'var', 1, drop = 'selected') + + ## Calculate correlation + enscorr <- cov / (std_hcst * std_obs) + + ## Calculate significance of corr + t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) + t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) + + sign_corr<- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions))) + + for (time in 1:dim(sign_corr)[['time']]){ + for (reg in 1:dim(sign_corr)[['region']]){ + + if (anyNA(c(t[time, reg], t_alpha2_n2[time, reg])) == FALSE + && t[time, reg] >= t_alpha2_n2[time, reg]){ + sign_corr[time, reg] <- TRUE + } else { + sign_corr[time, reg] <- FALSE + } + } + } + + ## Save metric result in arrays + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) + + } ## close if on enscorr + + if(met == 'mean_bias'){ + + ## Calculate ensemble mean + hcst_data_ens <- MeanDims(data$hcst$data, dims = 'ensemble') + obs_data_ens <- MeanDims(data$obs$data, dims = 'ensemble') + + ## Aggregate data over regions + hcst_data_aggr <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = hcst_data_ens, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + obs_data_aggr <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = skill_metrics$crps_clim_syear, + WeightedMean(data = obs_data_ens, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - - ## Include name of region dimension - names(dim(crps_syear))[length(dim(crps_syear))] <- 'region' - names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' - - ## Remove 'var' dimension - crps_syear <-Subset(crps_syear, 'var', 1, drop = 'selected') - crps_clim_syear <-Subset(crps_clim_syear, 'var', 1, drop = 'selected') - - ## Calculate significance - sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, - time_dim = time_dim, test.type = 'two.sided', - alpha = alpha, pval = FALSE, sign = TRUE, - ncores = NULL)$sign - - ## Average over 'syear' dimension - crps_syear <- Apply(data = crps_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - crps_clim_syear <- Apply(data = crps_clim_syear, - target_dims = time_dim, - fun = 'mean', ncores = ncores)$output1 - - ## Calculate CRPSS from aggregated CRPS and CRPS_clim - crpss <- 1 - crps_syear / crps_clim_syear - - ## Save metric result in arrays - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'region')) - - } ## close if on crpss - - if(met == 'enscorr'){ - - cov <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = statistics$cov, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - std_hcst <- sapply(X = 1:length(regions), + + ## Include name of region dimension + names(dim(hcst_data_aggr))[length(dim(hcst_data_aggr))] <- 'region' + names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' + + ## Remove unnecessary dimension + hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') + obs_data_aggr <- Subset(obs_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') + + ## Calculate significance + pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), + target_dims = c('syear'), ncores = ncores, + fun = function(x,y){t.test(as.vector(x),as.vector(y))})$p.value + + sign_mean_bias <- pval_mean_bias <= alpha + + ## Calculate aggregated mean bias metric + mean_bias <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics$mean_bias, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' + + ## Remove 'var' dimension + mean_bias <- Subset(mean_bias, 'var', 1, drop = 'selected') + + ## Save metric result in array + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = mean_bias, order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_mean_bias, order = c('time', 'region')) + + } ## close on mean_bias + + if(met == 'enssprerr'){ + + ## Calculate metric + spread <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = statistics$std_hcst, + WeightedMean(data = statistics$spread, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, na.rm = na.rm) - }, simplify = 'array') - - std_obs <- sapply(X = 1:length(regions), + }, simplify = 'array') + + error <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = statistics$std_obs, + WeightedMean(data = skill_metrics$rms, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, na.rm = na.rm) - }, simplify = 'array') - - n_eff <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = statistics$n_eff, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(cov))[length(dim(cov))] <- 'region' - names(dim(std_hcst))[length(dim(std_hcst))] <- 'region' - names(dim(std_obs))[length(dim(std_obs))] <- 'region' - names(dim(n_eff))[length(dim(n_eff))] <- 'region' - - ## Remove 'var' dimension - cov <- Subset(cov, 'var', 1, drop = 'selected') - std_hcst <- Subset(std_hcst, 'var', 1, drop = 'selected') - std_obs <- Subset(std_obs, 'var', 1, drop = 'selected') - n_eff <- Subset(n_eff, 'var', 1, drop = 'selected') - - ## Calculate correlation - enscorr <- cov / (std_hcst * std_obs) - - ## Calculate significance of corr - t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) - t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) - - sign_corr<- array(data = NA, - dim = c(time = length(forecast.months), - region = length(regions))) - - for (time in 1:dim(sign_corr)[['time']]){ - for (reg in 1:dim(sign_corr)[['region']]){ - - if (anyNA(c(t[time, reg], t_alpha2_n2[time, reg])) == FALSE - && t[time, reg] >= t_alpha2_n2[time, reg]){ - sign_corr[time, reg] <- TRUE - } else { - sign_corr[time, reg] <- FALSE - } - } - } - - ## Save metric result in arrays - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) - - } ## close if on enscorr - - if(met == 'mean_bias'){ - - ## Calculate ensemble mean - hcst_data_ens <- MeanDims(data$hcst$data, dims = 'ensemble') - obs_data_ens <- MeanDims(data$obs$data, dims = 'ensemble') - - ## Aggregate data over regions - hcst_data_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = hcst_data_ens, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - obs_data_aggr <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = obs_data_ens, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(hcst_data_aggr))[length(dim(hcst_data_aggr))] <- 'region' - names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' - - ## Remove unnecessary dimension - hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') - obs_data_aggr <- Subset(obs_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') - - ## Calculate significance - pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), - target_dims = c('syear'), ncores = ncores, - fun = function(x,y){t.test(as.vector(x),as.vector(y))})$p.value - - sign_mean_bias <- pval_mean_bias <= alpha - - ## Calculate aggregated mean bias metric - mean_bias <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics$mean_bias, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' - - ## Remove 'var' dimension - mean_bias <- Subset(mean_bias, 'var', 1, drop = 'selected') - - ## Save metric result in array - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = mean_bias, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_mean_bias, order = c('time', 'region')) - - } ## close on mean_bias - - if(met == 'enssprerr'){ - - ## Calculate metric - spread <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = statistics$spread, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - error <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics$rms, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - ## Include name of region dimension - names(dim(spread))[length(dim(spread))] <- 'region' - names(dim(error))[length(dim(error))] <- 'region' - - ## Remove 'var' dimension - spread <- Subset(spread, 'var', 1, drop = 'selected') - error <- Subset(error, 'var', 1, drop = 'selected') - - enssprerr <- spread / error - - # ## Significance calculation - # - # # Effective sample size - # # exp <- data$hcst$data - # # ens_exp <- MeanDims(data$hcst$data, dims = 'ensemble') - # # enospr <- sum(Eno(exp - InsertDim(ens_exp, length(dim(exp)), dim(exp)['ensemble']), "ensemble")) - # # enodif <- .Eno(ens_exp - ens_obs, na.action = na.pass) - # - # # Removing eno at the moment - # # F <- (enospr * spread^2 / (enospr - 1)) / (enodif * error^2 / (enodif - 1)) - # F <- spread^2 / error^2 - # # if (!is.na(F) & !is.na(enospr) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { - # # p.val <- pf(F, enospr - 1, enodif - 1) - # - # pval_enssprerr <- Apply(data = list(x = F, y = data$hcst$data), ### DOES NOT WORK - # target_dims = c('time'), - # fun = function(x, y) - # {pf(x, dim(y)['syear'] -1 ,dim(y)['syear'] -1)}) - # - # pval_enssprerr <- pf(F,dim(data$hcst$data)['syear'] -1 ,dim(data$hcst$data)['syear'] -1) - # pval_enssprerr <- 2 * min(pval_enssprerr, 1 - pval_enssprerr) - # - # sign_enssprerr <- pval_enssprerr <= alpha - - ## Save metric result in array - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enssprerr, order = c('time', 'region')) - # aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) + }, simplify = 'array') + + ## Include name of region dimension + names(dim(spread))[length(dim(spread))] <- 'region' + names(dim(error))[length(dim(error))] <- 'region' + + ## Remove 'var' dimension + spread <- Subset(spread, 'var', 1, drop = 'selected') + error <- Subset(error, 'var', 1, drop = 'selected') + + enssprerr <- spread / error + + # ## Significance calculation + # + # # Effective sample size + # # exp <- data$hcst$data + # # ens_exp <- MeanDims(data$hcst$data, dims = 'ensemble') + # # enospr <- sum(Eno(exp - InsertDim(ens_exp, length(dim(exp)), dim(exp)['ensemble']), "ensemble")) + # # enodif <- .Eno(ens_exp - ens_obs, na.action = na.pass) + # + # # Removing eno at the moment + # # F <- (enospr * spread^2 / (enospr - 1)) / (enodif * error^2 / (enodif - 1)) + # F <- spread^2 / error^2 + # # if (!is.na(F) & !is.na(enospr) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + # # p.val <- pf(F, enospr - 1, enodif - 1) + # + # pval_enssprerr <- Apply(data = list(x = F, y = data$hcst$data), ### DOES NOT WORK + # target_dims = c('time'), + # fun = function(x, y) + # {pf(x, dim(y)['syear'] -1 ,dim(y)['syear'] -1)}) + # + # pval_enssprerr <- pf(F,dim(data$hcst$data)['syear'] -1 ,dim(data$hcst$data)['syear'] -1) + # pval_enssprerr <- 2 * min(pval_enssprerr, 1 - pval_enssprerr) + # + # sign_enssprerr <- pval_enssprerr <= alpha + + ## Save metric result in array + aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enssprerr, order = c('time', 'region')) + # aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) + + } ## close on enssprerr - } ## close on enssprerr + } ## close loop on metric - } ## close loop on metric - - ## Set NAs to False - aggr_significance[is.na(aggr_significance)] <- FALSE - - } ## close if on score + } ## close if on score + + } ## close if on region + + ## Set NAs to False + aggr_significance[is.na(aggr_significance)] <- FALSE ## Include 'var' dimension to be able to save array if(!'var' %in% names(dim(aggr_metrics))){ -- GitLab From 4f75c9a4cd88256ee82fb1b98d8c4f429f1a3cad Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 13 May 2024 11:34:21 +0200 Subject: [PATCH 05/24] cleaning code --- modules/Scorecards/R/tmp/WeightedMetrics.R | 110 ------------------- modules/Scorecards/Scorecards_calculations.R | 97 +++++++++------- modules/Scorecards/Scorecards_plotting.R | 17 ++- 3 files changed, 70 insertions(+), 154 deletions(-) delete mode 100644 modules/Scorecards/R/tmp/WeightedMetrics.R diff --git a/modules/Scorecards/R/tmp/WeightedMetrics.R b/modules/Scorecards/R/tmp/WeightedMetrics.R deleted file mode 100644 index 08267db4..00000000 --- a/modules/Scorecards/R/tmp/WeightedMetrics.R +++ /dev/null @@ -1,110 +0,0 @@ -#' Scorecards spatial aggregation of loaded metrics -#' -#'@description Scorecards function to perform the spatial aggregation of the -#' loaded metrics for the specified regions. -#' -#'@param loaded_metrics is a list of arrays containing the metrics loaded by the -#' function SC_load_metrics. -#'@param region is a named list of vectors containing the desired regions to -#' analyze. For each region the following should be specified in this order: -#' lon_min, lon_max, lat_min, lat_max. -#'@param metric.aggregation a character indicating whether the skill score RPS -#' and CRPSS are calculated from aggregated scores or aggregated skill score -#' directly, either 'score' or 'skill' respectively -#'@param ncores is the number of cores to use for the calculation. -#' -#'@return An array with the following dimensions: system, reference, metrics, -#' time, sdate, region. -#' -#'@examples -#'regions <- list('global' = c(lon.min = 0, lon.max = 360, lat.min = -90, lat.max = 90), -#' 'europe' = c(lon.min = -10, lon.max = 40, lat.min = 30, lat.max = 70)) -#'aggregated_metrics <- WeightedMetrics(loaded_metrics, -#' regions = regions, -#' metric.aggregation = 'skill', -#' ncores = 4) -#'@import multiApply -#'@importFrom ClimProjDiags WeightedMean -#'@importFrom s2dv Reorder -#'@export -WeightedMetrics <- function(loaded_metrics, regions, forecast.months, - metric.aggregation, ncores = NULL, na.rm = TRUE) { - ## Initial checks - # loaded_metrics - if (any(sapply(loaded_metrics, function(x) { - sapply(x, function(y) {is.null(dim(y))}) - }))) { - stop(paste0("Parameter 'loaded_metrics' must be a list of lists of arrays ", - "with named dimensions.")) - } - # regions - if (!all(sapply(regions, is.numeric))) { - stop(paste0("Parameter 'regions' must be a named list of vectors ", - "containing the desired regions to analyze.")) - } - # metric.aggregation - if (!is.character(metric.aggregation)) { - stop("Parameter 'metric.aggregation' must be a character indicating.") - } - # ncores - if (!is.numeric(ncores)) { - stop("Parameter 'ncores' must be an integer.") - } - - ## Get metric names - ## TO DO: check all metric are in the same order for all sys - metrics <- attributes(loaded_metrics)$metrics - start.months <- attributes(loaded_metrics)$start_months - - all_metric_means <- array(dim = c(metric = length(metrics), - time = length(forecast.months), - sdate = length(start.months), - region = length(regions), - reference = length(loaded_metrics[[1]]), - system = length(loaded_metrics))) - - ## Loop over system - for (sys in 1:length(loaded_metrics)) { - ## Loop over reference - for (ref in 1:length(loaded_metrics[[sys]])) { - dimnames <- names(dim(loaded_metrics[[sys]][[ref]])) - lon_dim_name <- dimnames[which(dimnames %in% .KnownLonNames())] - lat_dim_name <- dimnames[which(dimnames %in% .KnownLatNames())] - ## Get latitude and longitude from attributes of loaded metrics - ## Loop over region - for (reg in 1:length(regions)) { - ## Calculate weighted means for defined regions for each system and reference - weighted.mean <- WeightedMean(data = loaded_metrics[[sys]][[ref]], - lon = as.vector(attributes(loaded_metrics[[sys]][[ref]])$lon), - lat = as.vector(attributes(loaded_metrics[[sys]][[ref]])$lat), - region = regions[[reg]], - londim = lon_dim_name, - latdim = lat_dim_name, - na.rm = na.rm, - ncores = ncores) - - if (!all(names(dim(weighted.mean)) == c('metric', 'time', 'sdate'))) { - weighted.mean <- Reorder(weighted.mean, c('metric', 'time', 'sdate')) - } - - all_metric_means[, , , reg, ref, sys] <- weighted.mean - - } ## close loop on region - } ## close loop on reference - } ## close loop on system - - ## reorder dimensions in array - all_metric_means <- s2dv::Reorder(all_metric_means, c('system','reference','metric','time','sdate','region')) - - ## Add attributes - attributes(all_metric_means)$metrics <- metrics - attributes(all_metric_means)$start.months <- start.months - attributes(all_metric_means)$forecast.months <- forecast.months - attributes(all_metric_means)$regions <- regions - attributes(all_metric_means)$system.name <- names(loaded_metrics) - attributes(all_metric_means)$reference.name <- names(loaded_metrics[[1]]) - - return(all_metric_means) - -} ## close function - diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index 5ba0a569..ca39dd01 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -5,12 +5,12 @@ ## This function spatially aggregates the skill metrics and statistics for the ## scorecards. The aggregated statistical significance is also calculate for the ## metrics when 'score' aggregate is requested. The aggregation regions are -## defined in the general recipe under the scorecards section. +## defined in the recipe under the scorecards section. This function reads from +## the atomic recipes. ## Define function -Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, recipe) { - - ## TO DO: need to test with NAO data +Scorecards_calculations <- function(data, skill_metrics, + statistics = NULL, recipe) { ## Parameters for saving output data files output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") @@ -21,7 +21,7 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci var <- recipe$Analysis$Variables$name start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) - forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max + forecast.months <- recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max start.months <- substr(recipe$Analysis$Time$sdate, 1,2) period <- paste0(start.year, "-", end.year) @@ -30,7 +30,8 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation - metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), + ", | |,")) ncores <- recipe$Analysis$ncores if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ @@ -51,13 +52,7 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na } - - ## Define arrays to filled with data - aggr_metrics <- array(data = NA, - dim = c(time = length(forecast.months), - region = length(regions), - metric = length(metrics))) - + ## Define array to filled with data aggr_significance <- array(data = NA, dim = c(time = length(forecast.months), region = length(regions), @@ -70,13 +65,20 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci aggr_metrics <- NULL for(met in metrics){ - skill_metrics[[met]] <- Reorder(skill_metrics[[met]], c("time", "region", "var")) + skill_metrics[[met]] <- Reorder(skill_metrics[[met]], + c("time", "region", "var")) aggr_metrics <- abind(aggr_metrics, skill_metrics[[met]], along=3) } names(dim(aggr_metrics)) <- c("time", "region", "metric") } else { + + ## Define arrays to filled with data + aggr_metrics <- array(data = NA, + dim = c(time = length(forecast.months), + region = length(regions), + metric = length(metrics))) lon_dim <- 'longitude' lat_dim <- 'latitude' @@ -85,7 +87,7 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci lon <- as.numeric(data$hcst$coords$longitude) lat <- as.numeric(data$hcst$coords$latitude) - ############################# SKILL AGGREGATION ############################# + ## Skill aggregation if(metric.aggregation == 'skill'){ ## Calculate weighted mean of spatial aggregation @@ -102,17 +104,18 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci names(dim(result))[length(dim(result))] <- 'region' result <-Subset(result, 'var', 1, drop = 'selected') - if(met =='crpss' && inf_to_na == TRUE){ + if(met =='crpss' && inf.to.na == TRUE){ result[result == -Inf] <- NA } - aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = result, order = c('time', 'region')) + aggr_metrics[,,which(metrics == met)] <- Reorder(data = result, + order = c('time', 'region')) - } ##close on met + } ## close on met } ## close if on skill - ############################# SCORE AGGREGATION ############################# + ## Score Aggregation if(metric.aggregation == 'score'){ ## Spatially aggregate data for each metric @@ -143,12 +146,13 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' ## Remove 'var' dimension - rps_syear <-Subset(rps_syear, 'var', 1, drop = 'selected') - rps_clim_syear <-Subset(rps_clim_syear, 'var', 1, drop = 'selected') + rps_syear <- Subset(rps_syear, 'var', 1, drop = 'selected') + rps_clim_syear <- Subset(rps_clim_syear, 'var', 1, drop = 'selected') ## Calculate significance sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, - time_dim = time_dim, test.type = 'two.sided', + time_dim = time_dim, + test.type = 'two.sided', alpha = alpha, pval = FALSE, sign = TRUE, ncores = NULL)$sign @@ -165,8 +169,10 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci rpss <- 1 - rps_syear / rps_clim_syear ## Save metric result in arrays - aggr_metrics[ , ,which(metrics == met)] <- s2dv::Reorder(data = rpss, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_rpss, order = c('time', 'region')) + aggr_metrics[,,which(metrics == met)] <- Reorder(data = rpss, + order = c('time', 'region')) + aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_rpss, + order = c('time', 'region')) } ## close if on rpss @@ -195,12 +201,13 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' ## Remove 'var' dimension - crps_syear <-Subset(crps_syear, 'var', 1, drop = 'selected') - crps_clim_syear <-Subset(crps_clim_syear, 'var', 1, drop = 'selected') + crps_syear <- Subset(crps_syear, 'var', 1, drop = 'selected') + crps_clim_syear <- Subset(crps_clim_syear, 'var', 1, drop = 'selected') ## Calculate significance sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, - time_dim = time_dim, test.type = 'two.sided', + time_dim = time_dim, + test.type = 'two.sided', alpha = alpha, pval = FALSE, sign = TRUE, ncores = NULL)$sign @@ -217,8 +224,10 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci crpss <- 1 - crps_syear / crps_clim_syear ## Save metric result in arrays - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = crpss, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_crpss, order = c('time', 'region')) + aggr_metrics[,,which(metrics == met)] <- Reorder(data = crpss, + order = c('time', 'region')) + aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_crpss, + order = c('time', 'region')) } ## close if on crpss @@ -296,8 +305,10 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci } ## Save metric result in arrays - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enscorr, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) + aggr_metrics[,,which(metrics == met)] <- Reorder(data = enscorr, + order = c('time', 'region')) + aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_corr, + order = c('time', 'region')) } ## close if on enscorr @@ -331,13 +342,16 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' ## Remove unnecessary dimension - hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') - obs_data_aggr <- Subset(obs_data_aggr, c('dat','var', 'sday','sweek'), list(1,1,1,1) , drop = 'selected') + hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','var', 'sday','sweek'), + list(1,1,1,1) , drop = 'selected') + obs_data_aggr <- Subset(obs_data_aggr, c('dat','var', 'sday','sweek'), + list(1,1,1,1) , drop = 'selected') ## Calculate significance pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), target_dims = c('syear'), ncores = ncores, - fun = function(x,y){t.test(as.vector(x),as.vector(y))})$p.value + fun = function(x,y) + {t.test(as.vector(x),as.vector(y))})$p.value sign_mean_bias <- pval_mean_bias <= alpha @@ -358,8 +372,10 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci mean_bias <- Subset(mean_bias, 'var', 1, drop = 'selected') ## Save metric result in array - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = mean_bias, order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_mean_bias, order = c('time', 'region')) + aggr_metrics[,,which(metrics == met)] <- Reorder(data = mean_bias, + order = c('time', 'region')) + aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_mean_bias, + order = c('time', 'region')) } ## close on mean_bias @@ -419,8 +435,10 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci # sign_enssprerr <- pval_enssprerr <= alpha ## Save metric result in array - aggr_metrics[ , , which(metrics == met)] <- s2dv::Reorder(data = enssprerr, order = c('time', 'region')) - # aggr_significance[ , , which(metrics == met)] <- s2dv::Reorder(data = sign_corr, order = c('time', 'region')) + aggr_metrics[,,which(metrics == met)] <- Reorder(data = enssprerr, + order = c('time', 'region')) + # aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_corr, + # order = c('time', 'region')) } ## close on enssprerr @@ -446,7 +464,8 @@ Scorecards_calculations <- function(data, skill_metrics, statistics = NULL, reci attributes(aggr_metrics)$system.name <- system attributes(aggr_metrics)$reference.name <- reference - aggr_scorecards <- list(aggr_metrics = aggr_metrics, aggr_significance = aggr_significance) + aggr_scorecards <- list(aggr_metrics = aggr_metrics, + aggr_significance = aggr_significance) ## Save metric data arrays recipe$Run$output_dir <- paste0(recipe$Run$output_dir, diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R index ead6294e..fd59b36f 100644 --- a/modules/Scorecards/Scorecards_plotting.R +++ b/modules/Scorecards/Scorecards_plotting.R @@ -1,7 +1,11 @@ ############################################################################### -##################### SCORECARDS MODULE FOR SUNSET SUITE ###################### +##################### Scorecard visualization plotting ####################### ############################################################################### +## This function loads the saved netcdf files containing the spatially +## aggregated skill metrics for the scorecards and plots the scorecard +## visualizations. The function reads from the general recipe. + ##### Load source functions ##### source('modules/Scorecards/R/tmp/LoadMetrics.R') source('modules/Scorecards/R/tmp/Utils.R') @@ -31,11 +35,13 @@ Scorecards_plotting <- function(recipe) { var <- recipe$Analysis$Variables$name start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) - forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max + forecast.months <- recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) - metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) + metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), + ", | |,")) - if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { + if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || + is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) } else { start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, @@ -228,7 +234,8 @@ Scorecards_plotting <- function(recipe) { col1.width = col1.width, col2.width = col2.width, output.path = output.path) - } else {stop ("Difference scorecard can only be computed with two systems or two references.")} + } else {stop ( + "Difference scorecard can only be computed with two systems or two references.")} } ## close if on calculate.diff } -- GitLab From ea8dc32ed903abd4711d12b9af02c5365c01b07f Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 13 May 2024 11:38:14 +0200 Subject: [PATCH 06/24] Cleaning LoadMetrics function --- modules/Scorecards/R/tmp/LoadMetrics.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index fe19dc85..2a70eff9 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -30,14 +30,13 @@ #' period = '1993-2016' #' start_months = sprintf("%02d", 1:12), #' calib_method = 'raw', -#' input_path = '/esarchive/scratch/nmilders/scorecards_data/input_data') +#' input_path = './scorecards_data/input_data') #'} #'@import easyNCDF #'@import multiApply #'@export LoadMetrics <- function(input_path, system, reference, var, period, data_type, - # metrics, start_months, calib_method = NULL) { # Initial checks @@ -109,12 +108,13 @@ LoadMetrics <- function(input_path, system, reference, var, period, data_type, result_attr <- attributes(result) - by_reference <- abind::abind(by_reference, result, along = length(dim(result)) + 1) + by_reference <- abind::abind(by_reference, result, + along = length(dim(result)) + 1) dim(by_reference) <- c(dim(result), reference = length(reference)) } ## close loop on reference - all_metrics <- abind::abind(all_metrics, by_reference, along = length(dim(by_reference)) + 1) + all_metrics <- abind::abind(all_metrics, by_reference, + along = length(dim(by_reference)) + 1) dim(all_metrics) <- c(dim(by_reference), system = length(system)) - # attributes(all_metrics) <- result_attr[-1] } ## close loop on system attributes(all_metrics)$start_months <- start_months -- GitLab From 75fe3e5981b0e113e6aeca9614284dc9e418b061 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 31 May 2024 16:50:01 +0200 Subject: [PATCH 07/24] included mean_bias and spread-to-error functions which compute significance --- modules/Skill/R/tmp/Bias.R | 216 +++++++++++++++++++++++++++++++++ modules/Skill/R/tmp/SprErr.R | 226 +++++++++++++++++++++++++++++++++++ modules/Skill/Skill.R | 40 ++++--- 3 files changed, 464 insertions(+), 18 deletions(-) create mode 100644 modules/Skill/R/tmp/Bias.R create mode 100644 modules/Skill/R/tmp/SprErr.R diff --git a/modules/Skill/R/tmp/Bias.R b/modules/Skill/R/tmp/Bias.R new file mode 100644 index 00000000..098a678e --- /dev/null +++ b/modules/Skill/R/tmp/Bias.R @@ -0,0 +1,216 @@ +#'Compute the Mean Bias +#' +#'The Mean Bias or Mean Error (Wilks, 2011) is defined as the mean difference +#'between the ensemble mean forecast and the observations. It is a deterministic +#'metric. Positive values indicate that the forecasts are on average too high +#'and negative values indicate that the forecasts are on average too low. +#'It also allows to compute the Absolute Mean Bias or bias without temporal +#'mean. If there is more than one dataset, the result will be computed for each +#'pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter +#' 'exp' is already the ensemble mean. The default value is NULL. +#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or +#' kept (FALSE) for computation. The default value is FALSE. +#'@param absolute A logical value indicating whether to compute the absolute +#' bias. The default value is FALSE. +#'@param time_mean A logical value indicating whether to compute the temporal +#' mean of the bias. The default value is TRUE. +#'@param alpha A numeric or NULL (default) to indicate the significance level using Weltch test. Only available when absolute is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number +#'of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If alpha is specified, and absolute is FALSE, the result is a list with two elements, the bias as describe above and the significance as logical array with the same dimensions. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) +#'bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, alpha = NULL, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop("Parameter 'exp' must be a numeric array.") + if (!is.array(obs) | !is.numeric(obs)) + stop("Parameter 'obs' must be a numeric array.") + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop("Parameter 'time_dim' must be a character string.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## absolute + if (!is.logical(absolute) | length(absolute) > 1) { + stop("Parameter 'absolute' must be one logical value.") + } + ## time_mean + if (!is.logical(time_mean) | length(time_mean) > 1) { + stop("Parameter 'time_mean' must be one logical value.") + } + ## alpha + if (!is.null(alpha)) { + if (!is.numeric(alpha) | length(alpha) > 1) { + stop("Parameter 'alpha' must be null or a numeric value.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = na.rm) + } + + ## (Mean) Bias + bias <- Apply(data = list(exp, obs), + target_dims = c(time_dim, dat_dim), + fun = .Bias, + time_dim = time_dim, + dat_dim = dat_dim, + na.rm = na.rm, + absolute = absolute, + time_mean = time_mean, + alpha = alpha, + ncores = ncores) + + if (is.null(alpha)) { + bias <- bias$output1 + } + return(bias) +} + + +.Bias <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, alpha = NULL) { + # exp and obs: [sdate, (dat)] + if (is.null(dat_dim)) { + bias <- exp - obs + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- mean(bias, na.rm = na.rm) + } + + if (!is.null(alpha)) { + if (!absolute) { + pval <- t.test(x = obs, y = exp, alternative = "two.sided")$p.value + sig <- pval <= alpha + } + } + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + bias <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + pval <- array(dim = c(nexp = nexp, nobs = nobs)) + sig <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + bias[, i, j] <- exp[, i] - obs[, j] + if (!is.null(alpha)) { + if (!absolute) { + pval[i,j] <- t.test(x = obs[,j], y = exp[,i], + alternative = "two.sided")$p.value + sig[i,j] <- pval <= alpha + } + } + } + } + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- MeanDims(bias, time_dim, na.rm = na.rm) + } + } + if (!is.null(alpha) && !absolute) { + return(list(bias = bias, sig = sig)) + } else { + return(bias) + } +} diff --git a/modules/Skill/R/tmp/SprErr.R b/modules/Skill/R/tmp/SprErr.R new file mode 100644 index 00000000..15bcba31 --- /dev/null +++ b/modules/Skill/R/tmp/SprErr.R @@ -0,0 +1,226 @@ +#'Compute the ratio between the ensemble spread and RMSE +#' +#'Compute the ratio between the spread of the members around the +#'ensemble mean in experimental data and the RMSE between the ensemble mean of +#'experimental and observational data. The p-value and/or the statistical +#'significance is provided by a one-sided Fisher's test. +#' +#'@param exp A named numeric array of experimental data with at least two +#' dimensions 'memb_dim' and 'time_dim'. +#'@param obs A named numeric array of observational data with at least two +#' dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. The default value +#' is 'member'. +#'@param time_dim A character string indicating the name of dimension along +#' which the ratio is computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param na.rm A logical value indicating whether to remove NA values. The default +#' value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of two arrays with dimensions c(nexp, nobs, the rest of +#' dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is +#' the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. +#' If dat_dim is NULL, nexp and nobs are omitted. \cr +#'\item{$ratio}{ +#' The ratio of the ensemble spread and RMSE. +#'} +#'\item{$p_val}{ +#' The p-value of the one-sided Fisher's test with Ho: SD/RMSE = 1. Only present +#' if \code{pval = TRUE}. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs, dat_dim = 'dataset') +#'# Reorder the data in order to plot it with PlotVsLTime +#'rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) +#'rsdrms_plot[, , 2, ] <- rsdrms$ratio +#'rsdrms_plot[, , 4, ] <- rsdrms$p.val +#'\dontrun{ +#'PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", +#' monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), +#' listobs = c('ERSST'), biglab = FALSE, siglev = TRUE) +#'} +#' +#'@import multiApply +#'@export +SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', pval = TRUE, sign = FALSE, + alpha = 0.05, na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions memb_dim and time_dim.")) + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. ", + "Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions except 'dat_dim' and 'memb_dim'.")) + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + # alpha + if (!is.numeric(alpha) | any(alpha < 0) | any(alpha > 1) | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + # na.rm + if (!na.rm %in% c(TRUE, FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate RatioSDRMS + + # If dat_dim = NULL, insert dat dim + remove_dat_dim <- FALSE + if (is.null(dat_dim)) { + dat_dim <- 'dataset' + exp <- InsertDim(exp, posdim = 1, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = 'dataset') + remove_dat_dim <- TRUE + } + + res <- Apply(list(exp, obs), + target_dims = list(c(dat_dim, memb_dim, time_dim), + c(dat_dim, memb_dim, time_dim)), + pval = pval, + sign = sign, + na.rm = na.rm, + fun = .SprErr, + ncores = ncores) + + if (remove_dat_dim) { + if (length(dim(res[[1]])) > 2) { + res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') + } else { + res <- lapply(res, as.numeric) + } + } + + return(res) +} + +.SprErr <- function(exp, obs, pval = TRUE, sign = FALSE, alpha = 0.05, na.rm = FALSE) { + + # exp: [dat_exp, member, sdate] + # obs: [dat_obs, member, sdate] + nexp <- dim(exp)[1] + nobs <- dim(obs)[1] + + # ensemble mean + ens_exp <- MeanDims(exp, 2, na.rm = na.rm) # [dat, sdate] + ens_obs <- MeanDims(obs, 2, na.rm = na.rm) + + # Create empty arrays + ratio <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + p.val <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + + # spread and error + spread <- sqrt(mean(apply(exp[jexp,,], 2, var, na.rm = na.rm), na.rm = na.rm)) + error <- sqrt(mean((ens_obs - ens_exp[jexp,])^2, na.rm = na.rm)) + ratio[jexp, jobs] <- spread/error + + # effective sample size + enospr <- sum(Eno(apply(exp[jexp,,], 2, var, na.rm = na.rm), names(dim(exp))[3])) + enodif <- .Eno((ens_exp[jexp, ] - ens_obs[jobs, ])^2, na.action = na.pass) + if (pval) { + F <- (enospr[jexp] * spread[jexp]^2 / (enospr[jexp] - 1)) / (enodif * error^2 / (enodif - 1)) + if (!is.na(F) & !is.na(enospr[jexp]) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + p.val[jexp, jobs] <- pf(F, enospr[jexp] - 1, enodif - 1) + p.val[jexp, jobs] <- 2 * min(p.val[jexp, jobs], 1 - p.val[jexp, jobs]) + } else { + ratio[jexp, jobs] <- NA + } + } + } + } + + res <- list(ratio = ratio) + if (pval) {res$p.val <- p.val} + if (sign) {res$sign <- p.val <= alpha} + + return(res) +} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index d59f29d7..2a803dc0 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -19,9 +19,11 @@ source("modules/Skill/R/tmp/GetProbs.R") ## Temporary source("modules/Skill/R/tmp/RPS.R") source("modules/Skill/R/tmp/CRPS.R") +source("modules/Skill/R/tmp/SprErr.R") # https://earth.bsc.es/gitlab/es/s2dv/-/issues/115 +source("modules/Skill/R/tmp/Bias.R") # https://earth.bsc.es/gitlab/es/s2dv/-/issues/118 Skill <- function(recipe, data, agg = 'global') { - + # data$hcst: s2dv_cube containing the hindcast # obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml @@ -225,15 +227,19 @@ Skill <- function(recipe, data, agg = 'global') { skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data, time_dim = time_dim, memb_dim = memb_dim, + alpha = 0.05, ncores = ncores) } else { skill <- Bias(data$hcst$data, data$obs$data, time_dim = time_dim, memb_dim = memb_dim, + alpha = 0.05, ncores = ncores) } - skill <- .drop_dims(skill) - skill_metrics[[ metric ]] <- skill + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$bias + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sig # Mean bias skill score } else if (metric == 'mean_bias_ss') { if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && @@ -310,21 +316,19 @@ Skill <- function(recipe, data, agg = 'global') { skill_metrics[[ metric ]] <- skill$msss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'enssprerr') { - # Remove ensemble dim from obs to avoid veriApply warning - obs_noensdim <- ClimProjDiags::Subset(data$obs$data, "ensemble", 1, - drop = "selected") - capture.output( - skill <- easyVerification::veriApply(verifun = 'EnsSprErr', - fcst = data$hcst$data, - obs = obs_noensdim, - tdim = which(names(dim(data$hcst$data))==time_dim), - ensdim = which(names(dim(data$hcst$data))==memb_dim), - na.rm = na.rm, - ncpus = ncores) - ) - remove(obs_noensdim) - skill <- .drop_dims(skill) - skill_metrics[[ metric ]] <- skill + skill <- SprErr(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + dat_dim = 'dat', + pval = TRUE, + sign = TRUE, + alpha = 0.05, + na.rm = na.rm, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$ratio + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # SpecsVerification metrics } else if (grepl("specs", metric, fixed = TRUE)) { # Compute SpecsVerification version of the metrics -- GitLab From c5a6f9c7d8dd813a8d85a7d7484fb7f082a36e46 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 3 Jun 2024 12:58:38 +0200 Subject: [PATCH 08/24] updating SprErr function --- modules/Skill/R/tmp/SprErr.R | 25 +++++++++---------------- modules/Skill/Skill.R | 3 ++- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/modules/Skill/R/tmp/SprErr.R b/modules/Skill/R/tmp/SprErr.R index 15bcba31..a22bdf0d 100644 --- a/modules/Skill/R/tmp/SprErr.R +++ b/modules/Skill/R/tmp/SprErr.R @@ -3,7 +3,7 @@ #'Compute the ratio between the spread of the members around the #'ensemble mean in experimental data and the RMSE between the ensemble mean of #'experimental and observational data. The p-value and/or the statistical -#'significance is provided by a one-sided Fisher's test. +#'significance is provided by a two-sided Fisher's test. #' #'@param exp A named numeric array of experimental data with at least two #' dimensions 'memb_dim' and 'time_dim'. @@ -37,23 +37,16 @@ #' The ratio of the ensemble spread and RMSE. #'} #'\item{$p_val}{ -#' The p-value of the one-sided Fisher's test with Ho: SD/RMSE = 1. Only present +#' The p-value of the two-sided Fisher's test with Ho: Spread/RMSE = 1. Only present #' if \code{pval = TRUE}. #'} #' #'@examples -#'# Load sample data as in Load() example: -#'example(Load) -#'rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs, dat_dim = 'dataset') -#'# Reorder the data in order to plot it with PlotVsLTime -#'rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) -#'rsdrms_plot[, , 2, ] <- rsdrms$ratio -#'rsdrms_plot[, , 4, ] <- rsdrms$p.val -#'\dontrun{ -#'PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", -#' monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), -#' listobs = c('ERSST'), biglab = FALSE, siglev = TRUE) -#'} +#'exp <- array(rnorm(30), dim = c(lat = 2, sdate = 3, member = 5)) +#'obs <- array(rnorm(30), dim = c(lat = 2, sdate = 3)) +#'sprerr1 <- SprErr(exp, obs) +#'sprerr2 <- SprErr(exp, obs, pval=F, sign=T) +#'sprerr3 <- SprErr(exp, obs, pval=T, sign=T) #' #'@import multiApply #'@export @@ -206,13 +199,13 @@ SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', # effective sample size enospr <- sum(Eno(apply(exp[jexp,,], 2, var, na.rm = na.rm), names(dim(exp))[3])) enodif <- .Eno((ens_exp[jexp, ] - ens_obs[jobs, ])^2, na.action = na.pass) - if (pval) { + if (pval | sign) { F <- (enospr[jexp] * spread[jexp]^2 / (enospr[jexp] - 1)) / (enodif * error^2 / (enodif - 1)) if (!is.na(F) & !is.na(enospr[jexp]) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { p.val[jexp, jobs] <- pf(F, enospr[jexp] - 1, enodif - 1) p.val[jexp, jobs] <- 2 * min(p.val[jexp, jobs], 1 - p.val[jexp, jobs]) } else { - ratio[jexp, jobs] <- NA + p.val[jexp, jobs] <- NA } } } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 2a803dc0..47832bde 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -316,11 +316,12 @@ Skill <- function(recipe, data, agg = 'global') { skill_metrics[[ metric ]] <- skill$msss skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign } else if (metric == 'enssprerr') { + .Eno <- s2dv:::.Eno skill <- SprErr(data$hcst$data, data$obs$data, time_dim = time_dim, memb_dim = memb_dim, dat_dim = 'dat', - pval = TRUE, + pval = FALSE, sign = TRUE, alpha = 0.05, na.rm = na.rm, -- GitLab From 3ecba8a61a94acac9dca848bbb8479f27ad8b4bc Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 3 Jun 2024 14:33:24 +0200 Subject: [PATCH 09/24] Updated unit tests to include mean_bias_significance metric --- tests/testthat/test-seasonal_NAO.R | 3 ++- tests/testthat/test-seasonal_downscaling.R | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-seasonal_NAO.R b/tests/testthat/test-seasonal_NAO.R index f70eefc7..c2e13546 100644 --- a/tests/testthat/test-seasonal_NAO.R +++ b/tests/testthat/test-seasonal_NAO.R @@ -213,7 +213,7 @@ TRUE ) expect_equal( names(skill_metrics), -c("mean_bias", "enscorr", +c("mean_bias", "mean_bias_significance" , "enscorr", "enscorr_significance", "rps", "rpss", "rpss_significance", "crps", "crpss", "crpss_significance", "enssprerr") ) @@ -254,6 +254,7 @@ c(paste0("Indices/ECMWF-SEAS5/nao/", paste0("nao_", 1993:2000, "0301.nc")), "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enscorr_significance_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enssprerr_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_mean_bias_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_mean_bias_significance_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rps_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rpss_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rpss_significance_1993-2000_s03.nc" diff --git a/tests/testthat/test-seasonal_downscaling.R b/tests/testthat/test-seasonal_downscaling.R index 8a52657a..1ba0e3df 100644 --- a/tests/testthat/test-seasonal_downscaling.R +++ b/tests/testthat/test-seasonal_downscaling.R @@ -214,7 +214,7 @@ TRUE ) expect_equal( names(skill_metrics), -c("bss10", "bss10_significance", "crpss", "crpss_significance","rpss", "rpss_significance", "mean_bias") +c("bss10", "bss10_significance", "crpss", "crpss_significance","rpss", "rpss_significance", "mean_bias", "mean_bias_significance") ) expect_equal( class(skill_metrics$rpss), -- GitLab From b0331406cc015d67ae679715c6a4d247064b3127 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Mon, 3 Jun 2024 16:05:36 +0200 Subject: [PATCH 10/24] Updated unit test to include ensprerr_significance metric --- tests/testthat/test-seasonal_NAO.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-seasonal_NAO.R b/tests/testthat/test-seasonal_NAO.R index c2e13546..f5a77af2 100644 --- a/tests/testthat/test-seasonal_NAO.R +++ b/tests/testthat/test-seasonal_NAO.R @@ -215,7 +215,7 @@ expect_equal( names(skill_metrics), c("mean_bias", "mean_bias_significance" , "enscorr", "enscorr_significance", "rps", "rpss", "rpss_significance", - "crps", "crpss", "crpss_significance", "enssprerr") + "crps", "crpss", "crpss_significance", "enssprerr", "enssprerr_significance") ) expect_equal( class(skill_metrics$rpss), @@ -253,6 +253,7 @@ c(paste0("Indices/ECMWF-SEAS5/nao/", paste0("nao_", 1993:2000, "0301.nc")), "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enscorr_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enscorr_significance_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enssprerr_1993-2000_s03.nc", + "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_enssprerr_significance_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_mean_bias_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_mean_bias_significance_1993-2000_s03.nc", "Skill/ECMWF-SEAS5/ERA5/raw/nao/scorecards_ECMWF-SEAS5_ERA5_nao_rps_1993-2000_s03.nc", @@ -263,7 +264,7 @@ c(paste0("Indices/ECMWF-SEAS5/nao/", paste0("nao_", 1993:2000, "0301.nc")), expect_equal( length(list.files(outputs, recursive = T)), -26 +28 ) }) -- GitLab From e873564b5bdef9b3816088b02efeecee62cfb422 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 25 Jun 2024 11:14:46 +0200 Subject: [PATCH 11/24] Fix format of recipe elements --- modules/Scorecards/execute_scorecards.R | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index f5cf0452..401e6b45 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -18,23 +18,19 @@ for (variable in 1:length(recipe$Analysis$Variables)) { scorecard_recipe <- recipe # Collect all system names scorecard_recipe$Analysis$Datasets$System <- - list(name = as.vector(unlist(recipe$Analysis$Datasets$System))) + as.vector(unlist(recipe$Analysis$Datasets$System)) # Include multimodel in systems if (!isFALSE(recipe$Analysis$Datasets$Multimodel$execute)) { - scorecard_recipe$Analysis$Datasets$System$name <- - c(scorecard_recipe$Analysis$Datasets$System$name, 'Multimodel') + scorecard_recipe$Analysis$Datasets$System <- + c(scorecard_recipe$Analysis$Datasets$System, 'Multimodel') } # Collect all reference names scorecard_recipe$Analysis$Datasets$Reference <- - list(name = as.vector(unlist(recipe$Analysis$Datasets$Reference))) + as.vector(unlist(recipe$Analysis$Datasets$Reference)) # Assign variables scorecard_recipe$Analysis$Variables <- recipe$Analysis$Variables[[variable]] -# scorecard_recipe$Analysis$Datasets$Reference <- -# as.vector(unlist(recipe$Analysis$Datasets$Reference)) - scorecard_recipe$Analysis$Variables <- - recipe$Analysis$Variables[[variable]] - ## Plot Scorecards + ## Plot Scorecards Scorecards_plotting(scorecard_recipe) } -- GitLab From 126479e5eeb8c92a1e4d0d22e0b386b8738d62a5 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 25 Jun 2024 11:15:54 +0200 Subject: [PATCH 12/24] Update Bias with latest changes --- modules/Skill/R/tmp/Bias.R | 53 ++++++++++++++++++++++++++------------ modules/Skill/Skill.R | 2 +- 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/modules/Skill/R/tmp/Bias.R b/modules/Skill/R/tmp/Bias.R index 098a678e..d4887189 100644 --- a/modules/Skill/R/tmp/Bias.R +++ b/modules/Skill/R/tmp/Bias.R @@ -27,7 +27,8 @@ #' bias. The default value is FALSE. #'@param time_mean A logical value indicating whether to compute the temporal #' mean of the bias. The default value is TRUE. -#'@param alpha A numeric or NULL (default) to indicate the significance level using Weltch test. Only available when absolute is FALSE. +#'@param alpha A numeric or NULL (default) to indicate the significance level +#' using Welch's t-test. Only available when absolute is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -35,7 +36,10 @@ #'A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of #''exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number #'of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation -#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If alpha is specified, and absolute is FALSE, the result is a list with two elements, the bias as describe above and the significance as logical array with the same dimensions. +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If +#'alpha is specified, and absolute is FALSE, the result is a list with two +#'elements: the bias as described above and the significance as a logical array +#'with the same dimensions. #' #'@references #'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 @@ -44,12 +48,15 @@ #'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) #'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) #'bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') +#'bias2 <- Bias(exp = exp, obs = obs, memb_dim = 'member', alpha = 0.01) +#'abs_bias <- Bias(exp = exp, obs = obs, memb_dim = 'member', absolute = TRUE, alpha = NULL) #' #'@import multiApply #'@importFrom ClimProjDiags Subset #'@export -Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, na.rm = FALSE, - absolute = FALSE, time_mean = TRUE, alpha = NULL, ncores = NULL) { +Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, + na.rm = FALSE, absolute = FALSE, time_mean = TRUE, + alpha = 0.05, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -123,9 +130,14 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, } ## alpha if (!is.null(alpha)) { - if (!is.numeric(alpha) | length(alpha) > 1) { + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { stop("Parameter 'alpha' must be null or a numeric value.") } + if (absolute) { + alpha <- NULL + .warning("Parameter 'absolute' is TRUE, so 'alpha' has been set to", + "false and significance will not be returned.") + } } ## ncores if (!is.null(ncores)) { @@ -134,9 +146,9 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, stop("Parameter 'ncores' must be either NULL or a positive integer.") } } - + ############################### - + ## Ensemble mean if (!is.null(memb_dim)) { exp <- MeanDims(exp, memb_dim, na.rm = na.rm) @@ -153,7 +165,7 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, time_mean = time_mean, alpha = alpha, ncores = ncores) - + if (is.null(alpha)) { bias <- bias$output1 } @@ -177,8 +189,12 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, if (!is.null(alpha)) { if (!absolute) { - pval <- t.test(x = obs, y = exp, alternative = "two.sided")$p.value - sig <- pval <= alpha + if (all(is.na(bias))) { + sign <- NA + } else { + pval <- t.test(x = obs, y = exp, alternative = "two.sided")$p.value + sign <- pval <= alpha + } } } } else { @@ -186,30 +202,33 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, nobs <- as.numeric(dim(obs)[dat_dim]) bias <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) pval <- array(dim = c(nexp = nexp, nobs = nobs)) - sig <- array(dim = c(nexp = nexp, nobs = nobs)) + sign <- array(dim = c(nexp = nexp, nobs = nobs)) for (i in 1:nexp) { for (j in 1:nobs) { bias[, i, j] <- exp[, i] - obs[, j] if (!is.null(alpha)) { if (!absolute) { - pval[i,j] <- t.test(x = obs[,j], y = exp[,i], - alternative = "two.sided")$p.value - sig[i,j] <- pval <= alpha + pval[i, j] <- t.test(x = obs[, j], y = exp[, i], + alternative = "two.sided")$p.value + sign[i, j] <- pval[i, j] <= alpha } } } } - + if (isTRUE(absolute)) { bias <- abs(bias) } - + if (isTRUE(time_mean)) { bias <- MeanDims(bias, time_dim, na.rm = na.rm) + if (!is.null(sign)) { + sign[which(is.na(bias))] <- NA + } } } if (!is.null(alpha) && !absolute) { - return(list(bias = bias, sig = sig)) + return(list(bias = bias, sign = sign)) } else { return(bias) } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 4f1d8f31..7c829c25 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -238,7 +238,7 @@ Skill <- function(recipe, data, agg = 'global') { skill <- lapply(skill, function(x) { .drop_dims(x)}) skill_metrics[[ metric ]] <- skill$bias - skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sig + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign # Mean bias skill score } else if (metric == 'mean_bias_ss') { if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && -- GitLab From 36b44ed4e523938f48e2d1cbb30560a8ad6c3f60 Mon Sep 17 00:00:00 2001 From: vagudets Date: Tue, 25 Jun 2024 14:31:09 +0200 Subject: [PATCH 13/24] Formatting and consistency improvements --- modules/Scorecards/Scorecards_calculations.R | 178 ++++++++----------- modules/Scorecards/Scorecards_plotting.R | 38 ++-- modules/Scorecards/execute_scorecards.R | 7 +- 3 files changed, 97 insertions(+), 126 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index ca39dd01..7b91e984 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -9,13 +9,39 @@ ## the atomic recipes. ## Define function -Scorecards_calculations <- function(data, skill_metrics, - statistics = NULL, recipe) { +Scorecards_calculations <- function(recipe, data, skill_metrics, + statistics = NULL) { + # Check that skill_metrics is not NULL + if (is.null(skill_metrics)) { + stop("Scorecards calculations requested but 'skill_metrics' not provided.") + } else if (!is.list(skill_metrics)) { + stop("'skill_metrics' should be a named list with metric arrays") + } + # Assign parameters + ncores <- recipe$Analysis$ncores + if (is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)) { + alpha <- 0.05 + } else { + alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha + } + + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm <- FALSE + } else { + na.rm <- recipe$Analysis$remove_NAs + } + + if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ + inf.to.na <- FALSE + } else { + inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na + } + ## Parameters for saving output data files output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) - + system <- recipe$Analysis$Datasets$System$name reference <- recipe$Analysis$Datasets$Reference$name var <- recipe$Analysis$Variables$name @@ -32,25 +58,6 @@ Scorecards_calculations <- function(data, skill_metrics, metric.aggregation <- recipe$Analysis$Workflow$Scorecards$metric_aggregation metrics <- unlist(strsplit(tolower(recipe$Analysis$Workflow$Scorecards$metric), ", | |,")) - ncores <- recipe$Analysis$ncores - - if(is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)){ - alpha <- 0.05 - } else { - alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha - } - - if(is.null(recipe$Analysis$remove_NAs)){ - na.rm <- FALSE - } else { - na.rm <- recipe$Analysis$remove_NAs - } - - if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ - inf.to.na <- FALSE - } else { - inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na - } ## Define array to filled with data aggr_significance <- array(data = NA, @@ -58,22 +65,16 @@ Scorecards_calculations <- function(data, skill_metrics, region = length(regions), metric = length(metrics))) - ## For data that is already aggregated by region if ("region" %in% names(dim(skill_metrics[[1]]))) { - - aggr_metrics <- NULL - - for(met in metrics){ + aggr_metrics <- NULL + for (met in metrics) { skill_metrics[[met]] <- Reorder(skill_metrics[[met]], c("time", "region", "var")) aggr_metrics <- abind(aggr_metrics, skill_metrics[[met]], along=3) - } - + } names(dim(aggr_metrics)) <- c("time", "region", "metric") - - } else { - + } else { ## Define arrays to filled with data aggr_metrics <- array(data = NA, dim = c(time = length(forecast.months), @@ -88,41 +89,34 @@ Scorecards_calculations <- function(data, skill_metrics, lat <- as.numeric(data$hcst$coords$latitude) ## Skill aggregation - if(metric.aggregation == 'skill'){ + if (metric.aggregation == 'skill') { + ## Calculate weighted mean of spatial aggregation + for (met in metrics) { + result <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics[[met]], + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') - ## Calculate weighted mean of spatial aggregation - for(met in metrics){ - result <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics[[met]], - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(result))[length(dim(result))] <- 'region' - result <-Subset(result, 'var', 1, drop = 'selected') - - if(met =='crpss' && inf.to.na == TRUE){ - result[result == -Inf] <- NA - } - - aggr_metrics[,,which(metrics == met)] <- Reorder(data = result, - order = c('time', 'region')) + names(dim(result))[length(dim(result))] <- 'region' + result <-Subset(result, 'var', 1, drop = 'selected') - } ## close on met - + if (met =='crpss' && inf.to.na == TRUE) { + result[result == -Inf] <- NA + } + aggr_metrics[,,which(metrics == met)] <- Reorder(data = result, + order = c('time', 'region')) + } ## close on met } ## close if on skill ## Score Aggregation - if(metric.aggregation == 'score'){ - + if (metric.aggregation == 'score') { ## Spatially aggregate data for each metric for (met in metrics) { - - if(met == 'rpss'){ - + if (met == 'rpss') { rps_syear <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = skill_metrics$rps_syear, @@ -174,10 +168,7 @@ Scorecards_calculations <- function(data, skill_metrics, aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_rpss, order = c('time', 'region')) - } ## close if on rpss - - if(met == 'crpss'){ - + } else if (met == 'crpss') { crps_syear <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = skill_metrics$crps_syear, @@ -186,7 +177,6 @@ Scorecards_calculations <- function(data, skill_metrics, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - crps_clim_syear <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = skill_metrics$crps_clim_syear, @@ -224,15 +214,11 @@ Scorecards_calculations <- function(data, skill_metrics, crpss <- 1 - crps_syear / crps_clim_syear ## Save metric result in arrays - aggr_metrics[,,which(metrics == met)] <- Reorder(data = crpss, - order = c('time', 'region')) - aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_crpss, - order = c('time', 'region')) - - } ## close if on crpss - - if(met == 'enscorr'){ - + aggr_metrics[ , , which(metrics == met)] <- Reorder(data = crpss, + order = c('time', 'region')) + aggr_significance[ , , which(metrics == met)] <- Reorder(data = sign_crpss, + order = c('time', 'region')) + } else if (met == 'enscorr') { cov <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = statistics$cov, @@ -241,7 +227,6 @@ Scorecards_calculations <- function(data, skill_metrics, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - std_hcst <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = statistics$std_hcst, @@ -250,7 +235,6 @@ Scorecards_calculations <- function(data, skill_metrics, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - std_obs <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = statistics$std_obs, @@ -259,7 +243,6 @@ Scorecards_calculations <- function(data, skill_metrics, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - n_eff <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = statistics$n_eff, @@ -292,15 +275,14 @@ Scorecards_calculations <- function(data, skill_metrics, dim = c(time = length(forecast.months), region = length(regions))) - for (time in 1:dim(sign_corr)[['time']]){ - for (reg in 1:dim(sign_corr)[['region']]){ - - if (anyNA(c(t[time, reg], t_alpha2_n2[time, reg])) == FALSE - && t[time, reg] >= t_alpha2_n2[time, reg]){ - sign_corr[time, reg] <- TRUE - } else { - sign_corr[time, reg] <- FALSE - } + for (time in 1:dim(sign_corr)[['time']]) { + for (reg in 1:dim(sign_corr)[['region']]) { + if ((anyNA(c(t[time, reg], t_alpha2_n2[time, reg])) == FALSE) && + (t[time, reg] >= t_alpha2_n2[time, reg])) { + sign_corr[time, reg] <- TRUE + } else { + sign_corr[time, reg] <- FALSE + } } } @@ -310,10 +292,7 @@ Scorecards_calculations <- function(data, skill_metrics, aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_corr, order = c('time', 'region')) - } ## close if on enscorr - - if(met == 'mean_bias'){ - + } else if (met == 'mean_bias') { ## Calculate ensemble mean hcst_data_ens <- MeanDims(data$hcst$data, dims = 'ensemble') obs_data_ens <- MeanDims(data$obs$data, dims = 'ensemble') @@ -372,15 +351,12 @@ Scorecards_calculations <- function(data, skill_metrics, mean_bias <- Subset(mean_bias, 'var', 1, drop = 'selected') ## Save metric result in array - aggr_metrics[,,which(metrics == met)] <- Reorder(data = mean_bias, + aggr_metrics[ , , which(metrics == met)] <- Reorder(data = mean_bias, order = c('time', 'region')) - aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_mean_bias, + aggr_significance[ , , which(metrics == met)] <- Reorder(data = sign_mean_bias, order = c('time', 'region')) - } ## close on mean_bias - - if(met == 'enssprerr'){ - + } else if (met == 'enssprerr') { ## Calculate metric spread <- sapply(X = 1:length(regions), FUN = function(X) { @@ -435,17 +411,13 @@ Scorecards_calculations <- function(data, skill_metrics, # sign_enssprerr <- pval_enssprerr <= alpha ## Save metric result in array - aggr_metrics[,,which(metrics == met)] <- Reorder(data = enssprerr, - order = c('time', 'region')) + aggr_metrics[ , , which(metrics == met)] <- Reorder(data = enssprerr, + order = c('time', 'region')) # aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_corr, # order = c('time', 'region')) - - } ## close on enssprerr - + } } ## close loop on metric - } ## close if on score - } ## close if on region ## Set NAs to False diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R index fd59b36f..ff1ba186 100644 --- a/modules/Scorecards/Scorecards_plotting.R +++ b/modules/Scorecards/Scorecards_plotting.R @@ -29,8 +29,8 @@ Scorecards_plotting <- function(recipe) { output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) - system <- as.vector(unlist(recipe$Analysis$Datasets$System)) - reference <- as.vector(unlist(recipe$Analysis$Datasets$Reference)) + system <- as.vector(unlist(recipe$Analysis$Datasets$System$name)) + reference <- as.vector(unlist(recipe$Analysis$Datasets$Reference$name)) var <- recipe$Analysis$Variables$name start.year <- as.numeric(recipe$Analysis$Time$hcst_start) @@ -42,7 +42,7 @@ Scorecards_plotting <- function(recipe) { if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { - start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1, 2)) } else { start.months <- as.numeric(strsplit(recipe$Analysis$Workflow$Scorecards$start_months, split = ", | |,")[[1]]) @@ -65,57 +65,57 @@ Scorecards_plotting <- function(recipe) { legend.breaks <- recipe$Analysis$Workflow$Scorecards$legend_breaks legend.width <- recipe$Analysis$Workflow$Scorecards$legend_width - if (is.null(recipe$Analysis$Workflow$Scorecards$plot_legend)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$plot_legend)) { plot.legend <- TRUE } else { plot.legend <- recipe$Analysis$Workflow$Scorecards$plot_legend } - if(is.null(recipe$Analysis$Workflow$Scorecards$columns_width)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$columns_width)) { columns.width <- 1.2 } else { columns.width <- recipe$Analysis$Workflow$Scorecards$columns_width } - if(is.null(recipe$Analysis$Workflow$Scorecards$legend_white_space)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$legend_white_space)) { legend.white.space <- 6 } else { legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space } - if(is.null(recipe$Analysis$Workflow$Scorecards$legend_height)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$legend_height)) { legend.height <- 50 } else { legend.height <- recipe$Analysis$Workflow$Scorecards$legend_height } - if(is.null(recipe$Analysis$Workflow$Scorecards$label_scale)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$label_scale)) { label.scale <- 1.4 } else { label.scale <- recipe$Analysis$Workflow$Scorecards$label_scale } - if(is.null(recipe$Analysis$Workflow$Scorecards$round_decimal)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$round_decimal)) { round.decimal <- 2 } else { round.decimal <- recipe$Analysis$Workflow$Scorecards$round_decimal } - if(is.null(recipe$Analysis$Workflow$Scorecards$font_size)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$font_size)) { font.size <- 1.1 } else { font.size <- recipe$Analysis$Workflow$Scorecards$font_size } ## Define if difference scorecard is to be plotted - if (is.null(recipe$Analysis$Workflow$Scorecards$calculate_diff)){ + if (is.null(recipe$Analysis$Workflow$Scorecards$calculate_diff)) { calculate.diff <- FALSE } else { calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff } - if ('name' %in% names(recipe$Analysis$Variables)){ + if ('name' %in% names(recipe$Analysis$Variables)) { if (recipe$Analysis$Variables$name == var) { var.units <- recipe$Analysis$Variables$units } @@ -186,7 +186,7 @@ Scorecards_plotting <- function(recipe) { ## Create multi system/reference scorecard tables ## (multiple systems with one reference or one system with multiple references) ## Metrics input must be in the same order as function SC_spatial_aggregation - if(length(system) > 1 || length(reference) > 1){ + if (length(system) > 1 || length(reference) > 1) { scorecard_multi <- ScorecardsMulti(data = aggregated_metrics, sign = aggregated_significance, system = system, @@ -214,9 +214,8 @@ Scorecards_plotting <- function(recipe) { output.path = output.path) } ## close if - - if(calculate.diff == TRUE){ - if(length(system) == 2 || length(reference) == 2){ + if (calculate.diff == TRUE) { + if (length(system) == 2 || length(reference) == 2) { scorecard_diff <- ScorecardsSystemDiff(data = aggregated_metrics, system = system, reference = reference, @@ -234,9 +233,10 @@ Scorecards_plotting <- function(recipe) { col1.width = col1.width, col2.width = col2.width, output.path = output.path) - } else {stop ( - "Difference scorecard can only be computed with two systems or two references.")} + } else { + stop("Difference scorecard can only be computed with two systems or ", + "two references.")} } ## close if on calculate.diff - + } diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index 401e6b45..2fa27549 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -5,7 +5,6 @@ args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] output_dir <- args[2] -## TODO: Replace with function # Read recipe and set outdir recipe <- read_yaml(recipe_file) recipe$Run$output_dir <- output_dir @@ -18,7 +17,7 @@ for (variable in 1:length(recipe$Analysis$Variables)) { scorecard_recipe <- recipe # Collect all system names scorecard_recipe$Analysis$Datasets$System <- - as.vector(unlist(recipe$Analysis$Datasets$System)) + list(name = as.vector(unlist(recipe$Analysis$Datasets$System))) # Include multimodel in systems if (!isFALSE(recipe$Analysis$Datasets$Multimodel$execute)) { scorecard_recipe$Analysis$Datasets$System <- @@ -26,11 +25,11 @@ for (variable in 1:length(recipe$Analysis$Variables)) { } # Collect all reference names scorecard_recipe$Analysis$Datasets$Reference <- - as.vector(unlist(recipe$Analysis$Datasets$Reference)) + list(name = as.vector(unlist(recipe$Analysis$Datasets$Reference))) # Assign variables scorecard_recipe$Analysis$Variables <- recipe$Analysis$Variables[[variable]] - ## Plot Scorecards + # Plot Scorecards Scorecards_plotting(scorecard_recipe) } -- GitLab From f220eb1ee966f9970b023062775e93c959d871ce Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 26 Jun 2024 10:53:31 +0200 Subject: [PATCH 14/24] Update Scorecards use case (WIP); fix bug in saving directory for scorecards metrics --- modules/Scorecards/Scorecards_calculations.R | 5 +---- .../ex1_2_autosubmit_scorecards/ex1_2-handson.md | 8 +++++++- .../ex1_2_autosubmit_scorecards/ex1_2-recipe.yml | 12 ++++++------ use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R | 5 +++++ 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index 7b91e984..1ea84c68 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -440,11 +440,8 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, aggr_significance = aggr_significance) ## Save metric data arrays - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/outputs/Scorecards/") - save_metrics(recipe = recipe, metrics = aggr_scorecards, data_cube = data$hcst, agg = 'region', - module = "scorecard") + module = "scorecards") } diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md index aab5b875..323d8430 100644 --- a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md @@ -82,6 +82,7 @@ source("modules/Loading/Loading.R") source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") source("modules/Statistics/Statistics.R") +source("modules/Scorecards/Scorecards_calculations.R") source("modules/Saving/Saving.R") # Read recipe @@ -91,7 +92,7 @@ recipe_file <- args[1] recipe <- read_atomic_recipe(recipe_file) ``` -The rest of the user-defined script can be written in the same way as any other SUNSET script. We load the data, calculate the anomalies, then compute the skill scores and save the result as netCDF files for Scorecards. +The rest of the user-defined script can be written in the same way as any other SUNSET script. We load the data, calculate the anomalies, then compute the skill metrics and statistics, and we call `Scorecards_calculations()` to do some specific computations and save the result as netCDF files for Scorecards. ```R # Load data @@ -102,6 +103,11 @@ data <- Anomalies(recipe, data) skill_metrics <- Skill(recipe, data) # Compute statistics statistics <- Statistics(recipe, data) +# Pre-computations required for the Scorecards +Scorecards_calculations(recipe, data = data, + skill_metrics = skill_metrics, + statistics = statistics) + ``` Check the example script at [ex1_2-script.yml](use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R). You can execute it as-is or copy it and modify it according to your needs. diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml index 68a8dc62..e63747d9 100644 --- a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml @@ -80,20 +80,20 @@ Run: Loglevel: INFO Terminal: yes filesystem: esarchive - output_dir: /esarchive/scratch/aho/auto-s2s-outputs/ - code_dir: /esarchive/scratch/aho/git/auto-s2s/ + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /esarchive/scratch/vagudets/git/auto-s2s/ autosubmit: yes # fill only if using autosubmit auto_conf: - script: /esarchive/scratch/aho/git/auto-s2s/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R # replace with the path to your script - expid: a6pc # replace with your EXPID - hpc_user: bsc032734 # replace with your hpc username + script: use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username wallclock: 03:00 # hh:mm processors_per_job: 8 platform: nord3v2 custom_directives: ['#SBATCH --exclusive'] email_notifications: yes # enable/disable email notifications. Change it if you want to. - email_address: an.ho@bsc.es # replace with your email address + email_address: victoria.agudetse@bsc.es # replace with your email address notify_completed: yes # notify me by email when a job finishes notify_failed: yes # notify me by email when a job fails diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R index a39c1c39..a8ee7eff 100644 --- a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R @@ -11,6 +11,7 @@ source("modules/Loading/Loading.R") source("modules/Anomalies/Anomalies.R") source("modules/Skill/Skill.R") source("modules/Statistics/Statistics.R") +source("modules/Scorecards/Scorecards_calculations.R") source("modules/Saving/Saving.R") # Read recipe @@ -26,3 +27,7 @@ data <- Anomalies(recipe, data) skill_metrics <- Skill(recipe, data) # Compute statistics statistics <- Statistics(recipe, data) +# Pre-computations required for the scorecards +Scorecards_calculations(recipe, data = data, + skill_metrics = skill_metrics, + statistics = statistics) -- GitLab From 03f82c36e8e5d42a1cad117147bfcf78ce9bc1d3 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 26 Jun 2024 15:54:41 +0200 Subject: [PATCH 15/24] Change series of ifs to if/ifelse --- modules/Statistics/Statistics.R | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/modules/Statistics/Statistics.R b/modules/Statistics/Statistics.R index 6fa5d03c..e3c27e97 100644 --- a/modules/Statistics/Statistics.R +++ b/modules/Statistics/Statistics.R @@ -26,8 +26,7 @@ Statistics <- function(recipe, data, agg = 'global') { method = "pearson")})$output1 covariance <- .drop_dims(covariance) statistics[[ stat ]] <- covariance - } ## close if on covariance - if (stat %in% c('std', 'standard_deviation')) { + } else if (stat %in% c('std', 'standard_deviation')) { ## Calculate standard deviation std_hcst <- Apply(data = hcst_ensmean, target_dims = c(time_dim), @@ -39,8 +38,7 @@ Statistics <- function(recipe, data, agg = 'global') { std_obs <- .drop_dims(std_obs) statistics[['std_hcst']] <- std_hcst statistics[['std_obs']] <- std_obs - } ## close if on std - if (stat %in% c('var', 'variance')) { + } else if (stat %in% c('var', 'variance')) { ## Calculate variance var_hcst <- (Apply(data = hcst_ensmean, target_dims = c(time_dim), @@ -52,8 +50,7 @@ Statistics <- function(recipe, data, agg = 'global') { var_obs <- .drop_dims(var_obs) statistics[['var_hcst']] <- var_hcst statistics[['var_obs']] <- var_obs - } ## close if on variance - if (stat == 'n_eff') { + } else if (stat == 'n_eff') { ## Calculate degrees of freedom n_eff <- s2dv::Eno(data = obs_ensmean, time_dim = time_dim, @@ -61,8 +58,7 @@ Statistics <- function(recipe, data, agg = 'global') { ncores = ncores) n_eff <- .drop_dims(n_eff) statistics[['n_eff']] <- n_eff - } ## close on n_eff - if (stat == 'spread') { + } else if (stat == 'spread') { C_cov <- stats:::C_cov spread <- sqrt(Apply(Apply(data = data$hcst$data, target_dims = c(memb_dim), @@ -70,7 +66,7 @@ Statistics <- function(recipe, data, agg = 'global') { fun = 'mean', target_dims = time_dim)$output1) spread <- .drop_dims(spread) statistics[['spread']] <- spread - } ## close on spread + } } ## close on stat info(recipe$Run$logger, "##### STATISTICS COMPUTATION COMPLETE #####") -- GitLab From 5cb281b4188c7161f60ac00b436d76131d1580e5 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 26 Jun 2024 15:55:00 +0200 Subject: [PATCH 16/24] Add checks for score-aggregated sprerr --- tools/check_recipe.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 78048e4a..290b20a6 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -772,6 +772,18 @@ check_recipe <- function(recipe) { "'n_eff' are required.")) error_status <- TRUE } + if ('ensprerr' %in% tolower(sc_metrics)) { + if (!('spread' %in% statistics)) { + error(recipe$Run$logger, + paste("For 'enssprerr' to be plotted in the Scorecards with", + "the 'score' aggregation, the Statistics module must", + "be called and the statistics 'spread' and 'rms'", + "are required.")) + error_status <- TRUE + } else if (!('rms' %in% requested_metrics)) { + requested_metrics <- c(requested_metrics, 'rms') + } + } recipe$Analysis$Workflow$Skill$metric <- paste0(requested_metrics, collapse = " ") } -- GitLab From 84ce2448fe55baf2b297677d743292299d19499c Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 26 Jun 2024 15:55:08 +0200 Subject: [PATCH 17/24] add missing metrics --- use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml index e63747d9..3cd547ed 100644 --- a/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml +++ b/use_cases/ex1_2_autosubmit_scorecards/ex1_2-recipe.yml @@ -50,7 +50,7 @@ Analysis: cross_validation: yes save: 'all' Statistics: - metric: cov std n_eff + metric: cov std n_eff spread save: 'all' Probabilities: percentiles: [[1/3, 2/3]] @@ -81,7 +81,7 @@ Run: Terminal: yes filesystem: esarchive output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ - code_dir: /esarchive/scratch/vagudets/git/auto-s2s/ + code_dir: /home/Earth/vagudets/git/auto-s2s/ autosubmit: yes # fill only if using autosubmit auto_conf: -- GitLab From 4073ed666c18a37c6e92d08caddaaec62ee6ec56 Mon Sep 17 00:00:00 2001 From: vagudets Date: Thu, 27 Jun 2024 16:00:49 +0200 Subject: [PATCH 18/24] Fix bug in check_recipe() and add correct output directory to Scorecards_calculations() --- modules/Scorecards/Scorecards_calculations.R | 4 +++- tools/check_recipe.R | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index 1ea84c68..a48aa99b 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -438,8 +438,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, aggr_scorecards <- list(aggr_metrics = aggr_metrics, aggr_significance = aggr_significance) - + ## Save metric data arrays + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, + "outputs", "Scorecards") save_metrics(recipe = recipe, metrics = aggr_scorecards, data_cube = data$hcst, agg = 'region', module = "scorecards") diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 290b20a6..e588683d 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -772,7 +772,7 @@ check_recipe <- function(recipe) { "'n_eff' are required.")) error_status <- TRUE } - if ('ensprerr' %in% tolower(sc_metrics)) { + if ('enssprerr' %in% tolower(sc_metrics)) { if (!('spread' %in% statistics)) { error(recipe$Run$logger, paste("For 'enssprerr' to be plotted in the Scorecards with", @@ -933,7 +933,7 @@ check_recipe <- function(recipe) { warn(recipe$Run$logger, paste("No 'custom_directives_multimodel' specified, the", "single-model verification custom directives will be used.")) - recipe$Run$auto_conf$custom_directives_multimodel <- + recipe$Run$auto_conf$custom_directives_multimodel <- recipe$Run$auto_conf$custom_directives } if (is.null(recipe$Run$auto_conf$processors_multimodel) && -- GitLab From 0ba530c0ad0d15913e4c693bb3d87b6523fc2a4a Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 1 Jul 2024 09:06:20 +0200 Subject: [PATCH 19/24] Bugfix: fix Scorecards directory path --- modules/Scorecards/Scorecards_calculations.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index a48aa99b..ed785ae7 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -424,7 +424,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, aggr_significance[is.na(aggr_significance)] <- FALSE ## Include 'var' dimension to be able to save array - if(!'var' %in% names(dim(aggr_metrics))){ + if (!'var' %in% names(dim(aggr_metrics))) { aggr_metrics <- InsertDim(aggr_metrics, 1, 1, name = 'var') aggr_significance <- InsertDim(aggr_significance, 1, 1, name = 'var') } @@ -441,9 +441,8 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Save metric data arrays recipe$Run$output_dir <- file.path(recipe$Run$output_dir, - "outputs", "Scorecards") + "outputs", "Scorecards/") save_metrics(recipe = recipe, metrics = aggr_scorecards, data_cube = data$hcst, agg = 'region', module = "scorecards") - } - +} -- GitLab From fc83b456db6d62348243706b69c94168c451dbe8 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Fri, 19 Jul 2024 11:40:47 +0200 Subject: [PATCH 20/24] corrected correlation aggregation for scorecards calculations --- modules/Scorecards/Scorecards_calculations.R | 87 +++++++++++--------- 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index ed785ae7..e24ed79e 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -25,23 +25,23 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, } else { alpha <- recipe$Analysis$Workflow$Scorecards$signif_alpha } - + if (is.null(recipe$Analysis$remove_NAs)) { na.rm <- FALSE } else { na.rm <- recipe$Analysis$remove_NAs } - + if (is.null(recipe$Analysis$Workflow$Scorecards$inf_to_na)){ inf.to.na <- FALSE } else { inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na } - + ## Parameters for saving output data files output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) - + system <- recipe$Analysis$Datasets$System$name reference <- recipe$Analysis$Datasets$Reference$name var <- recipe$Analysis$Variables$name @@ -50,7 +50,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, forecast.months <- recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max start.months <- substr(recipe$Analysis$Time$sdate, 1,2) period <- paste0(start.year, "-", end.year) - + ## Parameters for data aggregation regions <- recipe$Analysis$Workflow$Scorecards$regions for (i in names(regions)){regions[[i]] <- unlist(regions[[i]])} @@ -80,7 +80,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, dim = c(time = length(forecast.months), region = length(regions), metric = length(metrics))) - + lon_dim <- 'longitude' lat_dim <- 'latitude' time_dim <- 'syear' @@ -93,13 +93,13 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Calculate weighted mean of spatial aggregation for (met in metrics) { result <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics[[met]], - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') + FUN = function(X) { + WeightedMean(data = skill_metrics[[met]], + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') names(dim(result))[length(dim(result))] <- 'region' result <-Subset(result, 'var', 1, drop = 'selected') @@ -227,46 +227,53 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - std_hcst <- sapply(X = 1:length(regions), + n_eff <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = statistics$n_eff, + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + ## Calculate variance from standard deviation + var_hcst <- (statistics$std_hcst)^2 + var_obs <- (statistics$std_obs)^2 + + var_hcst <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = statistics$std_hcst, + WeightedMean(data = var_hcst, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - std_obs <- sapply(X = 1:length(regions), + var_obs <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = statistics$std_obs, + WeightedMean(data = var_obs, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - n_eff <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = statistics$n_eff, - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') ## Include name of region dimension names(dim(cov))[length(dim(cov))] <- 'region' - names(dim(std_hcst))[length(dim(std_hcst))] <- 'region' - names(dim(std_obs))[length(dim(std_obs))] <- 'region' + names(dim(var_hcst))[length(dim(var_hcst))] <- 'region' + names(dim(var_obs))[length(dim(var_obs))] <- 'region' names(dim(n_eff))[length(dim(n_eff))] <- 'region' - ## Remove 'var' dimension - cov <- Subset(cov, 'var', 1, drop = 'selected') - std_hcst <- Subset(std_hcst, 'var', 1, drop = 'selected') - std_obs <- Subset(std_obs, 'var', 1, drop = 'selected') - n_eff <- Subset(n_eff, 'var', 1, drop = 'selected') + ## Convert aggregated variance back to standard deviation + std_hcst <- sqrt(var_hcst) + std_obs <- sqrt(var_obs) ## Calculate correlation enscorr <- cov / (std_hcst * std_obs) + ## Remove 'var' dimension + enscorr <- Subset(enscorr, 'var', 1, drop = 'selected') + n_eff <- Subset(n_eff, 'var', 1, drop = 'selected') + ## Calculate significance of corr t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) @@ -306,7 +313,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, lat = lat, latdim = lat_dim, na.rm = na.rm) }, simplify = 'array') - + obs_data_aggr <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = obs_data_ens, @@ -330,7 +337,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), target_dims = c('syear'), ncores = ncores, fun = function(x,y) - {t.test(as.vector(x),as.vector(y))})$p.value + {t.test(as.vector(x),as.vector(y))})$p.value sign_mean_bias <- pval_mean_bias <= alpha @@ -352,9 +359,9 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Save metric result in array aggr_metrics[ , , which(metrics == met)] <- Reorder(data = mean_bias, - order = c('time', 'region')) + order = c('time', 'region')) aggr_significance[ , , which(metrics == met)] <- Reorder(data = sign_mean_bias, - order = c('time', 'region')) + order = c('time', 'region')) } else if (met == 'enssprerr') { ## Calculate metric @@ -385,7 +392,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, error <- Subset(error, 'var', 1, drop = 'selected') enssprerr <- spread / error - + # ## Significance calculation # # # Effective sample size @@ -409,7 +416,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, # pval_enssprerr <- 2 * min(pval_enssprerr, 1 - pval_enssprerr) # # sign_enssprerr <- pval_enssprerr <= alpha - + ## Save metric result in array aggr_metrics[ , , which(metrics == met)] <- Reorder(data = enssprerr, order = c('time', 'region')) @@ -435,10 +442,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, attributes(aggr_metrics)$regions <- regions attributes(aggr_metrics)$system.name <- system attributes(aggr_metrics)$reference.name <- reference - + aggr_scorecards <- list(aggr_metrics = aggr_metrics, aggr_significance = aggr_significance) - + ## Save metric data arrays recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", "Scorecards/") -- GitLab From 244602f9afd798863efd76f9335c9001da2471b9 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 29 Jul 2024 14:09:00 +0200 Subject: [PATCH 21/24] Replace SprErr() with final version --- modules/Skill/R/tmp/SprErr.R | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/modules/Skill/R/tmp/SprErr.R b/modules/Skill/R/tmp/SprErr.R index a22bdf0d..f89d37a1 100644 --- a/modules/Skill/R/tmp/SprErr.R +++ b/modules/Skill/R/tmp/SprErr.R @@ -17,15 +17,15 @@ #' is 'member'. #'@param time_dim A character string indicating the name of dimension along #' which the ratio is computed. The default value is 'sdate'. -#'@param pval A logical value indicating whether to compute or not the p-value +#'@param pval A logical value indicating whether to compute the p-value #' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. #'@param sign A logical value indicating whether to retrieve the statistical #' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is #' FALSE. #'@param alpha A numeric indicating the significance level for the statistical #' significance test. The default value is 0.05. -#'@param na.rm A logical value indicating whether to remove NA values. The default -#' value is TRUE. +#'@param na.rm A logical value indicating whether to remove NA values. The +#' default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -37,16 +37,16 @@ #' The ratio of the ensemble spread and RMSE. #'} #'\item{$p_val}{ -#' The p-value of the two-sided Fisher's test with Ho: Spread/RMSE = 1. Only present -#' if \code{pval = TRUE}. +#' The p-value of the two-sided Fisher's test with Ho: Spread/RMSE = 1. Only +#' present if \code{pval = TRUE}. #'} #' #'@examples #'exp <- array(rnorm(30), dim = c(lat = 2, sdate = 3, member = 5)) #'obs <- array(rnorm(30), dim = c(lat = 2, sdate = 3)) #'sprerr1 <- SprErr(exp, obs) -#'sprerr2 <- SprErr(exp, obs, pval=F, sign=T) -#'sprerr3 <- SprErr(exp, obs, pval=T, sign=T) +#'sprerr2 <- SprErr(exp, obs, pval = FALSE, sign = TRUE) +#'sprerr3 <- SprErr(exp, obs, pval = TRUE, sign = TRUE) #' #'@import multiApply #'@export @@ -83,16 +83,16 @@ SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', if (!is.character(memb_dim) | length(memb_dim) > 1) { stop("Parameter 'memb_dim' must be a character string.") } - if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { - stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. ", - "Set it as NULL if there is no member dimension.") + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimensions. ", + "'exp' must have the member dimension to compute the spread.") } # Add [member = 1] if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { dim(obs) <- c(dim(obs), 1) names(dim(obs))[length(dim(obs))] <- memb_dim } - if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { ## check no longer needed? dim(exp) <- c(dim(exp), 1) names(dim(exp))[length(dim(exp))] <- memb_dim } @@ -158,6 +158,7 @@ SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', c(dat_dim, memb_dim, time_dim)), pval = pval, sign = sign, + alpha = alpha, na.rm = na.rm, fun = .SprErr, ncores = ncores) @@ -166,7 +167,7 @@ SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', if (length(dim(res[[1]])) > 2) { res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') } else { - res <- lapply(res, as.numeric) + res <- lapply(res, as.vector) } } @@ -200,9 +201,9 @@ SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', enospr <- sum(Eno(apply(exp[jexp,,], 2, var, na.rm = na.rm), names(dim(exp))[3])) enodif <- .Eno((ens_exp[jexp, ] - ens_obs[jobs, ])^2, na.action = na.pass) if (pval | sign) { - F <- (enospr[jexp] * spread[jexp]^2 / (enospr[jexp] - 1)) / (enodif * error^2 / (enodif - 1)) - if (!is.na(F) & !is.na(enospr[jexp]) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { - p.val[jexp, jobs] <- pf(F, enospr[jexp] - 1, enodif - 1) + f_statistic <- (enospr * spread^2 / (enospr - 1)) / (enodif * error^2 / (enodif - 1)) + if (!is.na(f_statistic) & !is.na(enospr) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + p.val[jexp, jobs] <- pf(f_statistic, enospr - 1, enodif - 1) p.val[jexp, jobs] <- 2 * min(p.val[jexp, jobs], 1 - p.val[jexp, jobs]) } else { p.val[jexp, jobs] <- NA -- GitLab From 11e1a1190ec837c6f9d53b6a38b72bd6ab2e6d08 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Wed, 28 Aug 2024 12:52:29 +0200 Subject: [PATCH 22/24] cleaned var dimension removal in code, and bug fix for 1 forecast time --- modules/Scorecards/Scorecards_calculations.R | 158 +++++++++---------- 1 file changed, 77 insertions(+), 81 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index e24ed79e..75168908 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -61,7 +61,8 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Define array to filled with data aggr_significance <- array(data = NA, - dim = c(time = length(forecast.months), + dim = c(var = length(var), + time = length(forecast.months), region = length(regions), metric = length(metrics))) @@ -77,9 +78,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, } else { ## Define arrays to filled with data aggr_metrics <- array(data = NA, - dim = c(time = length(forecast.months), + dim = c(var = length(var), + time = length(forecast.months), region = length(regions), - metric = length(metrics))) + metric = length(metrics))) lon_dim <- 'longitude' lat_dim <- 'latitude' @@ -102,13 +104,12 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, }, simplify = 'array') names(dim(result))[length(dim(result))] <- 'region' - result <-Subset(result, 'var', 1, drop = 'selected') - + if (met =='crpss' && inf.to.na == TRUE) { result[result == -Inf] <- NA } - aggr_metrics[,,which(metrics == met)] <- Reorder(data = result, - order = c('time', 'region')) + aggr_metrics[,,,which(metrics == met)] <- Reorder(data = result, + order = c('var','time', 'region')) } ## close on met } ## close if on skill @@ -138,11 +139,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Include name of region dimension names(dim(rps_syear))[length(dim(rps_syear))] <- 'region' names(dim(rps_clim_syear))[length(dim(rps_clim_syear))] <- 'region' - - ## Remove 'var' dimension - rps_syear <- Subset(rps_syear, 'var', 1, drop = 'selected') - rps_clim_syear <- Subset(rps_clim_syear, 'var', 1, drop = 'selected') - + ## Calculate significance sign_rpss <- RandomWalkTest(rps_syear, rps_clim_syear, time_dim = time_dim, @@ -163,10 +160,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, rpss <- 1 - rps_syear / rps_clim_syear ## Save metric result in arrays - aggr_metrics[,,which(metrics == met)] <- Reorder(data = rpss, - order = c('time', 'region')) - aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_rpss, - order = c('time', 'region')) + aggr_metrics[,,,which(metrics == met)] <- Reorder(data = rpss, + order = c('var','time','region')) + aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_rpss, + order = c('var','time','region')) } else if (met == 'crpss') { crps_syear <- sapply(X = 1:length(regions), @@ -189,11 +186,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Include name of region dimension names(dim(crps_syear))[length(dim(crps_syear))] <- 'region' names(dim(crps_clim_syear))[length(dim(crps_clim_syear))] <- 'region' - - ## Remove 'var' dimension - crps_syear <- Subset(crps_syear, 'var', 1, drop = 'selected') - crps_clim_syear <- Subset(crps_clim_syear, 'var', 1, drop = 'selected') - + ## Calculate significance sign_crpss <- RandomWalkTest(crps_syear, crps_clim_syear, time_dim = time_dim, @@ -214,10 +207,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, crpss <- 1 - crps_syear / crps_clim_syear ## Save metric result in arrays - aggr_metrics[ , , which(metrics == met)] <- Reorder(data = crpss, - order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- Reorder(data = sign_crpss, - order = c('time', 'region')) + aggr_metrics[,,,which(metrics == met)] <- Reorder(data = crpss, + order = c('var','time','region')) + aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_crpss, + order = c('var','time','region')) } else if (met == 'enscorr') { cov <- sapply(X = 1:length(regions), FUN = function(X) { @@ -257,11 +250,18 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, na.rm = na.rm) }, simplify = 'array') - ## Include name of region dimension - names(dim(cov))[length(dim(cov))] <- 'region' - names(dim(var_hcst))[length(dim(var_hcst))] <- 'region' - names(dim(var_obs))[length(dim(var_obs))] <- 'region' - names(dim(n_eff))[length(dim(n_eff))] <- 'region' + if (is.null(dim(cov))){ + cov <- array(data = cov, dim = c(var = 1, time = 1, region = 1)) + n_eff <- array(data = n_eff, dim = c(var = 1, time = 1, region = 1)) + var_hcst <- array(data = var_hcst, dim = c(var = 1, time = 1, region = 1)) + var_obs <- array(data = var_obs, dim = c(var = 1, time = 1, region = 1)) + } else { + ## Include name of region dimension + names(dim(cov))[length(dim(cov))] <- 'region' + names(dim(var_hcst))[length(dim(var_hcst))] <- 'region' + names(dim(var_obs))[length(dim(var_obs))] <- 'region' + names(dim(n_eff))[length(dim(n_eff))] <- 'region' + } ## Convert aggregated variance back to standard deviation std_hcst <- sqrt(var_hcst) @@ -269,35 +269,37 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Calculate correlation enscorr <- cov / (std_hcst * std_obs) - - ## Remove 'var' dimension - enscorr <- Subset(enscorr, 'var', 1, drop = 'selected') - n_eff <- Subset(n_eff, 'var', 1, drop = 'selected') - + ## Calculate significance of corr t_alpha2_n2 <- qt(p = alpha/2, df = n_eff-2, lower.tail = FALSE) + if (is.null(dim(t_alpha2_n2))){ + t_alpha2_n2 <- array(data = t_alpha2_n2, dim = c(var = 1, time = 1, region = 1)) + } t <- abs(enscorr) * sqrt(n_eff-2) / sqrt(1-enscorr^2) sign_corr<- array(data = NA, - dim = c(time = length(forecast.months), + dim = c(var = length(var), + time = length(forecast.months), region = length(regions))) - for (time in 1:dim(sign_corr)[['time']]) { - for (reg in 1:dim(sign_corr)[['region']]) { - if ((anyNA(c(t[time, reg], t_alpha2_n2[time, reg])) == FALSE) && - (t[time, reg] >= t_alpha2_n2[time, reg])) { - sign_corr[time, reg] <- TRUE - } else { - sign_corr[time, reg] <- FALSE + for (var in 1:dim(sign_corr)[['var']]) { + for (time in 1:dim(sign_corr)[['time']]) { + for (reg in 1:dim(sign_corr)[['region']]) { + if ((anyNA(c(t[var, time, reg], t_alpha2_n2[var, time, reg])) == FALSE) + && (t[var, time, reg] >= t_alpha2_n2[var, time, reg])) { + sign_corr[var, time, reg] <- TRUE + } else { + sign_corr[var, time, reg] <- FALSE + } } - } + } } ## Save metric result in arrays - aggr_metrics[,,which(metrics == met)] <- Reorder(data = enscorr, - order = c('time', 'region')) - aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_corr, - order = c('time', 'region')) + aggr_metrics[,,,which(metrics == met)] <- Reorder(data = enscorr, + order = c('var','time','region')) + aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_corr, + order = c('var','time','region')) } else if (met == 'mean_bias') { ## Calculate ensemble mean @@ -328,10 +330,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' ## Remove unnecessary dimension - hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','var', 'sday','sweek'), - list(1,1,1,1) , drop = 'selected') - obs_data_aggr <- Subset(obs_data_aggr, c('dat','var', 'sday','sweek'), - list(1,1,1,1) , drop = 'selected') + hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','sday','sweek'), + list(1,1,1) , drop = 'selected') + obs_data_aggr <- Subset(obs_data_aggr, c('dat','sday','sweek'), + list(1,1,1) , drop = 'selected') ## Calculate significance pval_mean_bias <- Apply(data = list(x = hcst_data_aggr, y = obs_data_aggr), @@ -351,17 +353,17 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, na.rm = na.rm) }, simplify = 'array') - ## Include name of region dimension - names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' - - ## Remove 'var' dimension - mean_bias <- Subset(mean_bias, 'var', 1, drop = 'selected') - + if (is.null(dim(mean_bias))){ + mean_bias <- array(data = mean_bias, dim = c(var = 1, time = 1, region = 1)) + } else { + names(dim(mean_bias))[length(dim(mean_bias))] <- 'region' + } + ## Save metric result in array - aggr_metrics[ , , which(metrics == met)] <- Reorder(data = mean_bias, - order = c('time', 'region')) - aggr_significance[ , , which(metrics == met)] <- Reorder(data = sign_mean_bias, - order = c('time', 'region')) + aggr_metrics[,,,which(metrics == met)] <- Reorder(data = mean_bias, + order = c('var', 'time', 'region')) + aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_mean_bias, + order = c('var', 'time', 'region')) } else if (met == 'enssprerr') { ## Calculate metric @@ -383,17 +385,17 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, na.rm = na.rm) }, simplify = 'array') - ## Include name of region dimension - names(dim(spread))[length(dim(spread))] <- 'region' - names(dim(error))[length(dim(error))] <- 'region' - - ## Remove 'var' dimension - spread <- Subset(spread, 'var', 1, drop = 'selected') - error <- Subset(error, 'var', 1, drop = 'selected') - + if (is.null(dim(spread))){ + spread <- array(data = spread, dim = c(var = 1, time = 1, region = 1)) + error <- array(data = error, dim = c(var = 1, time = 1, region = 1)) + } else { + names(dim(spread))[length(dim(spread))] <- 'region' + names(dim(error))[length(dim(error))] <- 'region' + } + enssprerr <- spread / error - # ## Significance calculation + # ## Significance calculation (in progress) # # # Effective sample size # # exp <- data$hcst$data @@ -418,10 +420,10 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, # sign_enssprerr <- pval_enssprerr <= alpha ## Save metric result in array - aggr_metrics[ , , which(metrics == met)] <- Reorder(data = enssprerr, - order = c('time', 'region')) + aggr_metrics[,,,which(metrics == met)] <- Reorder(data = enssprerr, + order = c('var','time','region')) # aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_corr, - # order = c('time', 'region')) + # order = c('var', time', 'region')) } } ## close loop on metric } ## close if on score @@ -429,13 +431,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Set NAs to False aggr_significance[is.na(aggr_significance)] <- FALSE - - ## Include 'var' dimension to be able to save array - if (!'var' %in% names(dim(aggr_metrics))) { - aggr_metrics <- InsertDim(aggr_metrics, 1, 1, name = 'var') - aggr_significance <- InsertDim(aggr_significance, 1, 1, name = 'var') - } - + ## Include attributes attributes(aggr_metrics)$metrics <- metrics attributes(aggr_metrics)$forecast.months <- forecast.months -- GitLab From 17ff211db3710f8bb1973a917d207c09d0f46657 Mon Sep 17 00:00:00 2001 From: Nadia Milders Date: Thu, 29 Aug 2024 12:38:15 +0200 Subject: [PATCH 23/24] bug fix in LoadMetrics function for loading multiple systems --- modules/Scorecards/R/tmp/LoadMetrics.R | 50 ++++++++++++------------ modules/Scorecards/Scorecards_plotting.R | 1 + 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index 2a70eff9..e49132f6 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -91,34 +91,34 @@ LoadMetrics <- function(input_path, system, reference, var, period, data_type, reference <- gsub('.','', reference, fixed = T) ## Load data for each system + all_metrics <- NULL - for (sys in 1:length(system)) { - ## Load data for each reference - by_reference <- NULL - for (ref in 1:length(reference)) { - ## Call function to load metrics data - result <- .loadmetrics(input_path = input_path, - system = system[sys], - reference = reference[ref], - var = var, - period = period, - start_months = start_months, - calib_method = calib_method, - data_type = data_type) - - result_attr <- attributes(result) - by_reference <- abind::abind(by_reference, result, - along = length(dim(result)) + 1) - dim(by_reference) <- c(dim(result), reference = length(reference)) - } ## close loop on reference - all_metrics <- abind::abind(all_metrics, by_reference, - along = length(dim(by_reference)) + 1) - dim(all_metrics) <- c(dim(by_reference), system = length(system)) - } ## close loop on system - + for (sys in 1:length(system)){ + for (ref in 1:length(reference)){ + + result <- .loadmetrics(input_path = input_path, + system = system[sys], + reference = reference[ref], + var = var, + period = period, + start_months = start_months, + calib_method = calib_method, + data_type = data_type) + + if (sys == 1 && ref == 1){ + all_metrics <- array(data = NA, + dim = c('system'= length(system), + 'reference'=length(reference), + attributes(result)$dim)) + } + all_metrics[sys,ref,,,,] <- result + + } + } + attributes(all_metrics)$start_months <- start_months - + return(all_metrics) } ## close function diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R index ff1ba186..efeaad26 100644 --- a/modules/Scorecards/Scorecards_plotting.R +++ b/modules/Scorecards/Scorecards_plotting.R @@ -191,6 +191,7 @@ Scorecards_plotting <- function(recipe) { sign = aggregated_significance, system = system, reference = reference, + var = var, var.units = var.units, start.year = start.year, end.year = end.year, -- GitLab From ca9cc30deb17a099c9cbe8488a3e9cc3a47aa288 Mon Sep 17 00:00:00 2001 From: vagudets Date: Fri, 30 Aug 2024 16:00:15 +0200 Subject: [PATCH 24/24] Split variable string if it contains multiple variables in Scorecards_calculations(); bugfix and formatting --- modules/Scorecards/Scorecards_calculations.R | 29 ++++++++++---------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index 75168908..093aa426 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -44,7 +44,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, system <- recipe$Analysis$Datasets$System$name reference <- recipe$Analysis$Datasets$Reference$name - var <- recipe$Analysis$Variables$name + var <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) forecast.months <- recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max @@ -109,7 +109,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, result[result == -Inf] <- NA } aggr_metrics[,,,which(metrics == met)] <- Reorder(data = result, - order = c('var','time', 'region')) + order = c('var','time', 'region')) } ## close on met } ## close if on skill @@ -161,9 +161,9 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Save metric result in arrays aggr_metrics[,,,which(metrics == met)] <- Reorder(data = rpss, - order = c('var','time','region')) + order = c('var', 'time', 'region')) aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_rpss, - order = c('var','time','region')) + order = c('var', 'time', 'region')) } else if (met == 'crpss') { crps_syear <- sapply(X = 1:length(regions), @@ -208,9 +208,9 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Save metric result in arrays aggr_metrics[,,,which(metrics == met)] <- Reorder(data = crpss, - order = c('var','time','region')) + order = c('var', 'time', 'region')) aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_crpss, - order = c('var','time','region')) + order = c('var', 'time', 'region')) } else if (met == 'enscorr') { cov <- sapply(X = 1:length(regions), FUN = function(X) { @@ -250,7 +250,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, na.rm = na.rm) }, simplify = 'array') - if (is.null(dim(cov))){ + if (is.null(dim(cov))) { cov <- array(data = cov, dim = c(var = 1, time = 1, region = 1)) n_eff <- array(data = n_eff, dim = c(var = 1, time = 1, region = 1)) var_hcst <- array(data = var_hcst, dim = c(var = 1, time = 1, region = 1)) @@ -297,9 +297,9 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Save metric result in arrays aggr_metrics[,,,which(metrics == met)] <- Reorder(data = enscorr, - order = c('var','time','region')) + order = c('var', 'time', 'region')) aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_corr, - order = c('var','time','region')) + order = c('var', 'time', 'region')) } else if (met == 'mean_bias') { ## Calculate ensemble mean @@ -330,9 +330,9 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, names(dim(obs_data_aggr))[length(dim(obs_data_aggr))] <- 'region' ## Remove unnecessary dimension - hcst_data_aggr <- Subset(hcst_data_aggr, c('dat','sday','sweek'), + hcst_data_aggr <- Subset(hcst_data_aggr, c('dat', 'sday', 'sweek'), list(1,1,1) , drop = 'selected') - obs_data_aggr <- Subset(obs_data_aggr, c('dat','sday','sweek'), + obs_data_aggr <- Subset(obs_data_aggr, c('dat', 'sday', 'sweek'), list(1,1,1) , drop = 'selected') ## Calculate significance @@ -360,10 +360,11 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, } ## Save metric result in array + browser() aggr_metrics[,,,which(metrics == met)] <- Reorder(data = mean_bias, - order = c('var', 'time', 'region')) + order = c('var', 'time', 'region')) aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_mean_bias, - order = c('var', 'time', 'region')) + order = c('var', 'time', 'region')) } else if (met == 'enssprerr') { ## Calculate metric @@ -421,7 +422,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Save metric result in array aggr_metrics[,,,which(metrics == met)] <- Reorder(data = enssprerr, - order = c('var','time','region')) + order = c('var', 'time', 'region')) # aggr_significance[,,which(metrics == met)] <- Reorder(data = sign_corr, # order = c('var', time', 'region')) } -- GitLab