diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index 26295b372415352d53ad37d1108e086d81d890ba..edbed814fb95d2b1e031ec08f4d25a7b57d65f72 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -101,7 +101,7 @@ save_metrics <- function(recipe, ## the code extra_string <- get_filename("", recipe, variable, fcst.sdate, agg, names(subset_metric)[[i]]) - SaveExp(data = subset_metric[[i]], destination = outdir, + SaveExp(data = subset_metric[[i]], destination = outdir, Dates = dates, coords = list(latitude = data_cube$coords[['latitude']], longitude = data_cube$coords[['longitude']]), @@ -119,13 +119,19 @@ save_metrics <- function(recipe, # Remove singleton dimensions and rearrange lon, lat and time dims if (tolower(agg) == "global") { subset_metric <- lapply(subset_metric, function(x) { - Reorder(x, c(lalo, 'time'))}) + Reorder(x, c(lalo, 'time'))}) + } else { + subset_metric <- lapply(subset_metric, function(x) { + Reorder(x, c("region", "time"))}) } attr(subset_metric[[1]], 'global_attrs') <- global_attributes for (i in 1:length(subset_metric)) { metric <- names(subset_metric[i]) - long_name <- dictionary$metrics[[metric]]$long_name + long_name <- tryCatch( + expr = {dictionary$metrics[[metric]]$long_name}, + error = function(e) {NULL} + ) missing_val <- -9.e+33 subset_metric[[i]][is.na(subset_metric[[i]])] <- missing_val if (tolower(agg) == "country") { diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index e49132f675d99c0574f44ce7ccef3e57febdb46d..bde6d612f3108b2baf6559cc3063364e70220640 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -13,7 +13,7 @@ #' variable-dictionary.yml from verification suite (TO DO: multiple variables). #' The accepted names are: 'psl', 'tas', 'sfcWind', 'prlr'. #'@param period A character string indicating the start and end years of the -#' reference period (e.g. '1993-203') +#' reference period (e.g. '1993-2016') #'@param start_months A vector indicating the numbers of the start months #'@param input_path A character string indicating the path where metrics output #' files from verification suite are saved (or any other compatible files) @@ -26,17 +26,18 @@ #'\dontrun{ #'loaded_metrics <- LoadMetrics(system = c('ECMWF-SEAS5','DWD-GFCS2.1'), #' reference. = 'ERA5', -#' var = 'tas', +#' var = 'tas', #' period = '1993-2016' +#' metric_names = c('rpss', 'crpss'), #' start_months = sprintf("%02d", 1:12), #' calib_method = 'raw', #' input_path = './scorecards_data/input_data') #'} -#'@import easyNCDF -#'@import multiApply +#'@importFrom startR Start +#'@importFrom ClimProjDiags Subset #'@export -LoadMetrics <- function(input_path, system, reference, var, period, data_type, +LoadMetrics <- function(input_path, system, reference, var, period, metric_names, start_months, calib_method = NULL) { # Initial checks @@ -70,12 +71,6 @@ LoadMetrics <- function(input_path, system, reference, var, period, data_type, "the starting months.") } start_months <- sprintf("%02d", start_months) - ## Check if sdates are continuous or discrete - if (all(diff(as.numeric(start_months)) == 1)) { - consecutive_start_months <- TRUE - } else { - consecutive_start_months <- FALSE - } ## input_path if (!is.character(input_path)) { stop("Parameter 'input_path must be a character string.") @@ -90,86 +85,33 @@ LoadMetrics <- function(input_path, system, reference, var, period, data_type, system <- gsub('.','', system, fixed = T) reference <- gsub('.','', reference, fixed = T) - ## Load data for each system - - all_metrics <- NULL + # Load data + ## Outer (file) dimensions: var, sdate (month), system, reference + ## Inner dimensions: time, region + path_pattern <- file.path("$system$", "$reference$", calib_method, var, + paste0("scorecards_", "$system$", "_", + "$reference$", "_", var, "_$var$_", + period, "_s$sdate$.nc")) + metrics <- Start(dat = file.path(input_path, path_pattern), + var = metric_names, + system = system, + reference = reference, + sdate = start_months, + time = "all", + region = "all", + metadata_dims = "var", + return_vars = list(time = NULL, + region = NULL), + pattern_dims = 'dat', + retrieve = TRUE) - for (sys in 1:length(system)){ - for (ref in 1:length(reference)){ - - result <- .loadmetrics(input_path = input_path, - system = system[sys], - reference = reference[ref], - var = var, - period = period, - start_months = start_months, - calib_method = calib_method, - data_type = data_type) - - if (sys == 1 && ref == 1){ - all_metrics <- array(data = NA, - dim = c('system'= length(system), - 'reference'=length(reference), - attributes(result)$dim)) - } - all_metrics[sys,ref,,,,] <- result - - } + if (!is.null(attr(metrics, "NotFoundFiles"))) { + stop("Some of the requested files were not found.") } - attributes(all_metrics)$start_months <- start_months - - return(all_metrics) -} ## close function - -########################################################### + metrics <- Subset(metrics, along = "dat", + indices = 1, drop = "selected") + class(metrics) <- "array" + return(metrics) -.loadmetrics <- function(input_path, system, reference, - var, period, start_months, - calib_method, data_type) { - - ## Load data for each start date - allfiles <- sapply(start_months, function(m) { - paste0(input_path, "/", system, "/", reference, "/", calib_method, "/", - var, "/scorecards_", system, "_", reference, "_", - var, "_", data_type, "_", period, "_s", m, - ".nc")}) - allfiles_exist <- sapply(allfiles, file.exists) - - # Check dims - files_exist_by_month <- seq(1:length(allfiles))[allfiles_exist] - allfiledims <- sapply(allfiles[allfiles_exist], easyNCDF::NcReadDims) - if (length(files_exist_by_month) == 0) { - stop("No files are found.") - } - - num_dims <- numeric(dim(allfiledims)[1]) - for (i in 1:dim(allfiledims)[1]) { - if (length(unique(allfiledims[i,])) > 1) { - warning(paste0("Dimensions of system ", system," with var ", var, - " don't match.")) - } - num_dims[i] <- max(allfiledims[i,]) # We take the largest dimension - } - - # Loop for file - dim(allfiles) <- c(dat = 1, sdate = length(allfiles)) - - array_met_by_sdate <- Apply(data = allfiles, target_dims = 'dat', fun = function(x) { - if (file.exists(x)) { - res <- easyNCDF::NcToArray(x, vars_to_read = data_type, unlist = T, - drop_var_dim = T) - names(dim(res)) <- NULL - } else { - res <- array(dim = c(length(data_type), allfiledims[-1,1])) - names(dim(res)) <- NULL - } - res})$output1 - - dim(array_met_by_sdate) <- c(allfiledims[-1,1], - sdate = length(allfiles)) - - return(array_met_by_sdate) } - - diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index d2812a9dd33725c6efb6b854745f4778681d91b9..a2703e924db5c4935b4663a8e6f8f014d265c9c3 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -93,19 +93,19 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, } ## Make sure data is in correct order for using in functions: - data_order <- c('system','reference','metric','time','sdate','region') + data_order <- c('system','reference','var','time','sdate','region') data <- Reorder(data, data_order) ## Identify metrics loaded metrics_loaded <- attributes(data)$metrics ## Select only the metrics to visualize from data - data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) + data <- Subset(data, along = 'var', indices = match(metrics, metrics_loaded)) attributes(data)$metrics <- metrics if(!is.null(sign)){ sign <- Reorder(sign, data_order) - sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) + sign <- Subset(sign, along = 'var', indices = match(metrics, metrics_loaded)) attributes(sign)$metrics <- metrics } @@ -206,9 +206,8 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, ## Find position of mean bias metric to calculate breaks if ('mean_bias' %in% metrics) { - pos_bias <- which(metrics == 'mean_bias') - breaks_bias <- .SCBiasBreaks(Subset(data, along = c('metric','region'), - indices = list(pos_bias,reg))) + breaks_bias <- .SCBiasBreaks(Subset(data, along = c('var', 'region'), + indices = list(pos_bias ,reg))) } ## Define breaks for each metric based of metric position: @@ -246,9 +245,9 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, sign = sign_sc_9, row_dim = model, subrow_dim = 'time', - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = model.name, subrow_names = forecast.months, col_names = metric.names, @@ -282,7 +281,7 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, region = sub(" ", "-", region.names[reg]), fileout.label = fileout.label, output.path = output.path) - new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') + new_order <- c('system', 'reference', 'var', 'region','sdate', 'time') if(model == 'system'){ data_sc_10 <- Subset(Reorder(data, new_order), c('reference','region'), list(1, reg), drop = 'selected') @@ -303,9 +302,9 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, sign = sign_sc_10, row_dim = 'time', subrow_dim = model, - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = forecast.months, subrow_names = model.name, col_names = metric.names, @@ -361,9 +360,9 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, sign = sign_sc_11, row_dim = model, subrow_dim = 'time', - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = model.name, subrow_names = forecast.months, col_names = metric.names, @@ -398,7 +397,7 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, region = sub(" ", "-", region.names[reg]), fileout.label = fileout.label, output.path = output.path) - new_order <- c('system', 'reference', 'metric', 'region','sdate', 'time') + new_order <- c('system', 'reference', 'var', 'region','sdate', 'time') if(model == 'system'){ data_sc_12 <- Subset(Reorder(transformed_data, new_order), c('reference','region'), list(1, reg), drop = 'selected') @@ -420,9 +419,9 @@ ScorecardsMulti <- function(data, sign, system, reference, var, var.units, sign = sign_sc_12, row_dim = 'time', subrow_dim = model, - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = forecast.months, subrow_names = model.name, col_names = metric.names, diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index 9f00549bc7f5257b40850fe9dfaab65ca472b164..eca8f9b9f19e917ab36ec71a7d04ced3fde4b7b6 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -93,11 +93,11 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, if (is.null(names(dim(data)))) { stop("Parameter 'data' must have dimenision names.") } - if (!all(c('system','reference','metric','time','sdate','region') %in% - names(dim(data)))) { - stop("Dimension names of 'data' must be: 'system','reference','metric', - 'time','sdate','region'.") - } + # if (!all(c('system','reference','var','time','sdate','region') %in% + # names(dim(data)))) { + # stop("Dimension names of 'data' must be: 'system','reference','var', + # 'time','sdate','region'.") + # } if (is.null(table.label)){ table.label <- "" } @@ -106,19 +106,19 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, } ## Make sure data is in correct order for using in functions: - data_order <- c('system', 'reference', 'metric', 'time', 'sdate', 'region') + data_order <- c('var', 'system', 'reference', 'time', 'sdate', 'region') data <- Reorder(data, data_order) ## Identify metrics loaded metrics_loaded <- attributes(data)$metrics ## Select only the metrics to visualize from data - data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) + data <- Subset(data, along = 'var', indices = match(metrics, metrics_loaded)) attributes(data)$metrics <- metrics - if(!is.null(sign)){ + if (!is.null(sign)) { sign <- Reorder(sign, data_order) - sign <- Subset(sign, along = 'metric', indices = match(metrics, metrics_loaded)) + sign <- Subset(sign, along = 'var', indices = match(metrics, metrics_loaded)) attributes(sign)$metrics <- metrics } @@ -184,18 +184,18 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, for (ref in 1:dim(data)['reference']) { ## TO DO: Apply check to each scorecard function - ## check dimension 'metric' exists: - if (!("metric" %in% names(dim(data)))) { + ## check dimension 'var' exists: + if (!("var" %in% names(dim(data)))) { dim(data) <- c(metric = 1, dim(data)) } ## Find position of mean bias metric to calculate breaks breaks_bias <- NULL if ('mean_bias' %in% metrics){ - stopifnot(identical(names(dim(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'))), c('metric','time','sdate','region'))) + stopifnot(identical(names(dim(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'))), c('var','time','sdate','region'))) temp_data <- Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected') pos_bias <- which(metrics == 'mean_bias') - breaks_bias <- .SCBiasBreaks(Subset(temp_data, along = 'metric', + breaks_bias <- .SCBiasBreaks(Subset(temp_data, along = 'var', indices = pos_bias)) } @@ -233,9 +233,9 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, sign = sign_sc_1, row_dim = 'region', subrow_dim = 'time', - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = region.names, subrow_names = forecast.months, col_names = metric.names, @@ -266,13 +266,13 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, ## (reorder only) ## Scorecard type 2 is same as type 1 for only one region, therefore is ## only plotted if more that one region is requested - if(dim(data)['region'] > 1) { + if (dim(data)['region'] > 1) { fileout <- .Filename(system = system[sys], reference = reference[ref], var = var, start.year = start.year, end.year = end.year, scorecard.type = 2, fileout.label = fileout.label, output.path = output.path) - new_order <- c('metric', 'region', 'sdate', 'time') + new_order <- c('var', 'region', 'sdate', 'time') data_sc_2 <- Reorder(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) @@ -286,9 +286,9 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, sign = sign_sc_2, row_dim = 'time', subrow_dim = 'region', - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = forecast.months, subrow_names = region.names, col_names = metric.names, @@ -335,9 +335,9 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, sign = sign_sc_3, row_dim = 'region', subrow_dim = 'time', - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = region.names, subrow_names = forecast.months, col_names = metric.names, @@ -373,7 +373,7 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, start.year = start.year, end.year = end.year, scorecard.type = 4, fileout.label = fileout.label, output.path = output.path) - new_order <- c('metric', 'region', 'sdate', 'time') + new_order <- c('var', 'region', 'sdate', 'time') data_sc_4 <- Reorder(Subset(transformed_data, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) @@ -387,9 +387,9 @@ ScorecardsSingle <- function(data, sign, system, reference, var, var.units, sign = sign_sc_4, row_dim = 'time', subrow_dim = 'region', - col_dim = 'metric', + col_dim = 'var', subcol_dim = 'sdate', - legend_dim = 'metric', + legend_dim = 'var', row_names = forecast.months, subrow_names = region.names, col_names = metric.names, diff --git a/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R b/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R index 90ac44f62aa4989a3367de0488228b174d505ec6..156e24c70b63d959c6d62b0fe110d824faf3327c 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R +++ b/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R @@ -65,14 +65,14 @@ ScorecardsSystemDiff <- function(data, system, reference, var, var.units, } ## Make sure input_data is in correct order for using in functions: - data_order <- c('system','reference','metric','time','sdate','region') + data_order <- c('system','reference','var','time','sdate','region') data <- Reorder(data, data_order) ## Identify metrics loaded metrics_loaded <- attributes(data)$metrics ## Select only the metrics to visualize from data - input_data <- Subset(data, along = 'metric', indices = match(metrics, metrics_loaded)) + input_data <- Subset(data, along = 'var', indices = match(metrics, metrics_loaded)) attributes(input_data)$metrics <- metrics ## Calculate difference between two systems/references @@ -191,9 +191,9 @@ ScorecardsSystemDiff <- function(data, system, reference, var, var.units, SCPlotScorecard(data = diff_data, row.dim = 'region', subrow.dim = 'time', - col.dim = 'metric', + col.dim = 'var', subcol.dim = 'sdate', - legend.dim = 'metric', + legend.dim = 'var', row.names = region.names, subrow.names = forecast.months, col.names = metric.names, @@ -227,14 +227,14 @@ ScorecardsSystemDiff <- function(data, system, reference, var, var.units, fileout <- .Filename(system = paste0("diff_",comparison[1],"_",comparison[2]), reference = eval.filename, var = var, start.year = start.year, end.year = end.year, scorecard.type = 2, fileout.label = fileout.label, output.path = output.path) - new_order <- c('metric', 'region', 'sdate', 'time') + new_order <- c('var', 'region', 'sdate', 'time') data_sc_2 <- Reorder(diff_data, new_order) SCPlotScorecard(data = data_sc_2, row.dim = 'time', subrow.dim = 'region', - col.dim = 'metric', + col.dim = 'var', subcol.dim = 'sdate', - legend.dim = 'metric', + legend.dim = 'var', row.names = forecast.months, subrow.names = region.names, col.names = metric.names, @@ -269,9 +269,9 @@ ScorecardsSystemDiff <- function(data, system, reference, var, var.units, SCPlotScorecard(data = transformed_data, row.dim = 'region', subrow.dim = 'time', - col.dim = 'metric', + col.dim = 'var', subcol.dim = 'sdate', - legend.dim = 'metric', + legend.dim = 'var', row.names = region.names, subrow.names = forecast.months, col.names = metric.names, @@ -305,14 +305,14 @@ ScorecardsSystemDiff <- function(data, system, reference, var, var.units, fileout <- .Filename(system = paste0("diff_",comparison[1],"_",comparison[2]), reference = eval.filename, var = var, start.year = start.year, end.year = end.year, scorecard.type = 4, fileout.label = fileout.label, output.path = output.path) - new_order <- c('metric', 'region', 'sdate', 'time') + new_order <- c('var', 'region', 'sdate', 'time') data_sc_4 <- Reorder(transformed_data, new_order) SCPlotScorecard(data = data_sc_4, row.dim = 'time', subrow.dim = 'region', - col.dim = 'metric', + col.dim = 'var', subcol.dim = 'sdate', - legend.dim = 'metric', + legend.dim = 'var', row.names = forecast.months, subrow.names = region.names, col.names = metric.names, diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index fbd03154f550c3ecefddafb82052ab3c3796395f..bc8e39c065bdb72166e415a4d3506c00f0a283d6 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -18,6 +18,15 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, } else if (!is.list(skill_metrics)) { stop("'skill_metrics' should be a named list with metric arrays") } + # skill_metrics$ + if (!is.null(statistics)) { + warning("Parameter 'statistics' will be deprecated, use 'skill_metrics' ", + "as a single list instead.") + skill_metrics <- append(skill_metrics, statistics) + } + metrics_to_remove <- names(skill_metrics)[grep("_significance|metadata|_individual_members", + names(skill_metrics))] + skill_metrics <- skill_metrics[!names(skill_metrics) %in% metrics_to_remove] # Assign parameters ncores <- recipe$Analysis$ncores if (is.null(recipe$Analysis$Workflow$Scorecards$signif_alpha)) { @@ -60,29 +69,28 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ", | |,")) ## Define array to filled with data - aggr_significance <- array(data = NA, - dim = c(var = length(var), - time = length(forecast.months), - region = length(regions), - metric = length(metrics))) + aggr_significance <- list() ## For data that is already aggregated by region if ("region" %in% names(dim(skill_metrics[[1]]))) { aggr_metrics <- NULL - for (met in metrics) { - skill_metrics[[met]] <- Reorder(skill_metrics[[met]], - c("time", "region", "var")) - aggr_metrics <- abind(aggr_metrics, skill_metrics[[met]], along=3) + for (met in (names(skill_metrics))) { + if (strsplit(met, split = "-", fixed = TRUE)[[1]][1] %in% metrics) { + aggr_metrics[[met]] <- Reorder(skill_metrics[[met]], + c("time", "region", "var")) + aggr_significance[[paste0(met, "_significance")]] <- + array(data = NA, + dim = c(var = length(var), + time = length(forecast.months), + region = length(regions))) + + } } - names(dim(aggr_metrics)) <- c("time", "region", "metric") } else { ## Define arrays to filled with data - aggr_metrics <- array(data = NA, - dim = c(var = length(var), - time = length(forecast.months), - region = length(regions), - metric = length(metrics))) - + aggr_metrics <- list() + aggr_metrics_names <- c() + lon_dim <- 'longitude' lat_dim <- 'latitude' time_dim <- 'syear' @@ -93,31 +101,40 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Skill aggregation if (metric.aggregation == 'skill') { ## Calculate weighted mean of spatial aggregation - for (met in metrics) { - result <- sapply(X = 1:length(regions), - FUN = function(X) { - WeightedMean(data = skill_metrics[[met]], - region = regions[[X]], - lon = lon, londim = lon_dim, - lat = lat, latdim = lat_dim, - na.rm = na.rm) - }, simplify = 'array') - - names(dim(result))[length(dim(result))] <- 'region' + for (met in (names(skill_metrics))) { + if (strsplit(met, split = "-", fixed = TRUE)[[1]][1] %in% metrics) { + result <- sapply(X = 1:length(regions), + FUN = function(X) { + WeightedMean(data = skill_metrics[[met]], + region = regions[[X]], + lon = lon, londim = lon_dim, + lat = lat, latdim = lat_dim, + na.rm = na.rm) + }, simplify = 'array') + + names(dim(result))[length(dim(result))] <- 'region' + + if (met == 'crpss' && inf.to.na == TRUE) { + result[result == -Inf] <- NA + } + aggr_metrics[[met]] <- Reorder(data = result, + order = c('var', 'time', 'region')) + aggr_significance[[paste0(met, "_significance")]] <- + array(data = NA, + dim = c(var = length(var), + time = length(forecast.months), + region = length(regions))) - if (met =='crpss' && inf.to.na == TRUE) { - result[result == -Inf] <- NA } - aggr_metrics[,,,which(metrics == met)] <- Reorder(data = result, - order = c('var','time', 'region')) } ## close on met } ## close if on skill + ## TODO: Review and modify score aggregation ## Score Aggregation if (metric.aggregation == 'score') { ## Spatially aggregate data for each metric - for (met in metrics) { - if (met == 'rpss') { + for (met in metrics) { + if (met == "rpss" || grep('rpss-', met)) { rps_syear <- sapply(X = 1:length(regions), FUN = function(X) { WeightedMean(data = skill_metrics$rps_syear, @@ -214,7 +231,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, } else if (met == 'enscorr') { cov <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = statistics$cov, + WeightedMean(data = skill_metrics$cov, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, @@ -222,7 +239,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, }, simplify = 'array') n_eff <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = statistics$n_eff, + WeightedMean(data = skill_metrics$n_eff, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, @@ -230,8 +247,8 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, }, simplify = 'array') ## Calculate variance from standard deviation - var_hcst <- (statistics$std_hcst)^2 - var_obs <- (statistics$std_obs)^2 + var_hcst <- (skill_metrics$std_hcst)^2 + var_obs <- (skill_metrics$std_obs)^2 var_hcst <- sapply(X = 1:length(regions), FUN = function(X) { @@ -369,7 +386,7 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, ## Calculate metric spread <- sapply(X = 1:length(regions), FUN = function(X) { - WeightedMean(data = statistics$spread, + WeightedMean(data = skill_metrics$spread, region = regions[[X]], lon = lon, londim = lon_dim, lat = lat, latdim = lat_dim, @@ -430,22 +447,28 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, } ## close if on region ## Set NAs to False - aggr_significance[is.na(aggr_significance)] <- FALSE + aggr_significance <- lapply(aggr_significance, + function(x) { + x[is.na(x)] <- 0 + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res)}) + + # Metadata for saving + ## TODO: Significance + data$hcst$attrs$Variable$metadata$region <- names(regions) + attributes(data$hcst$attrs$Variable$metadata$region)$variables <- + list(region = list(names = paste(names(regions), collapse = ", "))) - ## Include attributes - attributes(aggr_metrics)$metrics <- metrics - attributes(aggr_metrics)$forecast.months <- forecast.months - attributes(aggr_metrics)$regions <- regions - attributes(aggr_metrics)$system.name <- system - attributes(aggr_metrics)$reference.name <- reference - - aggr_scorecards <- list(aggr_metrics = aggr_metrics, - aggr_significance = aggr_significance) - ## Save metric data arrays recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", "Scorecards/") - save_metrics(recipe = recipe, metrics = aggr_scorecards, + save_metrics(recipe = recipe, metrics = c(aggr_metrics, aggr_significance), data_cube = data$hcst, agg = 'region', module = "scorecards") } diff --git a/modules/Scorecards/Scorecards_plotting.R b/modules/Scorecards/Scorecards_plotting.R index 0e80164495e8b2084c66f5784a1b45d02e4df3fb..f8261862b6e63af5c99c3b1d16a9ad9733f94905 100644 --- a/modules/Scorecards/Scorecards_plotting.R +++ b/modules/Scorecards/Scorecards_plotting.R @@ -128,29 +128,30 @@ Scorecards_plotting <- function(recipe) { } ####### Load data files ####### - aggregated_metrics <- LoadMetrics(input_path = input.path, - system = system, - reference = reference, - var = var, - data_type = "aggr_metrics", - period = period, - start_months = as.numeric(start.months), - calib_method = calib.method) - - attributes(aggregated_metrics)$metrics <- metrics + aggregated_metrics <- LoadMetrics(input_path = input.path, + system = system, + reference = reference, + var = var, + metric_names = metrics, + period = period, + start_months = as.numeric(start.months), + calib_method = calib.method) + attributes(aggregated_metrics)$metrics <- attr(aggregated_metrics, "FileSelectors")$dat1$var[[1]] + aggregated_significance <- LoadMetrics(input_path = input.path, system = system, reference = reference, var = var, - data_type = "aggr_significance", + metric_names = paste0(metrics, "_significance"), period = period, start_months = as.numeric(start.months), calib_method = calib.method) - + aggregated_significance <- aggregated_significance == 1 + ## TODO: Separate significance and normal metrics ####### PLOT SCORECARDS ########## ## Create simple scorecard tables @@ -167,7 +168,7 @@ Scorecards_plotting <- function(recipe) { start.months = start.months, forecast.months = forecast.months, region.names = names(regions), - metrics = metrics, + metrics = attributes(aggregated_metrics)$metrics, table.label = table.label, fileout.label = fileout.label, plot.legend = plot.legend, @@ -198,7 +199,7 @@ Scorecards_plotting <- function(recipe) { start.months = start.months, forecast.months = forecast.months, region.names = names(regions), - metrics = metrics, + metrics = attributes(aggregated_metrics)$metrics, table.label = table.label, fileout.label = fileout.label, plot.legend = plot.legend, @@ -226,8 +227,8 @@ Scorecards_plotting <- function(recipe) { end.year = end.year, start.months = start.months, forecast.months = forecast.months, - region.names = attributes(regions)$names, - metrics = metrics, + region.names = names(regions), + metrics = attributes(aggregated_metrics)$metrics, table.label = table.label, fileout.label = fileout.label, legend.white.space = legend.white.space, diff --git a/recipe_bigpredidata_oper.yml b/recipe_bigpredidata_oper.yml new file mode 100644 index 0000000000000000000000000000000000000000..04ba65daac2ee397a3c2edac24567d1ecc746507 --- /dev/null +++ b/recipe_bigpredidata_oper.yml @@ -0,0 +1,84 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: monthly_mean, units: C} + # - {name: prlr, freq: monthly_mean, units: mm, flux: no} + # - {name: sfcWind, freq: monthly_mean} + # - {name: rsds, freq: monthly_mean} + Datasets: + System: + - {name: ECMWF-SEAS5.1} + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: 2025 # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + # - {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: none + Calibration: + method: bias # Mandatory, str: bias, evmos, mse_min, crps_min, rpc_based + cross_validation: yes + save: none + Skill: + metric: crpss rpss + save: all + cross_validation: yes + Probabilities: + percentiles: + Terciles: [1/3, 2/3] + P10: [1/10] + P90: [9/10] # frac: Quantile thresholds. + save: all + Indicators: + index: no + Visualization: + plots: skill_metrics, most_likely_terciles, extreme_probabilities + NA_color: white + multi_panel: no + dots: no + mask_terciles: yes + shapefile: /esarchive/scratch/cdelgado/aspect_outputs/casestudy-wine/shp_spain/recintos_provinciales_inspire_peninbal_etrs89/recintos_provinciales_inspire_peninbal_etrs89.shp + file_format: PNG # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + ncores: 16 # Optional, int: number of cores, defaults to 1 + remove_NAs: TRUE # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ # replace with the directory where you want to save the outputs + code_dir: /home/Earth/vagudets/oldhome/git/sunset/ # replace with the directory where your code is + autosubmit: yes + # fill only if using autosubmit + auto_conf: + script: script_bigpredidata_oper.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username + wallclock: 04:00 # hh:mm + processors_per_job: 32 + custom_directives: ['#SBATCH --exclusive'] + platform: nord4 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: victoria.agudetse@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/recipe_ecvs_ano_seas.yml b/recipe_ecvs_ano_seas.yml index 890f5366bb95e5042c1c3d3702e73634fcaaf259..ff288f46096dcbb74fbf15c663155e95cd709b2c 100644 --- a/recipe_ecvs_ano_seas.yml +++ b/recipe_ecvs_ano_seas.yml @@ -11,8 +11,8 @@ Analysis: System: #- {name: 'Meteo-France-System8'} #- {name: 'CMCC-SPS3.5'} - - {name: 'ECMWF-SEAS5.1'} - #- {name: 'UK-MetOffice-Glosea603'} + - {name: 'ECMWF-SEAS5.1'} + # - {name: 'UK-MetOffice-Glosea602'} ##- {name: 'NCEP-CFSv2'} #- {name: 'DWD-GCFS2.1'} #- {name: 'ECCC-GEM5.2-NEMO'} @@ -24,15 +24,15 @@ Analysis: Reference: name: ERA5 # Mandatory, str: Reference codename. See docu. Time: - sdate: '1201' - fcst_year: '2024' + sdate: '0101' + fcst_year: # '2024' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + hcst_end: '1996' # Mandatory, int: Hindcast end year 'YYYY' ftime_min: 1 # Mandatory, int: First leadtime time step in months ftime_max: 6 # Mandatory, int: Last leadtime time step in months Region: - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} - #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} + # - {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} Regrid: method: conservative # Mandatory, str: Interpolation method. See docu. type: to_system @@ -73,7 +73,7 @@ Analysis: Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} start_months: NULL - metric: mean_bias enscorr rpss crpss EnsSprErr + metric: rpss metric_aggregation: 'skill' #inf_to_na: yes table_label: NULL diff --git a/script_bigpredidata_oper.R b/script_bigpredidata_oper.R new file mode 100644 index 0000000000000000000000000000000000000000..3756a4426a0f4db4671786433e06c71db0ecaeac --- /dev/null +++ b/script_bigpredidata_oper.R @@ -0,0 +1,37 @@ +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +source("modules/Aggregation/Aggregation.R") + +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "/esarchive/scratch/ptrascas/R/dev-test_bigpredidata/sunset/sunset/recipes/recipe_bigpredidata_oper_test_sfcWind.yml" +#recipe_file <- "/esarchive/scratch/ptrascas/R/dev-test_bigpredidata/sunset/sunset/recipes/recipe_bigpredidata_oper.yml" +recipe <- read_atomic_recipe(recipe_file) +#recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) + +source("modules/Crossval/Crossval_anomalies.R") +data <- Crossval_anomalies(recipe = recipe, data = data) + +# Save calibrated hindcast, forecast and probabilities + +source("modules/Crossval/Crossval_metrics.R") +skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = data, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + significance = TRUE, probabilities = data$probs) +#if (!is.null(data$fcst)) { +# # Required to plot a forecast: +# data$fcst <- res$fcst +# Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, +# significance = TRUE, probabilities = res$probs) +#} else { +# Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, +# significance = TRUE) +#} +