From 91a287aa52fe1cc98c46d1b170ebaeed4a6a3a46 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 20 Jun 2025 11:40:58 +0200 Subject: [PATCH 01/11] test rpss meanbias and enscorr with subseasonal --- .../Crossval/Crossval_multimodel_metrics.R | 81 +++++++++++++------ 1 file changed, 58 insertions(+), 23 deletions(-) diff --git a/modules/Crossval/Crossval_multimodel_metrics.R b/modules/Crossval/Crossval_multimodel_metrics.R index d59b71db..b0857726 100644 --- a/modules/Crossval/Crossval_multimodel_metrics.R +++ b/modules/Crossval/Crossval_multimodel_metrics.R @@ -20,7 +20,7 @@ source("modules/Visualization/Visualization.R") # to load .warning from utils Crossval_multimodel_metrics <- function(recipe, data = NULL, Fair = FALSE) { - + horizon <- tolower(recipe$Analysis$Horizon) ncores <- recipe$Analysis$ncores alpha <- recipe$Analysis$Skill$alpha na.rm <- recipe$Analysis$remove_NAs @@ -54,6 +54,12 @@ Crossval_multimodel_metrics <- function(recipe, } else { datos <- append(list(obs = data$obs), data$hcst) } + if (horizon == 'subseasonal') { + # The evaluation of all metrics are done with extra sample + datos <- lapply(datos, function(x) { + MergeDims(x, merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE)}) + } ## CRPS metrics only make sense in pool method: if (any(c('crps', 'crpss') %in% requested_metrics)) { # CRPS @@ -68,13 +74,17 @@ Crossval_multimodel_metrics <- function(recipe, memb_dim = 'ensemble'))}, ncores = ncores)$output1 skill_metrics$crps <- crps - # Build the reference forecast: - ref_clim <- Apply(list(datos$obs, tmp), - target_dims = list(c('ensemble', 'syear'), NULL), - function(x, y) { - Subset(x, along = 'syear', - indices = cross[[y]]$train.dexes, drop = T)}, - ncores = ncores, output_dims = 'ensemble')$output1 + if (is.null(datos$ref_obs)) { + # Build the reference forecast: + ref_clim <- Apply(list(datos$obs, tmp), + target_dims = list(c('ensemble', 'syear'), NULL), + function(x, y) { + Subset(x, along = 'syear', + indices = cross[[y]]$train.dexes, drop = T)}, + ncores = ncores, output_dims = 'ensemble')$output1 + } else { + ref_clim <- datos$ref_obs + } ## Do we want to do this...? # CRPS Climatology datos <- append(list(ref = ref_clim), datos) @@ -123,21 +133,6 @@ Crossval_multimodel_metrics <- function(recipe, skill_metrics$enscorr <- enscorr$corr skill_metrics$enscorr_significance <- enscorr$sign } - if ('mean_bias' %in% requested_metrics) { - ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) - mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), - function(obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - Bias(exp = res, - obs = obs, - time_dim = 'syear', - memb_dim = 'ensemble', - alpha = alpha)}, - ncores = ncores) - skill_metrics$mean_bias <- mean_bias$bias - skill_metrics$mean_bias_significance <- mean_bias$sig - } if ('rms' %in% requested_metrics) { rms <- Apply(datos, target_dims = c('syear', 'ensemble'), function(obs, ...) { @@ -241,6 +236,14 @@ Crossval_multimodel_metrics <- function(recipe, # Compute rps for (ps in 1:length(categories)) { + if (horizon == 'subseasonal') { + data$probs$hcst[[ps]] <- MergeDims(data$probs$hcst[[ps]], + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data$probs$obs[[ps]] <- MergeDims(data$probs$obs[[ps]], + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + } if ('rps' %in% requested_metrics) { rps <- RPS(exp = data$probs$hcst[[ps]], obs = data$probs$obs[[ps]], memb_dim = NULL, @@ -285,6 +288,38 @@ Crossval_multimodel_metrics <- function(recipe, "_significance")]] <- rpss$sign } } + # keep mean_bias at the end of the code to overwrite object if needed. + if ('mean_bias' %in% requested_metrics) { + if (!is.null(data$hcst.full_val)) { + if (inherits(data$hcst.full_val, "s2dv_cube")) { + data$hcst <- list(data$hcst.full_val) + datos <- list(obs = data$obs.full_val$data) + datos <- append(datos, lapply(data$hcst, "[[", "data")) + } else { + datos <- append(list(obs = data$obs.full_val), data$hcst.full_val) + } + if (horizon == 'subseasonal') { + # The evaluation of all metrics are done with extra sample + datos <- lapply(datos, function(x) { + MergeDims(x, merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE)}) + } + } + ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) + mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Bias(exp = res, + obs = obs, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha)}, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } + } # Save metrics skill_metrics <- lapply(skill_metrics, -- GitLab From 205c0d8a7776d121f11a47bc52387a1933493070 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 20 Jun 2025 11:46:02 +0200 Subject: [PATCH 02/11] recipe calls on top --- .../Crossval/Crossval_multimodel_metrics.R | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/modules/Crossval/Crossval_multimodel_metrics.R b/modules/Crossval/Crossval_multimodel_metrics.R index b0857726..8b163f9c 100644 --- a/modules/Crossval/Crossval_multimodel_metrics.R +++ b/modules/Crossval/Crossval_multimodel_metrics.R @@ -21,9 +21,12 @@ Crossval_multimodel_metrics <- function(recipe, data = NULL, Fair = FALSE) { horizon <- tolower(recipe$Analysis$Horizon) + save <- tolower(recipe$Analysis$Workflow$Skill$save) ncores <- recipe$Analysis$ncores alpha <- recipe$Analysis$Skill$alpha na.rm <- recipe$Analysis$remove_NAs + # ---------------------- + # If the data$ref_obs is not NULL this is not needed: cross.method <- recipe$Analysis$cross.method if (is.null(cross.method)) { cross.method <- 'leave-one-out' @@ -33,13 +36,19 @@ Crossval_multimodel_metrics <- function(recipe, cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, amt.points = sdate_dim, amt.points_cor = NULL) - ## What is this for? tmp <- array(1:length(cross), c(syear = length(cross))) + # ------------------------------ alpha <- recipe$Analysis$alpha if (is.null(alpha)) { alpha <- 0.05 } + # Probabilistic metrics for categories (for RPS(S)) + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(x) { + eval(parse(text = x))}, USE.NAMES = FALSE)}) + ## START SKILL ASSESSMENT: skill_metrics <- list() requested_metrics <- tolower(strsplit(recipe$Analysis$Workflow$Skill$metric, @@ -217,13 +226,7 @@ Crossval_multimodel_metrics <- function(recipe, skill_metrics$covariance <- covariance } } - # Probabilistic metrics for categories - - categories <- recipe$Analysis$Workflow$Probabilities$percentiles - categories <- lapply(categories, function (x) { - sapply(x, function(x) { - eval(parse(text = x))}, USE.NAMES = FALSE)}) - ## TODO: Understand this + # If Fair, RPS(S) requires the number of ensemble members if (!is.null(datos)) { if (Fair) { nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) @@ -336,7 +339,7 @@ Crossval_multimodel_metrics <- function(recipe, # Drop dimensions to work with saving and visualization modules skill_metrics <- lapply(skill_metrics, function(x) {.drop_dims(x)}) # Save to netCDF - if (tolower(recipe$Analysis$Workflow$Skill$save) == 'all') { + if (save == 'all') { original_dir <- recipe$Run$output_dir recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/", "outputs/Skill/") save_metrics(recipe = recipe, -- GitLab From b8bceac63786c4cc8226878f796ded6cc5933d16 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 20 Jun 2025 11:47:47 +0200 Subject: [PATCH 03/11] keep one code for metrics --- modules/Crossval/Crossval_metrics.R | 501 ++++++++++-------- .../Crossval/Crossval_multimodel_metrics.R | 352 ------------ 2 files changed, 275 insertions(+), 578 deletions(-) delete mode 100644 modules/Crossval/Crossval_multimodel_metrics.R diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index fc3cd455..8b163f9c 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -1,4 +1,9 @@ - +# One function to compute all the possible metrics or statistics +# datos is a list returned by load_multimodel or load_multimodel_mean + ## this reduce memory and it can be used to compute several metrics + ## in the mean the ensemble dim is length 1 for each model +# probs is a list returned by load_multimodel_probs + ## this is needed to compute rps/rpss source("modules/Saving/Saving.R") source("modules/Crossval/R/tmp/RPS.R") source("modules/Crossval/R/RPS_clim.R") @@ -9,101 +14,269 @@ source("modules/Crossval/R/tmp/Corr.R") source("modules/Crossval/R/tmp/Bias.R") source("modules/Crossval/R/tmp/SprErr.R") source("modules/Crossval/R/tmp/Eno.R") -source("modules/Crossval/R/tmp/CST_MergeDims.R") - -## data_crossval is the result from function full_crossval_anomalies or similar. -## this is a list with the required elements: - ## probs is a list with - ## probs$hcst_ev and probs$obs_ev - ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles - ## each element will be an array with 'cat' dimension - ## the same for probs$obs_ev - ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices - ## in this case cross validated anomalies - ## obs is a s2dv_cube for the post-processed obs - ## in this case cross validated anomalies - ## fcst is a s2dv_cube for the post-processed fcst - ## in this case cross anomalies with the full hindcast period - ## this object is not required for skill assessment - ## hcst.full_val and obs.full_val are the original data to compute mean bias - ## cat_lims used to compute the probabilities - ## this object is not required for skill assessment - ## ref_obs_tr is an array with the cross-validate observed anomalies - ## to be used as reference forecast in the CRPSS and CRPS_clim - ## it is computed from the training indices -## the recipe could be used to read the Percentiles -## if fair is TRUE, the nmemb used to compute the probabilities is needed - ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference -Crossval_metrics <- function(recipe, data_crossval, - fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Visualization/Visualization.R") # to load .warning from utils +Crossval_multimodel_metrics <- function(recipe, + data = NULL, + Fair = FALSE) { + horizon <- tolower(recipe$Analysis$Horizon) + save <- tolower(recipe$Analysis$Workflow$Skill$save) ncores <- recipe$Analysis$ncores alpha <- recipe$Analysis$Skill$alpha na.rm <- recipe$Analysis$remove_NAs + # ---------------------- + # If the data$ref_obs is not NULL this is not needed: + cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + # Prepare indices for crossval + sdate_dim <- dim(data$hcst[[1]])['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + tmp <- array(1:length(cross), c(syear = length(cross))) + # ------------------------------ + alpha <- recipe$Analysis$alpha + if (is.null(alpha)) { + alpha <- 0.05 + } + + # Probabilistic metrics for categories (for RPS(S)) categories <- recipe$Analysis$Workflow$Probabilities$percentiles categories <- lapply(categories, function (x) { sapply(x, function(x) { eval(parse(text = x))}, USE.NAMES = FALSE)}) - # TODO: distinguish between rpss and bss - # if 1 percentile -> bss - # if more than 1 -> rpss - # TODO: for subseasonal check if the dimension sday is 1 - ## exe_rps <- unlist(lapply(categories, function(x) { - ## if (length(x) > 1) { - ## x <- x[1] *100 - ## } - ## return(x)})) - if (is.null(alpha)) { - alpha <- 0.05 - } ## START SKILL ASSESSMENT: skill_metrics <- list() requested_metrics <- tolower(strsplit(recipe$Analysis$Workflow$Skill$metric, - ", | |,")[[1]]) + ", | |,")[[1]]) + if (!is.null(data)) { + ## TODO: Change 'datos' name + # convert $data elements to list to use multiApply: + if (inherits(data$hcst, "s2dv_cube")) { + data$hcst <- list(data$hcst) + datos <- list(obs = data$obs$data) + datos <- append(datos, lapply(data$hcst, "[[", "data")) + } else { + datos <- append(list(obs = data$obs), data$hcst) + } + if (horizon == 'subseasonal') { + # The evaluation of all metrics are done with extra sample + datos <- lapply(datos, function(x) { + MergeDims(x, merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE)}) + } + ## CRPS metrics only make sense in pool method: + if (any(c('crps', 'crpss') %in% requested_metrics)) { + # CRPS + crps <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + mean(s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + time_dim = 'syear', + memb_dim = 'ensemble'))}, + ncores = ncores)$output1 + skill_metrics$crps <- crps + if (is.null(datos$ref_obs)) { + # Build the reference forecast: + ref_clim <- Apply(list(datos$obs, tmp), + target_dims = list(c('ensemble', 'syear'), NULL), + function(x, y) { + Subset(x, along = 'syear', + indices = cross[[y]]$train.dexes, drop = T)}, + ncores = ncores, output_dims = 'ensemble')$output1 + } else { + ref_clim <- datos$ref_obs + } + ## Do we want to do this...? + # CRPS Climatology + datos <- append(list(ref = ref_clim), datos) + crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), + fun = function(ref, obs) { + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + mean(s2dv:::.CRPS(exp = ref, obs = obs, + dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble'))}, + ncores = ncores)$output1 + skill_metrics$crps_clim <- crps_clim + + # CRPSS + crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(ref, obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + CRPSS(exp = res, obs = obs, ref = ref, + memb_dim = 'ensemble', Fair = FALSE, + time_dim = 'syear', clim.cross.val = FALSE)}, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + datos <- datos[-1] + } + ## Deterministic metrics using ensemble mean: + if ('enscorr' %in% requested_metrics) { + enscorr <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Corr(exp = res, + obs = obs, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha)}, ncores = ncores) + skill_metrics$enscorr <- enscorr$corr + skill_metrics$enscorr_significance <- enscorr$sign + } + if ('rms' %in% requested_metrics) { + rms <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMS(exp = res, + obs = obs, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha)}, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + ####### Do we need reference fcst to compute rmss?? + if ('rmss' %in% requested_metrics) { + # if (is.null(res$ref_obs_tr)) { + # } + rmss <- Apply(datos, target_dims = c('syear','ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMSSS(exp = res, + obs = obs, + #ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE)}, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + # This are used to compute correlation for the scorecards + # It's the temporal standard deviation of the ensmean + ## TODO: We don't need obs here + std_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + sd(as.vector(res))}, + ncores = ncores)$output1 + ## TODO: We only needs obs here + std_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = 'sd')$output1 + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } + + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + fun = function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + variance <- var(as.vector(res))}, + ncores = ncores)$output1 + + var_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = function(x) {var(as.vector(x))}, + ncores = ncores)$output1 + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs + } + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(datos[[1]], time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } + if (any(c('cov', 'covariance') %in% requested_metrics)) { + # The covariance of the ensemble mean + covariance <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + cov(as.vector(obs), as.vector(res), + use = "everything", + method = "pearson")}, + ncores = ncores)$output1 + skill_metrics$covariance <- covariance + } + } + # If Fair, RPS(S) requires the number of ensemble members + if (!is.null(datos)) { + if (Fair) { + nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) + } else { + nmemb <- NULL + } + } else { + nmemb <- NULL + } - # The recipe allows to requset more than only terciles: + # Compute rps for (ps in 1:length(categories)) { - if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { - data_crossval$probs$hcst_ev[[ps]] <- MergeDims(data_crossval$probs$hcst_ev[[ps]], + if (horizon == 'subseasonal') { + data$probs$hcst[[ps]] <- MergeDims(data$probs$hcst[[ps]], merge_dims = c('sweek', 'syear'), rename_dim = 'syear', na.rm = FALSE) - data_crossval$probs$obs_ev[[ps]] <- MergeDims(data_crossval$probs$obs_ev[[ps]], + data$probs$obs[[ps]] <- MergeDims(data$probs$obs[[ps]], merge_dims = c('sweek', 'syear'), rename_dim = 'syear', na.rm = FALSE) } - if ('rps' %in% requested_metrics) { - rps <- RPS(exp = data_crossval$probs$hcst_ev[[ps]], - obs = data_crossval$probs$obs_ev[[ps]], memb_dim = NULL, + rps <- RPS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], memb_dim = NULL, cat_dim = 'bin', cross.val = FALSE, time_dim = 'syear', - Fair = fair, nmemb = nmemb, + Fair = Fair, nmemb = nmemb, ncores = ncores) - rps_clim <- Apply(list(data_crossval$probs$obs_ev[[ps]]), + rps_clim <- Apply(list(data$probs$obs[[ps]]), target_dims = c('bin', 'syear'), - RPS_clim, bin_dim_abs = 'bin', Fair = fair, + RPS_clim, bin_dim_abs = 'bin', Fair = Fair, cross.val = FALSE, ncores = ncores)$output1 if (is.null(names(categories)[ps])) { skill_metrics$rps <- rps skill_metrics$rps_clim <- rps_clim } else { # names based on the categories: - # To use it when visualization works for more rps skill_metrics[[paste0('rps-', names(categories)[ps])]] <- rps skill_metrics[[paste0('rps_clim-', names(categories)[ps])]] <- rps_clim } } if ('rpss' %in% requested_metrics) { - rpss <- RPSS(exp = data_crossval$probs$hcst_ev[[ps]], - obs = data_crossval$probs$obs_ev[[ps]], + rpss <- RPSS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], ref = NULL, # ref is 1/3 by default if terciles time_dim = 'syear', memb_dim = NULL, cat_dim = 'bin', nmemb = nmemb, dat_dim = NULL, - prob_thresholds = categories[[ps]], + prob_thresholds = categories[[ps]], # un use param when providing probs indices_for_clim = NULL, - Fair = fair, weights_exp = NULL, weights_ref = NULL, + Fair = Fair, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, na.rm = na.rm, sig_method.type = 'two.sided.approx', alpha = alpha, ncores = ncores) @@ -111,193 +284,69 @@ Crossval_metrics <- function(recipe, data_crossval, skill_metrics[[paste0('rpss')]] <- rpss$rpss skill_metrics[[paste0('rpss_significance')]] <- rpss$sign } else { - # TO USE IT when visualization works for more rpsss + # names based on the categories: skill_metrics[[paste0('rpss-', names(categories)[ps])]] <- rpss$rpss - ## TODO: Change this name? skill_metrics[[paste0('rpss-', - names(categories)[ps], + names(categories)[ps], "_significance")]] <- rpss$sign } } - } - if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { - # The evaluation of all metrics are done with extra sample - data_crossval$hcst <- CST_MergeDims(data_crossval$hcst, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - data_crossval$obs <- CST_MergeDims(data_crossval$obs, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - data_crossval$ref_obs_tr <- MergeDims(data_crossval$ref_obs_tr, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - data_crossval$hcst.full_val <- CST_MergeDims(data_crossval$hcst.full_val, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - data_crossval$obs.full_val <- CST_MergeDims(data_crossval$obs.full_val, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - } - - if ('crps' %in% requested_metrics) { - crps <- CRPS(exp = data_crossval$hcst$data, - obs = data_crossval$obs$data, - time_dim = 'syear', memb_dim = 'ensemble', - Fair = fair, - ncores = ncores) - skill_metrics$crps <- crps - crps_clim <- CRPS(exp = data_crossval$ref_obs_tr, - obs = data_crossval$obs$data, - time_dim = 'syear', memb_dim = 'ensemble', - Fair = fair, ncores = ncores) - skill_metrics$crps_clim <- crps_clim - } - if ('crpss' %in% requested_metrics) { - crpss <- CRPSS(exp = data_crossval$hcst$data, - obs = data_crossval$obs$data, - ref = data_crossval$ref_obs_tr, - memb_dim = 'ensemble', Fair = fair, - time_dim = 'syear', clim.cross.val = FALSE, - ncores = ncores) - skill_metrics$crpss <- crpss$crpss - skill_metrics$crpss_significance <- crpss$sign - } - - if ('enscorr' %in% requested_metrics) { - enscorr <- Corr(exp = data_crossval$hcst$data, - obs = data_crossval$obs$data, - dat_dim = NULL, - time_dim = 'syear', - method = 'pearson', - memb_dim = 'ensemble', - memb = F, - conf = F, - pval = F, - sign = T, - alpha = alpha, - ncores = ncores) - skill_metrics$enscorr <- enscorr$corr - skill_metrics$enscorr_significance <- enscorr$sign - } - if ('mean_bias' %in% requested_metrics) { - if (!is.null(data_crossval$hcst.full_val$data)) { - mean_bias <- Bias(exp = data_crossval$hcst.full_val$data, - obs = data_crossval$obs.full_val$data, - time_dim = 'syear', - memb_dim = 'ensemble', - alpha = alpha, - na.rm = na.rm, - ncores = ncores) + # keep mean_bias at the end of the code to overwrite object if needed. + if ('mean_bias' %in% requested_metrics) { + if (!is.null(data$hcst.full_val)) { + if (inherits(data$hcst.full_val, "s2dv_cube")) { + data$hcst <- list(data$hcst.full_val) + datos <- list(obs = data$obs.full_val$data) + datos <- append(datos, lapply(data$hcst, "[[", "data")) + } else { + datos <- append(list(obs = data$obs.full_val), data$hcst.full_val) + } + if (horizon == 'subseasonal') { + # The evaluation of all metrics are done with extra sample + datos <- lapply(datos, function(x) { + MergeDims(x, merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE)}) + } + } + ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) + mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Bias(exp = res, + obs = obs, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha)}, + ncores = ncores) skill_metrics$mean_bias <- mean_bias$bias skill_metrics$mean_bias_significance <- mean_bias$sig - } else { - info(recipe$Run$logger, - "Full values not available") } - } - if ('enssprerr' %in% requested_metrics) { - enssprerr <- SprErr(exp = data_crossval$hcst$data, - obs = data_crossval$obs$data, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', pval = TRUE, - na.rm = na.rm, - ncores = ncores) - skill_metrics$SprErr <- enssprerr$ratio - skill_metrics$SprErr_significance <- enssprerr$p.val <= alpha - } - if ('rms' %in% requested_metrics) { - rms <- RMS(exp = data_crossval$hcst$data, - obs = data_crossval$obs$data, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = alpha, - ncores = ncores) - skill_metrics$rms <- rms$rms - } - if ('rmss' %in% requested_metrics) { - rmss <- RMSSS(exp = data_crossval$hcst$data, - obs = data_crossval$obs$data, - ref = res$ref_obs_tr, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = alpha, sign = TRUE, - ncores = ncores) - skill_metrics$rmss <- rmss$rmss - skill_metrics$rmss_significance <- rmss$sign - } - if (is.null(data_crossval$hcst_EM)) { - data_crossval$hcst_EM <- MeanDims(data_crossval$hcst$data, - dims = 'ensemble', - drop = TRUE) - } - if (any(c('std', 'standard_deviation') %in% requested_metrics)) { - std_hcst <- Apply(data = data_crossval$hcst_EM, - target_dims = 'syear', - fun = 'sd')$output1 - - std_obs <- Apply(data = data_crossval$obs$data, - target_dims = 'syear', - fun = 'sd')$output1 - - skill_metrics[['std_hcst']] <- std_hcst - skill_metrics[['std_obs']] <- std_obs - } - if (any(c('var', 'variance') %in% requested_metrics)) { - ## Calculate variance - var_hcst <- Apply(data = data_crossval$hcst_EM, - target_dims = 'syear', - fun = 'sd')$output1 ^ 2 - - var_obs <- Apply(data = data_crossval$obs$data, - target_dims = 'syear', - fun = 'sd')$output1 ^ 2 - - skill_metrics[['var_hcst']] <- var_hcst - skill_metrics[['var_obs']] <- var_obs - } ## close if on variance - if ('n_eff' %in% requested_metrics) { - ## Calculate degrees of freedom - n_eff <- s2dv::Eno(data = data_crossval$obs$data, - time_dim = 'syear', - na.action = na.pass, - ncores = ncores) - skill_metrics[['n_eff']] <- n_eff - } ## close on n_eff - if (any(c('cov', 'covariance') %in% requested_metrics)) { - covariance <- Apply(data = list(x = data_crossval$obs$data, - y = data_crossval$hcst_EM), - target_dims = 'syear', - fun = function(x, y) { - cov(as.vector(x), as.vector(y), - use = "everything", - method = "pearson")})$output1 - skill_metrics$covariance <- covariance } - original <- recipe$Run$output_dir - recipe$Run$output_dir <- paste0(original, "/", "outputs/Skill/") - + # Save metrics skill_metrics <- lapply(skill_metrics, function(x) { - if (is.logical(x)) { - dims <- dim(x) - res <- as.numeric(x) - dim(res) <- dims - } else { - res <- x - } - return(res) - }) - # Save metrics - # reduce dimension to work with Visualization module: + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) + # Drop dimensions to work with saving and visualization modules skill_metrics <- lapply(skill_metrics, function(x) {.drop_dims(x)}) - - if (tolower(recipe$Analysis$Workflow$Skill$save) == 'all') { + # Save to netCDF + if (save == 'all') { + original_dir <- recipe$Run$output_dir + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/", "outputs/Skill/") save_metrics(recipe = recipe, metrics = skill_metrics, data_cube = data_crossval$hcst, agg = 'global', outdir = recipe$Run$output_dir) + recipe$Run$output_dir <- original_dir } - recipe$Run$output_dir <- original return(skill_metrics) } - - diff --git a/modules/Crossval/Crossval_multimodel_metrics.R b/modules/Crossval/Crossval_multimodel_metrics.R deleted file mode 100644 index 8b163f9c..00000000 --- a/modules/Crossval/Crossval_multimodel_metrics.R +++ /dev/null @@ -1,352 +0,0 @@ -# One function to compute all the possible metrics or statistics -# datos is a list returned by load_multimodel or load_multimodel_mean - ## this reduce memory and it can be used to compute several metrics - ## in the mean the ensemble dim is length 1 for each model -# probs is a list returned by load_multimodel_probs - ## this is needed to compute rps/rpss -source("modules/Saving/Saving.R") -source("modules/Crossval/R/tmp/RPS.R") -source("modules/Crossval/R/RPS_clim.R") -source("modules/Crossval/R/CRPS_clim.R") -source("modules/Crossval/R/tmp/RPSS.R") -source("modules/Crossval/R/tmp/RandomWalkTest.R") -source("modules/Crossval/R/tmp/Corr.R") -source("modules/Crossval/R/tmp/Bias.R") -source("modules/Crossval/R/tmp/SprErr.R") -source("modules/Crossval/R/tmp/Eno.R") -source("modules/Crossval/R/tmp/GetProbs.R") -source("modules/Visualization/Visualization.R") # to load .warning from utils - -Crossval_multimodel_metrics <- function(recipe, - data = NULL, - Fair = FALSE) { - horizon <- tolower(recipe$Analysis$Horizon) - save <- tolower(recipe$Analysis$Workflow$Skill$save) - ncores <- recipe$Analysis$ncores - alpha <- recipe$Analysis$Skill$alpha - na.rm <- recipe$Analysis$remove_NAs - # ---------------------- - # If the data$ref_obs is not NULL this is not needed: - cross.method <- recipe$Analysis$cross.method - if (is.null(cross.method)) { - cross.method <- 'leave-one-out' - } - # Prepare indices for crossval - sdate_dim <- dim(data$hcst[[1]])['syear'] - cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, - amt.points = sdate_dim, - amt.points_cor = NULL) - tmp <- array(1:length(cross), c(syear = length(cross))) - # ------------------------------ - alpha <- recipe$Analysis$alpha - if (is.null(alpha)) { - alpha <- 0.05 - } - - # Probabilistic metrics for categories (for RPS(S)) - categories <- recipe$Analysis$Workflow$Probabilities$percentiles - categories <- lapply(categories, function (x) { - sapply(x, function(x) { - eval(parse(text = x))}, USE.NAMES = FALSE)}) - - ## START SKILL ASSESSMENT: - skill_metrics <- list() - requested_metrics <- tolower(strsplit(recipe$Analysis$Workflow$Skill$metric, - ", | |,")[[1]]) - if (!is.null(data)) { - ## TODO: Change 'datos' name - # convert $data elements to list to use multiApply: - if (inherits(data$hcst, "s2dv_cube")) { - data$hcst <- list(data$hcst) - datos <- list(obs = data$obs$data) - datos <- append(datos, lapply(data$hcst, "[[", "data")) - } else { - datos <- append(list(obs = data$obs), data$hcst) - } - if (horizon == 'subseasonal') { - # The evaluation of all metrics are done with extra sample - datos <- lapply(datos, function(x) { - MergeDims(x, merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE)}) - } - ## CRPS metrics only make sense in pool method: - if (any(c('crps', 'crpss') %in% requested_metrics)) { - # CRPS - crps <- Apply(datos, target_dims = c('syear', 'ensemble'), - fun = function(obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- names(dim(obs)) - obs <- Subset(obs, along = 'ensemble', - indices = 1, drop = 'selected') - mean(s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, - time_dim = 'syear', - memb_dim = 'ensemble'))}, - ncores = ncores)$output1 - skill_metrics$crps <- crps - if (is.null(datos$ref_obs)) { - # Build the reference forecast: - ref_clim <- Apply(list(datos$obs, tmp), - target_dims = list(c('ensemble', 'syear'), NULL), - function(x, y) { - Subset(x, along = 'syear', - indices = cross[[y]]$train.dexes, drop = T)}, - ncores = ncores, output_dims = 'ensemble')$output1 - } else { - ref_clim <- datos$ref_obs - } - ## Do we want to do this...? - # CRPS Climatology - datos <- append(list(ref = ref_clim), datos) - crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), - fun = function(ref, obs) { - obs <- Subset(obs, along = 'ensemble', - indices = 1, drop = 'selected') - mean(s2dv:::.CRPS(exp = ref, obs = obs, - dat_dim = NULL, - time_dim = 'syear', memb_dim = 'ensemble'))}, - ncores = ncores)$output1 - skill_metrics$crps_clim <- crps_clim - - # CRPSS - crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), - fun = function(ref, obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- names(dim(obs)) - obs <- Subset(obs, along = 'ensemble', - indices = 1, drop = 'selected') - CRPSS(exp = res, obs = obs, ref = ref, - memb_dim = 'ensemble', Fair = FALSE, - time_dim = 'syear', clim.cross.val = FALSE)}, - ncores = ncores) - skill_metrics$crpss <- crpss$crpss - skill_metrics$crpss_significance <- crpss$sign - datos <- datos[-1] - } - ## Deterministic metrics using ensemble mean: - if ('enscorr' %in% requested_metrics) { - enscorr <- Apply(datos, target_dims = c('syear', 'ensemble'), - function(obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - Corr(exp = res, - obs = obs, - dat_dim = NULL, - time_dim = 'syear', - method = 'pearson', - memb_dim = 'ensemble', - memb = F, - conf = F, - pval = F, - sign = T, - alpha = alpha)}, ncores = ncores) - skill_metrics$enscorr <- enscorr$corr - skill_metrics$enscorr_significance <- enscorr$sign - } - if ('rms' %in% requested_metrics) { - rms <- Apply(datos, target_dims = c('syear', 'ensemble'), - function(obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - RMS(exp = res, - obs = obs, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = alpha)}, - ncores = ncores) - skill_metrics$rms <- rms$rms - } - ####### Do we need reference fcst to compute rmss?? - if ('rmss' %in% requested_metrics) { - # if (is.null(res$ref_obs_tr)) { - # } - rmss <- Apply(datos, target_dims = c('syear','ensemble'), - function(obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - RMSSS(exp = res, - obs = obs, - #ref = res$ref_obs_tr, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = alpha, sign = TRUE)}, - ncores = ncores) - skill_metrics$rmss <- rmss$rmss - skill_metrics$rmss_significance <- rmss$sign - } - if (any(c('std', 'standard_deviation') %in% requested_metrics)) { - # This are used to compute correlation for the scorecards - # It's the temporal standard deviation of the ensmean - ## TODO: We don't need obs here - std_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), - function(...) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - res <- MeanDims(res, 'ensemble') - sd(as.vector(res))}, - ncores = ncores)$output1 - ## TODO: We only needs obs here - std_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), - fun = 'sd')$output1 - skill_metrics[['std_hcst']] <- std_hcst - skill_metrics[['std_obs']] <- std_obs - } - - if (any(c('var', 'variance') %in% requested_metrics)) { - ## Calculate variance - var_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), - fun = function(...) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - res <- MeanDims(res, 'ensemble') - variance <- var(as.vector(res))}, - ncores = ncores)$output1 - - var_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), - fun = function(x) {var(as.vector(x))}, - ncores = ncores)$output1 - skill_metrics[['var_hcst']] <- var_hcst - skill_metrics[['var_obs']] <- var_obs - } - if ('n_eff' %in% requested_metrics) { - ## Calculate degrees of freedom - n_eff <- s2dv::Eno(datos[[1]], time_dim = 'syear', - na.action = na.pass, - ncores = ncores) - skill_metrics[['n_eff']] <- n_eff - } - if (any(c('cov', 'covariance') %in% requested_metrics)) { - # The covariance of the ensemble mean - covariance <- Apply(datos, target_dims = c('syear', 'ensemble'), - fun = function(obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - res <- MeanDims(res, 'ensemble') - cov(as.vector(obs), as.vector(res), - use = "everything", - method = "pearson")}, - ncores = ncores)$output1 - skill_metrics$covariance <- covariance - } - } - # If Fair, RPS(S) requires the number of ensemble members - if (!is.null(datos)) { - if (Fair) { - nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) - } else { - nmemb <- NULL - } - } else { - nmemb <- NULL - } - - # Compute rps - for (ps in 1:length(categories)) { - if (horizon == 'subseasonal') { - data$probs$hcst[[ps]] <- MergeDims(data$probs$hcst[[ps]], - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - data$probs$obs[[ps]] <- MergeDims(data$probs$obs[[ps]], - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - } - if ('rps' %in% requested_metrics) { - rps <- RPS(exp = data$probs$hcst[[ps]], - obs = data$probs$obs[[ps]], memb_dim = NULL, - cat_dim = 'bin', cross.val = FALSE, time_dim = 'syear', - Fair = Fair, nmemb = nmemb, - ncores = ncores) - rps_clim <- Apply(list(data$probs$obs[[ps]]), - target_dims = c('bin', 'syear'), - RPS_clim, bin_dim_abs = 'bin', Fair = Fair, - cross.val = FALSE, ncores = ncores)$output1 - if (is.null(names(categories)[ps])) { - skill_metrics$rps <- rps - skill_metrics$rps_clim <- rps_clim - } else { - # names based on the categories: - skill_metrics[[paste0('rps-', names(categories)[ps])]] <- rps - skill_metrics[[paste0('rps_clim-', - names(categories)[ps])]] <- rps_clim - } - } - if ('rpss' %in% requested_metrics) { - rpss <- RPSS(exp = data$probs$hcst[[ps]], - obs = data$probs$obs[[ps]], - ref = NULL, # ref is 1/3 by default if terciles - time_dim = 'syear', memb_dim = NULL, - cat_dim = 'bin', nmemb = nmemb, - dat_dim = NULL, - prob_thresholds = categories[[ps]], # un use param when providing probs - indices_for_clim = NULL, - Fair = Fair, weights_exp = NULL, weights_ref = NULL, - cross.val = FALSE, na.rm = na.rm, - sig_method.type = 'two.sided.approx', alpha = alpha, - ncores = ncores) - if (is.null(names(categories)[ps])) { - skill_metrics[[paste0('rpss')]] <- rpss$rpss - skill_metrics[[paste0('rpss_significance')]] <- rpss$sign - } else { - # names based on the categories: - skill_metrics[[paste0('rpss-', names(categories)[ps])]] <- rpss$rpss - skill_metrics[[paste0('rpss-', - names(categories)[ps], - "_significance")]] <- rpss$sign - } - } - # keep mean_bias at the end of the code to overwrite object if needed. - if ('mean_bias' %in% requested_metrics) { - if (!is.null(data$hcst.full_val)) { - if (inherits(data$hcst.full_val, "s2dv_cube")) { - data$hcst <- list(data$hcst.full_val) - datos <- list(obs = data$obs.full_val$data) - datos <- append(datos, lapply(data$hcst, "[[", "data")) - } else { - datos <- append(list(obs = data$obs.full_val), data$hcst.full_val) - } - if (horizon == 'subseasonal') { - # The evaluation of all metrics are done with extra sample - datos <- lapply(datos, function(x) { - MergeDims(x, merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE)}) - } - } - ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) - mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), - function(obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - Bias(exp = res, - obs = obs, - time_dim = 'syear', - memb_dim = 'ensemble', - alpha = alpha)}, - ncores = ncores) - skill_metrics$mean_bias <- mean_bias$bias - skill_metrics$mean_bias_significance <- mean_bias$sig - } - - } - # Save metrics - skill_metrics <- lapply(skill_metrics, - function(x) { - if (is.logical(x)) { - dims <- dim(x) - res <- as.numeric(x) - dim(res) <- dims - } else { - res <- x - } - return(res) - }) - # Drop dimensions to work with saving and visualization modules - skill_metrics <- lapply(skill_metrics, function(x) {.drop_dims(x)}) - # Save to netCDF - if (save == 'all') { - original_dir <- recipe$Run$output_dir - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/", "outputs/Skill/") - save_metrics(recipe = recipe, - metrics = skill_metrics, - data_cube = data_crossval$hcst, agg = 'global', - outdir = recipe$Run$output_dir) - recipe$Run$output_dir <- original_dir - } - return(skill_metrics) -} -- GitLab From 60c3371d3e07881f6d8576b9a791b2c3ff2f9c05 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 20 Jun 2025 12:18:17 +0200 Subject: [PATCH 04/11] rename function crossval_metrics --- modules/Crossval/Crossval_metrics.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index 8b163f9c..778bd44b 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -17,7 +17,7 @@ source("modules/Crossval/R/tmp/Eno.R") source("modules/Crossval/R/tmp/GetProbs.R") source("modules/Visualization/Visualization.R") # to load .warning from utils -Crossval_multimodel_metrics <- function(recipe, +Crossval_metrics <- function(recipe, data = NULL, Fair = FALSE) { horizon <- tolower(recipe$Analysis$Horizon) -- GitLab From d70e554588933b314f8fdd6d3826b61db76e8161 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 20 Jun 2025 12:34:06 +0200 Subject: [PATCH 05/11] rename outputs --- modules/Crossval/Crossval_anomalies.R | 10 ++++------ modules/Crossval/Crossval_calibration.R | 6 +++--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 280850dd..11c3baf1 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -355,13 +355,11 @@ Crossval_anomalies <- function(recipe, data) { } return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, hcst.full_val = data$hcst, obs.full_val = data$obs, - hcst_EM = hcst_EM, fcst_EM = fcst_EM, - #cat_lims = list(hcst_tr = res$lims_ano_hcst_tr, - # obs_tr = res$lims_ano_obs_tr), - probs = list(hcst_ev = hcst_probs_ev, - obs_ev = obs_probs_ev, + cat_lims = list(obs = res$lims_ano_obs_tr), + probs = list(hcst = hcst_probs_ev, + obs = obs_probs_ev, fcst = fcst_probs), - ref_obs_tr = res$ano_obs_tr)) + ref_obs = res$ano_obs_tr)) } diff --git a/modules/Crossval/Crossval_calibration.R b/modules/Crossval/Crossval_calibration.R index 394a3de5..0c06fd19 100644 --- a/modules/Crossval/Crossval_calibration.R +++ b/modules/Crossval/Crossval_calibration.R @@ -485,10 +485,10 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { return(list(hcst = hcst, obs = data$obs, fcst = data$fcst, hcst.full_val = data$hcst, obs.full_val = data$obs, cat_lims = list(obs = lims), - probs = list(hcst_ev = hcst_probs_ev, - obs_ev = obs_probs_ev, + probs = list(hcst = hcst_probs_ev, + obs = obs_probs_ev, fcst = fcst_probs), - ref_obs_tr = res$obs_tr)) + ref_obs = res$obs_tr)) } -- GitLab From e0e1f8d0b6d5b4b66f489dbcec9fbb2374c7d0c1 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 20 Jun 2025 12:57:54 +0200 Subject: [PATCH 06/11] formatting --- modules/Crossval/Crossval_anomalies.R | 20 +++++++++++--------- modules/Crossval/Crossval_calibration.R | 10 +++++----- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 11c3baf1..575244f9 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -1,8 +1,10 @@ # Full-cross-val workflow ## This code should be valid for individual months and temporal averages source("modules/Crossval/R/tmp/GetProbs.R") - +source("modules/Crossval/R/tmp/CST_MergeDims.R") + Crossval_anomalies <- function(recipe, data) { + horizon <- tolower(recipe$Analysis$Horizon) cross.method <- recipe$Analysis$cross.method # TODO move check if (is.null(cross.method)) { @@ -53,7 +55,7 @@ Crossval_anomalies <- function(recipe, data) { indices = cross$eval.dexes, drop = 'selected') obs_ev <- Subset(obs, along = 'syear', indices = cross$eval.dexes, drop = 'selected') - if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + if (horizon == 'subseasonal') { central_day <- (dim(exp)['sday'] + 1)/2 hcst_tr <- MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), rename_dim = 'syear', na.rm = FALSE) @@ -69,13 +71,13 @@ Crossval_anomalies <- function(recipe, data) { clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) # compute anomalies: ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) # compute category limits lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { @@ -91,14 +93,14 @@ Crossval_anomalies <- function(recipe, data) { na.rm = TRUE))})}, prob_lims = list(unlist(categories)), ncores = ncores)$output1 - if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + if (horizon == 'subseasonal') { ano_hcst_tr <- SplitDim(ano_hcst_tr, split_dim = 'syear', new_dim_name = 'sday', indices = rep(1:dim(data$hcst$data)['sday'], - length(cross[[t]]$train.dexes))) + length(cross$train.dexes))) ano_obs_tr <- SplitDim(obs_tr, split_dim = 'syear', new_dim_name = 'sday', indices = rep(1:dim(data$hcst$data)['sday'], - length(cross[[t]]$train.dexes))) + length(cross$train.dexes))) } return(list(ano_hcst_ev = ano_hcst_ev, diff --git a/modules/Crossval/Crossval_calibration.R b/modules/Crossval/Crossval_calibration.R index 0c06fd19..f1900e63 100644 --- a/modules/Crossval/Crossval_calibration.R +++ b/modules/Crossval/Crossval_calibration.R @@ -4,7 +4,7 @@ source("modules/Crossval/R/tmp/GetProbs.R") source("modules/Crossval/R/tmp/CST_MergeDims.R") Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { - + horizon <- tolower(recipe$Analysis$Horizon) cal_method <- recipe$Analysis$Workflow$Calibration$method cross.method <- recipe$Analysis$cross.method # TODO move check @@ -52,7 +52,7 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { obs_ev <- Subset(obs, along = 'syear', indices = cross$eval.dexes) - if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + if (horizon == 'subseasonal') { central_day <- (dim(exp)['sday'] + 1)/2 hcst_tr <- MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), rename_dim = 'syear', na.rm = na.rm) @@ -117,7 +117,7 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { prob_lims = unlist(categories), ncores = ncores)$output1 names(dim(lims_obs_tr))[1] <- 'bin' - if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + if (horizon == 'subseasonal') { cal_hcst_tr <- SplitDim(cal_hcst_tr, split_dim = 'syear', new_dim_name = 'sday', indices = rep(1:dim(data$hcst$data)['sday'], @@ -248,7 +248,7 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { # Forecast calibration: lims <- list() if (!is.null(data$fcst)) { - if (tolower(recipe$Analysis$Horizon) %in% c('subseasonal')) { + if (horizon %in% c('subseasonal')) { # merge sample dimensions and select central week hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), rename_dim = 'syear', na.rm = FALSE) @@ -404,7 +404,7 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { hcst <- data$hcst hcst$data <- res$cal_hcst_ev - if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + if (horizon == 'subseasonal') { # Keep only de central sday position from the dimension if (dim(hcst$data)['sday'] > 1) { hcst <- CST_Subset(hcst, along = 'sday', -- GitLab From e3a331f94056ac1bdbdcccbf01b69e53d76ca241 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 20 Jun 2025 17:38:16 +0200 Subject: [PATCH 07/11] subseasonal anomalies --- modules/Crossval/Crossval_anomalies.R | 163 +++++++++++++++++++------- 1 file changed, 121 insertions(+), 42 deletions(-) diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 575244f9..07f38ed7 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -86,6 +86,7 @@ Crossval_anomalies <- function(recipe, data) { na.rm = TRUE))})}, prob_lims = list(unlist(categories)), ncores = ncores)$output1 + names(dim(lims_ano_hcst_tr))[1] <- 'bin' lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { @@ -93,6 +94,7 @@ Crossval_anomalies <- function(recipe, data) { na.rm = TRUE))})}, prob_lims = list(unlist(categories)), ncores = ncores)$output1 + names(dim(lims_ano_obs_tr))[1] <- 'bin' if (horizon == 'subseasonal') { ano_hcst_tr <- SplitDim(ano_hcst_tr, split_dim = 'syear', new_dim_name = 'sday', @@ -102,7 +104,94 @@ Crossval_anomalies <- function(recipe, data) { indices = rep(1:dim(data$hcst$data)['sday'], length(cross$train.dexes))) } - + # check order before storing: + # Add back 'sday' dim if needed or select central 'sday' + if ('sday' %in% names(dim(ano_hcst_ev))) { + if (dim(ano_hcst_ev)['sday'] != 1) { + # To fail because it should not appear + } + } else { + ano_hcst_ev <- InsertDim(ano_hcst_ev, 'sday', len = 1, pos = 1) + } + # remove syear dim from ev objects + if ('syear' %in% names(dim(ano_hcst_ev))) { + if (dim(ano_hcst_ev)['syear'] == 1) { + ano_hcst_ev <- Subset(ano_hcst_ev, along = 'syear', indices = 1, + drop = 'selected') + } + } + # ---- + if ('sday' %in% names(dim(ano_obs_ev))) { + if (dim(ano_obs_ev)['sday'] > 1) { + # 'sday' dim to select the central day + ano_obs_ev <- Subset(ano_obs_ev, along = 'sday', + indices = central_day) + } + } else { + ano_obs_ev <- InsertDim(ano_obs_ev, 'sday', len = 1, pos = 1) + } + # ---- + if ('sday' %in% names(dim(ano_obs_tr))) { + if (dim(ano_obs_tr)['sday'] > 1) { + # 'sday' dim to select the central day + ano_obs_tr <- Subset(ano_obs_tr, along = 'sday', + indices = central_day) + } + } else { + ano_obs_tr <- InsertDim(ano_obs_tr, 'sday', len = 1, pos = 1) + } + # rename dimension to be ensemble + if ('ensemble' %in% names(dim(ano_obs_tr))) { + if (dim(ano_obs_tr)['ensemble'] == 1) { + ano_obs_tr <- Subset(ano_obs_tr, along = 'ensemble', indices = 1, + drop = 'selected') + } else { + stop("why it is longer than 1?") + } + } + if ('syear' %in% names(dim(ano_obs_tr))) { # should be true + names(dim(ano_obs_tr))[which(names(dim(ano_obs_tr)) == 'syear')] <- 'ensemble' + } + # ---- + if ('sday' %in% names(dim(lims_ano_obs_tr))) { + if (dim(lims_ano_obs_tr)['sday'] > 1) { + # 'sday' dim to select the central day + lims_ano_obs_tr <- Subset(lims_ano-obs_tr, along = 'sday', + indices = central_day) + } + } else { + lims_ano_obs_tr <- InsertDim(lims_ano_obs_tr, 'sday', len = 1, pos = 1) + } + # lims cannot have ensemble dim: + if ('ensemble' %in% names(dim(lims_ano_obs_tr))) { + if (dim(lims_ano_obs_tr)['ensemble'] == 1) { + lims_ano_obs_tr <- Subset(lims_ano_obs_tr, along = 'ensemble', + indices = 1, drop = 'selected') + } else { + # error: + stop("Why it has memb_dim") + } + } + # ---- + if ('sday' %in% names(dim(lims_ano_hcst_tr))) { + if (dim(lims_ano_hcst_tr)['sday'] > 1) { + # 'sday' dim to select the central day + lims_ano_hcst_tr <- Subset(lims_ano_hcst_tr, along = 'sday', + indices = central_day) + } + } else { + lims_ano_hcst_tr <- InsertDim(lims_ano_hcst_tr, 'sday', len = 1, pos = 1) + } + # lims cannot have ensemble dim: + if ('ensemble' %in% names(dim(lims_ano_hcst_tr))) { + if (dim(lims_ano_hcst_tr)['ensemble'] == 1) { + lims_ano_hcst_tr <- Subset(lims_ano_hcst_tr, along = 'ensemble', + indices = 1, drop = 'selected') + } else { + # error: + stop("Why it has memb_dim") + } + } return(list(ano_hcst_ev = ano_hcst_ev, ano_obs_ev = ano_obs_ev, ano_obs_tr = ano_obs_tr, @@ -110,9 +199,6 @@ Crossval_anomalies <- function(recipe, data) { lims_ano_obs_tr = lims_ano_obs_tr)) } - output_dims <- names(dim(data$hcst$data))[which(names(dim(data$hcst$data)) != "syear")] - lims_output_dims <- c("bin", - names(dim(data$hcst$data))[which(!names(dim(data$hcst$data)) %in% c("syear", "ensemble"))]) indices <- array(1:length(cross), dim = c(syear = length(cross))) res <- Apply(data = list(indices = indices, exp = data$hcst$data, @@ -120,19 +206,11 @@ Crossval_anomalies <- function(recipe, data) { target_dims = list(indices = NULL, obs = names(dim(data$obs$data)), exp = names(dim(data$hcst$data))), - output_dims = list(ano_hcst_ev = output_dims, - ano_obs_ev = output_dims, - ano_obs_tr = c("dat", "var", "sday", "sweek", - "ensemble", "time", "latitude", "longitude", - "unneeded"), - lims_ano_hcst_tr = lims_output_dims, - lims_ano_obs_tr = lims_output_dims), fun = .cross_anomalies, ncores = 1, ## TODO: Explore options for core distribution cross = cross, categories = categories, recipe = recipe) - info(recipe$Run$logger, "#### Anomalies Cross-validation loop ended #####") @@ -157,22 +235,36 @@ Crossval_anomalies <- function(recipe, data) { rm(lims_tmp_hcst, lims_tmp_obs) gc() - # To make crps_clim to work the reference forecast need to have same dims as obs: - res$ano_obs_tr <- Subset(res$ano_obs_tr, along = 'unneeded', - indices = 1, drop = 'selected') recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/", "outputs/Anomalies/") # Forecast anomalies: if (!is.null(data$fcst)) { - clim_hcst <- Apply(data$hcst$data, + if (horizon %in% c('subseasonal')) { + # merge sample dimensions and select central week + hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + hcst <- Subset(hcst, along = 'sweek', + indices = (dim(data$hcst$data)['sweek'] + 1) / 2) + obs <- MergeDims(data$obs$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + obs <- Subset(obs, along = 'sweek', + indices = (dim(data$obs$data)['sweek'] + 1) / 2) + fcst <- Subset(data$fcst$data, along = 'sday', indices = 1) + } else { + # we need to keep original objects (to later compute bias) + hcst <- data$hcst$data + obs <- data$obs$data + fcst <- data$fcst$data + } + clim_hcst <- Apply(hcst, target_dims = c('syear', 'ensemble'), mean, na.rm = na.rm, ncores = ncores)$output1 - data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) - hcst_ano <- Ano(data = data$hcst$data, clim = clim_hcst) + data$fcst$data <- Ano(data = fcst, clim = clim_hcst) + hcst_ano <- Ano(data = hcst, clim = clim_hcst) # Terciles limits using the whole hindcast period: lims_fcst <- Apply(hcst_ano, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { @@ -182,12 +274,12 @@ Crossval_anomalies <- function(recipe, data) { output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) - clim_obs <- Apply(data$obs$data, - target_dims = c('syear', 'ensemble'), - mean, - na.rm = na.rm, - ncores = ncores)$output1 - obs_ano <- Ano(data = data$obs$data, clim = clim_obs) + clim_obs <- Apply(obs, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + obs_ano <- Ano(data = obs, clim = clim_obs) lims <- Apply(obs_ano, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { @@ -336,24 +428,11 @@ Crossval_anomalies <- function(recipe, data) { type = "fcst") } } - - # Save ensemble mean for multimodel option: - hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) -# save_metrics(recipe = recipe, -# metrics = list(hcst_EM = -# Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), -# data_cube = data$hcst, agg = agg, -# module = "statistics") -# fcst_EM <- NULL - if (!is.null(data$fcst)) { - fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) -# save_metrics(recipe = recipe, -# metrics = list(fcst_EM = -# Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), -# data_cube = data$fcst, agg = agg, -# module = "statistics") - } else { - fcst_EM <- NULL + # Subset dimension 'sday' for subseasonal for mean bias: + if (horizon == 'subseasonal') { + central_day <- (dim(data$hcst$data)['sday'] + 1)/2 + data$hcst <- CST_Subset(data$hcst, along = 'sday', indices = central_day) + data$obs <- CST_Subset(data$obs, along = 'sday', indices = central_day) } return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, hcst.full_val = data$hcst, obs.full_val = data$obs, -- GitLab From 975b1d8260e5d00a29402e83c6256f25831524f2 Mon Sep 17 00:00:00 2001 From: nperez Date: Fri, 20 Jun 2025 18:36:49 +0200 Subject: [PATCH 08/11] subseasonal and .drop_dims() and unit tests --- modules/Crossval/Crossval_anomalies.R | 1 + modules/Crossval/Crossval_calibration.R | 1 + modules/Crossval/Crossval_metrics.R | 6 +- .../recipe-seasonal_monthly_crossval.yml | 55 +++++++++ .../testthat/test-seasonal_monthly_crossval.R | 111 ++++++++++++++++++ 5 files changed, 171 insertions(+), 3 deletions(-) create mode 100644 tests/recipes/recipe-seasonal_monthly_crossval.yml create mode 100644 tests/testthat/test-seasonal_monthly_crossval.R diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 07f38ed7..8de05c4b 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -2,6 +2,7 @@ ## This code should be valid for individual months and temporal averages source("modules/Crossval/R/tmp/GetProbs.R") source("modules/Crossval/R/tmp/CST_MergeDims.R") +source("modules/Saving/Saving.R") Crossval_anomalies <- function(recipe, data) { horizon <- tolower(recipe$Analysis$Horizon) diff --git a/modules/Crossval/Crossval_calibration.R b/modules/Crossval/Crossval_calibration.R index f1900e63..17bb133e 100644 --- a/modules/Crossval/Crossval_calibration.R +++ b/modules/Crossval/Crossval_calibration.R @@ -2,6 +2,7 @@ source("modules/Crossval/R/tmp/GetProbs.R") source("modules/Crossval/R/tmp/CST_MergeDims.R") +source("modules/Saving/Saving.R") Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { horizon <- tolower(recipe$Analysis$Horizon) diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index 778bd44b..d78126a4 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -18,8 +18,8 @@ source("modules/Crossval/R/tmp/GetProbs.R") source("modules/Visualization/Visualization.R") # to load .warning from utils Crossval_metrics <- function(recipe, - data = NULL, - Fair = FALSE) { + data = NULL, + Fair = FALSE) { horizon <- tolower(recipe$Analysis$Horizon) save <- tolower(recipe$Analysis$Workflow$Skill$save) ncores <- recipe$Analysis$ncores @@ -344,7 +344,7 @@ Crossval_metrics <- function(recipe, recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/", "outputs/Skill/") save_metrics(recipe = recipe, metrics = skill_metrics, - data_cube = data_crossval$hcst, agg = 'global', + data_cube = data$obs, agg = 'global', outdir = recipe$Run$output_dir) recipe$Run$output_dir <- original_dir } diff --git a/tests/recipes/recipe-seasonal_monthly_crossval.yml b/tests/recipes/recipe-seasonal_monthly_crossval.yml new file mode 100644 index 00000000..914e14bc --- /dev/null +++ b/tests/recipes/recipe-seasonal_monthly_crossval.yml @@ -0,0 +1,55 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: Meteo-France-System7 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2000' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system + Workflow: + # Anomalies: + # compute: no + # cross_validation: + # save: 'none' + Calibration: + method: mse_min + save: 'all' + Skill: + metric: RPSS CRPSS EnsCorr Corr_individual_members Enscorr_specs + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics most_likely_terciles forecast_ensemble_mean + multi_panel: yes + projection: cylindrical_equidistant + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/testthat/test-seasonal_monthly_crossval.R b/tests/testthat/test-seasonal_monthly_crossval.R new file mode 100644 index 00000000..5712682b --- /dev/null +++ b/tests/testthat/test-seasonal_monthly_crossval.R @@ -0,0 +1,111 @@ +context("Seasonal monthly data") + +source("modules/Loading/Loading.R") +source("modules/Visualization/Visualization.R") + +recipe_file <- "tests/recipes/recipe-seasonal_monthly_crossval.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = F) + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + +# Calibrate data +source("modules/Crossval/Crossval_calibration.R") +calibrated <- Crossval_calibration(recipe = recipe, data = data) + +# Compute skill metrics +source("modules/Crossval/Crossval_metrics.R") +suppressWarnings({invisible(capture.output( +skill_metrics <- Crossval_metrics(recipe, calibrated) +))}) + +# Plotting +suppressWarnings({invisible(capture.output( +Visualization(recipe = recipe, data = calibrated, + skill_metrics = skill_metrics, probabilities = calibrated$probs, + significance = T) +))}) +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) + +# ------- TESTS -------- + +test_that("2. Crossval_calibration", { + +expect_equal(is.list(calibrated), TRUE) + +expect_equal(names(calibrated), + c("hcst", "obs", "fcst", "hcst.full_val", "obs.full_val", + "cat_lims", "probs", "ref_obs")) + +expect_equal(class(calibrated$hcst), "s2dv_cube") + +expect_equal(class(calibrated$fcst), "s2dv_cube") + +expect_equal(dim(calibrated$hcst$data), + c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 3, + longitude = 3, ensemble = 25, syear = 8)) + +expect_equal(dim(calibrated$fcst$data), + c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, + latitude = 3, longitude = 3, ensemble = 51)) + +expect_equal(mean(calibrated$fcst$data), + 291.0119, tolerance = 0.0001) + +expect_equal(mean(calibrated$hcst$data), + 290.5232, tolerance = 0.0001) + +expect_equal(as.vector(drop(calibrated$hcst$data)[1, , 2, 3, 4]), + c(291.2313, 290.8534, 291.8334), tolerance = 0.0001) + +expect_equal(range(calibrated$fcst$data), + c(285.3917, 297.5869), tolerance = 0.0001) + +}) + + +#====================================== +test_that("3. Crossval_metrics", { + +expect_equal(is.list(skill_metrics), TRUE) + +expect_equal(names(skill_metrics), + c("crps", "crps_clim", "crpss", + "crpss_significance", "enscorr", "enscorr_significance", + "rpss-set1", "rpss-set1_significance", + "rpss-set2", "rpss-set2_significance")) + +expect_equal(class(skill_metrics[['rpss-set1']]), "array") + +expect_equal(dim(skill_metrics[['rpss-set1']]), + c(var = 1, time = 3, latitude = 3, longitude = 3)) + +expect_equal(dim(skill_metrics[['rpss-set1_significance']]), + dim(skill_metrics[['rpss-set1']])) + +expect_equal(as.vector(skill_metrics[['rpss-set1']][, , 2, 3]), + c(0.013176471, 0.037741176, -0.001223529), tolerance = 0.0001) + +expect_equal(as.vector(skill_metrics[['rpss-set1_significance']][, , 2, 3]), + c(0,0,1)) + +}) + + +test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") +expect_equal(all(basename(list.files(plots, recursive = T)) %in% + c("crps_clim-november.pdf", "crps-november.pdf", "crpss-november.pdf", + "enscorr-november.pdf", "forecast_ensemble_median-20201101.pdf", + "forecast_most_likely_tercile-20201101.pdf", + "rpss-set1-november.pdf", "rpss-set2-november.pdf")), + TRUE) + +expect_equal(length(list.files(plots, recursive = T)), 8) + +}) + +# Delete files +unlink(recipe$Run$output_dir, recursive = T) -- GitLab From b4a3ece1bf7ecaaddc9c3c3a0252677e1075ab39 Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 23 Jun 2025 15:57:56 +0200 Subject: [PATCH 09/11] Add nmemb_ref; add Fair and alpha parameters in Apply() functionsD --- modules/Crossval/Crossval_metrics.R | 47 ++++++++++++++++++----------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index d78126a4..7d253117 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -80,7 +80,9 @@ Crossval_metrics <- function(recipe, indices = 1, drop = 'selected') mean(s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, time_dim = 'syear', - memb_dim = 'ensemble'))}, + memb_dim = 'ensemble', + Fair = .Fair))}, + extra_info = list(Fair = Fair), ncores = ncores)$output1 skill_metrics$crps <- crps if (is.null(datos$ref_obs)) { @@ -103,7 +105,10 @@ Crossval_metrics <- function(recipe, indices = 1, drop = 'selected') mean(s2dv:::.CRPS(exp = ref, obs = obs, dat_dim = NULL, - time_dim = 'syear', memb_dim = 'ensemble'))}, + time_dim = 'syear', + memb_dim = 'ensemble', + Fair = .Fair))}, + extra_info = list(Fair = Fair), ncores = ncores)$output1 skill_metrics$crps_clim <- crps_clim @@ -115,8 +120,9 @@ Crossval_metrics <- function(recipe, obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') CRPSS(exp = res, obs = obs, ref = ref, - memb_dim = 'ensemble', Fair = FALSE, + memb_dim = 'ensemble', Fair = .Fair, time_dim = 'syear', clim.cross.val = FALSE)}, + extra_info = list(Fair = Fair), ncores = ncores) skill_metrics$crpss <- crpss$crpss skill_metrics$crpss_significance <- crpss$sign @@ -134,11 +140,13 @@ Crossval_metrics <- function(recipe, time_dim = 'syear', method = 'pearson', memb_dim = 'ensemble', - memb = F, - conf = F, - pval = F, - sign = T, - alpha = alpha)}, ncores = ncores) + memb = FALSE, + conf = FALSE, + pval = FALSE, + sign = TRUE, + alpha = .alpha)}, + extra_info = list(alpha = alpha), + ncores = ncores) skill_metrics$enscorr <- enscorr$corr skill_metrics$enscorr_significance <- enscorr$sign } @@ -150,8 +158,9 @@ Crossval_metrics <- function(recipe, RMS(exp = res, obs = obs, memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = alpha)}, - ncores = ncores) + time_dim = 'syear', alpha = .alpha)}, + extra_info = list(alpha = alpha), + ncores = ncores) skill_metrics$rms <- rms$rms } ####### Do we need reference fcst to compute rmss?? @@ -166,7 +175,9 @@ Crossval_metrics <- function(recipe, obs = obs, #ref = res$ref_obs_tr, memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = alpha, sign = TRUE)}, + time_dim = 'syear', alpha = .alpha, + sign = TRUE)}, + extra_info = list(alpha = alpha), ncores = ncores) skill_metrics$rmss <- rmss$rmss skill_metrics$rmss_significance <- rmss$sign @@ -227,14 +238,12 @@ Crossval_metrics <- function(recipe, } } # If Fair, RPS(S) requires the number of ensemble members - if (!is.null(datos)) { - if (Fair) { - nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) - } else { - nmemb <- NULL - } + if (!is.null(datos) && Fair) { + nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) + nmemb_ref <- dim(datos[[1]])[['syear']] - 1 } else { nmemb <- NULL + nmemb_ref <- NULL } # Compute rps @@ -273,6 +282,7 @@ Crossval_metrics <- function(recipe, ref = NULL, # ref is 1/3 by default if terciles time_dim = 'syear', memb_dim = NULL, cat_dim = 'bin', nmemb = nmemb, + nmemb_ref = nmemb_ref, dat_dim = NULL, prob_thresholds = categories[[ps]], # un use param when providing probs indices_for_clim = NULL, @@ -317,7 +327,8 @@ Crossval_metrics <- function(recipe, obs = obs, time_dim = 'syear', memb_dim = 'ensemble', - alpha = alpha)}, + alpha = .alpha)}, + extra_info = list(alpha = alpha), ncores = ncores) skill_metrics$mean_bias <- mean_bias$bias skill_metrics$mean_bias_significance <- mean_bias$sig -- GitLab From 7cac78406d82dec58ff939efdb85df25e9458f8b Mon Sep 17 00:00:00 2001 From: vagudets Date: Mon, 23 Jun 2025 16:19:50 +0200 Subject: [PATCH 10/11] Add 'CSTools::' to Calibration() --- modules/Crossval/Crossval_calibration.R | 68 ++++++++++++------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/modules/Crossval/Crossval_calibration.R b/modules/Crossval/Crossval_calibration.R index 17bb133e..75765e75 100644 --- a/modules/Crossval/Crossval_calibration.R +++ b/modules/Crossval/Crossval_calibration.R @@ -84,20 +84,20 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { na.rm = na.rm, qstep = 0.1, ncores = ncores) } else { - cal_hcst_tr <- Calibration(exp = hcst_tr, obs = obs_tr, - cal.method = cal_method, - memb_dim = 'ensemble', sdate_dim = 'syear', - eval.method = 'in-sample', - na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, - alpha = NULL, ncores = ncores) - cal_hcst_ev <- Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, - cal.method = cal_method, - memb_dim = 'ensemble', sdate_dim = 'syear', - eval.method = 'in-sample', - na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, - alpha = NULL, ncores = ncores) + cal_hcst_tr <- CSTools::Calibration(exp = hcst_tr, obs = obs_tr, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, ncores = ncores) + cal_hcst_ev <- CSTools::Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, ncores = ncores) } # If precipitation is negative: if (correct_negative) { @@ -288,26 +288,26 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { na.rm = na.rm, qstep = 0.1, ncores = ncores) } else { - data$fcst$data <- Calibration(exp = hcst, - obs = obs, - exp_cor = fcst, - cal.method = cal_method, - multi.model = FALSE, - na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, - alpha = NULL, memb_dim = 'ensemble', - sdate_dim = 'syear', - dat_dim = NULL, ncores = ncores) - hcst_cal <- Calibration(exp = hcst, - obs = obs, - cal.method = cal_method, - eval.method = 'in-sample', - multi.model = FALSE, - na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, - alpha = NULL, memb_dim = 'ensemble', - sdate_dim = 'syear', - dat_dim = NULL, ncores = ncores) + data$fcst$data <- CSTools::Calibration(exp = hcst, + obs = obs, + exp_cor = fcst, + cal.method = cal_method, + multi.model = FALSE, + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + hcst_cal <- CSTools::Calibration(exp = hcst, + obs = obs, + cal.method = cal_method, + eval.method = 'in-sample', + multi.model = FALSE, + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) } if (!('sday' %in% names(dim(data$fcst$data)))) { data$fcst$data <- InsertDim(data$fcst$data, -- GitLab From 9e5a04f1c56206f579e3a9661f2669c4f9ecfa51 Mon Sep 17 00:00:00 2001 From: vagudets Date: Wed, 25 Jun 2025 15:21:42 +0200 Subject: [PATCH 11/11] Add ensemble spread error 'enssprerr' --- modules/Crossval/Crossval_metrics.R | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index 7d253117..6da103a1 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -115,7 +115,7 @@ Crossval_metrics <- function(recipe, # CRPSS crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), fun = function(ref, obs, ...) { - res <- abind(..., along = 2) + res <- abind(..., along = 2) names(dim(res)) <- names(dim(obs)) obs <- Subset(obs, along = 'ensemble', indices = 1, drop = 'selected') @@ -128,6 +128,22 @@ Crossval_metrics <- function(recipe, skill_metrics$crpss_significance <- crpss$sign datos <- datos[-1] } + # Ensemble spread error + if ('enssprerr' %in% requested_metrics) { + enssprerr <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + SprErr(exp = res, + obs = obs, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + na.rm = .na.rm)}, + extra_info = list(na.rm = na.rm), + ncores = ncores) + skill_metrics$ensprerr <- enssprerr$ratio + skill_metrics$ensprerr_significance <- enssprerr$p.val <= alpha + } ## Deterministic metrics using ensemble mean: if ('enscorr' %in% requested_metrics) { enscorr <- Apply(datos, target_dims = c('syear', 'ensemble'), -- GitLab