diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index 4c48ad8e62e75c2548a92ad625a8bed960936028..31f8c310f7ca596bb089ee70cf93b07862325a0b 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -120,7 +120,7 @@ Crossval_metrics <- function(recipe, data_crossval, } } if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { - # The evaluation of all metrics are done with extra sample + # The evaluation of all metrics are done with extra sample data_crossval$hcst <- CST_MergeDims(data_crossval$hcst, merge_dims = c('sweek', 'syear'), rename_dim = 'syear', na.rm = FALSE) @@ -274,16 +274,17 @@ Crossval_metrics <- function(recipe, data_crossval, original <- recipe$Run$output_dir recipe$Run$output_dir <- paste0(original, "/", "outputs/Skill/") - skill_metrics <- lapply(skill_metrics, function(x) { - if (is.logical(x)) { - dims <- dim(x) - res <- as.numeric(x) - dim(res) <- dims - } else { - res <- x - } - return(res) - }) + skill_metrics <- lapply(skill_metrics, + function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) # Save metrics # reduce dimension to work with Visualization module: skill_metrics <- lapply(skill_metrics, function(x) {.drop_dims(x)}) diff --git a/modules/Crossval/Crossval_multimodel_metrics.R b/modules/Crossval/Crossval_multimodel_metrics.R new file mode 100644 index 0000000000000000000000000000000000000000..d59b71db806787de1feb1f41bc07807e87723915 --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_metrics.R @@ -0,0 +1,314 @@ +# One function to compute all the possible metrics or statistics +# datos is a list returned by load_multimodel or load_multimodel_mean + ## this reduce memory and it can be used to compute several metrics + ## in the mean the ensemble dim is length 1 for each model +# probs is a list returned by load_multimodel_probs + ## this is needed to compute rps/rpss +source("modules/Saving/Saving.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Crossval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Visualization/Visualization.R") # to load .warning from utils + +Crossval_multimodel_metrics <- function(recipe, + data = NULL, + Fair = FALSE) { + + ncores <- recipe$Analysis$ncores + alpha <- recipe$Analysis$Skill$alpha + na.rm <- recipe$Analysis$remove_NAs + cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + # Prepare indices for crossval + sdate_dim <- dim(data$hcst[[1]])['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + ## What is this for? + tmp <- array(1:length(cross), c(syear = length(cross))) + alpha <- recipe$Analysis$alpha + if (is.null(alpha)) { + alpha <- 0.05 + } + + ## START SKILL ASSESSMENT: + skill_metrics <- list() + requested_metrics <- tolower(strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]]) + if (!is.null(data)) { + ## TODO: Change 'datos' name + # convert $data elements to list to use multiApply: + if (inherits(data$hcst, "s2dv_cube")) { + data$hcst <- list(data$hcst) + datos <- list(obs = data$obs$data) + datos <- append(datos, lapply(data$hcst, "[[", "data")) + } else { + datos <- append(list(obs = data$obs), data$hcst) + } + ## CRPS metrics only make sense in pool method: + if (any(c('crps', 'crpss') %in% requested_metrics)) { + # CRPS + crps <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + mean(s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + time_dim = 'syear', + memb_dim = 'ensemble'))}, + ncores = ncores)$output1 + skill_metrics$crps <- crps + # Build the reference forecast: + ref_clim <- Apply(list(datos$obs, tmp), + target_dims = list(c('ensemble', 'syear'), NULL), + function(x, y) { + Subset(x, along = 'syear', + indices = cross[[y]]$train.dexes, drop = T)}, + ncores = ncores, output_dims = 'ensemble')$output1 + ## Do we want to do this...? + # CRPS Climatology + datos <- append(list(ref = ref_clim), datos) + crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), + fun = function(ref, obs) { + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + mean(s2dv:::.CRPS(exp = ref, obs = obs, + dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble'))}, + ncores = ncores)$output1 + skill_metrics$crps_clim <- crps_clim + + # CRPSS + crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(ref, obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + CRPSS(exp = res, obs = obs, ref = ref, + memb_dim = 'ensemble', Fair = FALSE, + time_dim = 'syear', clim.cross.val = FALSE)}, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + datos <- datos[-1] + } + ## Deterministic metrics using ensemble mean: + if ('enscorr' %in% requested_metrics) { + enscorr <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Corr(exp = res, + obs = obs, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha)}, ncores = ncores) + skill_metrics$enscorr <- enscorr$corr + skill_metrics$enscorr_significance <- enscorr$sign + } + if ('mean_bias' %in% requested_metrics) { + ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) + mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Bias(exp = res, + obs = obs, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha)}, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } + if ('rms' %in% requested_metrics) { + rms <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMS(exp = res, + obs = obs, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha)}, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + ####### Do we need reference fcst to compute rmss?? + if ('rmss' %in% requested_metrics) { + # if (is.null(res$ref_obs_tr)) { + # } + rmss <- Apply(datos, target_dims = c('syear','ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMSSS(exp = res, + obs = obs, + #ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE)}, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + # This are used to compute correlation for the scorecards + # It's the temporal standard deviation of the ensmean + ## TODO: We don't need obs here + std_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + sd(as.vector(res))}, + ncores = ncores)$output1 + ## TODO: We only needs obs here + std_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = 'sd')$output1 + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } + + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + fun = function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + variance <- var(as.vector(res))}, + ncores = ncores)$output1 + + var_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = function(x) {var(as.vector(x))}, + ncores = ncores)$output1 + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs + } + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(datos[[1]], time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } + if (any(c('cov', 'covariance') %in% requested_metrics)) { + # The covariance of the ensemble mean + covariance <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + cov(as.vector(obs), as.vector(res), + use = "everything", + method = "pearson")}, + ncores = ncores)$output1 + skill_metrics$covariance <- covariance + } + } + # Probabilistic metrics for categories + + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(x) { + eval(parse(text = x))}, USE.NAMES = FALSE)}) + ## TODO: Understand this + if (!is.null(datos)) { + if (Fair) { + nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) + } else { + nmemb <- NULL + } + } else { + nmemb <- NULL + } + + # Compute rps + for (ps in 1:length(categories)) { + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], memb_dim = NULL, + cat_dim = 'bin', cross.val = FALSE, time_dim = 'syear', + Fair = Fair, nmemb = nmemb, + ncores = ncores) + rps_clim <- Apply(list(data$probs$obs[[ps]]), + target_dims = c('bin', 'syear'), + RPS_clim, bin_dim_abs = 'bin', Fair = Fair, + cross.val = FALSE, ncores = ncores)$output1 + if (is.null(names(categories)[ps])) { + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + } else { + # names based on the categories: + skill_metrics[[paste0('rps-', names(categories)[ps])]] <- rps + skill_metrics[[paste0('rps_clim-', + names(categories)[ps])]] <- rps_clim + } + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'bin', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = categories[[ps]], # un use param when providing probs + indices_for_clim = NULL, + Fair = Fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + if (is.null(names(categories)[ps])) { + skill_metrics[[paste0('rpss')]] <- rpss$rpss + skill_metrics[[paste0('rpss_significance')]] <- rpss$sign + } else { + # names based on the categories: + skill_metrics[[paste0('rpss-', names(categories)[ps])]] <- rpss$rpss + skill_metrics[[paste0('rpss-', + names(categories)[ps], + "_significance")]] <- rpss$sign + } + } + } + # Save metrics + skill_metrics <- lapply(skill_metrics, + function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) + # Drop dimensions to work with saving and visualization modules + skill_metrics <- lapply(skill_metrics, function(x) {.drop_dims(x)}) + # Save to netCDF + if (tolower(recipe$Analysis$Workflow$Skill$save) == 'all') { + original_dir <- recipe$Run$output_dir + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/", "outputs/Skill/") + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data_crossval$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + recipe$Run$output_dir <- original_dir + } + return(skill_metrics) +}