diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 280850ddb3de7e2f69e1f79019d1d122a1466249..8de05c4b08424614337a054247acf1a9ee828729 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -1,8 +1,11 @@ # Full-cross-val workflow ## This code should be valid for individual months and temporal averages source("modules/Crossval/R/tmp/GetProbs.R") - +source("modules/Crossval/R/tmp/CST_MergeDims.R") +source("modules/Saving/Saving.R") + Crossval_anomalies <- function(recipe, data) { + horizon <- tolower(recipe$Analysis$Horizon) cross.method <- recipe$Analysis$cross.method # TODO move check if (is.null(cross.method)) { @@ -53,7 +56,7 @@ Crossval_anomalies <- function(recipe, data) { indices = cross$eval.dexes, drop = 'selected') obs_ev <- Subset(obs, along = 'syear', indices = cross$eval.dexes, drop = 'selected') - if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + if (horizon == 'subseasonal') { central_day <- (dim(exp)['sday'] + 1)/2 hcst_tr <- MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), rename_dim = 'syear', na.rm = FALSE) @@ -69,13 +72,13 @@ Crossval_anomalies <- function(recipe, data) { clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) # compute anomalies: ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, - ncores = recipe$Analysis$ncores) + ncores = ncores) # compute category limits lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { @@ -84,6 +87,7 @@ Crossval_anomalies <- function(recipe, data) { na.rm = TRUE))})}, prob_lims = list(unlist(categories)), ncores = ncores)$output1 + names(dim(lims_ano_hcst_tr))[1] <- 'bin' lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { @@ -91,16 +95,104 @@ Crossval_anomalies <- function(recipe, data) { na.rm = TRUE))})}, prob_lims = list(unlist(categories)), ncores = ncores)$output1 - if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + names(dim(lims_ano_obs_tr))[1] <- 'bin' + if (horizon == 'subseasonal') { ano_hcst_tr <- SplitDim(ano_hcst_tr, split_dim = 'syear', new_dim_name = 'sday', indices = rep(1:dim(data$hcst$data)['sday'], - length(cross[[t]]$train.dexes))) + length(cross$train.dexes))) ano_obs_tr <- SplitDim(obs_tr, split_dim = 'syear', new_dim_name = 'sday', indices = rep(1:dim(data$hcst$data)['sday'], - length(cross[[t]]$train.dexes))) + length(cross$train.dexes))) } - + # check order before storing: + # Add back 'sday' dim if needed or select central 'sday' + if ('sday' %in% names(dim(ano_hcst_ev))) { + if (dim(ano_hcst_ev)['sday'] != 1) { + # To fail because it should not appear + } + } else { + ano_hcst_ev <- InsertDim(ano_hcst_ev, 'sday', len = 1, pos = 1) + } + # remove syear dim from ev objects + if ('syear' %in% names(dim(ano_hcst_ev))) { + if (dim(ano_hcst_ev)['syear'] == 1) { + ano_hcst_ev <- Subset(ano_hcst_ev, along = 'syear', indices = 1, + drop = 'selected') + } + } + # ---- + if ('sday' %in% names(dim(ano_obs_ev))) { + if (dim(ano_obs_ev)['sday'] > 1) { + # 'sday' dim to select the central day + ano_obs_ev <- Subset(ano_obs_ev, along = 'sday', + indices = central_day) + } + } else { + ano_obs_ev <- InsertDim(ano_obs_ev, 'sday', len = 1, pos = 1) + } + # ---- + if ('sday' %in% names(dim(ano_obs_tr))) { + if (dim(ano_obs_tr)['sday'] > 1) { + # 'sday' dim to select the central day + ano_obs_tr <- Subset(ano_obs_tr, along = 'sday', + indices = central_day) + } + } else { + ano_obs_tr <- InsertDim(ano_obs_tr, 'sday', len = 1, pos = 1) + } + # rename dimension to be ensemble + if ('ensemble' %in% names(dim(ano_obs_tr))) { + if (dim(ano_obs_tr)['ensemble'] == 1) { + ano_obs_tr <- Subset(ano_obs_tr, along = 'ensemble', indices = 1, + drop = 'selected') + } else { + stop("why it is longer than 1?") + } + } + if ('syear' %in% names(dim(ano_obs_tr))) { # should be true + names(dim(ano_obs_tr))[which(names(dim(ano_obs_tr)) == 'syear')] <- 'ensemble' + } + # ---- + if ('sday' %in% names(dim(lims_ano_obs_tr))) { + if (dim(lims_ano_obs_tr)['sday'] > 1) { + # 'sday' dim to select the central day + lims_ano_obs_tr <- Subset(lims_ano-obs_tr, along = 'sday', + indices = central_day) + } + } else { + lims_ano_obs_tr <- InsertDim(lims_ano_obs_tr, 'sday', len = 1, pos = 1) + } + # lims cannot have ensemble dim: + if ('ensemble' %in% names(dim(lims_ano_obs_tr))) { + if (dim(lims_ano_obs_tr)['ensemble'] == 1) { + lims_ano_obs_tr <- Subset(lims_ano_obs_tr, along = 'ensemble', + indices = 1, drop = 'selected') + } else { + # error: + stop("Why it has memb_dim") + } + } + # ---- + if ('sday' %in% names(dim(lims_ano_hcst_tr))) { + if (dim(lims_ano_hcst_tr)['sday'] > 1) { + # 'sday' dim to select the central day + lims_ano_hcst_tr <- Subset(lims_ano_hcst_tr, along = 'sday', + indices = central_day) + } + } else { + lims_ano_hcst_tr <- InsertDim(lims_ano_hcst_tr, 'sday', len = 1, pos = 1) + } + # lims cannot have ensemble dim: + if ('ensemble' %in% names(dim(lims_ano_hcst_tr))) { + if (dim(lims_ano_hcst_tr)['ensemble'] == 1) { + lims_ano_hcst_tr <- Subset(lims_ano_hcst_tr, along = 'ensemble', + indices = 1, drop = 'selected') + } else { + # error: + stop("Why it has memb_dim") + } + } return(list(ano_hcst_ev = ano_hcst_ev, ano_obs_ev = ano_obs_ev, ano_obs_tr = ano_obs_tr, @@ -108,9 +200,6 @@ Crossval_anomalies <- function(recipe, data) { lims_ano_obs_tr = lims_ano_obs_tr)) } - output_dims <- names(dim(data$hcst$data))[which(names(dim(data$hcst$data)) != "syear")] - lims_output_dims <- c("bin", - names(dim(data$hcst$data))[which(!names(dim(data$hcst$data)) %in% c("syear", "ensemble"))]) indices <- array(1:length(cross), dim = c(syear = length(cross))) res <- Apply(data = list(indices = indices, exp = data$hcst$data, @@ -118,19 +207,11 @@ Crossval_anomalies <- function(recipe, data) { target_dims = list(indices = NULL, obs = names(dim(data$obs$data)), exp = names(dim(data$hcst$data))), - output_dims = list(ano_hcst_ev = output_dims, - ano_obs_ev = output_dims, - ano_obs_tr = c("dat", "var", "sday", "sweek", - "ensemble", "time", "latitude", "longitude", - "unneeded"), - lims_ano_hcst_tr = lims_output_dims, - lims_ano_obs_tr = lims_output_dims), fun = .cross_anomalies, ncores = 1, ## TODO: Explore options for core distribution cross = cross, categories = categories, recipe = recipe) - info(recipe$Run$logger, "#### Anomalies Cross-validation loop ended #####") @@ -155,22 +236,36 @@ Crossval_anomalies <- function(recipe, data) { rm(lims_tmp_hcst, lims_tmp_obs) gc() - # To make crps_clim to work the reference forecast need to have same dims as obs: - res$ano_obs_tr <- Subset(res$ano_obs_tr, along = 'unneeded', - indices = 1, drop = 'selected') recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/", "outputs/Anomalies/") # Forecast anomalies: if (!is.null(data$fcst)) { - clim_hcst <- Apply(data$hcst$data, + if (horizon %in% c('subseasonal')) { + # merge sample dimensions and select central week + hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + hcst <- Subset(hcst, along = 'sweek', + indices = (dim(data$hcst$data)['sweek'] + 1) / 2) + obs <- MergeDims(data$obs$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + obs <- Subset(obs, along = 'sweek', + indices = (dim(data$obs$data)['sweek'] + 1) / 2) + fcst <- Subset(data$fcst$data, along = 'sday', indices = 1) + } else { + # we need to keep original objects (to later compute bias) + hcst <- data$hcst$data + obs <- data$obs$data + fcst <- data$fcst$data + } + clim_hcst <- Apply(hcst, target_dims = c('syear', 'ensemble'), mean, na.rm = na.rm, ncores = ncores)$output1 - data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) - hcst_ano <- Ano(data = data$hcst$data, clim = clim_hcst) + data$fcst$data <- Ano(data = fcst, clim = clim_hcst) + hcst_ano <- Ano(data = hcst, clim = clim_hcst) # Terciles limits using the whole hindcast period: lims_fcst <- Apply(hcst_ano, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { @@ -180,12 +275,12 @@ Crossval_anomalies <- function(recipe, data) { output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) - clim_obs <- Apply(data$obs$data, - target_dims = c('syear', 'ensemble'), - mean, - na.rm = na.rm, - ncores = ncores)$output1 - obs_ano <- Ano(data = data$obs$data, clim = clim_obs) + clim_obs <- Apply(obs, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + obs_ano <- Ano(data = obs, clim = clim_obs) lims <- Apply(obs_ano, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { @@ -334,34 +429,19 @@ Crossval_anomalies <- function(recipe, data) { type = "fcst") } } - - # Save ensemble mean for multimodel option: - hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) -# save_metrics(recipe = recipe, -# metrics = list(hcst_EM = -# Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), -# data_cube = data$hcst, agg = agg, -# module = "statistics") -# fcst_EM <- NULL - if (!is.null(data$fcst)) { - fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) -# save_metrics(recipe = recipe, -# metrics = list(fcst_EM = -# Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), -# data_cube = data$fcst, agg = agg, -# module = "statistics") - } else { - fcst_EM <- NULL + # Subset dimension 'sday' for subseasonal for mean bias: + if (horizon == 'subseasonal') { + central_day <- (dim(data$hcst$data)['sday'] + 1)/2 + data$hcst <- CST_Subset(data$hcst, along = 'sday', indices = central_day) + data$obs <- CST_Subset(data$obs, along = 'sday', indices = central_day) } return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, hcst.full_val = data$hcst, obs.full_val = data$obs, - hcst_EM = hcst_EM, fcst_EM = fcst_EM, - #cat_lims = list(hcst_tr = res$lims_ano_hcst_tr, - # obs_tr = res$lims_ano_obs_tr), - probs = list(hcst_ev = hcst_probs_ev, - obs_ev = obs_probs_ev, + cat_lims = list(obs = res$lims_ano_obs_tr), + probs = list(hcst = hcst_probs_ev, + obs = obs_probs_ev, fcst = fcst_probs), - ref_obs_tr = res$ano_obs_tr)) + ref_obs = res$ano_obs_tr)) } diff --git a/modules/Crossval/Crossval_calibration.R b/modules/Crossval/Crossval_calibration.R index 394a3de5a1a78b71c002d646055b623e07773b4a..75765e75c810f23d0455c5be9c3636c37f52f96a 100644 --- a/modules/Crossval/Crossval_calibration.R +++ b/modules/Crossval/Crossval_calibration.R @@ -2,9 +2,10 @@ source("modules/Crossval/R/tmp/GetProbs.R") source("modules/Crossval/R/tmp/CST_MergeDims.R") +source("modules/Saving/Saving.R") Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { - + horizon <- tolower(recipe$Analysis$Horizon) cal_method <- recipe$Analysis$Workflow$Calibration$method cross.method <- recipe$Analysis$cross.method # TODO move check @@ -52,7 +53,7 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { obs_ev <- Subset(obs, along = 'syear', indices = cross$eval.dexes) - if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + if (horizon == 'subseasonal') { central_day <- (dim(exp)['sday'] + 1)/2 hcst_tr <- MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), rename_dim = 'syear', na.rm = na.rm) @@ -83,20 +84,20 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { na.rm = na.rm, qstep = 0.1, ncores = ncores) } else { - cal_hcst_tr <- Calibration(exp = hcst_tr, obs = obs_tr, - cal.method = cal_method, - memb_dim = 'ensemble', sdate_dim = 'syear', - eval.method = 'in-sample', - na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, - alpha = NULL, ncores = ncores) - cal_hcst_ev <- Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, - cal.method = cal_method, - memb_dim = 'ensemble', sdate_dim = 'syear', - eval.method = 'in-sample', - na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, - alpha = NULL, ncores = ncores) + cal_hcst_tr <- CSTools::Calibration(exp = hcst_tr, obs = obs_tr, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, ncores = ncores) + cal_hcst_ev <- CSTools::Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, ncores = ncores) } # If precipitation is negative: if (correct_negative) { @@ -117,7 +118,7 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { prob_lims = unlist(categories), ncores = ncores)$output1 names(dim(lims_obs_tr))[1] <- 'bin' - if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + if (horizon == 'subseasonal') { cal_hcst_tr <- SplitDim(cal_hcst_tr, split_dim = 'syear', new_dim_name = 'sday', indices = rep(1:dim(data$hcst$data)['sday'], @@ -248,7 +249,7 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { # Forecast calibration: lims <- list() if (!is.null(data$fcst)) { - if (tolower(recipe$Analysis$Horizon) %in% c('subseasonal')) { + if (horizon %in% c('subseasonal')) { # merge sample dimensions and select central week hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), rename_dim = 'syear', na.rm = FALSE) @@ -287,26 +288,26 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { na.rm = na.rm, qstep = 0.1, ncores = ncores) } else { - data$fcst$data <- Calibration(exp = hcst, - obs = obs, - exp_cor = fcst, - cal.method = cal_method, - multi.model = FALSE, - na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, - alpha = NULL, memb_dim = 'ensemble', - sdate_dim = 'syear', - dat_dim = NULL, ncores = ncores) - hcst_cal <- Calibration(exp = hcst, - obs = obs, - cal.method = cal_method, - eval.method = 'in-sample', - multi.model = FALSE, - na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, - alpha = NULL, memb_dim = 'ensemble', - sdate_dim = 'syear', - dat_dim = NULL, ncores = ncores) + data$fcst$data <- CSTools::Calibration(exp = hcst, + obs = obs, + exp_cor = fcst, + cal.method = cal_method, + multi.model = FALSE, + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + hcst_cal <- CSTools::Calibration(exp = hcst, + obs = obs, + cal.method = cal_method, + eval.method = 'in-sample', + multi.model = FALSE, + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) } if (!('sday' %in% names(dim(data$fcst$data)))) { data$fcst$data <- InsertDim(data$fcst$data, @@ -404,7 +405,7 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { hcst <- data$hcst hcst$data <- res$cal_hcst_ev - if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + if (horizon == 'subseasonal') { # Keep only de central sday position from the dimension if (dim(hcst$data)['sday'] > 1) { hcst <- CST_Subset(hcst, along = 'sday', @@ -485,10 +486,10 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { return(list(hcst = hcst, obs = data$obs, fcst = data$fcst, hcst.full_val = data$hcst, obs.full_val = data$obs, cat_lims = list(obs = lims), - probs = list(hcst_ev = hcst_probs_ev, - obs_ev = obs_probs_ev, + probs = list(hcst = hcst_probs_ev, + obs = obs_probs_ev, fcst = fcst_probs), - ref_obs_tr = res$obs_tr)) + ref_obs = res$obs_tr)) } diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index fc3cd455167936717b6fa1bda22d615a6fb0c8df..6da103a1f1cba08d448e8d2085d4432ff7de7f7f 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -1,4 +1,9 @@ - +# One function to compute all the possible metrics or statistics +# datos is a list returned by load_multimodel or load_multimodel_mean + ## this reduce memory and it can be used to compute several metrics + ## in the mean the ensemble dim is length 1 for each model +# probs is a list returned by load_multimodel_probs + ## this is needed to compute rps/rpss source("modules/Saving/Saving.R") source("modules/Crossval/R/tmp/RPS.R") source("modules/Crossval/R/RPS_clim.R") @@ -9,101 +14,295 @@ source("modules/Crossval/R/tmp/Corr.R") source("modules/Crossval/R/tmp/Bias.R") source("modules/Crossval/R/tmp/SprErr.R") source("modules/Crossval/R/tmp/Eno.R") -source("modules/Crossval/R/tmp/CST_MergeDims.R") - -## data_crossval is the result from function full_crossval_anomalies or similar. -## this is a list with the required elements: - ## probs is a list with - ## probs$hcst_ev and probs$obs_ev - ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles - ## each element will be an array with 'cat' dimension - ## the same for probs$obs_ev - ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices - ## in this case cross validated anomalies - ## obs is a s2dv_cube for the post-processed obs - ## in this case cross validated anomalies - ## fcst is a s2dv_cube for the post-processed fcst - ## in this case cross anomalies with the full hindcast period - ## this object is not required for skill assessment - ## hcst.full_val and obs.full_val are the original data to compute mean bias - ## cat_lims used to compute the probabilities - ## this object is not required for skill assessment - ## ref_obs_tr is an array with the cross-validate observed anomalies - ## to be used as reference forecast in the CRPSS and CRPS_clim - ## it is computed from the training indices -## the recipe could be used to read the Percentiles -## if fair is TRUE, the nmemb used to compute the probabilities is needed - ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference -Crossval_metrics <- function(recipe, data_crossval, - fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Visualization/Visualization.R") # to load .warning from utils +Crossval_metrics <- function(recipe, + data = NULL, + Fair = FALSE) { + horizon <- tolower(recipe$Analysis$Horizon) + save <- tolower(recipe$Analysis$Workflow$Skill$save) ncores <- recipe$Analysis$ncores alpha <- recipe$Analysis$Skill$alpha na.rm <- recipe$Analysis$remove_NAs + # ---------------------- + # If the data$ref_obs is not NULL this is not needed: + cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + # Prepare indices for crossval + sdate_dim <- dim(data$hcst[[1]])['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + tmp <- array(1:length(cross), c(syear = length(cross))) + # ------------------------------ + alpha <- recipe$Analysis$alpha + if (is.null(alpha)) { + alpha <- 0.05 + } + + # Probabilistic metrics for categories (for RPS(S)) categories <- recipe$Analysis$Workflow$Probabilities$percentiles categories <- lapply(categories, function (x) { sapply(x, function(x) { eval(parse(text = x))}, USE.NAMES = FALSE)}) - # TODO: distinguish between rpss and bss - # if 1 percentile -> bss - # if more than 1 -> rpss - # TODO: for subseasonal check if the dimension sday is 1 - ## exe_rps <- unlist(lapply(categories, function(x) { - ## if (length(x) > 1) { - ## x <- x[1] *100 - ## } - ## return(x)})) - if (is.null(alpha)) { - alpha <- 0.05 - } ## START SKILL ASSESSMENT: skill_metrics <- list() requested_metrics <- tolower(strsplit(recipe$Analysis$Workflow$Skill$metric, - ", | |,")[[1]]) + ", | |,")[[1]]) + if (!is.null(data)) { + ## TODO: Change 'datos' name + # convert $data elements to list to use multiApply: + if (inherits(data$hcst, "s2dv_cube")) { + data$hcst <- list(data$hcst) + datos <- list(obs = data$obs$data) + datos <- append(datos, lapply(data$hcst, "[[", "data")) + } else { + datos <- append(list(obs = data$obs), data$hcst) + } + if (horizon == 'subseasonal') { + # The evaluation of all metrics are done with extra sample + datos <- lapply(datos, function(x) { + MergeDims(x, merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE)}) + } + ## CRPS metrics only make sense in pool method: + if (any(c('crps', 'crpss') %in% requested_metrics)) { + # CRPS + crps <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + mean(s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + time_dim = 'syear', + memb_dim = 'ensemble', + Fair = .Fair))}, + extra_info = list(Fair = Fair), + ncores = ncores)$output1 + skill_metrics$crps <- crps + if (is.null(datos$ref_obs)) { + # Build the reference forecast: + ref_clim <- Apply(list(datos$obs, tmp), + target_dims = list(c('ensemble', 'syear'), NULL), + function(x, y) { + Subset(x, along = 'syear', + indices = cross[[y]]$train.dexes, drop = T)}, + ncores = ncores, output_dims = 'ensemble')$output1 + } else { + ref_clim <- datos$ref_obs + } + ## Do we want to do this...? + # CRPS Climatology + datos <- append(list(ref = ref_clim), datos) + crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), + fun = function(ref, obs) { + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + mean(s2dv:::.CRPS(exp = ref, obs = obs, + dat_dim = NULL, + time_dim = 'syear', + memb_dim = 'ensemble', + Fair = .Fair))}, + extra_info = list(Fair = Fair), + ncores = ncores)$output1 + skill_metrics$crps_clim <- crps_clim + + # CRPSS + crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(ref, obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + CRPSS(exp = res, obs = obs, ref = ref, + memb_dim = 'ensemble', Fair = .Fair, + time_dim = 'syear', clim.cross.val = FALSE)}, + extra_info = list(Fair = Fair), + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + datos <- datos[-1] + } + # Ensemble spread error + if ('enssprerr' %in% requested_metrics) { + enssprerr <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + SprErr(exp = res, + obs = obs, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + na.rm = .na.rm)}, + extra_info = list(na.rm = na.rm), + ncores = ncores) + skill_metrics$ensprerr <- enssprerr$ratio + skill_metrics$ensprerr_significance <- enssprerr$p.val <= alpha + } + ## Deterministic metrics using ensemble mean: + if ('enscorr' %in% requested_metrics) { + enscorr <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Corr(exp = res, + obs = obs, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = FALSE, + conf = FALSE, + pval = FALSE, + sign = TRUE, + alpha = .alpha)}, + extra_info = list(alpha = alpha), + ncores = ncores) + skill_metrics$enscorr <- enscorr$corr + skill_metrics$enscorr_significance <- enscorr$sign + } + if ('rms' %in% requested_metrics) { + rms <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMS(exp = res, + obs = obs, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = .alpha)}, + extra_info = list(alpha = alpha), + ncores = ncores) + skill_metrics$rms <- rms$rms + } + ####### Do we need reference fcst to compute rmss?? + if ('rmss' %in% requested_metrics) { + # if (is.null(res$ref_obs_tr)) { + # } + rmss <- Apply(datos, target_dims = c('syear','ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMSSS(exp = res, + obs = obs, + #ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = .alpha, + sign = TRUE)}, + extra_info = list(alpha = alpha), + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + # This are used to compute correlation for the scorecards + # It's the temporal standard deviation of the ensmean + ## TODO: We don't need obs here + std_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + sd(as.vector(res))}, + ncores = ncores)$output1 + ## TODO: We only needs obs here + std_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = 'sd')$output1 + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } + + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + fun = function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + variance <- var(as.vector(res))}, + ncores = ncores)$output1 + + var_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = function(x) {var(as.vector(x))}, + ncores = ncores)$output1 + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs + } + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(datos[[1]], time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } + if (any(c('cov', 'covariance') %in% requested_metrics)) { + # The covariance of the ensemble mean + covariance <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + cov(as.vector(obs), as.vector(res), + use = "everything", + method = "pearson")}, + ncores = ncores)$output1 + skill_metrics$covariance <- covariance + } + } + # If Fair, RPS(S) requires the number of ensemble members + if (!is.null(datos) && Fair) { + nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) + nmemb_ref <- dim(datos[[1]])[['syear']] - 1 + } else { + nmemb <- NULL + nmemb_ref <- NULL + } - # The recipe allows to requset more than only terciles: + # Compute rps for (ps in 1:length(categories)) { - if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { - data_crossval$probs$hcst_ev[[ps]] <- MergeDims(data_crossval$probs$hcst_ev[[ps]], + if (horizon == 'subseasonal') { + data$probs$hcst[[ps]] <- MergeDims(data$probs$hcst[[ps]], merge_dims = c('sweek', 'syear'), rename_dim = 'syear', na.rm = FALSE) - data_crossval$probs$obs_ev[[ps]] <- MergeDims(data_crossval$probs$obs_ev[[ps]], + data$probs$obs[[ps]] <- MergeDims(data$probs$obs[[ps]], merge_dims = c('sweek', 'syear'), rename_dim = 'syear', na.rm = FALSE) } - if ('rps' %in% requested_metrics) { - rps <- RPS(exp = data_crossval$probs$hcst_ev[[ps]], - obs = data_crossval$probs$obs_ev[[ps]], memb_dim = NULL, + rps <- RPS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], memb_dim = NULL, cat_dim = 'bin', cross.val = FALSE, time_dim = 'syear', - Fair = fair, nmemb = nmemb, + Fair = Fair, nmemb = nmemb, ncores = ncores) - rps_clim <- Apply(list(data_crossval$probs$obs_ev[[ps]]), + rps_clim <- Apply(list(data$probs$obs[[ps]]), target_dims = c('bin', 'syear'), - RPS_clim, bin_dim_abs = 'bin', Fair = fair, + RPS_clim, bin_dim_abs = 'bin', Fair = Fair, cross.val = FALSE, ncores = ncores)$output1 if (is.null(names(categories)[ps])) { skill_metrics$rps <- rps skill_metrics$rps_clim <- rps_clim } else { # names based on the categories: - # To use it when visualization works for more rps skill_metrics[[paste0('rps-', names(categories)[ps])]] <- rps skill_metrics[[paste0('rps_clim-', names(categories)[ps])]] <- rps_clim } } if ('rpss' %in% requested_metrics) { - rpss <- RPSS(exp = data_crossval$probs$hcst_ev[[ps]], - obs = data_crossval$probs$obs_ev[[ps]], + rpss <- RPSS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], ref = NULL, # ref is 1/3 by default if terciles time_dim = 'syear', memb_dim = NULL, cat_dim = 'bin', nmemb = nmemb, + nmemb_ref = nmemb_ref, dat_dim = NULL, - prob_thresholds = categories[[ps]], + prob_thresholds = categories[[ps]], # un use param when providing probs indices_for_clim = NULL, - Fair = fair, weights_exp = NULL, weights_ref = NULL, + Fair = Fair, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, na.rm = na.rm, sig_method.type = 'two.sided.approx', alpha = alpha, ncores = ncores) @@ -111,193 +310,70 @@ Crossval_metrics <- function(recipe, data_crossval, skill_metrics[[paste0('rpss')]] <- rpss$rpss skill_metrics[[paste0('rpss_significance')]] <- rpss$sign } else { - # TO USE IT when visualization works for more rpsss + # names based on the categories: skill_metrics[[paste0('rpss-', names(categories)[ps])]] <- rpss$rpss - ## TODO: Change this name? skill_metrics[[paste0('rpss-', - names(categories)[ps], + names(categories)[ps], "_significance")]] <- rpss$sign } } - } - if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { - # The evaluation of all metrics are done with extra sample - data_crossval$hcst <- CST_MergeDims(data_crossval$hcst, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - data_crossval$obs <- CST_MergeDims(data_crossval$obs, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - data_crossval$ref_obs_tr <- MergeDims(data_crossval$ref_obs_tr, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - data_crossval$hcst.full_val <- CST_MergeDims(data_crossval$hcst.full_val, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - data_crossval$obs.full_val <- CST_MergeDims(data_crossval$obs.full_val, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - } - - if ('crps' %in% requested_metrics) { - crps <- CRPS(exp = data_crossval$hcst$data, - obs = data_crossval$obs$data, - time_dim = 'syear', memb_dim = 'ensemble', - Fair = fair, - ncores = ncores) - skill_metrics$crps <- crps - crps_clim <- CRPS(exp = data_crossval$ref_obs_tr, - obs = data_crossval$obs$data, - time_dim = 'syear', memb_dim = 'ensemble', - Fair = fair, ncores = ncores) - skill_metrics$crps_clim <- crps_clim - } - if ('crpss' %in% requested_metrics) { - crpss <- CRPSS(exp = data_crossval$hcst$data, - obs = data_crossval$obs$data, - ref = data_crossval$ref_obs_tr, - memb_dim = 'ensemble', Fair = fair, - time_dim = 'syear', clim.cross.val = FALSE, - ncores = ncores) - skill_metrics$crpss <- crpss$crpss - skill_metrics$crpss_significance <- crpss$sign - } - - if ('enscorr' %in% requested_metrics) { - enscorr <- Corr(exp = data_crossval$hcst$data, - obs = data_crossval$obs$data, - dat_dim = NULL, - time_dim = 'syear', - method = 'pearson', - memb_dim = 'ensemble', - memb = F, - conf = F, - pval = F, - sign = T, - alpha = alpha, - ncores = ncores) - skill_metrics$enscorr <- enscorr$corr - skill_metrics$enscorr_significance <- enscorr$sign - } - if ('mean_bias' %in% requested_metrics) { - if (!is.null(data_crossval$hcst.full_val$data)) { - mean_bias <- Bias(exp = data_crossval$hcst.full_val$data, - obs = data_crossval$obs.full_val$data, - time_dim = 'syear', - memb_dim = 'ensemble', - alpha = alpha, - na.rm = na.rm, - ncores = ncores) + # keep mean_bias at the end of the code to overwrite object if needed. + if ('mean_bias' %in% requested_metrics) { + if (!is.null(data$hcst.full_val)) { + if (inherits(data$hcst.full_val, "s2dv_cube")) { + data$hcst <- list(data$hcst.full_val) + datos <- list(obs = data$obs.full_val$data) + datos <- append(datos, lapply(data$hcst, "[[", "data")) + } else { + datos <- append(list(obs = data$obs.full_val), data$hcst.full_val) + } + if (horizon == 'subseasonal') { + # The evaluation of all metrics are done with extra sample + datos <- lapply(datos, function(x) { + MergeDims(x, merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE)}) + } + } + ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) + mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Bias(exp = res, + obs = obs, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = .alpha)}, + extra_info = list(alpha = alpha), + ncores = ncores) skill_metrics$mean_bias <- mean_bias$bias skill_metrics$mean_bias_significance <- mean_bias$sig - } else { - info(recipe$Run$logger, - "Full values not available") } - } - if ('enssprerr' %in% requested_metrics) { - enssprerr <- SprErr(exp = data_crossval$hcst$data, - obs = data_crossval$obs$data, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', pval = TRUE, - na.rm = na.rm, - ncores = ncores) - skill_metrics$SprErr <- enssprerr$ratio - skill_metrics$SprErr_significance <- enssprerr$p.val <= alpha - } - if ('rms' %in% requested_metrics) { - rms <- RMS(exp = data_crossval$hcst$data, - obs = data_crossval$obs$data, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = alpha, - ncores = ncores) - skill_metrics$rms <- rms$rms - } - if ('rmss' %in% requested_metrics) { - rmss <- RMSSS(exp = data_crossval$hcst$data, - obs = data_crossval$obs$data, - ref = res$ref_obs_tr, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = alpha, sign = TRUE, - ncores = ncores) - skill_metrics$rmss <- rmss$rmss - skill_metrics$rmss_significance <- rmss$sign - } - if (is.null(data_crossval$hcst_EM)) { - data_crossval$hcst_EM <- MeanDims(data_crossval$hcst$data, - dims = 'ensemble', - drop = TRUE) - } - if (any(c('std', 'standard_deviation') %in% requested_metrics)) { - std_hcst <- Apply(data = data_crossval$hcst_EM, - target_dims = 'syear', - fun = 'sd')$output1 - - std_obs <- Apply(data = data_crossval$obs$data, - target_dims = 'syear', - fun = 'sd')$output1 - - skill_metrics[['std_hcst']] <- std_hcst - skill_metrics[['std_obs']] <- std_obs - } - if (any(c('var', 'variance') %in% requested_metrics)) { - ## Calculate variance - var_hcst <- Apply(data = data_crossval$hcst_EM, - target_dims = 'syear', - fun = 'sd')$output1 ^ 2 - - var_obs <- Apply(data = data_crossval$obs$data, - target_dims = 'syear', - fun = 'sd')$output1 ^ 2 - - skill_metrics[['var_hcst']] <- var_hcst - skill_metrics[['var_obs']] <- var_obs - } ## close if on variance - if ('n_eff' %in% requested_metrics) { - ## Calculate degrees of freedom - n_eff <- s2dv::Eno(data = data_crossval$obs$data, - time_dim = 'syear', - na.action = na.pass, - ncores = ncores) - skill_metrics[['n_eff']] <- n_eff - } ## close on n_eff - if (any(c('cov', 'covariance') %in% requested_metrics)) { - covariance <- Apply(data = list(x = data_crossval$obs$data, - y = data_crossval$hcst_EM), - target_dims = 'syear', - fun = function(x, y) { - cov(as.vector(x), as.vector(y), - use = "everything", - method = "pearson")})$output1 - skill_metrics$covariance <- covariance } - original <- recipe$Run$output_dir - recipe$Run$output_dir <- paste0(original, "/", "outputs/Skill/") - + # Save metrics skill_metrics <- lapply(skill_metrics, function(x) { - if (is.logical(x)) { - dims <- dim(x) - res <- as.numeric(x) - dim(res) <- dims - } else { - res <- x - } - return(res) - }) - # Save metrics - # reduce dimension to work with Visualization module: + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) + # Drop dimensions to work with saving and visualization modules skill_metrics <- lapply(skill_metrics, function(x) {.drop_dims(x)}) - - if (tolower(recipe$Analysis$Workflow$Skill$save) == 'all') { + # Save to netCDF + if (save == 'all') { + original_dir <- recipe$Run$output_dir + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/", "outputs/Skill/") save_metrics(recipe = recipe, metrics = skill_metrics, - data_cube = data_crossval$hcst, agg = 'global', + data_cube = data$obs, agg = 'global', outdir = recipe$Run$output_dir) + recipe$Run$output_dir <- original_dir } - recipe$Run$output_dir <- original return(skill_metrics) } - - diff --git a/modules/Crossval/Crossval_multimodel_metrics.R b/modules/Crossval/Crossval_multimodel_metrics.R deleted file mode 100644 index d59b71db806787de1feb1f41bc07807e87723915..0000000000000000000000000000000000000000 --- a/modules/Crossval/Crossval_multimodel_metrics.R +++ /dev/null @@ -1,314 +0,0 @@ -# One function to compute all the possible metrics or statistics -# datos is a list returned by load_multimodel or load_multimodel_mean - ## this reduce memory and it can be used to compute several metrics - ## in the mean the ensemble dim is length 1 for each model -# probs is a list returned by load_multimodel_probs - ## this is needed to compute rps/rpss -source("modules/Saving/Saving.R") -source("modules/Crossval/R/tmp/RPS.R") -source("modules/Crossval/R/RPS_clim.R") -source("modules/Crossval/R/CRPS_clim.R") -source("modules/Crossval/R/tmp/RPSS.R") -source("modules/Crossval/R/tmp/RandomWalkTest.R") -source("modules/Crossval/R/tmp/Corr.R") -source("modules/Crossval/R/tmp/Bias.R") -source("modules/Crossval/R/tmp/SprErr.R") -source("modules/Crossval/R/tmp/Eno.R") -source("modules/Crossval/R/tmp/GetProbs.R") -source("modules/Visualization/Visualization.R") # to load .warning from utils - -Crossval_multimodel_metrics <- function(recipe, - data = NULL, - Fair = FALSE) { - - ncores <- recipe$Analysis$ncores - alpha <- recipe$Analysis$Skill$alpha - na.rm <- recipe$Analysis$remove_NAs - cross.method <- recipe$Analysis$cross.method - if (is.null(cross.method)) { - cross.method <- 'leave-one-out' - } - # Prepare indices for crossval - sdate_dim <- dim(data$hcst[[1]])['syear'] - cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, - amt.points = sdate_dim, - amt.points_cor = NULL) - ## What is this for? - tmp <- array(1:length(cross), c(syear = length(cross))) - alpha <- recipe$Analysis$alpha - if (is.null(alpha)) { - alpha <- 0.05 - } - - ## START SKILL ASSESSMENT: - skill_metrics <- list() - requested_metrics <- tolower(strsplit(recipe$Analysis$Workflow$Skill$metric, - ", | |,")[[1]]) - if (!is.null(data)) { - ## TODO: Change 'datos' name - # convert $data elements to list to use multiApply: - if (inherits(data$hcst, "s2dv_cube")) { - data$hcst <- list(data$hcst) - datos <- list(obs = data$obs$data) - datos <- append(datos, lapply(data$hcst, "[[", "data")) - } else { - datos <- append(list(obs = data$obs), data$hcst) - } - ## CRPS metrics only make sense in pool method: - if (any(c('crps', 'crpss') %in% requested_metrics)) { - # CRPS - crps <- Apply(datos, target_dims = c('syear', 'ensemble'), - fun = function(obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- names(dim(obs)) - obs <- Subset(obs, along = 'ensemble', - indices = 1, drop = 'selected') - mean(s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, - time_dim = 'syear', - memb_dim = 'ensemble'))}, - ncores = ncores)$output1 - skill_metrics$crps <- crps - # Build the reference forecast: - ref_clim <- Apply(list(datos$obs, tmp), - target_dims = list(c('ensemble', 'syear'), NULL), - function(x, y) { - Subset(x, along = 'syear', - indices = cross[[y]]$train.dexes, drop = T)}, - ncores = ncores, output_dims = 'ensemble')$output1 - ## Do we want to do this...? - # CRPS Climatology - datos <- append(list(ref = ref_clim), datos) - crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), - fun = function(ref, obs) { - obs <- Subset(obs, along = 'ensemble', - indices = 1, drop = 'selected') - mean(s2dv:::.CRPS(exp = ref, obs = obs, - dat_dim = NULL, - time_dim = 'syear', memb_dim = 'ensemble'))}, - ncores = ncores)$output1 - skill_metrics$crps_clim <- crps_clim - - # CRPSS - crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), - fun = function(ref, obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- names(dim(obs)) - obs <- Subset(obs, along = 'ensemble', - indices = 1, drop = 'selected') - CRPSS(exp = res, obs = obs, ref = ref, - memb_dim = 'ensemble', Fair = FALSE, - time_dim = 'syear', clim.cross.val = FALSE)}, - ncores = ncores) - skill_metrics$crpss <- crpss$crpss - skill_metrics$crpss_significance <- crpss$sign - datos <- datos[-1] - } - ## Deterministic metrics using ensemble mean: - if ('enscorr' %in% requested_metrics) { - enscorr <- Apply(datos, target_dims = c('syear', 'ensemble'), - function(obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - Corr(exp = res, - obs = obs, - dat_dim = NULL, - time_dim = 'syear', - method = 'pearson', - memb_dim = 'ensemble', - memb = F, - conf = F, - pval = F, - sign = T, - alpha = alpha)}, ncores = ncores) - skill_metrics$enscorr <- enscorr$corr - skill_metrics$enscorr_significance <- enscorr$sign - } - if ('mean_bias' %in% requested_metrics) { - ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) - mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), - function(obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - Bias(exp = res, - obs = obs, - time_dim = 'syear', - memb_dim = 'ensemble', - alpha = alpha)}, - ncores = ncores) - skill_metrics$mean_bias <- mean_bias$bias - skill_metrics$mean_bias_significance <- mean_bias$sig - } - if ('rms' %in% requested_metrics) { - rms <- Apply(datos, target_dims = c('syear', 'ensemble'), - function(obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - RMS(exp = res, - obs = obs, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = alpha)}, - ncores = ncores) - skill_metrics$rms <- rms$rms - } - ####### Do we need reference fcst to compute rmss?? - if ('rmss' %in% requested_metrics) { - # if (is.null(res$ref_obs_tr)) { - # } - rmss <- Apply(datos, target_dims = c('syear','ensemble'), - function(obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - RMSSS(exp = res, - obs = obs, - #ref = res$ref_obs_tr, - memb_dim = 'ensemble', dat_dim = NULL, - time_dim = 'syear', alpha = alpha, sign = TRUE)}, - ncores = ncores) - skill_metrics$rmss <- rmss$rmss - skill_metrics$rmss_significance <- rmss$sign - } - if (any(c('std', 'standard_deviation') %in% requested_metrics)) { - # This are used to compute correlation for the scorecards - # It's the temporal standard deviation of the ensmean - ## TODO: We don't need obs here - std_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), - function(...) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - res <- MeanDims(res, 'ensemble') - sd(as.vector(res))}, - ncores = ncores)$output1 - ## TODO: We only needs obs here - std_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), - fun = 'sd')$output1 - skill_metrics[['std_hcst']] <- std_hcst - skill_metrics[['std_obs']] <- std_obs - } - - if (any(c('var', 'variance') %in% requested_metrics)) { - ## Calculate variance - var_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), - fun = function(...) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - res <- MeanDims(res, 'ensemble') - variance <- var(as.vector(res))}, - ncores = ncores)$output1 - - var_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), - fun = function(x) {var(as.vector(x))}, - ncores = ncores)$output1 - skill_metrics[['var_hcst']] <- var_hcst - skill_metrics[['var_obs']] <- var_obs - } - if ('n_eff' %in% requested_metrics) { - ## Calculate degrees of freedom - n_eff <- s2dv::Eno(datos[[1]], time_dim = 'syear', - na.action = na.pass, - ncores = ncores) - skill_metrics[['n_eff']] <- n_eff - } - if (any(c('cov', 'covariance') %in% requested_metrics)) { - # The covariance of the ensemble mean - covariance <- Apply(datos, target_dims = c('syear', 'ensemble'), - fun = function(obs, ...) { - res <- abind(..., along = 2) - names(dim(res)) <- c('syear', 'ensemble') - res <- MeanDims(res, 'ensemble') - cov(as.vector(obs), as.vector(res), - use = "everything", - method = "pearson")}, - ncores = ncores)$output1 - skill_metrics$covariance <- covariance - } - } - # Probabilistic metrics for categories - - categories <- recipe$Analysis$Workflow$Probabilities$percentiles - categories <- lapply(categories, function (x) { - sapply(x, function(x) { - eval(parse(text = x))}, USE.NAMES = FALSE)}) - ## TODO: Understand this - if (!is.null(datos)) { - if (Fair) { - nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) - } else { - nmemb <- NULL - } - } else { - nmemb <- NULL - } - - # Compute rps - for (ps in 1:length(categories)) { - if ('rps' %in% requested_metrics) { - rps <- RPS(exp = data$probs$hcst[[ps]], - obs = data$probs$obs[[ps]], memb_dim = NULL, - cat_dim = 'bin', cross.val = FALSE, time_dim = 'syear', - Fair = Fair, nmemb = nmemb, - ncores = ncores) - rps_clim <- Apply(list(data$probs$obs[[ps]]), - target_dims = c('bin', 'syear'), - RPS_clim, bin_dim_abs = 'bin', Fair = Fair, - cross.val = FALSE, ncores = ncores)$output1 - if (is.null(names(categories)[ps])) { - skill_metrics$rps <- rps - skill_metrics$rps_clim <- rps_clim - } else { - # names based on the categories: - skill_metrics[[paste0('rps-', names(categories)[ps])]] <- rps - skill_metrics[[paste0('rps_clim-', - names(categories)[ps])]] <- rps_clim - } - } - if ('rpss' %in% requested_metrics) { - rpss <- RPSS(exp = data$probs$hcst[[ps]], - obs = data$probs$obs[[ps]], - ref = NULL, # ref is 1/3 by default if terciles - time_dim = 'syear', memb_dim = NULL, - cat_dim = 'bin', nmemb = nmemb, - dat_dim = NULL, - prob_thresholds = categories[[ps]], # un use param when providing probs - indices_for_clim = NULL, - Fair = Fair, weights_exp = NULL, weights_ref = NULL, - cross.val = FALSE, na.rm = na.rm, - sig_method.type = 'two.sided.approx', alpha = alpha, - ncores = ncores) - if (is.null(names(categories)[ps])) { - skill_metrics[[paste0('rpss')]] <- rpss$rpss - skill_metrics[[paste0('rpss_significance')]] <- rpss$sign - } else { - # names based on the categories: - skill_metrics[[paste0('rpss-', names(categories)[ps])]] <- rpss$rpss - skill_metrics[[paste0('rpss-', - names(categories)[ps], - "_significance")]] <- rpss$sign - } - } - } - # Save metrics - skill_metrics <- lapply(skill_metrics, - function(x) { - if (is.logical(x)) { - dims <- dim(x) - res <- as.numeric(x) - dim(res) <- dims - } else { - res <- x - } - return(res) - }) - # Drop dimensions to work with saving and visualization modules - skill_metrics <- lapply(skill_metrics, function(x) {.drop_dims(x)}) - # Save to netCDF - if (tolower(recipe$Analysis$Workflow$Skill$save) == 'all') { - original_dir <- recipe$Run$output_dir - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/", "outputs/Skill/") - save_metrics(recipe = recipe, - metrics = skill_metrics, - data_cube = data_crossval$hcst, agg = 'global', - outdir = recipe$Run$output_dir) - recipe$Run$output_dir <- original_dir - } - return(skill_metrics) -} diff --git a/tests/recipes/recipe-seasonal_monthly_crossval.yml b/tests/recipes/recipe-seasonal_monthly_crossval.yml new file mode 100644 index 0000000000000000000000000000000000000000..914e14bcc7eadc8b2fdcc7a015e47d97ed2b3153 --- /dev/null +++ b/tests/recipes/recipe-seasonal_monthly_crossval.yml @@ -0,0 +1,55 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: Meteo-France-System7 + Multimodel: False + Reference: + name: ERA5 + Time: + sdate: '1101' + fcst_year: '2020' + hcst_start: '1993' + hcst_end: '2000' + ftime_min: 1 + ftime_max: 3 + Region: + latmin: 17 + latmax: 20 + lonmin: 12 + lonmax: 15 + Regrid: + method: bilinear + type: to_system + Workflow: + # Anomalies: + # compute: no + # cross_validation: + # save: 'none' + Calibration: + method: mse_min + save: 'all' + Skill: + metric: RPSS CRPSS EnsCorr Corr_individual_members Enscorr_specs + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics most_likely_terciles forecast_ensemble_mean + multi_panel: yes + projection: cylindrical_equidistant + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: ./tests/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/tests/testthat/test-seasonal_monthly_crossval.R b/tests/testthat/test-seasonal_monthly_crossval.R new file mode 100644 index 0000000000000000000000000000000000000000..5712682bbb292baa11db7eafd6fb534419061598 --- /dev/null +++ b/tests/testthat/test-seasonal_monthly_crossval.R @@ -0,0 +1,111 @@ +context("Seasonal monthly data") + +source("modules/Loading/Loading.R") +source("modules/Visualization/Visualization.R") + +recipe_file <- "tests/recipes/recipe-seasonal_monthly_crossval.yml" +recipe <- prepare_outputs(recipe_file, disable_checks = F) + +# Load datasets +suppressWarnings({invisible(capture.output( +data <- Loading(recipe) +))}) + +# Calibrate data +source("modules/Crossval/Crossval_calibration.R") +calibrated <- Crossval_calibration(recipe = recipe, data = data) + +# Compute skill metrics +source("modules/Crossval/Crossval_metrics.R") +suppressWarnings({invisible(capture.output( +skill_metrics <- Crossval_metrics(recipe, calibrated) +))}) + +# Plotting +suppressWarnings({invisible(capture.output( +Visualization(recipe = recipe, data = calibrated, + skill_metrics = skill_metrics, probabilities = calibrated$probs, + significance = T) +))}) +outdir <- get_dir(recipe = recipe, variable = data$hcst$attrs$Variable$varName) + +# ------- TESTS -------- + +test_that("2. Crossval_calibration", { + +expect_equal(is.list(calibrated), TRUE) + +expect_equal(names(calibrated), + c("hcst", "obs", "fcst", "hcst.full_val", "obs.full_val", + "cat_lims", "probs", "ref_obs")) + +expect_equal(class(calibrated$hcst), "s2dv_cube") + +expect_equal(class(calibrated$fcst), "s2dv_cube") + +expect_equal(dim(calibrated$hcst$data), + c(dat = 1, var = 1, sday = 1, sweek = 1, time = 3, latitude = 3, + longitude = 3, ensemble = 25, syear = 8)) + +expect_equal(dim(calibrated$fcst$data), + c(dat = 1, var = 1, sday = 1, sweek = 1, syear = 1, time = 3, + latitude = 3, longitude = 3, ensemble = 51)) + +expect_equal(mean(calibrated$fcst$data), + 291.0119, tolerance = 0.0001) + +expect_equal(mean(calibrated$hcst$data), + 290.5232, tolerance = 0.0001) + +expect_equal(as.vector(drop(calibrated$hcst$data)[1, , 2, 3, 4]), + c(291.2313, 290.8534, 291.8334), tolerance = 0.0001) + +expect_equal(range(calibrated$fcst$data), + c(285.3917, 297.5869), tolerance = 0.0001) + +}) + + +#====================================== +test_that("3. Crossval_metrics", { + +expect_equal(is.list(skill_metrics), TRUE) + +expect_equal(names(skill_metrics), + c("crps", "crps_clim", "crpss", + "crpss_significance", "enscorr", "enscorr_significance", + "rpss-set1", "rpss-set1_significance", + "rpss-set2", "rpss-set2_significance")) + +expect_equal(class(skill_metrics[['rpss-set1']]), "array") + +expect_equal(dim(skill_metrics[['rpss-set1']]), + c(var = 1, time = 3, latitude = 3, longitude = 3)) + +expect_equal(dim(skill_metrics[['rpss-set1_significance']]), + dim(skill_metrics[['rpss-set1']])) + +expect_equal(as.vector(skill_metrics[['rpss-set1']][, , 2, 3]), + c(0.013176471, 0.037741176, -0.001223529), tolerance = 0.0001) + +expect_equal(as.vector(skill_metrics[['rpss-set1_significance']][, , 2, 3]), + c(0,0,1)) + +}) + + +test_that("5. Visualization", { +plots <- paste0(recipe$Run$output_dir, "/plots/") +expect_equal(all(basename(list.files(plots, recursive = T)) %in% + c("crps_clim-november.pdf", "crps-november.pdf", "crpss-november.pdf", + "enscorr-november.pdf", "forecast_ensemble_median-20201101.pdf", + "forecast_most_likely_tercile-20201101.pdf", + "rpss-set1-november.pdf", "rpss-set2-november.pdf")), + TRUE) + +expect_equal(length(list.files(plots, recursive = T)), 8) + +}) + +# Delete files +unlink(recipe$Run$output_dir, recursive = T)