diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index 5051f12f566d6bc54be80922c54110d01a09d448..fe9d85f68a2baf2235628abff52398c333881082 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -86,10 +86,11 @@ Crossval_Calibration <- function(recipe, data) { apply_to = NULL, alpha = NULL, ncores = ncores) - lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), + lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, @@ -97,7 +98,8 @@ Crossval_Calibration <- function(recipe, data) { lims_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x){'bin'}), prob_lims = categories, @@ -265,60 +267,62 @@ Crossval_Calibration <- function(recipe, data) { lims_fcst <- Apply(hcst_cal, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, - output_dims = lapply(categories, function(x) {'bin'}), - prob_lims = categories, - ncores = ncores) - lims <- Apply(data$obs$data, target_dims = c('syear', 'ensemble'), - fun = function(x, prob_lims) { - lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) + lims <- Apply(obs, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) tmp_lims2 <- list() -# What to save hcst category limits or obs category limits? -# TODO saving: -#recipe$Analysis$Workflow$Probabilities$save <- FALSE -if (recipe$Analysis$Workflow$Probabilities$save == 'all') { - for(ps in 1:length(categories)) { - tmp_lims3 <- drop(lims[[ps]]) - for (l in 1:dim(lims[[ps]])['bin']) { - tmp_lims <- tmp_lims3[l,,,] - if (!('var' %in% names(dim(tmp_lims)))) { - dim(tmp_lims) <- c(var = 1, dim(tmp_lims)) + + # What to save hcst category limits or obs category limits? + # TODO saving: + #recipe$Analysis$Workflow$Probabilities$save <- FALSE + if (recipe$Analysis$Workflow$Probabilities$save == 'all') { + for (ps in 1:length(categories)) { + ## TODO: use .drop_dims() + tmp_lims3 <- .drop_dims(lims[[ps]]) + for (l in 1:dim(lims[[ps]])['bin']) { + tmp_lims <- ClimProjDiags::Subset(x = tmp_lims3, + along = "bin", + indices = l, + drop = "selected") + tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) + names(tmp_lims2)[length(tmp_lims2)] <- paste0("p_", as.character(categories[[ps]][l]*100)) } - tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) - names(tmp_lims2)[length(tmp_lims2)] <- as.character(categories[[ps]][l]) - } - if (recipe$Analysis$Workflow$Probabilities$save == 'yes') { - save_percentiles(recipe = recipe, percentiles = tmp_lims2, - data_cube = data$obs, - agg = "global", outdir = NULL) } + save_percentiles(recipe = recipe, percentiles = tmp_lims2, + data_cube = data$obs, + agg = "global", outdir = NULL) } } -} # Compute Probabilities for (ps in 1:length(categories)) { - # Get only the probabilities of the central day in sday - central_day <- (dim(cal_hcst_ev_res)['sday'] + 1)/2 - tmp <- Subset(cal_hcst_ev_res, along = 'sday', indices = central_day) - hcst_probs_ev[[ps]] <- GetProbs(tmp, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'bin', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', - abs_thresholds = lims_cal_hcst_tr_res[[ps]], - ncores = ncores) - tmp <- Subset(data$obs$data, along = 'sday', indices = central_day) - obs_probs_ev[[ps]] <- GetProbs(tmp, time_dim = 'syear', - prob_thresholds = NULL, - bin_dim_abs = 'bin', - indices_for_quantiles = NULL, - memb_dim = 'ensemble', - abs_thresholds = lims_obs_tr_res[[ps]], - ncores = ncores) + # Get only the probabilities of the central day in sday + central_day <- (dim(cal_hcst_ev_res)['sday'] + 1)/2 + tmp <- Subset(cal_hcst_ev_res, along = 'sday', indices = central_day) + hcst_probs_ev[[ps]] <- GetProbs(tmp, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_cal_hcst_tr_res[[ps]], + ncores = ncores) + tmp <- Subset(data$obs$data, along = 'sday', indices = central_day) + obs_probs_ev[[ps]] <- GetProbs(tmp, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_obs_tr_res[[ps]], + ncores = ncores) if (!is.null(data$fcst)) { fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', prob_thresholds = NULL, @@ -355,7 +359,7 @@ if (recipe$Analysis$Workflow$Probabilities$save == 'all') { info(recipe$Run$logger, "#### Calibrated and Probabilities Done #####") # TODO saving: -recipe$Analysis$Workflow$Calibration$save <- FALSE + recipe$Analysis$Workflow$Calibration$save <- FALSE if (recipe$Analysis$Workflow$Calibration$save != FALSE) { info(recipe$Run$logger, "##### START SAVING CALIBRATED #####") # recipe$Run$output_dir <- paste0(recipe$Run$output_dir, @@ -377,65 +381,57 @@ recipe$Analysis$Workflow$Calibration$save <- FALSE probs_obs <- list() all_names <- NULL + ## TODO: Add .drop_dims() here for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { if (perc == 1) { - name_elem <- paste0("below_", categories[[ps]][perc]) + name_elem <- paste0("below_", categories[[ps]][perc]*100) } else if (perc == length(categories[[ps]]) + 1) { - name_elem <- paste0("above_", categories[[ps]][perc-1]) + name_elem <- paste0("above_", categories[[ps]][perc-1]*100) } else { - name_elem <- paste0("from_", categories[[ps]][perc-1], - "_to_", categories[[ps]][perc]) + name_elem <- paste0("from_", categories[[ps]][perc-1]*100, + "_to_", categories[[ps]][perc]*100) } probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], - along = 'bin', indices = perc, drop = 'all')), + along = 'bin', + indices = perc, + drop = 'selected')), probs_hcst) probs_obs <- append(list(Subset(obs_probs_ev[[ps]], - along = 'bin', indices = perc, drop = 'all')), + along = 'bin', + indices = perc, + drop = 'selected')), probs_obs) if (!is.null(data$fcst)) { probs_fcst <- append(list(Subset(fcst_probs[[ps]], - along = 'bin', indices = perc, drop = 'all')), + along = 'bin', + indices = perc, + drop = 'selected')), probs_fcst) } all_names <- c(all_names, name_elem) } } + ## TODO: Apply .drop_dims() directly to hcst_probs_ev etc; and move + ## reorganizing and renaming to save_probabilities() names(probs_hcst) <- all_names - if (!('var' %in% names(dim(probs_hcst[[1]])))) { - probs_hcst <- lapply(probs_hcst, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } + probs_hcst <- lapply(probs_hcst, .drop_dims) names(probs_obs) <- all_names - if (!('var' %in% names(dim(probs_obs[[1]])))) { - probs_obs <- lapply(probs_obs, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } - + probs_obs <- lapply(probs_obs, .drop_dims) if (!is.null(data$fcst)) { names(probs_fcst) <- all_names - if (!('var' %in% names(dim(probs_fcst[[1]])))) { - probs_fcst <- lapply(probs_fcst, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } - if (!('syear' %in% names(dim(probs_fcst[[1]])))) { - probs_fcst <- lapply(probs_fcst, function(x) { - dim(x) <- c(syear = 1, dim(x)) - return(x)}) - } + probs_fcst <- lapply(probs_fcst, .drop_dims) } if (recipe$Analysis$Workflow$Probabilities$save %in% - c('all', 'bins_only')) { - save_probabilities(recipe = recipe, probs = probs_hcst, - data_cube = data$hcst, agg = agg, - type = "hcst") - save_probabilities(recipe = recipe, probs = probs_obs, - data_cube = data$hcst, agg = agg, - type = "obs") - save_probabilities(recipe = recipe, probs = probs_fcst, + c('all', 'bins_only')) { + ## Saving only forecast probabilities for now + # save_probabilities(recipe = recipe, probs = probs_hcst, + # data_cube = data$hcst, agg = agg, + # type = "hcst") + # save_probabilities(recipe = recipe, probs = probs_obs, + # data_cube = data$obs, agg = agg, + # type = "obs") + save_probabilities(recipe = recipe, probs = probs_fcst, data_cube = data$fcst, agg = agg, type = "fcst") } diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 606368b77fb1caead8767b1d85b50aa170ca2c35..a08efc31c8caaac3e4de34b9cba35e55d2618255 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -82,21 +82,15 @@ Crossval_anomalies <- function(recipe, data) { lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - res <- quantile(as.vector(x), - ps, na.rm = na.rm) - dim(res) <- c(bin = length(res)) - return(res) - })}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, prob_lims = categories, ncores = ncores) lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - res <- quantile(as.vector(x), ps, - na.rm = na.rm) - dim(res) <- c(bin = length(res)) - return(res) - })}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, prob_lims = categories, ncores = ncores) #store results @@ -152,7 +146,8 @@ Crossval_anomalies <- function(recipe, data) { lims_fcst <- Apply(hcst_ano, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) @@ -165,19 +160,21 @@ Crossval_anomalies <- function(recipe, data) { lims <- Apply(obs_ano, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) tmp_lims2 <- list() - for(ps in 1:length(categories)) { - tmp_lims3 <- drop(lims[[ps]]) + for (ps in 1:length(categories)) { + ## TODO: use .drop_dims() + tmp_lims3 <- .drop_dims(lims[[ps]]) for (l in 1:dim(lims[[ps]])['bin']) { - tmp_lims <- tmp_lims3[l,,,] - if (!('var' %in% names(dim(tmp_lims)))) { - dim(tmp_lims) <- c(var = 1, dim(tmp_lims)) - } + ## TODO: Use Subset + tmp_lims <- ClimProjDiags::Subset(x = tmp_lims3, + along = "bin", + indices = l) tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) names(tmp_lims2)[length(tmp_lims2)] <- as.character(categories[[ps]][l]) } @@ -248,6 +245,7 @@ Crossval_anomalies <- function(recipe, data) { probs_obs <- list() all_names <- NULL + ## TODO: Add .drop_dims() here for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { if (perc == 1) { @@ -259,45 +257,36 @@ Crossval_anomalies <- function(recipe, data) { "_to_", categories[[ps]][perc]) } probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], - along = 'bin', indices = perc, drop = 'all')), + along = 'bin', + indices = perc, + drop = 'selected')), probs_hcst) probs_obs <- append(list(Subset(obs_probs_ev[[ps]], - along = 'bin', indices = perc, drop = 'all')), + along = 'bin', + indices = perc, + drop = 'selected')), probs_obs) if (!is.null(data$fcst)) { probs_fcst <- append(list(Subset(fcst_probs[[ps]], - along = 'bin', indices = perc, drop = 'all')), + along = 'bin', + indices = perc, + drop = 'selected')), probs_fcst) } all_names <- c(all_names, name_elem) } } + ## TODO: Apply .drop_dims() directly to hcst_probs_ev etc; and move + ## reorganizing and renaming to save_probabilities() names(probs_hcst) <- all_names - if (!('var' %in% names(dim(probs_hcst[[1]])))) { - probs_hcst <- lapply(probs_hcst, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } + probs_hcst <- lapply(probs_hcst, .drop_dims) names(probs_obs) <- all_names - if (!('var' %in% names(dim(probs_obs[[1]])))) { - probs_obs <- lapply(probs_obs, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } - + probs_obs <- lapply(probs_obs, .drop_dims) if (!is.null(data$fcst)) { names(probs_fcst) <- all_names - if (!('var' %in% names(dim(probs_fcst[[1]])))) { - probs_fcst <- lapply(probs_fcst, function(x) { - dim(x) <- c(var = 1, dim(x)) - return(x)}) - } - if (!('syear' %in% names(dim(probs_fcst[[1]])))) { - probs_fcst <- lapply(probs_fcst, function(x) { - dim(x) <- c(syear = 1, dim(x)) - return(x)}) - } + probs_fcst <- lapply(probs_fcst, .drop_dims) } + if (recipe$Analysis$Workflow$Probabilities$save %in% c('all', 'bins_only')) { save_probabilities(recipe = recipe, probs = probs_hcst, @@ -306,12 +295,11 @@ Crossval_anomalies <- function(recipe, data) { save_probabilities(recipe = recipe, probs = probs_obs, data_cube = data$hcst, agg = agg, type = "obs") - # TODO Forecast -# if (!is.null(probs_fcst)) { -# save_probabilities(recipe = recipe, probs = probs_fcst, -# data_cube = data$fcst, agg = agg, -# type = "fcst") -# } + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } } # Save ensemble mean for multimodel option: hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index 605e1f2b7b2c71687ed1e370f322de15959d5468..6475f4781a2bdf47d978a26560f46557c52ca389 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -112,21 +112,21 @@ Crossval_metrics <- function(recipe, data_crossval, } if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { # The evaluation of all metrics are done with extra sample - data_crossval$hcst$data <- MergeDims(data_crossval$hcst$data, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - data_crossval$obs$data <- MergeDims(data_crossval$obs$data, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) + data_crossval$hcst <- CST_MergeDims(data_crossval$hcst, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$obs <- CST_MergeDims(data_crossval$obs, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) data_crossval$ref_obs_tr <- MergeDims(data_crossval$ref_obs_tr, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - data_crossval$hcst.full_val$data <- MergeDims(data_crossval$hcst.full_val$data, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - data_crossval$obs.full_val$data <- MergeDims(data_crossval$obs.full_val$data, - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$hcst.full_val <- CST_MergeDims(data_crossval$hcst.full_val, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$obs.full_val <- CST_MergeDims(data_crossval$obs.full_val, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) } if ('crps' %in% requested_metrics) { @@ -276,6 +276,9 @@ Crossval_metrics <- function(recipe, data_crossval, return(res) }) # Save metrics + # reduce dimension to work with Visualization module: + skill_metrics <- lapply(skill_metrics, function(x) {.drop_dims(x)}) + if (tolower(recipe$Analysis$Workflow$Skill$save) == 'all') { save_metrics(recipe = recipe, metrics = skill_metrics, @@ -283,10 +286,6 @@ Crossval_metrics <- function(recipe, data_crossval, outdir = recipe$Run$output_dir) } recipe$Run$output_dir <- original - # reduce dimension to work with Visualization module: - skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) - skill_metrics <- lapply(skill_metrics, function(x){ - InsertDim(x, pos = 1, len = 1, name = 'var')}) return(skill_metrics) } diff --git a/modules/Crossval/Crossval_multimodel_Calibration.R b/modules/Crossval/Crossval_multimodel_Calibration.R index 229abd0a99458f5ddfb15783736ec8225cc7c245..3f7788e03a975e9847d1c0a42b5a1ce98706109a 100644 --- a/modules/Crossval/Crossval_multimodel_Calibration.R +++ b/modules/Crossval/Crossval_multimodel_Calibration.R @@ -111,8 +111,8 @@ Crossval_multimodel_Calibration <- function(recipe, data) { fun = function(..., prob_lims) { res <- abind(..., along = 2) lapply(prob_lims, function(ps) { - quantile(as.vector(res), - ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(res), + ps, na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) @@ -120,7 +120,8 @@ Crossval_multimodel_Calibration <- function(recipe, data) { target_dims = c('syear'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(x), + ps, na.rm = na.rm))})}, output_dims = lapply(categories, function(x){'bin'}), prob_lims = categories, ncores = ncores) @@ -176,7 +177,8 @@ Crossval_multimodel_Calibration <- function(recipe, data) { fun = function(..., prob_lims) { res <- abind(..., along = 2) lapply(prob_lims, function(ps) { - quantile(as.vector(res), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(res), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) diff --git a/modules/Crossval/Crossval_multimodel_anomalies.R b/modules/Crossval/Crossval_multimodel_anomalies.R index fd7fd0fe089760bf6648c22d1ea260b3bd4b781e..4032a02e4bb1250f63dccdca6f106ee90509b5fd 100644 --- a/modules/Crossval/Crossval_multimodel_anomalies.R +++ b/modules/Crossval/Crossval_multimodel_anomalies.R @@ -98,8 +98,8 @@ Crossval_multimodel_anomalies <- function(recipe, data) { fun = function(..., prob_lims) { res <- abind(..., along = 2) lapply(prob_lims, function(ps) { - quantile(as.vector(res), - ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(res), + ps, na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) @@ -107,7 +107,8 @@ Crossval_multimodel_anomalies <- function(recipe, data) { target_dims = c('syear'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { - quantile(as.vector(x), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x){'bin'}), prob_lims = categories, ncores = ncores) @@ -156,7 +157,8 @@ Crossval_multimodel_anomalies <- function(recipe, data) { fun = function(..., prob_lims) { res <- abind(..., along = 2) lapply(prob_lims, function(ps) { - quantile(as.vector(res), ps, na.rm = na.rm)})}, + as.array(quantile(as.vector(res), ps, + na.rm = na.rm))})}, output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) diff --git a/modules/Crossval/R/tmp/CST_MergeDims.R b/modules/Crossval/R/tmp/CST_MergeDims.R new file mode 100644 index 0000000000000000000000000000000000000000..bb06bf608051d06ecabce9a9cd3163c03128c6ad --- /dev/null +++ b/modules/Crossval/R/tmp/CST_MergeDims.R @@ -0,0 +1,162 @@ +#'Function to Merge Dimensions +#' +#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +#' +#'@description This function merges two dimensions of the array \code{data} in a +#''s2dv_cube' object into one. The user can select the dimensions to merge and +#'provide the final name of the dimension. The user can select to remove NA +#'values or keep them. +#' +#'@param data An 's2dv_cube' object +#'@param merge_dims A character vector indicating the names of the dimensions to +#' merge. +#'@param rename_dim a character string indicating the name of the output +#' dimension. If left at NULL, the first dimension name provided in parameter +#' \code{merge_dims} will be used. +#'@param na.rm A logical indicating if the NA values should be removed or not. +#' +#'@examples +#'data <- 1 : c(2 * 3 * 4 * 5 * 6 * 7) +#'dim(data) <- c(time = 7, lat = 2, lon = 3, monthly = 4, member = 6, +#' dataset = 5, var = 1) +#'data[2,,,,,,] <- NA +#'data[c(3,27)] <- NA +#'data <- list(data = data) +#'class(data) <- 's2dv_cube' +#'new_data <- CST_MergeDims(data, merge_dims = c('time', 'monthly')) +#'new_data <- CST_MergeDims(data, merge_dims = c('lon', 'lat'), rename_dim = 'grid') +#'new_data <- CST_MergeDims(data, merge_dims = c('time', 'monthly'), na.rm = TRUE) +#'@export +CST_MergeDims <- function(data, merge_dims = c('ftime', 'monthly'), + rename_dim = NULL, na.rm = FALSE) { + # Check 's2dv_cube' + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube'.") + } + if (is.null(rename_dim)) { + rename_dim <- merge_dims[1] + } + # data + data$data <- MergeDims(data$data, merge_dims = merge_dims, + rename_dim = rename_dim, na.rm = na.rm) + # dims + data$dims <- dim(data$data) + + # rename_dim + if (length(rename_dim) > 1) { + rename_dim <- as.character(rename_dim[1]) + } + # coords + data$coords[merge_dims] <- NULL + data$coords[[rename_dim]] <- 1:dim(data$data)[rename_dim] + attr(data$coords[[rename_dim]], 'indices') <- TRUE + + # attrs + if (all(merge_dims %in% names(dim(data$attrs$Dates)))) { + original_timezone <- attr(data$attrs$Dates[1], "tzone") + data$attrs$Dates <- MergeDims(data$attrs$Dates, merge_dims = merge_dims, + rename_dim = rename_dim, na.rm = na.rm) + # Transform dates back to POSIXct + data$attrs$Dates <- as.POSIXct(data$attrs$Dates, + origin = "1970-01-01", + tz = original_timezone) + + } else if (any(merge_dims %in% names(dim(data$attrs$Dates)))) { + warning("The dimensions of 'Dates' array will be different from ", + "the temporal dimensions in 'data'. Parameter 'merge_dims' ", + "only includes one temporal dimension of 'Dates'.") + } + return(data) +} +#'Function to Split Dimension +#' +#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +#' +#'@description This function merges two dimensions of an array into one. The +#'user can select the dimensions to merge and provide the final name of the +#'dimension. The user can select to remove NA values or keep them. +#' +#'@param data An n-dimensional array with named dimensions +#'@param merge_dims A character vector indicating the names of the dimensions to +#' merge. +#'@param rename_dim A character string indicating the name of the output +#' dimension. If left at NULL, the first dimension name provided in parameter +#' \code{merge_dims} will be used. +#'@param na.rm A logical indicating if the NA values should be removed or not. +#' +#'@examples +#'data <- 1 : 20 +#'dim(data) <- c(time = 10, lat = 2) +#'new_data <- MergeDims(data, merge_dims = c('time', 'lat')) +#'@import abind +#'@importFrom ClimProjDiags Subset +#'@export +MergeDims <- function(data, merge_dims = c('time', 'monthly'), + rename_dim = NULL, na.rm = FALSE) { + # check data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (is.null(dim(data))) { + stop("Parameter 'data' must have dimensions.") + } + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must have dimension names.") + } + dims <- dim(data) + # check merge_dims + if (is.null(merge_dims)) { + stop("Parameter 'merge_dims' cannot be NULL.") + } + if (!is.character(merge_dims)) { + stop("Parameter 'merge_dims' must be a character vector ", + "indicating the names of the dimensions to be merged.") + } + if (length(merge_dims) > 2) { + warning("Only two dimensions can be merge, only the first two ", + "dimension will be used. To merge further dimensions ", + "consider to use this function multiple times.") + merge_dims <- merge_dims[1 : 2] + } else if (length(merge_dims) < 2) { + stop("Parameter 'merge_dims' must be of length two.") + } + if (is.null(rename_dim)) { + rename_dim <- merge_dims[1] + } + if (length(rename_dim) > 1) { + warning("Parameter 'rename_dim' has length greater than 1 ", + "and only the first element will be used.") + rename_dim <- as.character(rename_dim[1]) + } + if (!any(names(dims) %in% merge_dims)) { + stop("Parameter 'merge_dims' must match with dimension ", + "names in parameter 'data'.") + } + pos1 <- which(names(dims) == merge_dims[1]) + pos2 <- which(names(dims) == merge_dims[2]) + if (length(pos1) == 0 | length(pos2) == 0) { + stop("Parameter 'merge_dims' must match with dimension ", + "names in parameter 'data'.") + } + if (pos1 > pos2) { + pos1 <- pos1 - 1 + } + data <- lapply(1:dims[pos2], function(x) {Subset(data, along = pos2, + indices = x, drop = 'selected')}) + data <- abind(data, along = pos1) + names(dim(data)) <- names(dims)[-pos2] + if (!is.null(rename_dim)) { + names(dim(data))[pos1] <- rename_dim + } + if (na.rm) { + nas <- which(is.na(Subset(data, along = -pos1, indices = 1))) + if (length(nas) != 0) { + nas <- unlist(lapply(nas, function(x) { + if(all(is.na(Subset(data, along = pos1, + indices = x)))) { + return(x)}})) + data <- Subset(data, along = pos1, indices = -nas) + } + } + return(data) +} diff --git a/modules/Saving/R/drop_dims.R b/modules/Saving/R/drop_dims.R index 7361faa8ea1056b414b5e2ed67740e4ceeb38bae..be68a3332a42bb1d3298b6ea3200750c02239f76 100644 --- a/modules/Saving/R/drop_dims.R +++ b/modules/Saving/R/drop_dims.R @@ -1,8 +1,14 @@ # version victoria https://earth.bsc.es/gitlab/es/auto-s2s/-/blob/dev-Loading-multivar/modules/Skill/Skill.R +# This function takes an array and drops dimensions of length = 1 that are not +# needed by the saving or plotting functions, aka 'droppable_dims'. +# The essential dimensions are: 'var', 'bin', 'time', and 'latitude'/'longitude' +# or 'region'. +# The 'droppable' dimensions are: 'dat', 'sday', 'sweek', 'ensemble', 'nobs', +# 'nexp, 'exp_memb', 'obs_memb'. .drop_dims <- function(metric_array) { # Define dimensions that are not essential for saving droppable_dims <- c("dat", "sday", "sweek", "ensemble", "nobs", - "nexp", "exp_memb", "obs_memb", "bin") + "nexp", "exp_memb", "obs_memb") # , "bin") # Select non-essential dimensions of length 1 dims_to_drop <- intersect(names(which(dim(metric_array) == 1)), droppable_dims) @@ -18,10 +24,6 @@ return(metric_array) } - - - - ## TODO: Replace with ClimProjDiags::Subset and add var and dat dimensions #.drop_dims <- function(metric_array) { # # Drop all singleton dimensions diff --git a/modules/Saving/R/get_filename.R b/modules/Saving/R/get_filename.R index b2345691c8a5ffdc4839a0ca331761aa4548201b..2766c17acf0739ecd7c3c1006afaf4f3e1e33940 100644 --- a/modules/Saving/R/get_filename.R +++ b/modules/Saving/R/get_filename.R @@ -17,12 +17,12 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { if (tolower(recipe$Analysis$Horizon) == "decadal") { # to not save the month and day in the filename (needed for the multimodel) - date <- substr(date,1,4) + date <- substr(date, 1, 4) # for the models initialised in January - it may be better to do this in save_* functions archive <- read_yaml(paste0("conf/archive_decadal.yml"))$esarchive exp.name <- recipe$Analysis$Datasets$System$name - if (exp.name != 'Multimodel' && archive$System[[exp.name]]$initial_month == 1){ - date <- as.character(as.numeric(date)-1) + if (exp.name != 'Multimodel' && archive$System[[exp.name]]$initial_month == 1) { + date <- as.character(as.numeric(date) - 1) } } @@ -41,24 +41,24 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { hcst_end <- recipe$Analysis$Time$hcst_end switch(tolower(file.type), - "skill" = {type_info <- "-skill_"}, - "corr" = {type_info <- "-corr_"}, - "exp" = {type_info <- paste0("_", date, "_")}, - "obs" = {type_info <- paste0("-obs_", date, "_")}, - "percentiles" = {type_info <- "-percentiles_"}, - "probs" = {type_info <- paste0("-probs_", date, "_")}, - "bias" = {type_info <- paste0("-bias_", date, "_")}, - "rps_syear" = {type_info <- "rps_syear"}, - "rps_clim_syear" = {type_info <- "rps_clim_syear"}, - "crps_syear" = {type_info <- "crps_syear"}, - "crps_clim_syear" = {type_info <- "crps_clim_syear"}, - "crps" = {type_info <- "crps"}, - "mean_bias" = {type_info <- "mean_bias"}, - {type_info <- paste0(file.type)}) + "skill" = {type_info <- "-skill"}, + "corr" = {type_info <- "-corr"}, + "exp" = {type_info <- paste0("_", date)}, + "obs" = {type_info <- paste0("-obs_", date)}, + "percentiles" = {type_info <- "-percentiles"}, + "probs" = {type_info <- paste0("-probs_", date)}, + "bias" = {type_info <- paste0("-bias_", date)}, + "rps_syear" = {type_info <- "-rps_syear"}, + "rps_clim_syear" = {type_info <- "-rps_clim_syear"}, + "crps_syear" = {type_info <- "-crps_syear"}, + "crps_clim_syear" = {type_info <- "-crps_clim_syear"}, + "crps" = {type_info <- "-crps"}, + "mean_bias" = {type_info <- "-mean_bias"}, + {type_info <- paste0("-", file.type)}) # Build file name filename <- paste0("scorecards_", system, "_", reference, "_", var, - "_", type_info, "_", hcst_start, "-", hcst_end, + type_info, "_", hcst_start, "-", hcst_end, "_s", shortdate) } else { if (tolower(recipe$Analysis$Horizon) == "decadal") { diff --git a/modules/Saving/R/get_times.R b/modules/Saving/R/get_times.R index ec36b1028889e54f43cd47932f591173677e69dc..8106923486ea86017f89776bf7ea681443ce99f5 100644 --- a/modules/Saving/R/get_times.R +++ b/modules/Saving/R/get_times.R @@ -10,6 +10,16 @@ # Compute initial date fcst.horizon <- tolower(recipe$Analysis$Horizon) # Generate time dimensions and the corresponding metadata. + ## TODO: This addresses subseasonal case, but does not work well + ## when there is missing data. + if (all(c("sweek", "sday") %in% names(dim(data_cube$attrs$Dates)))) { + central_day <- (dim(data_cube$attrs$Dates)[["sday"]] + 1) / 2 + central_week <- (dim(data_cube$attrs$Dates)[["sweek"]] + 1) / 2 + data_cube$attrs$Dates <- Subset(data_cube$attrs$Dates, + along = c("sday", "sweek"), + indices = list(sday = central_day, + sweek = central_week)) + } dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) leadtimes <- as.numeric(dates - init_date)/3600 @@ -17,7 +27,7 @@ switch(fcst.horizon, "seasonal" = {time <- leadtimes; ref <- 'hours since '; stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "subseasonal" = {len <- leadtimes; ref <- 'hours since '; + "subseasonal" = {time <- leadtimes; ref <- 'hours since '; stdname <- ''}, "decadal" = {time <- leadtimes; ref <- 'hours since '; stdname <- paste(strtoi(leadtimes), collapse=", ")}) diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index db7ceecacd0f6e8af750fae45147e6dcbf07f506..d0b8dc4f267183404b637cf350cdd7a58ae0affd 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -5,8 +5,14 @@ save_metrics <- function(recipe, agg = "global", outdir = NULL, module = "skill") { + # This function adds metadata to the skill metrics in 'skill' # and exports them to a netCDF file inside 'outdir'. + + # Sanity checks + if (!is.list(metrics) || is.null(names(metrics))) { + stop("'metrics' should be a named list.") + } # Define grid dimensions and names lalo <- c('longitude', 'latitude') archive <- get_archive(recipe) @@ -36,12 +42,12 @@ save_metrics <- function(recipe, calendar <- archive$System[[global_attributes$system]]$calendar } if (fcst.horizon == 'decadal') { - # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - # sprintf('%02d', init_month), '-01'), - # cal = calendar) init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, '-01-01'), cal = calendar) + } else if (fcst.horizon == 'subseasonal') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + substr(recipe$Analysis$Time$sdate, 5, 8)), + format = '%Y%m%d', cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -56,6 +62,8 @@ save_metrics <- function(recipe, } else { fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') } + } else if (fcst.horizon == 'subseasonal') { + fcst.sdate <- as.character(recipe$Analysis$Time$sdate) } else { if (!is.null(recipe$Analysis$Time$fcst_year)) { fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, @@ -95,6 +103,15 @@ save_metrics <- function(recipe, }) } } + metric <- names(subset_metric[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + subset_metric[[i]][is.na(subset_metric[[i]])] <- missing_val + metadata <- list(metric = list(name = metric, + long_name = long_name, + missing_value = missing_val)) + data_cube$attrs$Variable$metadata[metric] <- metadata + attr(subset_metric[[i]], 'variables') <- metadata ## TODO: Maybe 'scorecards' condition could go here to further simplify ## the code extra_string <- get_filename(NULL, recipe, variable, diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index d5dfae16f16346242d8bf1b744d536164fb3e30d..0071eb88e0867e9a24c6852c91ba43f004c8fa0a 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -5,6 +5,11 @@ save_percentiles <- function(recipe, outdir = NULL) { # This function adds metadata to the percentiles # and exports them to a netCDF file inside 'outdir'. + + # percentiles <- lapply(percentiles, + # function(x) { + # .drop_dims(x) + # }) archive <- get_archive(recipe) # Define grid dimensions and names lalo <- c('longitude', 'latitude') @@ -35,16 +40,12 @@ save_percentiles <- function(recipe, # Generate vector containing leadtimes if (fcst.horizon == 'decadal') { - # if (global_attributes$system == 'Multimodel') { - # init_month <- 11 #TODO: put as if init_month is January - # } else { - # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - # } - # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - # sprintf('%02d', init_month), '-01'), - # cal = calendar) - init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start) + 1, '-01-01'), cal = calendar) + } else if (fcst.horizon == 'subseasonal') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + substr(recipe$Analysis$Time$sdate, 5, 8)), + format = '%Y%m%d', cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -62,6 +63,8 @@ save_percentiles <- function(recipe, } else { fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') } + } else if (fcst.horizon == 'subseasonal') { + fcst.sdate <- as.character(recipe$Analysis$Time$sdate) } else { if (!is.null(recipe$Analysis$Time$fcst_year)) { fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index c6d5f22280a5c1079d620b7fbf21eb7aa93eb637..e283c8fd4e9cd0ecd1da1cf8dc652f7ccc6e22c0 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -59,7 +59,6 @@ save_probabilities <- function(recipe, syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - # Loop over variable dimension for (var in 1:data_cube$dims[['var']]) { subset_probs <- lapply(probs, function(x) { diff --git a/recipe_bigpredidata_oper_subseasonal_tas.yml b/recipe_bigpredidata_oper_subseasonal_tas.yml new file mode 100644 index 0000000000000000000000000000000000000000..322811ff7b0c78454deca1482dddd466113dffa8 --- /dev/null +++ b/recipe_bigpredidata_oper_subseasonal_tas.yml @@ -0,0 +1,83 @@ + +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: subseasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: weekly_mean, units: C} + # - {name: prlr, freq: monthly_mean, units: mm, flux: no} + # - {name: sfcWind, freq: monthly_mean} + Datasets: + System: + - {name: 'NCEP-CFSv2', member: 'all'} + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: 20250102 + fcst_year: '2025' # Optional, int: Forecast year 'YYYY' + hcst_start: '2011' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 4 # Mandatory, int: Last leadtime time step in months + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Calibration: + method: bias # Mandatory, str: bias, evmos, mse_min, crps_min, rpc_based + cross_validation: yes + save: all + Skill: + metric: mean_bias rpss + save: none + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. This avoids saving the probs of 10 to 90 + save: all + Indicators: + index: no + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean + NA_color: white + multi_panel: no + dots: no + mask_terciles: yes + shapefile: /esarchive/scratch/cdelgado/aspect_outputs/casestudy-wine/shp_spain/recintos_provinciales_inspire_peninbal_etrs89/recintos_provinciales_inspire_peninbal_etrs89.shp + file_format: PNG # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + ncores: 8 # Optional, int: number of cores, defaults to 1 + remove_NAs: TRUE # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/ptrascas/R/dev-test_bigpredidata/sunset/sunset # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/ptrascas/R/sunset/bigpredidata_fqa.R # replace with the path to your script + expid: a78g # replace with your EXPID + hpc_user: bsc032413 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 16 + custom_directives: ['#SBATCH --constraint=medmem'] + platform: nord3v2 + email_notifications: no # enable/disable email notifications. Change it if you want to. + email_address: paloma.trascasa@bsc.es # replace with your email address + notify_completed: no # notify me by email when a job finishes + notify_failed: no # notify me by email when a job fails diff --git a/tools/check_recipe.R b/tools/check_recipe.R index 9339c7a3a2aaf93530d02392267377ebb8958c46..3dd186c7d116719a661615913ec8d189eaab14f3 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -659,10 +659,15 @@ check_recipe <- function(recipe) { "Parameter 'percentiles' must be defined under 'Probabilities'.") error_status <- TRUE } else if (!is.list(recipe$Analysis$Workflow$Probabilities$percentiles)) { - error(recipe$Run$logger, - paste("Parameter 'Probabilities:percentiles' expects a list.", - "See documentation in the wiki for examples.")) - error_status <- TRUE + if (length(recipe$Analysis$Workflow$Probabilities$percentiles) == 1) { + recipe$Analysis$Workflow$Probabilities$percentiles <- + list(recipe$Analysis$Workflow$Probabilities$percentiles) + } else { + error(recipe$Run$logger, + paste("Parameter 'Probabilities:percentiles' expects a list.", + "See documentation in the wiki for examples.")) + error_status <- TRUE + } } # Saving checks SAVING_OPTIONS_PROBS <- c("all", "none", "bins_only", "percentiles_only") diff --git a/tools/libs.R b/tools/libs.R index 401467860ba602c0bd459439e973e3637adb7a4f..5973f2195af4c1b81972d62d48e43edc82aada68 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -37,6 +37,9 @@ source("tools/get_archive.R") source("tools/Utils.R") source("tools/restructure_recipe.R") # source("tools/add_dims.R") # Not sure if necessary yet +## To be removed after next release of CSTools: +source("modules/Crossval/R/tmp/CST_MergeDims.R") + # Settings options(bitmapType = 'cairo')