diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index d33aab80192133eb7ef120ab8ea55ba636bbefe8..95fe4701f1d4172df67b78e268e3ff1137d709c9 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -48,14 +48,14 @@ Crossval_anomalies <- function(recipe, data) { cross <- cross[[indices]] print(paste("Crossval Index:", cross$eval.dexes)) obs_tr <- Subset(obs, along = 'syear', - indices = cross$train.dexes) + indices = cross$train.dexes) hcst_tr <- Subset(exp, along = 'syear', - indices = cross$train.dexes) + indices = cross$train.dexes) ## evaluation indices hcst_ev <- Subset(exp, along = 'syear', - indices = cross$eval.dexes, drop = 'selected') + indices = cross$eval.dexes, drop = 'selected') obs_ev <- Subset(obs, along = 'syear', - indices = cross$eval.dexes, drop = 'selected') + indices = cross$eval.dexes, drop = 'selected') if (horizon == 'subseasonal') { central_day <- (dim(exp)['sday'] + 1)/2 hcst_tr <- MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), @@ -235,30 +235,39 @@ Crossval_anomalies <- function(recipe, data) { res$lims_ano_obs_tr <- lims_tmp_obs rm(lims_tmp_hcst, lims_tmp_obs) gc() - - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - "/", "outputs/Anomalies/") + # Subseasonal: merge sample dimensions and select central week + if (horizon %in% c('subseasonal')) { + hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + hcst <- Subset(hcst, along = 'sweek', + indices = (dim(data$hcst$data)['sweek'] + 1) / 2) + obs <- MergeDims(data$obs$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + obs <- Subset(obs, along = 'sweek', + indices = (dim(data$obs$data)['sweek'] + 1) / 2) + fcst <- Subset(data$fcst$data, along = 'sday', indices = 1, + drop = 'selected') + } else { + # we need to keep original objects (to later compute bias) + hcst <- data$hcst$data + obs <- data$obs$data + fcst <- data$fcst$data + } + + # Limits calculation with full hindcast period + lims <- list() + lims <- Apply(obs, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = TRUE))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) # Forecast anomalies: if (!is.null(data$fcst)) { - if (horizon %in% c('subseasonal')) { - # merge sample dimensions and select central week - hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - hcst <- Subset(hcst, along = 'sweek', - indices = (dim(data$hcst$data)['sweek'] + 1) / 2) - obs <- MergeDims(data$obs$data, merge_dims = c('sday', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - obs <- Subset(obs, along = 'sweek', - indices = (dim(data$obs$data)['sweek'] + 1) / 2) - fcst <- Subset(data$fcst$data, along = 'sday', indices = 1) - } else { - # we need to keep original objects (to later compute bias) - hcst <- data$hcst$data - obs <- data$obs$data - fcst <- data$fcst$data - } clim_hcst <- Apply(hcst, target_dims = c('syear', 'ensemble'), mean, @@ -281,36 +290,17 @@ Crossval_anomalies <- function(recipe, data) { na.rm = na.rm, ncores = ncores)$output1 obs_ano <- Ano(data = obs, clim = clim_obs) - lims <- Apply(obs_ano, target_dims = c('syear', 'ensemble'), - fun = function(x, prob_lims) { - lapply(prob_lims, function(ps) { - as.array(quantile(as.vector(x), ps, - na.rm = TRUE))})}, - output_dims = lapply(categories, function(x) {'bin'}), - prob_lims = categories, - ncores = ncores) - tmp_lims2 <- list() - for (ps in 1:length(categories)) { - tmp_lims3 <- .drop_dims(lims[[ps]]) - for (l in 1:dim(lims[[ps]])['bin']) { - tmp_lims <- ClimProjDiags::Subset(x = tmp_lims3, - along = "bin", - indices = l, - drop = "selected") - tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) - names(tmp_lims2)[length(tmp_lims2)] <- as.character(categories[[ps]][l]) - } - if (recipe$Analysis$Workflow$Probabilities$save %in% c("all", "percentiles_only")) { - save_percentiles(recipe = recipe, percentiles = tmp_lims2, - data_cube = data$obs, - agg = "global", outdir = NULL) - } - } - } - ## TODO: Should this happen here? - # Make categories rounded number to use as names: - categories <- lapply(categories, round, digits = 2) + # Percentile limits for the forecast: + lims_fcst <- Apply(hcst_cal, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = TRUE))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + } # Compute Probabilities fcst_probs <- lapply(categories, function(x){NULL}) @@ -348,103 +338,28 @@ Crossval_anomalies <- function(recipe, data) { ano_obs <- data$obs ano_obs$data <- res$ano_obs_ev - info(recipe$Run$logger, - "#### Anomalies and Probabilities Done #####") - if (recipe$Analysis$Workflow$Anomalies$save != 'none') { - info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") - # recipe$Run$output_dir <- paste0(recipe$Run$output_dir, - # "/", "outputs/Anomalies/") - # Save forecast - if ((recipe$Analysis$Workflow$Anomalies$save %in% - c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { - save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst', - agg = agg) - } - # Save hindcast - if (recipe$Analysis$Workflow$Anomalies$save %in% - c('all', 'exp_only')) { - save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst', - agg = agg) - } - # Save observation - if (recipe$Analysis$Workflow$Anomalies$save == 'all') { - save_observations(recipe = recipe, data_cube = ano_obs, agg = agg) - } - } - # Save probability bins - probs_hcst <- list() - probs_fcst <- list() - probs_obs <- list() - all_names <- NULL - - for (ps in 1:length(categories)) { - for (perc in 1:(length(categories[[ps]]) + 1)) { - if (perc == 1) { - name_elem <- paste0("below_", categories[[ps]][perc]) - } else if (perc == length(categories[[ps]]) + 1) { - name_elem <- paste0("above_", categories[[ps]][perc-1]) - } else { - name_elem <- paste0("from_", categories[[ps]][perc-1], - "_to_", categories[[ps]][perc]) - } - probs_hcst <- append(probs_hcst, - list(Subset(hcst_probs_ev[[ps]], - along = 'bin', - indices = perc, - drop = 'selected'))) - probs_obs <- append(probs_obs, - list(Subset(obs_probs_ev[[ps]], - along = 'bin', - indices = perc, - drop = 'selected'))) - if (!is.null(data$fcst)) { - probs_fcst <- append(probs_fcst, - list(Subset(fcst_probs[[ps]], - along = 'bin', - indices = perc, - drop = 'selected'))) - } - all_names <- c(all_names, name_elem) - } - } - ## TODO: Apply .drop_dims() directly to hcst_probs_ev etc; and move - ## reorganizing and renaming to save_probabilities() - names(probs_hcst) <- all_names - probs_hcst <- lapply(probs_hcst, .drop_dims) - names(probs_obs) <- all_names - probs_obs <- lapply(probs_obs, .drop_dims) - if (!is.null(data$fcst)) { - names(probs_fcst) <- all_names - probs_fcst <- lapply(probs_fcst, .drop_dims) - } - - if (recipe$Analysis$Workflow$Probabilities$save %in% - c('all', 'bins_only')) { - # save_probabilities(recipe = recipe, probs = probs_hcst, - # data_cube = data$hcst, agg = agg, - # type = "hcst") - # save_probabilities(recipe = recipe, probs = probs_obs, - # data_cube = data$hcst, agg = agg, - # type = "obs") - if (length(probs_fcst) > 0) { - save_probabilities(recipe = recipe, probs = probs_fcst, - data_cube = data$fcst, agg = agg, - type = "fcst") - } - } # Subset dimension 'sday' for subseasonal for mean bias: if (horizon == 'subseasonal') { central_day <- (dim(data$hcst$data)['sday'] + 1)/2 data$hcst <- CST_Subset(data$hcst, along = 'sday', indices = central_day) data$obs <- CST_Subset(data$obs, along = 'sday', indices = central_day) + ano_hcst <- CST_Subset(ano_hcst, along = 'sday', indices = central_day) + ano_obs <- CST_Subset(ano_obs, along = 'sday', indices = central_day) } - return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, - hcst.full_val = data$hcst, obs.full_val = data$obs, - cat_lims = list(obs = res$lims_ano_obs_tr), - probs = list(hcst = hcst_probs_ev, - obs = obs_probs_ev, - fcst = fcst_probs), - ref_obs = res$ano_obs_tr)) + + data <- list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + cat_lims = list(obs = lims), + probs = list(hcst = hcst_probs_ev, + obs = obs_probs_ev, + fcst = fcst_probs), + ref_obs = res$ano_obs_tr) + + # Save Crossval module outputs + source("modules/Saving/Saving_crossval.R") + Saving(recipe, data, module = "Anomalies", agg = agg) + + return(data) } diff --git a/modules/Crossval/Crossval_calibration.R b/modules/Crossval/Crossval_calibration.R index 48f250c955f7e3375d7e4ee6a4cd50952457c652..2aef46fcd927e9a8d5ea48b46ba73f4db2d0176c 100644 --- a/modules/Crossval/Crossval_calibration.R +++ b/modules/Crossval/Crossval_calibration.R @@ -44,14 +44,14 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { cross <- cross[[indices]] print(paste("Crossval Index:", cross$eval.dexes)) obs_tr <- Subset(obs, along = 'syear', - indices = cross$train.dexes) + indices = cross$train.dexes) hcst_tr <- Subset(exp, along = 'syear', - indices = cross$train.dexes) + indices = cross$train.dexes) ## evaluation indices hcst_ev <- Subset(exp, along = 'syear', - indices = cross$eval.dexes) + indices = cross$eval.dexes) obs_ev <- Subset(obs, along = 'syear', - indices = cross$eval.dexes) + indices = cross$eval.dexes) if (horizon == 'subseasonal') { central_day <- (dim(exp)['sday'] + 1)/2 @@ -67,20 +67,20 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { } if (cal_method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) { cal_hcst_tr <- QuantileMapping(exp = hcst_tr, obs = obs_tr, - method = cal_method, - memb_dim = 'ensemble', - sdate_dim = 'syear', - window_dim = NULL, - na.rm = na.rm, qstep = 0.1, - wet.day = FALSE, - ncores = ncores) + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + na.rm = na.rm, qstep = 0.1, + wet.day = FALSE, + ncores = ncores) cal_hcst_ev <- QuantileMapping(exp = hcst_tr, obs = obs_tr, - exp_cor = hcst_ev, - method = cal_method, + exp_cor = hcst_ev, + method = cal_method, memb_dim = 'ensemble', sdate_dim = 'syear', window_dim = NULL, - wet.day = FALSE, + wet.day = FALSE, na.rm = na.rm, qstep = 0.1, ncores = ncores) } else { @@ -92,9 +92,9 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { apply_to = NULL, alpha = NULL, ncores = ncores) cal_hcst_ev <- CSTools::Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, - cal.method = cal_method, + cal.method = cal_method, memb_dim = 'ensemble', sdate_dim = 'syear', - eval.method = 'in-sample', + eval.method = 'in-sample', na.fill = TRUE, na.rm = na.rm, apply_to = NULL, alpha = NULL, ncores = ncores) @@ -118,15 +118,15 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { prob_lims = unlist(categories), ncores = ncores)$output1 names(dim(lims_obs_tr))[1] <- 'bin' - if (horizon == 'subseasonal') { - cal_hcst_tr <- SplitDim(cal_hcst_tr, split_dim = 'syear', - new_dim_name = 'sday', - indices = rep(1:dim(data$hcst$data)['sday'], - length(cross$train.dexes))) - obs_tr <- SplitDim(obs_tr, split_dim = 'syear', new_dim_name = 'sday', - indices = rep(1:dim(data$hcst$data)['sday'], - length(cross$train.dexes))) - } + if (horizon == 'subseasonal') { + cal_hcst_tr <- SplitDim(cal_hcst_tr, split_dim = 'syear', + new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross$train.dexes))) + obs_tr <- SplitDim(obs_tr, split_dim = 'syear', new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross$train.dexes))) + } # check order before storing: # Add back 'sday' dim if needed or select central 'sday' if ('sday' %in% names(dim(cal_hcst_ev))) { @@ -134,14 +134,14 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { # To fail because it should not appear } } else { - cal_hcst_ev <- InsertDim(cal_hcst_ev, 'sday', len = 1, pos = 1) + cal_hcst_ev <- InsertDim(cal_hcst_ev, 'sday', len = 1, pos = 1) } # remove syear dim from ev objects if ('syear' %in% names(dim(cal_hcst_ev))) { if (dim(cal_hcst_ev)['syear'] == 1) { cal_hcst_ev <- Subset(cal_hcst_ev, along = 'syear', indices = 1, drop = 'selected') - } + } } # ---- if ('sday' %in% names(dim(obs_tr))) { @@ -178,7 +178,7 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { # lims cannot have ensemble dim: if ('ensemble' %in% names(dim(lims_obs_tr))) { if (dim(lims_obs_tr)['ensemble'] == 1) { - lims_obs_tr <- Subset(lims_obs_tr, along = 'ensemble', + lims_obs_tr <- Subset(lims_obs_tr, along = 'ensemble', indices = 1, drop = 'selected') } else { # error: @@ -204,7 +204,7 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { # error: stop("Why it has memb_dim") } - } + } return(list(cal_hcst_ev = cal_hcst_ev, obs_tr = obs_tr, lims_cal_hcst_tr = lims_cal_hcst_tr, @@ -246,69 +246,81 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { rm(lims_tmp_hcst, lims_tmp_obs) gc() - # Forecast calibration: + if (horizon %in% c('subseasonal')) { + # merge sample dimensions and select central week + hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + hcst <- Subset(hcst, along = 'sweek', + indices = (dim(data$hcst$data)['sweek'] + 1) / 2) + obs <- MergeDims(data$obs$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + obs <- Subset(obs, along = 'sweek', + indices = (dim(data$obs$data)['sweek'] + 1) / 2) + fcst <- Subset(data$fcst$data, along = 'sday', indices = 1, + drop = 'selected') + } else { + # we need to keep original objects (to later compute bias) + hcst <- data$hcst$data + obs <- data$obs$data + fcst <- data$fcst$data + } + + # Limits calculation with full hindcast period lims <- list() + lims <- Apply(obs, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = TRUE))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + + # Forecast calibration if (!is.null(data$fcst)) { - if (horizon %in% c('subseasonal')) { - # merge sample dimensions and select central week - hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - hcst <- Subset(hcst, along = 'sweek', - indices = (dim(data$hcst$data)['sweek'] + 1) / 2) - obs <- MergeDims(data$obs$data, merge_dims = c('sday', 'syear'), - rename_dim = 'syear', na.rm = FALSE) - obs <- Subset(obs, along = 'sweek', - indices = (dim(data$obs$data)['sweek'] + 1) / 2) - fcst <- Subset(data$fcst$data, along = 'sday', indices = 1, - drop = 'selected') - } else { - # we need to keep original objects (to later compute bias) - hcst <- data$hcst$data - obs <- data$obs$data - fcst <- data$fcst$data - } if (cal_method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) { data$fcst$data <- QuantileMapping(exp = hcst, - obs = obs, - exp_cor = fcst, + obs = obs, + exp_cor = fcst, method = cal_method, memb_dim = 'ensemble', sdate_dim = 'syear', window_dim = NULL, na.rm = na.rm, qstep = 0.1, - wet.day = FALSE, + wet.day = FALSE, ncores = ncores) hcst_cal <- QuantileMapping(exp = hcst, - obs = obs, - method = cal_method, + obs = obs, + method = cal_method, memb_dim = 'ensemble', sdate_dim = 'syear', window_dim = NULL, - wet.day = FALSE, + wet.day = FALSE, na.rm = na.rm, qstep = 0.1, ncores = ncores) } else { - data$fcst$data <- CSTools::Calibration(exp = hcst, - obs = obs, - exp_cor = fcst, - cal.method = cal_method, - multi.model = FALSE, - na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, - alpha = NULL, memb_dim = 'ensemble', - sdate_dim = 'syear', - dat_dim = NULL, ncores = ncores) - hcst_cal <- CSTools::Calibration(exp = hcst, - obs = obs, - cal.method = cal_method, - eval.method = 'in-sample', - multi.model = FALSE, - na.fill = TRUE, na.rm = na.rm, - apply_to = NULL, - alpha = NULL, memb_dim = 'ensemble', - sdate_dim = 'syear', - dat_dim = NULL, ncores = ncores) + data$fcst$data <- CSTools::Calibration(exp = hcst, + obs = obs, + exp_cor = fcst, + cal.method = cal_method, + multi.model = FALSE, + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + hcst_cal <- CSTools::Calibration(exp = hcst, + obs = obs, + cal.method = cal_method, + eval.method = 'in-sample', + multi.model = FALSE, + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) } + if (!('sday' %in% names(dim(data$fcst$data)))) { data$fcst$data <- InsertDim(data$fcst$data, len = 1, pos = 3, name = 'sday') @@ -317,7 +329,7 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { hcst_cal[hcst_cal < 0] <- 0 data$fcst$data[data$fcst$data < 0] <- 0 } - # Terciles limits using the whole hindcast period: + # Percentile limits for the forecast: lims_fcst <- Apply(hcst_cal, target_dims = c('syear', 'ensemble'), fun = function(x, prob_lims) { lapply(prob_lims, function(ps) { @@ -326,32 +338,14 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { output_dims = lapply(categories, function(x) {'bin'}), prob_lims = categories, ncores = ncores) - lims <- Apply(obs, target_dims = c('syear', 'ensemble'), - fun = function(x, prob_lims) { - lapply(prob_lims, function(ps) { - as.array(quantile(as.vector(x), ps, - na.rm = TRUE))})}, - output_dims = lapply(categories, function(x) {'bin'}), - prob_lims = categories, - ncores = ncores) } - tmp_lims2 <- list() - # SAVING 'lims' which are the observed category limits: - # TODO saving: - recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/", - "outputs/Calibration/") - - # Make categories rounded number to use as names: - categories <- recipe$Analysis$Workflow$Probabilities$percentiles - categories <- lapply(categories, function (x) { - sapply(x, function(y) { - round(eval(parse(text = y)),2)})}) - + ## TODO: Function? # Compute Probabilities hcst_probs_ev <- list() obs_probs_ev <- list() fcst_probs <- list() + for (ps in 1:length(categories)) { hcst_probs_ev[[ps]] <- GetProbs(res$cal_hcst_ev, time_dim = 'syear', prob_thresholds = NULL, @@ -379,118 +373,40 @@ Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { ncores = ncores) } } - if (recipe$Analysis$Workflow$Probabilities$save == 'all') { - for (ps in 1:length(categories)) { - ## TODO: use .drop_dims() - tmp_lims3 <- .drop_dims(lims[[ps]]) - for (l in 1:dim(lims[[ps]])['bin']) { - tmp_lims <- ClimProjDiags::Subset(x = tmp_lims3, - along = "bin", - indices = l, - drop = "selected") - tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) - names(tmp_lims2)[length(tmp_lims2)] <- - paste0("p_", as.character(categories[[ps]][l]*100)) - } - } - info(recipe$Run$logger, - "SAVING OBSERVED CATEGORY LIMITS") - save_percentiles(recipe = recipe, percentiles = tmp_lims2, - data_cube = data$obs, - agg = "global", outdir = NULL) - #} - } # Convert to s2dv_cubes the resulting calibrated hcst <- data$hcst hcst$data <- res$cal_hcst_ev + # Subseasonal case: keep only the central sday position from the dimension if (horizon == 'subseasonal') { - # Keep only de central sday position from the dimension if (dim(hcst$data)['sday'] > 1) { hcst <- CST_Subset(hcst, along = 'sday', indices = (dim(hcst$data)['sday'] + 1) / 2) } if (dim(data$obs$data)['sday'] > 1) { data$obs <- CST_Subset(data$obs, along = 'sday', - indices = (dim(data$obs$data)['sday'] + 1) / 2) + indices = (dim(data$obs$data)['sday'] + 1) / 2) } if (dim(data$hcst$data)['sday'] > 1) { data$hcst <- CST_Subset(data$hcst, along = 'sday', - indices = (dim(data$hcst$data)['sday'] + 1) / 2) + indices = (dim(data$hcst$data)['sday'] + 1) / 2) } } - info(recipe$Run$logger, - "#### Calibrated and Probabilities Done #####") - if (recipe$Analysis$Workflow$Calibration$save != FALSE) { - info(recipe$Run$logger, "##### START SAVING CALIBRATED #####") - # Save forecast - if ((recipe$Analysis$Workflow$Calibration$save %in% - c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { - save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') - } - } - # Save probability bins - probs_hcst <- list() - probs_fcst <- list() - probs_obs <- list() - all_names <- NULL - for (ps in 1:length(categories)) { - for (perc in 1:(length(categories[[ps]]) + 1)) { - if (perc == 1) { - name_elem <- paste0("below_", categories[[ps]][perc]*100) - } else if (perc == length(categories[[ps]]) + 1) { - name_elem <- paste0("above_", categories[[ps]][perc-1]*100) - } else { - name_elem <- paste0("from_", categories[[ps]][perc-1]*100, - "_to_", categories[[ps]][perc]*100) - } - probs_hcst <- append(probs_hcst, - list(Subset(hcst_probs_ev[[ps]], - along = 'bin', - indices = perc, - drop = 'selected'))) - probs_obs <- append(probs_obs, - list(Subset(obs_probs_ev[[ps]], - along = 'bin', - indices = perc, - drop = 'selected'))) - if (!is.null(data$fcst)) { - probs_fcst <- append(probs_fcst, - list(Subset(fcst_probs[[ps]], - along = 'bin', - indices = perc, - drop = 'selected'))) - } - all_names <- c(all_names, name_elem) - } - } - ## TODO: Apply .drop_dims() directly to hcst_probs_ev etc; and move - ## reorganizing and renaming to save_probabilities() - names(probs_hcst) <- all_names - probs_hcst <- lapply(probs_hcst, .drop_dims) - names(probs_obs) <- all_names - probs_obs <- lapply(probs_obs, .drop_dims) - if (!is.null(data$fcst)) { - names(probs_fcst) <- all_names - probs_fcst <- lapply(probs_fcst, .drop_dims) - } - if (recipe$Analysis$Workflow$Probabilities$save %in% - c('all', 'bins_only')) { - save_probabilities(recipe = recipe, probs = probs_fcst, - data_cube = data$fcst, agg = agg, - type = "fcst") - } + data <- list(hcst = hcst, obs = data$obs, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + cat_lims = list(obs = lims), + probs = list(hcst = hcst_probs_ev, + obs = obs_probs_ev, + fcst = fcst_probs), + ref_obs = res$obs_tr) - return(list(hcst = hcst, obs = data$obs, fcst = data$fcst, - hcst.full_val = data$hcst, obs.full_val = data$obs, - cat_lims = list(obs = lims), - probs = list(hcst = hcst_probs_ev, - obs = obs_probs_ev, - fcst = fcst_probs), - ref_obs = res$obs_tr)) + # Save Crossval module outputs + source("modules/Saving/Saving_crossval.R") + Saving(recipe, data, module = "Calibration", agg = agg) + return(data) } # Outputs: diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R index 0026dc0c15c7def4960c7285cfca0147d8d666fc..762bc7cdd922bc57cf79fa7e06657b843f39c083 100644 --- a/modules/Crossval/Crossval_metrics.R +++ b/modules/Crossval/Crossval_metrics.R @@ -279,11 +279,11 @@ Crossval_metrics <- function(recipe, for (ps in 1:length(categories)) { if (horizon == 'subseasonal') { data$probs$hcst[[ps]] <- MergeDims(data$probs$hcst[[ps]], - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) data$probs$obs[[ps]] <- MergeDims(data$probs$obs[[ps]], - merge_dims = c('sweek', 'syear'), - rename_dim = 'syear', na.rm = FALSE) + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) } if ('rps' %in% requested_metrics) { rps <- RPS(exp = data$probs$hcst[[ps]], diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R index d7fe542d8a337d43cfa23c1edb1fe77b123e52eb..807a6bcb2b663922a0872ea41ddefe678182c07e 100644 --- a/modules/Saving/R/save_forecast.R +++ b/modules/Saving/R/save_forecast.R @@ -141,14 +141,14 @@ save_forecast <- function(recipe, ArrayToNc(append(country, times, fcst), outfile) } else if (tolower(agg) == "region") { region <- .get_region(data_cube) - vars <- c(region, times, fcst) + vars <- c(times, region, fcst) ArrayToNc(vars, outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, fcst) + vars <- c(times, latlon, fcst) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index d5fe91bd3e64005e88dd2a41d13352caf7429cb1..42aa5a672bc71e18b9d31c297b9198cfe96ec97e 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -162,14 +162,14 @@ save_metrics <- function(recipe, ArrayToNc(append(country, times$time, subset_metric), outfile) } else if (tolower(agg) == "region") { region <- .get_region(data_cube) - vars <- c(region, times, subset_metric) + vars <- c(times, region, subset_metric) ArrayToNc(vars, outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, subset_metric) + vars <- c(times, latlon, subset_metric) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R index b6ed41418cd14c50426f40ee673bbb25c0724b78..b357d5c3cd56c2ae00d41080b1ba94535159cf32 100644 --- a/modules/Saving/R/save_observations.R +++ b/modules/Saving/R/save_observations.R @@ -29,15 +29,22 @@ save_observations <- function(recipe, # cal = calendar) init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, '-01-01'), cal = calendar) - } else { + } else if (fcst.horizon == 'seasonal') { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), format = '%Y%m%d', cal = calendar) + } else if (fcst.horizon == 'subseasonal') { + init_date <- as.PCICt(as.character(recipe$Analysis$Time$sdate), + format = '%Y%m%d', cal = calendar) } syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + if (fcst.horizon == "subseasonal") { + syears_val <- recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end + } else { + syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) + } # Loop over variables for (var in 1:data_cube$dims[['var']]) { @@ -99,12 +106,11 @@ save_observations <- function(recipe, # the same name pattern. ## Because observations are loaded differently in the daily vs. monthly ## cases, different approaches are necessary. - if (fcst.horizon == 'decadal') { + if (fcst.horizon %in% c('subseasonal', 'decadal')) { # init_date is like "1990-11-01" init_date <- as.POSIXct(init_date) fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) } else { - if (store.freq == "monthly_mean") { fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] fcst.sdate <- as.Date(paste0(fcst.sdate, "01"), '%Y%m%d') @@ -135,14 +141,14 @@ save_observations <- function(recipe, ArrayToNc(append(country, times$time, fcst), outfile) } else if (tolower(agg) == "region") { region <- .get_region(data_cube) - vars <- c(region, times, fcst) + vars <- c(times, region, fcst) ArrayToNc(vars, outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, fcst) + vars <- c(times, latlon, fcst) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index d5b4ae6188e3d4239b7b92436d7c8f8123136c96..905be2fb385e0904ec1c607260c23ed883b8dc99 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -120,14 +120,14 @@ save_percentiles <- function(recipe, ArrayToNc(append(country, time, subset_percentiles), outfile) } else if (tolower(agg) == "region") { region <- .get_region(data_cube) - vars <- c(region, times, subset_percentiles) + vars <- c(times, region, subset_percentiles) ArrayToNc(vars, outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, subset_percentiles) + vars <- c(times, latlon, subset_percentiles) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index e3df5a1e154ae07b3dbf00480bc031cace126ada..1f6a597a9c624316582d3d950bebe27898b37023 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -124,14 +124,14 @@ save_probabilities <- function(recipe, ## TODO: Move this segment outside of the loop if (tolower(agg) == "region") { region <- .get_region(data_cube) - vars <- c(region, times, probs_syear) + vars <- c(times, region, probs_syear) ArrayToNc(vars, outfile) } else { latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, probs_syear) + vars <- c(times, latlon, probs_syear) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/Saving_crossval.R b/modules/Saving/Saving_crossval.R new file mode 100644 index 0000000000000000000000000000000000000000..edd48351f418eb765920925a10865e397bb45975 --- /dev/null +++ b/modules/Saving/Saving_crossval.R @@ -0,0 +1,150 @@ +source("modules/Saving/R/get_dir.R") +source("modules/Saving/R/get_filename.R") +source("modules/Saving/R/Utils.R") +source("modules/Saving/R/save_forecast.R") +source("modules/Saving/R/save_observations.R") +source("modules/Saving/R/save_metrics.R") +source("modules/Saving/R/save_probabilities.R") +source("modules/Saving/R/save_percentiles.R") +source("modules/Saving/R/get_times.R") +source("modules/Saving/R/get_latlon.R") +source("modules/Saving/R/get_global_attributes.R") +source("modules/Saving/R/drop_dims.R") +## TODO: Remove when packages are released +source("modules/Saving/R/tmp/ArrayToNc.R") +source("modules/Saving/R/tmp/CST_SaveExp.R") + + + +Saving <- function(recipe, data, module, metrics = NULL, agg = "global") { + + original_output_dir <- recipe$Run$output_dir + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", module) + + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)), 2)})}) + + # 1. Save percentiles + limits <- list() + ## TODO: Review if + if (recipe$Analysis$Workflow$Probabilities$save == 'all') { + for (ps in 1:length(categories)) { + limits_ps <- .drop_dims(data$cat_lims$obs[[ps]]) + for (l in 1:dim(data$cat_lims$obs[[ps]])['bin']) { + limits_subset <- ClimProjDiags::Subset(x = limits_ps, + along = "bin", + indices = l, + drop = "selected") + limits <- append(limits, list(limits_subset)) + names(limits)[length(limits)] <- + paste0("p_", as.character(categories[[ps]][l]*100)) + } + } + info(recipe$Run$logger, + "SAVING OBSERVED CATEGORY LIMITS") + save_percentiles(recipe = recipe, percentiles = limits, + data_cube = data$obs, + agg = agg, outdir = NULL) + } + + # 2. Save probabilities + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + + # Convert arrays to named list of lists + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]*100) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]*100) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1]*100, + "_to_", categories[[ps]][perc]*100) + } + probs_hcst <- append(probs_hcst, + list(Subset(data$probs$hcst[[ps]], + along = 'bin', + indices = perc, + drop = 'selected'))) + probs_obs <- append(probs_obs, + list(Subset(data$probs$obs[[ps]], + along = 'bin', + indices = perc, + drop = 'selected'))) + if (!is.null(data$fcst)) { + probs_fcst <- append(probs_fcst, + list(Subset(data$probs$fcst[[ps]], + along = 'bin', + indices = perc, + drop = 'selected'))) + } + all_names <- c(all_names, name_elem) + } + } + names(probs_hcst) <- all_names + probs_hcst <- lapply(probs_hcst, .drop_dims) + names(probs_obs) <- all_names + probs_obs <- lapply(probs_obs, .drop_dims) + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + probs_fcst <- lapply(probs_fcst, .drop_dims) + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + ## TODO: Add hcst and obs + ## Saving only forecast probabilities for now + # save_probabilities(recipe = recipe, probs = probs_hcst, + # data_cube = data$hcst, agg = agg, + # type = "hcst") + # save_probabilities(recipe = recipe, probs = probs_obs, + # data_cube = data$obs, agg = agg, + # type = "obs") + if (!is.null(data$fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + } + + # 3. Save hindcast, fcst and obs + if (recipe$Analysis$Workflow[[module]]$save != "none") { + # Save forecast + if ((recipe$Analysis$Workflow[[module]]$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow[[module]]$save %in% + c('all', 'exp_only')) { + if (dim(data$hcst$data)['sweek'] > 1) { + data$hcst <- CST_Subset(data$hcst, along = 'sweek', + indices = (dim(data$hcst$data)['week'] + 1) / 2) + } + save_forecast(recipe = recipe, data_cube = data$hcst, type = 'hcst') + } + # Save observations + if (recipe$Analysis$Workflow[[module]]$save == 'all') { + if (dim(data$obs$data)['sweek'] > 1) { + data$obs <- CST_Subset(data$obs, along = 'sweek', + indices = (dim(data$obs$data)['sweek'] + 1) / 2) + } + save_observations(recipe = recipe, data_cube = data$obs) + } + } + + # 4. Save metrics + if (!is.null(metrics)) { + recipe$Run$output_dir <- file.path(original_output_dir, "outputs", "Skill") + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data$obs, agg = agg, + outdir = recipe$Run$output_dir) + } + recipe$Run$output_dir <- original_output_dir +}