diff --git a/modules/Crossval/Crossval_calibration.R b/modules/Crossval/Crossval_calibration.R new file mode 100644 index 0000000000000000000000000000000000000000..394a3de5a1a78b71c002d646055b623e07773b4a --- /dev/null +++ b/modules/Crossval/Crossval_calibration.R @@ -0,0 +1,504 @@ +# take the output of Flor/s2s/subseasonal_loading.R + +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Crossval/R/tmp/CST_MergeDims.R") + +Crossval_calibration <- function(recipe, data, correct_negative = FALSE) { + + cal_method <- recipe$Analysis$Workflow$Calibration$method + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + orig_dims <- names(dim(data$hcst$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst$data))) { + agg = 'region' + nregions <- dim(data$hcst$data)['region'] + } + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + .cross_calibration <- function(indices, exp, obs, cross, categories, recipe) { + ## Objects to return: + # lims_cal_hcst_tr: hcst percentiles (per category) + # lims_cal_obs_tr: obs percentiles (per category) + # cal_hcst_ev: hcst cross-validated calibrated + # obs_tr: obs training + ## training indices + cross <- cross[[indices]] + print(paste("Crossval Index:", cross$eval.dexes)) + obs_tr <- Subset(obs, along = 'syear', + indices = cross$train.dexes) + hcst_tr <- Subset(exp, along = 'syear', + indices = cross$train.dexes) + ## evaluation indices + hcst_ev <- Subset(exp, along = 'syear', + indices = cross$eval.dexes) + obs_ev <- Subset(obs, along = 'syear', + indices = cross$eval.dexes) + + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + central_day <- (dim(exp)['sday'] + 1)/2 + hcst_tr <- MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = na.rm) + + obs_tr <- MergeDims(obs_tr, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = na.rm) + # 'sday' dim to select the central day + hcst_ev <- Subset(hcst_ev, along = 'sday', indices = central_day, + drop = 'selected') + + } + if (cal_method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) { + cal_hcst_tr <- QuantileMapping(exp = hcst_tr, obs = obs_tr, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + na.rm = na.rm, qstep = 0.1, + wet.day = FALSE, + ncores = ncores) + cal_hcst_ev <- QuantileMapping(exp = hcst_tr, obs = obs_tr, + exp_cor = hcst_ev, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + wet.day = FALSE, + na.rm = na.rm, qstep = 0.1, + ncores = ncores) + } else { + cal_hcst_tr <- Calibration(exp = hcst_tr, obs = obs_tr, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, ncores = ncores) + cal_hcst_ev <- Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, ncores = ncores) + } + # If precipitation is negative: + if (correct_negative) { + cal_hcst_tr[cal_hcst_tr < 0] <- 0 + cal_hcst_ev[cal_hcst_ev < 0] <- 0 + } + lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + as.array(quantile(as.vector(x), prob_lims, + na.rm = TRUE))}, + prob_lims = unlist(categories), + ncores = ncores)$output1 + names(dim(lims_cal_hcst_tr))[1] <- 'bin' + lims_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), + fun = function(x, prob_lims) { + as.array(quantile(as.vector(x), prob_lims, + na.rm = TRUE))}, + prob_lims = unlist(categories), + ncores = ncores)$output1 + names(dim(lims_obs_tr))[1] <- 'bin' + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + cal_hcst_tr <- SplitDim(cal_hcst_tr, split_dim = 'syear', + new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross$train.dexes))) + obs_tr <- SplitDim(obs_tr, split_dim = 'syear', new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross$train.dexes))) + } + # check order before storing: + # Add back 'sday' dim if needed or select central 'sday' + if ('sday' %in% names(dim(cal_hcst_ev))) { + if (dim(cal_hcst_ev)['sday'] != 1) { + # To fail because it should not appear + } + } else { + cal_hcst_ev <- InsertDim(cal_hcst_ev, 'sday', len = 1, pos = 1) + } + # remove syear dim from ev objects + if ('syear' %in% names(dim(cal_hcst_ev))) { + if (dim(cal_hcst_ev)['syear'] == 1) { + cal_hcst_ev <- Subset(cal_hcst_ev, along = 'syear', indices = 1, + drop = 'selected') + } + } + # ---- + if ('sday' %in% names(dim(obs_tr))) { + if (dim(obs_tr)['sday'] > 1) { + # 'sday' dim to select the central day + obs_tr <- Subset(obs_tr, along = 'sday', + indices = central_day) + } + } else { + obs_tr <- InsertDim(obs_tr, 'sday', len = 1, pos = 1) + } + # rename dimension to be ensemble + if ('ensemble' %in% names(dim(obs_tr))) { + if (dim(obs_tr)['ensemble'] == 1) { + obs_tr <- Subset(obs_tr, along = 'ensemble', indices = 1, + drop = 'selected') + } else { + stop("why it is longer than 1?") + } + } + if ('syear' %in% names(dim(obs_tr))) { # should be true + names(dim(obs_tr))[which(names(dim(obs_tr)) == 'syear')] <- 'ensemble' + } + # ---- + if ('sday' %in% names(dim(lims_obs_tr))) { + if (dim(lims_obs_tr)['sday'] > 1) { + # 'sday' dim to select the central day + lims_obs_tr <- Subset(lims_obs_tr, along = 'sday', + indices = central_day) + } + } else { + lims_obs_tr <- InsertDim(lims_obs_tr, 'sday', len = 1, pos = 1) + } + # lims cannot have ensemble dim: + if ('ensemble' %in% names(dim(lims_obs_tr))) { + if (dim(lims_obs_tr)['ensemble'] == 1) { + lims_obs_tr <- Subset(lims_obs_tr, along = 'ensemble', + indices = 1, drop = 'selected') + } else { + # error: + stop("Why it has memb_dim") + } + } + # ---- + if ('sday' %in% names(dim(lims_cal_hcst_tr))) { + if (dim(lims_cal_hcst_tr)['sday'] > 1) { + # 'sday' dim to select the central day + lims_cal_hcst_tr <- Subset(lims_cal_hcst_tr, along = 'sday', + indices = central_day) + } + } else { + lims_cal_hcst_tr <- InsertDim(lims_cal_hcst_tr, 'sday', len = 1, pos = 1) + } + # lims cannot have ensemble dim: + if ('ensemble' %in% names(dim(lims_cal_hcst_tr))) { + if (dim(lims_cal_hcst_tr)['ensemble'] == 1) { + lims_cal_hcst_tr <- Subset(lims_cal_hcst_tr, along = 'ensemble', + indices = 1, drop = 'selected') + } else { + # error: + stop("Why it has memb_dim") + } + } + return(list(cal_hcst_ev = cal_hcst_ev, + obs_tr = obs_tr, + lims_cal_hcst_tr = lims_cal_hcst_tr, + lims_obs_tr = lims_obs_tr)) + } + + indices <- array(1:length(cross), dim = c(syear = length(cross))) + res <- Apply(data = list(indices = indices, + exp = data$hcst$data, + obs = data$obs$data), + target_dims = list(indices = NULL, + obs = names(dim(data$obs$data)), + exp = names(dim(data$hcst$data))), + fun = .cross_calibration, + ncores = 1, ## TODO: Explore options for core distribution + cross = cross, + categories = categories, + recipe = recipe) + info(recipe$Run$logger, + "#### Calibration Cross-validation loop ended #####") + # Rearrange limits into a list: + lims_tmp_hcst <- list() + lims_tmp_obs <- list() + last_index <- 0 + for (ps in 1:length(categories)) { + indices <- last_index + 1:length(categories[[ps]]) + lims_category_hcst <- Subset(res$lims_cal_hcst_tr, along = "bin", + indices = list(indices), + drop = FALSE) + lims_tmp_hcst[[ps]] <- lims_category_hcst + lims_category_obs <- Subset(res$lims_obs_tr, along = "bin", + indices = list(indices), + drop = FALSE) + lims_tmp_obs[[ps]] <- lims_category_obs + last_index <- indices[length(indices)] + } + res$lims_cal_hcst_tr <- lims_tmp_hcst + res$lims_obs_tr <- lims_tmp_obs + rm(lims_tmp_hcst, lims_tmp_obs) + gc() + + # Forecast calibration: + lims <- list() + if (!is.null(data$fcst)) { + if (tolower(recipe$Analysis$Horizon) %in% c('subseasonal')) { + # merge sample dimensions and select central week + hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + hcst <- Subset(hcst, along = 'sweek', + indices = (dim(data$hcst$data)['sweek'] + 1) / 2) + obs <- MergeDims(data$obs$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + obs <- Subset(obs, along = 'sweek', + indices = (dim(data$obs$data)['sweek'] + 1) / 2) + fcst <- Subset(data$fcst$data, along = 'sday', indices = 1, + drop = 'selected') + } else { + # we need to keep original objects (to later compute bias) + hcst <- data$hcst$data + obs <- data$obs$data + fcst <- data$fcst$data + } + if (cal_method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) { + data$fcst$data <- QuantileMapping(exp = hcst, + obs = obs, + exp_cor = fcst, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + na.rm = na.rm, qstep = 0.1, + wet.day = FALSE, + ncores = ncores) + hcst_cal <- QuantileMapping(exp = hcst, + obs = obs, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + wet.day = FALSE, + na.rm = na.rm, qstep = 0.1, + ncores = ncores) + } else { + data$fcst$data <- Calibration(exp = hcst, + obs = obs, + exp_cor = fcst, + cal.method = cal_method, + multi.model = FALSE, + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + hcst_cal <- Calibration(exp = hcst, + obs = obs, + cal.method = cal_method, + eval.method = 'in-sample', + multi.model = FALSE, + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + } + if (!('sday' %in% names(dim(data$fcst$data)))) { + data$fcst$data <- InsertDim(data$fcst$data, + len = 1, pos = 3, name = 'sday') + } + if (correct_negative) { + hcst_cal[hcst_cal < 0] <- 0 + data$fcst$data[data$fcst$data < 0] <- 0 + } + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(hcst_cal, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = TRUE))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + lims <- Apply(obs, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = TRUE))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + } + tmp_lims2 <- list() + + # SAVING 'lims' which are the observed category limits: + # TODO saving: + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "/", + "outputs/Calibration/") + + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + # Compute Probabilities + hcst_probs_ev <- list() + obs_probs_ev <- list() + fcst_probs <- list() + for (ps in 1:length(categories)) { + hcst_probs_ev[[ps]] <- GetProbs(res$cal_hcst_ev, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = res$lims_cal_hcst_tr[[ps]], + ncores = ncores) + central_day <- (dim(data$obs$data)['sday'] + 1)/2 + tmp <- Subset(data$obs$data, along = 'sday', indices = central_day) + obs_probs_ev[[ps]] <- GetProbs(tmp, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = res$lims_obs_tr[[ps]], + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } + } + if (recipe$Analysis$Workflow$Probabilities$save == 'all') { + for (ps in 1:length(categories)) { + ## TODO: use .drop_dims() + tmp_lims3 <- .drop_dims(lims[[ps]]) + for (l in 1:dim(lims[[ps]])['bin']) { + tmp_lims <- ClimProjDiags::Subset(x = tmp_lims3, + along = "bin", + indices = l, + drop = "selected") + tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) + names(tmp_lims2)[length(tmp_lims2)] <- + paste0("p_", as.character(categories[[ps]][l]*100)) + } + } + info(recipe$Run$logger, + "SAVING OBSERVED CATEGORY LIMITS") + save_percentiles(recipe = recipe, percentiles = tmp_lims2, + data_cube = data$obs, + agg = "global", outdir = NULL) + #} + } + + # Convert to s2dv_cubes the resulting calibrated + hcst <- data$hcst + hcst$data <- res$cal_hcst_ev + + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + # Keep only de central sday position from the dimension + if (dim(hcst$data)['sday'] > 1) { + hcst <- CST_Subset(hcst, along = 'sday', + indices = (dim(hcst$data)['sday'] + 1) / 2) + } + if (dim(data$obs$data)['sday'] > 1) { + data$obs <- CST_Subset(data$obs, along = 'sday', + indices = (dim(data$obs$data)['sday'] + 1) / 2) + } + if (dim(data$hcst$data)['sday'] > 1) { + data$hcst <- CST_Subset(data$hcst, along = 'sday', + indices = (dim(data$hcst$data)['sday'] + 1) / 2) + } + } + info(recipe$Run$logger, + "#### Calibrated and Probabilities Done #####") + if (recipe$Analysis$Workflow$Calibration$save != FALSE) { + info(recipe$Run$logger, "##### START SAVING CALIBRATED #####") + # Save forecast + if ((recipe$Analysis$Workflow$Calibration$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + } + # Save probability bins + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]*100) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]*100) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1]*100, + "_to_", categories[[ps]][perc]*100) + } + probs_hcst <- append(probs_hcst, + list(Subset(hcst_probs_ev[[ps]], + along = 'bin', + indices = perc, + drop = 'selected'))) + probs_obs <- append(probs_obs, + list(Subset(obs_probs_ev[[ps]], + along = 'bin', + indices = perc, + drop = 'selected'))) + if (!is.null(data$fcst)) { + probs_fcst <- append(probs_fcst, + list(Subset(fcst_probs[[ps]], + along = 'bin', + indices = perc, + drop = 'selected'))) + } + all_names <- c(all_names, name_elem) + } + } + ## TODO: Apply .drop_dims() directly to hcst_probs_ev etc; and move + ## reorganizing and renaming to save_probabilities() + names(probs_hcst) <- all_names + probs_hcst <- lapply(probs_hcst, .drop_dims) + names(probs_obs) <- all_names + probs_obs <- lapply(probs_obs, .drop_dims) + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + probs_fcst <- lapply(probs_fcst, .drop_dims) + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + + return(list(hcst = hcst, obs = data$obs, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + cat_lims = list(obs = lims), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev, + fcst = fcst_probs), + ref_obs_tr = res$obs_tr)) + +} + +# Outputs: +# hcst is the calibrated hcst for eval indices +# obs are the original obs +# fcst is the calibrated forecast +# .full_val are the original data to calculate bias (although it may be 0 in calibrated data) +# hcst_ev and obs_ev probabilities are needed for the RPS +# fcst_probs for an operaitonal forecast +# ref_obs_tr is needed for the CRPSS +# observerd category limits for the operational forecast + diff --git a/modules/Visualization/R/get_plot_time_labels.R b/modules/Visualization/R/get_plot_time_labels.R index 2dfc420244cd58e4d6493698be4d1fa2d757120a..4cdc23f541cee704031fd16aa7dd0dc5f0f40e87 100644 --- a/modules/Visualization/R/get_plot_time_labels.R +++ b/modules/Visualization/R/get_plot_time_labels.R @@ -11,7 +11,8 @@ .get_plot_time_labels <- function(dates, horizon, init_date, start_date, i_syear, time_bounds = NULL) { - years <- lubridate::year(dates[1, 1, which(start_date == i_syear), ]) + years <- lubridate::year(Subset(dates, along = 'syear', + indices = which(start_date == i_syear))) if (is.null(attributes(time_bounds))) { if (horizon == "subseasonal") { time_labels <- dates[1, 1, which(start_date == i_syear), ] diff --git a/modules/Visualization/var_long_names.yml b/modules/Visualization/var_long_names.yml index a6293406d9d790cedd11619ce53e0736f42ea363..9f670abc77730e88f9479f4623895b2781b36e0d 100644 --- a/modules/Visualization/var_long_names.yml +++ b/modules/Visualization/var_long_names.yml @@ -5,4 +5,4 @@ Mycase: prlr: "Total precipitation" sfcWind: "Wind speed" rsds: "Solar radiation" - + z500: "Geopotential height at 500 hPa"