From 9108a1771b7363846cc13f94bf31c1b2ad0fdd05 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 2 Jun 2022 09:23:48 +0200 Subject: [PATCH 01/11] add comments and TODOs --- modules/Saving/Save_data.R | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/modules/Saving/Save_data.R b/modules/Saving/Save_data.R index ad6906f1..a1e38ef4 100644 --- a/modules/Saving/Save_data.R +++ b/modules/Saving/Save_data.R @@ -1,4 +1,3 @@ -library(easyNCDF) get_times <- function(fcst.type, leadtimes, sdate) { @@ -11,7 +10,7 @@ get_times <- function(fcst.type, leadtimes, sdate) { ## TODO: Remove "sub_obs"? "seasonal" = {len <- length(leadtimes); ref <- 'months since '; stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "sub_obs" = {len <- 52; ref <- 'week of the year '; + "sub_obs" = {len <- 52; ref <- 'week of the year '; stdname <- paste(strtoi(leadtimes), collapse=", ")}, "subseasonal" = {len <- 4; ref <- 'weeks since '; stdname <- ''} @@ -42,21 +41,20 @@ get_times <- function(fcst.type, leadtimes, sdate) { } -get_latlon <- function(lat, lon) { - - longitude <- lon - dim(longitude) <- length(longitude) - metadata <- list(longitude = list(units = 'degrees_east')) - attr(longitude, 'variables') <- metadata - names(dim(longitude)) <- 'longitude' - - latitude <- lat - dim(latitude) <- length(latitude) - metadata <- list(latitude = list(units = 'degrees_north')) - attr(latitude, 'variables') <- metadata - names(dim(latitude)) <- 'latitude' +get_latlon <- function(latitude, longitude) { + ## TODO: Verify if this is needed or if it needs a change now that we are + ## using s2dv_cube objects + dim(longitude) <- length(longitude) + metadata <- list(longitude = list(units = 'degrees_east')) + attr(longitude, 'variables') <- metadata + names(dim(longitude)) <- 'longitude' + + dim(latitude) <- length(latitude) + metadata <- list(latitude = list(units = 'degrees_north')) + attr(latitude, 'variables') <- metadata + names(dim(latitude)) <- 'latitude' - return(list(lat=latitude, lon=longitude)) + return(list(lat=latitude, lon=longitude)) } @@ -111,5 +109,6 @@ save_metrics <- function(variable, vars <- c(vars, skill, list(time_step)) ArrayToNc(vars, outfile) } + } -- GitLab From a1c3a859364928ddc780d33bc599a532d6182c17 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 9 Jun 2022 09:32:37 +0200 Subject: [PATCH 02/11] Remove old S2S4E Calibration scripts --- modules/Calibration/Calibration.R | 238 ------------------------ modules/Calibration/Calibration_fcst3.R | 186 ------------------ modules/Calibration/Calibration_fcst4.R | 157 ---------------- 3 files changed, 581 deletions(-) delete mode 100644 modules/Calibration/Calibration.R delete mode 100644 modules/Calibration/Calibration_fcst3.R delete mode 100644 modules/Calibration/Calibration_fcst4.R diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R deleted file mode 100644 index 2463fcbd..00000000 --- a/modules/Calibration/Calibration.R +++ /dev/null @@ -1,238 +0,0 @@ -## Code to apply any correction method: -## simple bias adjustment -## variance inflation -## quantile mapping -# -### Which parameter are required? -#if (!("obs" %in% ls()) || is.null(obs)) { -# error(logger, -# "There is no object 'obs' in the global environment or it is NULL") -# stop("EXECUTION FAILED") -#} -#if (stream == "fcst" && (!("fcst" %in% ls()) || is.null(fcst))) { -# error(logger, -# "There is no object 'fcst' in the global environment or it is NULL") -# stop("EXECUTION FAILED") -#} -#if (!("hcst" %in% ls()) || is.null(hcst)) { -# error(logger, -# "There is no object 'hcst' in the global environment or it is NULL") -# stop("EXECUTION FAILED") -#} -#if (!("method" %in% ls()) || is.null(method)) { -# warn(logger, -# "Calibration method not found and it is set as 'SBC'.") -# method <- 'SBC' -#} -#if (method %in% c('SBC', 'SimpleBiasCorrection')) { -# cal_fun <- "CSTools::Calibration" -# cal_method <- "bias" -#} else if (method %in% c("Inflation", "VarianceInflation")) { -# cal_fun <- "CSTools::Calibration" -# cal_method <- "evmos" -#} else if (method %in% c("QM", "QuantileMapping")) { -# cal_fun <- "CSTools::QuantileMapping" -#} else { -# error(logger, "Unknown calibration method definde in the recipe.") -# stop("EXECUTION FAILED") -#} -#info(logger, paste("#-------------------------- ", "\n", -# "running Calibration module", "\n", -# "it can call", cal_fun )) - -#' Function that bias-adjust the S2S hindcast -# -#' this function bias adjust the hindcast data using -#' the leave-one-out method and the specified fun -# -#'@param obs A numeric array with named dimensions, -#' representing the observational data used -#' in the bias-adjustment method. -#' 'sday','syear','ensemble' are mandatory dims - -#'@param hcst A numeric array with named dimensions, -#' representing the system hindcast data used -#' in the bias-adjustment method. -#' 'sday','syear','ensemble' are mandatory dims - -#'@param fun bias adjustment function - -#'@param mm TRUE if the experiment data is conformed by -#' multiple systems (dat > 1) - -#'@param ncores An integer indicating the number of cores to -#' use for parallel computation. The default value is NULL. - -#'@param target_dims Dims needed to do the calibration -#'@param output_dims Output dims from the calib fun - -#'@return A numeric array with the bias adjusted hindcast, -#' - -CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") - -hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, - split_factor=1, ncores=32) -{ - - # Replicates observations for bias adjusting each - # system of the multi-model separately - ## TODO: Adapt code for multi-model case - if(mm){ - obs.mm <- obs - for(dat in 1:(dim(hcst)['dat'][[1]]-1)){ - obs.mm <- abind(obs2, obs, - along = which(names(dim(obs)) == 'dat')) - } - names(dim(obs.mm)) <- names(dim(obs)) - obs <- obs.mm - remove(obs.mm) - } - - if (method %in% CST_CALIB_METHODS) { - # Hcst calibration - hcst <- CSTools::CST_Calibration(hcst, obs, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - alpha = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) - - hcst$data[!is.finite(hcst$data)] <- NA - - } else if (method %in% c("s2s4e")) { - hcst <- Apply(data=list(obs = obs$data, hcst = hcst$data), - extra_info = list(na.rm = na.rm), - target_dims = c('sday', 'syear', 'ensemble'), - output_dims = c('sday', 'syear', 'ensemble'), - na.rm = na.rm, - ncores = ncores, - fun = .cal)[[1]] - - hcst[!is.finite(hcst)] <- NA - } else { - ## TODO: Implement other calibration methods, implement logger - stop("Calibration method is not implemented in CSTools") - # error(logger, - # "Calibration method is not implemented in CSTools") - } - - # hcst$data[!is.finite(hcst$data)] <- NA - ## TODO: Is this line necessary? - # remove(hcst) - - # Merges all the ensembles from the different systems into - # one single ensemble - if(mm){ - hcst <- MergeDims(hcst, - merge_dims = c('dat', 'ensemble'), - rename_dim = 'ensemble') - hcst <- drop_na_dim(hcst, 'ensemble') - } - -# # removes dat and var dimensions if needed -# try(hcst<-Subset(hcst, -# c('var'), list(1), drop='selected')) -# try(hcst<-Subset(hcst, -# c('dat'), list(1), drop='selected')) - - return(hcst) - -} - -#' Function that bias-adjust the S2S forecast -# -#' this function bias adjust the forecast data using -#' hindcast and observational data -# -#'@param obs A numeric array with named dimensions, -#' representing the observational data used -#' in the bias-adjustment method. -#' 'sday','syear','ensemble' are mandatory dims - -#'@param hcst A numeric array with named dimensions, -#' representing the system hindcast data used -#' in the bias-adjustment method. -#' 'sday','syear','ensemble' are mandatory dims - -#'@param fcst A numeric array with named dimensions, -#' representing the system forecast to be -#' bias-adjusted. -#' 'sday','syear','ensemble' are mandatory dims - -#'@param fun bias adjustment function - -#'@param mm TRUE if the experiment data is conformed by -#' multiple systems (dat > 1) - -#'@param ncores An integer indicating the number of cores to -#' use for parallel computation. The default value is NULL. - -#'@param target_dims Dims needed to do the calibration -#'@param output_dims Output dims from the calib fun - -#'@return A numeric array with the bias adjusted forecast, -#' - -## TODO: -## needs and update: look at hcst version -fcst_calib <- function(obs, hcst, fcst, fun, mm=F, - na.rm=T, split_factor=1, ncores=32, - ## TODO: establish target dims and output dims - target_dims=c('sday', 'syear', 'ensemble'), - output_dims=c('sday', 'syear', 'ensemble')) -{ - - # Replicates observations for bias adjusting each - # system of the multi-model separately - ## TODO: Adapt multi-model case - if(mm){ - obs.mm <- obs - for(dat in 1:(dim(hcst)['dat'][[1]]-1)){ - obs.mm <- abind(obs2,obs, - along=which(names(dim(obs)) == 'dat')) - } - names(dim(obs.mm)) <- names(dim(obs)) - obs <- obs.mm - remove(obs.mm) - } - - # Fcst Calibration - ## TODO: Return s2dv_cube - calibrated_fcst <- Apply(data=list(obs = obs$data, hcst = hcst$data, fcst = fcst$data), - extra_info = list(na.rm = na.rm), - target_dims = target_dims, - output_dims = output_dims, - na.rm = na.rm, - ncores = ncores, - fun = .fcstcal)[[1]] - - ## TODO: Restore this line - # calibrated_fcst$data[!is.finite(calibrated_fcst$data)] <- NA - calibrated_fcst[!is.finite(calibrated_fcst)] <- NA - - # Merges all the ensembles from the different systems into - # one single ensemble - ## TODO: Adapt multi-model case - if(mm){ - calibrated_fcst <- MergeDims(calibrated_fcst, - merge_dims=c('dat', 'ensemble'), - rename_dim = 'ensemble') - calibrated_fcst <- drop_na_dim(calibrated_fcst, 'ensemble') - } - -# # removes dat and var dimensions if needed -# try(calibrated_fcst<-Subset(calibrated_fcst, -# c('var'),list(1),drop='selected')) -# try(calibrated_fcst<-Subset(calibrated_fcst, -# c('dat'),list(1),drop='selected')) - - return(calibrated_fcst) - - -} diff --git a/modules/Calibration/Calibration_fcst3.R b/modules/Calibration/Calibration_fcst3.R deleted file mode 100644 index 3d24226b..00000000 --- a/modules/Calibration/Calibration_fcst3.R +++ /dev/null @@ -1,186 +0,0 @@ - -# Performs the hcst calibration for a centered time -# window of sdays. Objects must have the dimensions; -# sday, ensemble & syear. -# If sday dim = 1; no time window calibration is performed. -.cal <- function(var_obs, var_exp, na.rm=F) { - - ntime <- dim(var_exp)[which(names(dim(var_exp)) == 'syear')][] - nmembers <- dim(var_exp)[which(names(dim(var_exp)) == 'ensemble')][] - ndays <- dim(var_exp)[which(names(dim(var_exp)) == 'sday')][] - - # selects only the centered day of time window - if ( ndays == 1 ){ - fcst.index <- 1 - } else { - fcst.index <- (ndays + 1)/2 - } - - ind <- 1:ntime - - climObs <- sapply(ind, function(x) {mean( - Subset(var_obs, along='syear', indices=-x), na.rm=na.rm)}) - - climObs2 <- sapply(ind, function(x) {mean(var_obs, na.rm=na.rm)}) - - climPred <- sapply(ind, function(x) {mean( - Subset(var_exp, along='syear', indices=-x), na.rm=na.rm)}) - - climPred2 <- sapply(ind, function(x) {mean(var_exp, na.rm=na.rm)}) - - var_obs.ano <- var_obs - climObs2 - var_fcst.ano <- var_exp - climPred - var_exp.ano <- var_exp - climPred2 - - calib.hcst <- NA * Subset(var_exp, along=c('sday', 'syear'), - c(fcst.index, 0)) - # loop over the hindcast years - for (t in 1 : ntime) { - - # defining forecast,hindcast in cross-validation - fcst.ano <- Subset(var_fcst.ano, along=c('syear', 'sday'), - indices=list(t, fcst.index)) - - hcst.ano <- Subset(var_exp.ano, along=c('syear'), indices=list(-t)) - - obs.ano <- Subset(var_obs.ano, along='syear', indices=-t) - - # computes ensemble mean of the fcst & hcst - em_fcst <- mean(fcst.ano, na.rm=na.rm) - em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean, - na.rm=na.rm)$output1 - - # Changes the correlation method in case of NAs - if (na.rm){ - corr.method <- "pairwise.complete.obs" - } else { - corr.method <- "everything" - } - corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method) - sd_obs <- sd(obs.ano, na.rm=na.rm) - sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm) - - # Computes difference between fcst anomaly and ensemble mean - fcst_diff <- fcst.ano - em_fcst - hcst_diff <- array(numeric(), c(ndays,ntime-1,0)) - names(dim(hcst_diff)) <- names(dim(hcst.ano)) - for (n in 1 : nmembers) { - diff <- Subset(hcst.ano, along='ensemble', indices=n, - drop='selected') - em_hcst - hcst_diff <- abind(hcst_diff, diff, - along=which(names(dim(var_exp)) == 'ensemble'), - hier.names=F) - } - - sd_hcst_diff <- sd(hcst_diff, na.rm=na.rm) - - a <- abs(corr) * (sd_obs / sd_em_hcst) - b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2)) - # print(paste0("climObs: ", climObs[t])) # - # print(paste0("sd_obs :", sd_obs)) # - # print(paste0("sd_hcst_diff :", sd_hcst_diff)) - # print(paste0("sd_em_hcst :", sd_em_hcst)) # - # print(paste0("em_hcst :", em_hcst)) - # print(paste0("Alpha - Lluís:", a)) # - # print(paste0("Beta - Lluís:", b)) # - # print(paste0("fcst_diff: ", fcst_diff)) - # print(paste0("climPred: ", climPred)) - # print(paste0("climCross: ", climCross[t])) - # print(paste0("em_fcst :", em_fcst)) - - # Calibrates hcst sdate - calib.syear <- ((a * em_fcst) + (b * fcst_diff) + climObs[t]) - # Appends calibrated sdate to the hcst array - calib.hcst <- abind(calib.hcst, calib.syear, - along=which(names(dim(var_exp)) == 'syear'), - hier.names=F) - } - - names(dim(calib.hcst)) <- names(dim(calib.syear)) - # removes NAs added in array init - calib.hcst <- Subset(calib.hcst, along='syear', indices=-1) - - return(calib.hcst) - -} - -# Performs the fcst calibration for a centered time -# window of sdays. Objects must have the dimensions; -# sday, ensemble & syear. -# If sday dim = 1; no time window calibration is performed. -.fcstcal <- function(obs, hcst, fcst, na.rm=F) { - - ntime <- dim(hcst)['syear'] - nmembers <- dim(hcst)['ensemble'] - fcst.nmembers <- dim(fcst)['ensemble'] - ndays <- dim(hcst)['sday'] - - # selects only the centered day of time window - if ( ndays == 1 ){ - fcst.index <- 1 - } else { - fcst.index <- (ndays + 1)/2 - } - - ind <- 1:ntime - climObs <- mean(obs, na.rm=na.rm) - climPred <- mean(hcst, na.rm=na.rm) - - obs.ano <- obs - climObs - hcst.ano <- hcst - climPred - fcst.ano <- fcst - climPred - - calib.fcst <- NA * fcst - - # computes ensemble mean of the fcst & hcst - em_fcst <- mean(fcst.ano, na.rm=na.rm) - em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean, - na.rm=na.rm)$output1 - - # Changes the correlation method in case of NAs allowed - if (na.rm){ - corr.method <- "pairwise.complete.obs" - } else { - corr.method <- "everything" - } - - corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method) - sd_obs <- sd(obs.ano, na.rm=na.rm) - sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm) - - # Computes difference between fcst anomaly and ensemble mean - fcst_diff <- fcst.ano - em_fcst - hcst_diff <- array(numeric(), c(ndays,ntime,0)) - names(dim(hcst_diff)) <- names(dim(hcst.ano)) - for (n in 1 : nmembers) { - diff <- Subset(hcst.ano, along='ensemble', indices=n, - drop='selected') - em_hcst - hcst_diff <- abind(hcst_diff, diff, - along=which(names(dim(hcst)) == 'ensemble'), - hier.names=F) - } - - sd_hcst_diff <- sd(hcst_diff, na.rm) - # print(paste0("Sigma e:", sd_hcst_diff)) - a <- abs(corr) * (sd_obs / sd_em_hcst) - b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2)) - ## print(paste0("climObs: ", climObs)) - ## print(paste0("sd_obs :", sd_obs)) - # print(paste0("sd_hcst_diff :", sd_hcst_diff)) - # print(paste0("sd_em_hcst :", sd_em_hcst)) - # print(paste0("em_hcst :", em_hcst)) - ## print(paste0("Alpha - Lluís:", a)) - ## print(paste0("Beta - Lluís:", b)) - # print(paste0("climObs: ", climObs)) - # print(paste0("fcst_diff: ", fcst_diff)) - # print(paste0("em_fcst :", em_fcst)) - - # Calibrates hcst sdate - fcst.calib <- ((a * em_fcst) + (b * fcst_diff) + climObs) - # fcst_cube$data <- fcst.calib - return(fcst.calib) - # return(Subset(fcst.calib, c('sday', 'syear'), list(1,1), drop='selected')) - -} - - diff --git a/modules/Calibration/Calibration_fcst4.R b/modules/Calibration/Calibration_fcst4.R deleted file mode 100644 index 9359c6e8..00000000 --- a/modules/Calibration/Calibration_fcst4.R +++ /dev/null @@ -1,157 +0,0 @@ - -# Performs the hcst calibration for a centered time -# window of sdays. Objects must have the dimensions; -# sday, ensemble & syear. -# If sday dim = 1; no time window calibration is performed. -.cal <- function(var_obs, var_exp, na.rm=F) { - - ntime <- dim(var_exp)[which(names(dim(var_exp)) == 'syear')][] - nmembers <- dim(var_exp)[which(names(dim(var_exp)) == 'ensemble')][] - ndays <- dim(var_exp)[which(names(dim(var_exp)) == 'sday')][] - - # selects only the centered day of time window - if ( ndays == 1 ){ - fcst.index <- 1 - } else { - fcst.index <- (ndays + 1)/2 - } - - ind <- 1:ntime - - climObs <- sapply(ind, function(x) {mean(var_obs, na.rm=na.rm)}) - climPred <- sapply(ind, function(x) {mean( - Subset(var_exp, along='syear', indices=-x), na.rm=na.rm)}) - - var_obs.ano <- var_obs - climObs - var_exp.ano <- var_exp - climPred - - calib.hcst <- NA * Subset(var_exp, along=c('sday', 'syear'), - c(fcst.index, 0)) - # loop over the hindcast years - for (t in 1 : ntime) { - - # defining forecast,hindcast in cross-validation - fcst.ano <- Subset(var_exp.ano, along=c('syear', 'sday'), - indices=list(t, fcst.index)) - hcst.ano <- Subset(var_exp.ano, along=c('syear'), indices=list(-t)) - - obs.ano <- Subset(var_obs.ano, along='syear', indices=-t) - - # computes ensemble mean of the fcst & hcst - em_fcst <- mean(fcst.ano, na.rm=na.rm) - em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean, - na.rm=na.rm)$output1 - - # Changes the correlation method in case of NAs - if (na.rm){ - corr.method <- "pairwise.complete.obs" - } else { - corr.method <- "everything" - } - corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method) - sd_obs <- sd(obs.ano, na.rm=na.rm) - sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm) - - # Computes difference between fcst anomaly and ensemble mean - fcst_diff <- fcst.ano - em_fcst - hcst_diff <- array(numeric(), c(ndays,ntime-1,0)) - names(dim(hcst_diff)) <- names(dim(hcst.ano)) - for (n in 1 : nmembers) { - diff <- Subset(hcst.ano, along='ensemble', indices=n, - drop='selected') - em_hcst - hcst_diff <- abind(hcst_diff, diff, - along=which(names(dim(var_exp)) == 'ensemble'), - hier.names=F) - } - - sd_hcst_diff <- sd(hcst_diff, na.rm=na.rm) - - a <- abs(corr) * (sd_obs / sd_em_hcst) - b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2)) - - # Calibrates hcst sdate - calib.syear <- ((a * em_fcst) + (b * fcst_diff) + climObs[t]) - # Appends calibrated sdate to the hcst array - calib.hcst <- abind(calib.hcst, calib.syear, - along=which(names(dim(var_exp)) == 'syear'), - hier.names=F) - } - - names(dim(calib.hcst)) <- names(dim(calib.syear)) - # removes NAs added in array init - calib.hcst <- Subset(calib.hcst, along='syear', indices=-1) - - return(calib.hcst) - -} - -# Performs the fcst calibration for a centered time -# window of sdays. Objects must have the dimensions; -# sday, ensemble & syear. -# If sday dim = 1; no time window calibration is performed. -.fcstcal <- function(obs, hcst, fcst, na.rm=F) { - - ntime <- dim(hcst)['syear'] - nmembers <- dim(hcst)['ensemble'] - fcst.nmembers <- dim(fcst)['ensemble'] - ndays <- dim(hcst)['sday'] - - # selects only the centered day of time window - if ( ndays == 1 ){ - fcst.index <- 1 - } else { - fcst.index <- (ndays + 1)/2 - } - - ind <- 1:ntime - climObs <- mean(obs, na.rm=na.rm) - climPred <- mean(hcst, na.rm=na.rm) - - obs.ano <- obs - climObs - hcst.ano <- hcst - climPred - fcst.ano <- fcst - climPred - - calib.fcst <- NA * fcst - - # computes ensemble mean of the fcst & hcst - em_fcst <- mean(fcst.ano, na.rm=na.rm) - em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean, - na.rm=na.rm)$output1 - - # Changes the correlation method in case of NAs allowed - if (na.rm){ - corr.method <- "pairwise.complete.obs" - } else { - corr.method <- "everything" - } - - corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method) - sd_obs <- sd(obs.ano, na.rm=na.rm) - sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm) - - # Computes difference between fcst anomaly and ensemble mean - fcst_diff <- fcst.ano - em_fcst - hcst_diff <- array(numeric(), c(ndays,ntime,0)) - names(dim(hcst_diff)) <- names(dim(hcst.ano)) - for (n in 1 : nmembers) { - diff <- Subset(hcst.ano, along='ensemble', indices=n, - drop='selected') - em_hcst - hcst_diff <- abind(hcst_diff, diff, - along=which(names(dim(hcst)) == 'ensemble'), - hier.names=F) - } - - sd_hcst_diff <- sd(hcst_diff, na.rm) - - a <- abs(corr) * (sd_obs / sd_em_hcst) - b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2)) - - # Calibrates hcst sdate - fcst.calib <- ((a * em_fcst) + (b * fcst_diff) + climObs) - # fcst_cube$data <- fcst.calib - return(fcst.calib) - # return(Subset(fcst.calib, c('sday', 'syear'), list(1,1), drop='selected')) - -} - - -- GitLab From a6df1c321f62ada0b382cb1e9014ee604e28bdd7 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 9 Jun 2022 12:25:37 +0200 Subject: [PATCH 03/11] move data_summary() to separate file, add first and last day to the summary for daily data --- modules/data_load/load.R | 28 +++------------------------- tools/data_summary.R | 27 +++++++++++++++++++++++++++ tools/libs.R | 3 ++- 3 files changed, 32 insertions(+), 26 deletions(-) create mode 100644 tools/data_summary.R diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 570788ee..aaadf553 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -1,6 +1,5 @@ ## TODO: remove paths to personal scratchs source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") -source("/esarchive/scratch/vagudets/repos/cstools/R/s2dv_cube.R") # Load required libraries/funs source("modules/data_load/dates2load.R") source("tools/libs.R") @@ -278,31 +277,10 @@ load_datasets <- function(recipe_file) { obs <- as.s2dv_cube(obs) # Print a summary of the loaded data for the user, for each object - # (hcst, obs and fcst). - ## TODO: Incorporate into logger - ## TODO: Adapt to daily/subseasonal cases - ## TODO: Add check for missing files/NAs - data_summary <- function(object) { - # Get name and start dates - object_name <- deparse(substitute(object)) - sdate_min <- format(min(as.Date(object$Dates[[1]])), format = '%b %Y') - sdate_max <- format(max(as.Date(object$Dates[[1]])), format = '%b %Y') - - print("DATA SUMMARY:") - print(paste0(object_name, " range: ", sdate_min, " to ", sdate_max)) - print(paste0(object_name, " dimensions: ")) - print(dim(object$data)) - print(paste0("Statistical summary of the data in ", object_name, ":")) - print(summary(object$data)) - print("---------------------------------------------") - - } - - - data_summary(hcst) - data_summary(obs) + data_summary(hcst, store.freq) + data_summary(obs, store.freq) if (!is.null(fcst)) { - data_summary(fcst) + data_summary(fcst, store.freq) } print("##### DATA LOADING COMPLETED SUCCESSFULLY #####") diff --git a/tools/data_summary.R b/tools/data_summary.R new file mode 100644 index 00000000..625bbef1 --- /dev/null +++ b/tools/data_summary.R @@ -0,0 +1,27 @@ +# Print a summary of the loaded data for the user, for each object. +# object: hindcast, forecast or reference data in s2dv_cube format. +## TODO: Incorporate into logger +## TODO: Adapt to daily/subseasonal cases +## TODO: Add check for missing files/NAs by dimension + +data_summary <- function(object, frequency) { + # Get name and start dates + object_name <- deparse(substitute(object)) + if (tolower(frequency) == "monthly_mean") { + date_format <- '%b %Y' + } else if (tolower(frequency) == "daily_mean") { + date_format <- '%b %d %Y' + } + sdate_min <- format(min(as.Date(object$Dates[[1]])), format = date_format) + sdate_max <- format(max(as.Date(object$Dates[[1]])), format = date_format) + + print("DATA SUMMARY:") + print(paste0(object_name, " range: ", sdate_min, " to ", sdate_max)) + print(paste0(object_name, " dimensions: ")) + print(dim(object$data)) + print(paste0("Statistical summary of the data in ", object_name, ":")) + print(summary(object$data)) + print("---------------------------------------------") + +} + diff --git a/tools/libs.R b/tools/libs.R index 2470225f..31728650 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -26,5 +26,6 @@ library(CSTools) # # To be removed when new package is done by library(CSOperational) source("tools/check_recipe.R") source("tools/prepare_outputs.R") -source("tools/divide_recipe.R") +source("tools/divide_recipe.R") +source("tools/data_summary.R") # source("tools/add_dims.R") # Not sure if necessary yet -- GitLab From 56d4678cf8771c9cacf68e24f8608f43064f3499 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 9 Jun 2022 12:26:25 +0200 Subject: [PATCH 04/11] Add recipe for circular sort testing (issue #15) --- .../recipe_circular-sort-test.yml | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 modules/data_load/testing_recipes/recipe_circular-sort-test.yml diff --git a/modules/data_load/testing_recipes/recipe_circular-sort-test.yml b/modules/data_load/testing_recipes/recipe_circular-sort-test.yml new file mode 100644 index 00000000..d6cbae8f --- /dev/null +++ b/modules/data_load/testing_recipes/recipe_circular-sort-test.yml @@ -0,0 +1,44 @@ +Description: + Author: V. Agudetse + Info: For testing the behavior of the loading module when loading data + that crosses the date line or the Greenwich meridian. +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: + fcst_syear: + fcst_sday: '1101' + hcst_start: '1993' + hcst_end: '1995' + leadtimemin: 1 + leadtimemax: 1 + Region: + latmin: -10 + latmax: 10 + lonmin: 170 + lonmax: -170 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: mse_min + Skill: + metric: BSS90 + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ -- GitLab From 1d6896f72f2da892737a8cd48b80d5594b570c07 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 9 Jun 2022 16:41:39 +0200 Subject: [PATCH 05/11] Add recipe to test circular longitude sorting --- .../data_load/testing_recipes/recipe_circular-sort-test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/data_load/testing_recipes/recipe_circular-sort-test.yml b/modules/data_load/testing_recipes/recipe_circular-sort-test.yml index d6cbae8f..3c49f807 100644 --- a/modules/data_load/testing_recipes/recipe_circular-sort-test.yml +++ b/modules/data_load/testing_recipes/recipe_circular-sort-test.yml @@ -18,14 +18,14 @@ Analysis: fcst_syear: fcst_sday: '1101' hcst_start: '1993' - hcst_end: '1995' + hcst_end: '2003' leadtimemin: 1 leadtimemax: 1 Region: latmin: -10 latmax: 10 - lonmin: 170 - lonmax: -170 + lonmin: 320 + lonmax: 350 Regrid: method: bilinear type: to_system -- GitLab From baace7c7e3bab41ebee8bb88ded1aff4e25da611 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 9 Jun 2022 16:45:40 +0200 Subject: [PATCH 06/11] Minor changes and comments --- modules/Saving/Save_data.R | 68 +++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/modules/Saving/Save_data.R b/modules/Saving/Save_data.R index a1e38ef4..3ace35b8 100644 --- a/modules/Saving/Save_data.R +++ b/modules/Saving/Save_data.R @@ -1,20 +1,19 @@ +## TODO: Change names of functions to make sure they don't clash +## with others in the tool - -get_times <- function(fcst.type, leadtimes, sdate) { +get_times <- function(fcst.horizon, leadtimes, sdate) { ## TODO: Adapt to Auto-S2S recipe format - ## TODO: Is all of this necessary? Is some of it already computed by - ## dates2load? - # fcst.type <- recipe$Analysis$Horizon - switch(tolower(fcst.type), + switch(fcst.horizon, ## TODO: Remove "sub_obs"? + ## TODO: daily case? "seasonal" = {len <- length(leadtimes); ref <- 'months since '; stdname <- paste(strtoi(leadtimes), collapse=", ")}, "sub_obs" = {len <- 52; ref <- 'week of the year '; stdname <- paste(strtoi(leadtimes), collapse=", ")}, "subseasonal" = {len <- 4; ref <- 'weeks since '; stdname <- ''} - ) + ) time <- 1:len dim(time) <- length(time) @@ -44,30 +43,50 @@ get_times <- function(fcst.type, leadtimes, sdate) { get_latlon <- function(latitude, longitude) { ## TODO: Verify if this is needed or if it needs a change now that we are ## using s2dv_cube objects + # Adds dimensions and metadata to lat and lon + # latitude: array containing the latitude values + # longitude: array containing the longitude values + ## the inputs are simply the arrays with the values? dim(longitude) <- length(longitude) + ## TODO: Extract metadata from s2dv_cube metadata <- list(longitude = list(units = 'degrees_east')) + # Add metadata to attributes attr(longitude, 'variables') <- metadata + # Add longitude dimension names(dim(longitude)) <- 'longitude' dim(latitude) <- length(latitude) + ## TODO: Extract metadata from s2dv_cube metadata <- list(latitude = list(units = 'degrees_north')) + # Add metadata to attributes attr(latitude, 'variables') <- metadata + # Add latitude dimension names(dim(latitude)) <- 'latitude' return(list(lat=latitude, lon=longitude)) } -save_metrics <- function(variable, - skill, - fcst.sdate, +## TODO: Place inside a function somewhere +# if (tolower(agg) == "country"){ +# load(mask.path) +# grid <- europe.countries.iso +# } else { +# grid <- list(lon=attr(var.obs, 'Variables')$dat1$longitude, +# lat=attr(var.obs, 'Variables')$dat1$latitude) +# } + +save_metrics <- function(skill, + recipe, grid, outfile, - monthnames, - fcst.type, - agg) { + leadtimes, + agg="global") { + + fcst.horizon <- tolower(recipe$Analysis$Horizon) + fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, + recipe$Analysis$Time$sdate$fcst_sday) - ## TODO: Remove variable as entry parameter lalo <- c('longitude', 'latitude') @@ -93,22 +112,19 @@ save_metrics <- function(variable, names(dim(skill[[i]])) <- dims } - times <- get_times(fcst.type, monthnames, fcst.sdate) + times <- get_times(fcst.horizon, leadtimes, fcst.sdate) time <- times$time time_step <- times$time_step - if (tolower(agg) == "country") { - - country <- get_countries(grid) - ArrayToNc(append(country, time, skill, time_step), outfile) + # if (tolower(agg) == "country") { + # country <- get_countries(grid) + # ArrayToNc(append(country, time, skill, time_step), outfile) + # } else { - } else { - - latlon <- get_latlon(grid$lat, grid$lon) - vars <- list(latlon$lat, latlon$lon, time) - vars <- c(vars, skill, list(time_step)) - ArrayToNc(vars, outfile) - } + latlon <- get_latlon(grid$lat, grid$lon) + vars <- list(latlon$lat, latlon$lon, time) + vars <- c(vars, skill, list(time_step)) + ArrayToNc(vars, outfile) } -- GitLab From f0bd7ddf81cfd7dc39df953bdeb2c0841dd24c72 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 10 Jun 2022 16:49:00 +0200 Subject: [PATCH 07/11] Renamed test_calib.R to Calibration.R --- modules/Calibration/{test_calib.R => Calibration.R} | 0 modules/test_victoria.R | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename modules/Calibration/{test_calib.R => Calibration.R} (100%) diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/Calibration.R similarity index 100% rename from modules/Calibration/test_calib.R rename to modules/Calibration/Calibration.R diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 58d9be6a..b5e026a3 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -3,7 +3,7 @@ recipe_file <- "modules/data_load/testing_recipes/recipe_1.yml" source("modules/data_load/load.R") -source("modules/Calibration/test_calib.R") +source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") # Load datasets -- GitLab From 45a45b26306b522c26df13ee6862bca4de5fc6f1 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 13 Jun 2022 15:46:01 +0200 Subject: [PATCH 08/11] Remove gitlab links when sourcing functions so the code can be run in nord3v2 --- modules/Calibration/Calibration.R | 2 +- modules/Skill/Skill.R | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 8c6600f3..1478ecd5 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -1,6 +1,6 @@ ## TODO: Remove once Alba's fun is merged in CSTools -source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-CalibrationForecast/R/CST_Calibration.R") +source("tools/tmp/CST_Calibration.R") ## Entry params data and recipe? calibrate_datasets <- function(data, recipe) { diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index f641fa6a..caf4939d 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -9,9 +9,9 @@ source("modules/Skill/s2s.metrics.R") ## TODO: Remove once the new version of s2dv is released -source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/RandomWalkTest.R") -source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/develop-RPSS/R/RPS.R") -source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/develop-RPSS/R/RPSS.R") +source("tools/tmp/RandomWalkTest.R") +source("tools/tmp/RPS.R") +source("tools/tmp/RPSS.R") ## TODO: Implement this in the future ## Which parameter are required? -- GitLab From 53f96a9b3e91513949b968858726bcf2cf35fad2 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 13 Jun 2022 15:53:58 +0200 Subject: [PATCH 09/11] Add temporary directory for functions that need to be sourced --- tools/tmp/CST_Calibration.R | 563 ++++++++++++++++++++++++++++++++++++ tools/tmp/RPS.R | 193 ++++++++++++ tools/tmp/RPSS.R | 223 ++++++++++++++ tools/tmp/RandomWalkTest.R | 82 ++++++ 4 files changed, 1061 insertions(+) create mode 100644 tools/tmp/CST_Calibration.R create mode 100644 tools/tmp/RPS.R create mode 100644 tools/tmp/RPSS.R create mode 100644 tools/tmp/RandomWalkTest.R diff --git a/tools/tmp/CST_Calibration.R b/tools/tmp/CST_Calibration.R new file mode 100644 index 00000000..79b41951 --- /dev/null +++ b/tools/tmp/CST_Calibration.R @@ -0,0 +1,563 @@ +#'Forecast Calibration +#' +#'@author Verónica Torralba, \email{veronica.torralba@bsc.es} +#'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be} +#'@description Equivalent to function \code{Calibration} but for objects of class \code{s2dv_cube}. +#' +#'@param exp an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal hindcast experiment data in the element named \code{$data}. The hindcast is used to calibrate the forecast in case the forecast is provided; if not, the same hindcast will be calibrated instead. +#'@param obs an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the observed data in the element named \code{$data}. +#'@param exp_cor an optional object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}. If the forecast is provided, it will be calibrated using the hindcast and observations; if not, the hindcast will be calibrated instead. +#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}. +#'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. In case the forecast is provided, any chosen eval.method is over-ruled and a third option is used. +#'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. +#'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned. +#'@param na.rm is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. See Details section for further information about its use and compatibility with \code{na.fill}. +#'@param apply_to is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. +#'@param alpha is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. +#'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'. +#'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'. +#'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. +#'@return an object of class \code{s2dv_cube} containing the calibrated forecasts in the element \code{$data} with the same dimensions as the one in the exp object. +#' +#'@importFrom s2dv InsertDim +#'@import abind +#' +#'@seealso \code{\link{CST_Load}} +#' +#'@examples +#'# Example 1: +#'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) +#'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) +#'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) +#'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) +#'lon <- seq(0, 30, 5) +#'lat <- seq(0, 25, 5) +#'exp <- list(data = mod1, lat = lat, lon = lon) +#'obs <- list(data = obs1, lat = lat, lon = lon) +#'attr(exp, 'class') <- 's2dv_cube' +#'attr(obs, 'class') <- 's2dv_cube' +#'a <- CST_Calibration(exp = exp, obs = obs, cal.method = "mse_min", eval.method = "in-sample") +#'str(a) +#' +#'# Example 2: +#'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) +#'mod2 <- 1 : (1 * 3 * 1 * 5 * 6 * 7) +#'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) +#'dim(mod2) <- c(dataset = 1, member = 3, sdate = 1, ftime = 5, lat = 6, lon = 7) +#'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) +#'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) +#'lon <- seq(0, 30, 5) +#'lat <- seq(0, 25, 5) +#'exp <- list(data = mod1, lat = lat, lon = lon) +#'obs <- list(data = obs1, lat = lat, lon = lon) +#'exp_cor <- list(data = mod2, lat = lat, lon = lon) +#'attr(exp, 'class') <- 's2dv_cube' +#'attr(obs, 'class') <- 's2dv_cube' +#'attr(exp_cor, 'class') <- 's2dv_cube' +#'a <- CST_Calibration(exp = exp, obs = obs, exp_cor = exp_cor, cal.method = "evmos") +#'str(a) +#'@export + +CST_Calibration <- function(exp, obs, exp_cor = NULL, cal.method = "mse_min", + eval.method = "leave-one-out", multi.model = FALSE, + na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, + memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) { + + if(!missing(multi.model) & !(cal.method == "mse_min")){ + warning(paste0("The multi.model parameter is ignored when using the calibration method ", cal.method)) + } + + if(is.null(exp_cor)){ #exp will be used to calibrate and will also be calibrated: "calibrate hindcast" + if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube")) { + stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + exp$data <- Calibration(exp = exp$data, obs = obs$data, exp_cor = NULL, + cal.method = cal.method, + eval.method = eval.method, + multi.model = multi.model, + na.fill = na.fill, na.rm = na.rm, + apply_to = apply_to, alpha = alpha, + memb_dim = memb_dim, sdate_dim = sdate_dim, + ncores = ncores) + exp$Datasets <- c(exp$Datasets, obs$Datasets) + exp$source_files <- c(exp$source_files, obs$source_files) + + return(exp) + + }else{ #if exp_cor is provided, it will be calibrated: "calibrate forecast instead of hindcast" + eval.method = "hindcast-vs-forecast" #if exp_cor is provided, eval.method is overrruled (because if exp_cor is provided, the train data will be all data of "exp" and the evalutaion data will be all data of "exp_cor"; no need for "leave-one-out" or "in-sample") + if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube") || !inherits(exp_cor, "s2dv_cube")) { + stop("Parameter 'exp', 'obs' and 'exp_cor' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + exp_cor$data <- Calibration(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, + cal.method = cal.method, + eval.method = eval.method, + multi.model = multi.model, + na.fill = na.fill, na.rm = na.rm, + apply_to = apply_to, alpha = alpha, + memb_dim = memb_dim, sdate_dim = sdate_dim, + ncores = ncores) + exp_cor$Datasets <- c(exp_cor$Datasets, obs$Datasets) + exp_cor$source_files <- c(exp_cor$source_files, exp$source_files, obs$source_files) + + return(exp_cor) + + } +} + + + +#'Forecast Calibration +#' +#'@author Verónica Torralba, \email{veronica.torralba@bsc.es} +#'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be} +#'@description Five types of member-by-member bias correction can be performed. The \code{"bias"} method corrects the bias only, the \code{"evmos"} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). The \code{"rpc-based"} method adjusts the forecast variance ensuring that the ratio of predictable components (RPC) is equal to one, as in Eade et al. (2014). +#'@description Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible. +#'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the success of multi-model ensembles in seasonal forecasting-II calibration and combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x +#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate predictions underestimate the predictability of the read world? Geophysical Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 +#'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing through linear regression. Nonlinear Processes in Geophysics, 18(2), 147. doi:10.5194/npg-18-147-2011 +#'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble post-processing using member-by-member approaches: theoretical aspects. Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. doi:10.1002/qj.2397 +#' +#'@param exp a multidimensional array with named dimensions (at least 'sdate' and 'member') containing the seasonal hindcast experiment data. The hindcast is used to calibrate the forecast in case the forecast is provided; if not, the same hindcast will be calibrated instead. +#'@param obs a multidimensional array with named dimensions (at least 'sdate') containing the observed data. +#'@param exp_cor an optional multidimensional array with named dimensions (at least 'sdate' and 'member') containing the seasonal forecast experiment data. If the forecast is provided, it will be calibrated using the hindcast and observations; if not, the hindcast will be calibrated instead. +#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}. +#'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. In case the forecast is provided, any chosen eval.method is over-ruled and a third option is used. +#'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. +#'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned. +#'@param na.rm is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. +#'@param apply_to is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. +#'@param alpha is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. +#'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'. +#'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'. +#'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. +#'@return an array containing the calibrated forecasts with the same dimensions as the \code{exp} array. +#' +#'@importFrom s2dv InsertDim MeanDims Reorder +#'@import abind +#'@import multiApply +#'@importFrom s2dverification Subset +#' +#'@seealso \code{\link{CST_Load}} +#' +#'@details +#'Both the \code{na.fill} and \code{na.rm} parameters can be used to indicate how the function has to handle the NA values. The \code{na.fill} parameter checks whether there are more than three forecast-observations pairs to perform the computation. In case there are three or less pairs, the computation is not carried out, and the value returned by the function depends on the value of this parameter (either NA if \code{na.fill == TRUE} or the uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} is used to indicate the function whether to remove the missing values during the computation of the parameters needed to perform the calibration. +#' +#'@examples +#'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) +#'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) +#'obs1 <- 1 : (1 * 1 * 4 * 5 * 6 * 7) +#'dim(obs1) <- c(dataset = 1, member = 1, sdate = 4, ftime = 5, lat = 6, lon = 7) +#'a <- Calibration(exp = mod1, obs = obs1) +#'str(a) +#'@export +Calibration <- function(exp, obs, exp_cor=NULL, cal.method = "mse_min", + eval.method = "leave-one-out", + multi.model = FALSE, na.fill = TRUE, + na.rm = TRUE, apply_to = NULL, alpha = NULL, + memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) { + + dim.exp <- dim(exp) + amt.dims.exp <- length(dim.exp) + dim.obs <- dim(obs) + amt.dims.obs <- length(dim.obs) + dim.names.exp <- names(dim.exp) + dim.names.obs <- names(dim.obs) + if(!is.null(exp_cor)){ + dim.exp_cor <- dim(exp_cor) + amt.dims.exp_cor <- length(dim.exp_cor) + dim.names.exp_cor <- names(dim.exp_cor) + } + if (is.null(memb_dim) || !is.character(memb_dim)) { + stop("Parameter 'memb_dim' should be a character string indicating the", + "name of the dimension where members are stored in 'exp'.") + } + if (length(memb_dim) > 1) { + memb_dim <- memb_dim[1] + warning("Parameter 'memb_dim' has length greater than 1 and only", + " the first element will be used.") + } + + if (is.null(sdate_dim) || !is.character(sdate_dim)) { + stop("Parameter 'sdate_dim' should be a character string indicating the", + "name of the dimension where start dates are stored in 'exp'.") + } + if (length(sdate_dim) > 1) { + sdate_dim <- sdate_dim[1] + warning("Parameter 'sdate_dim' has length greater than 1 and only", + " the first element will be used.") + } + target.dim.names.exp <- c(memb_dim, sdate_dim) + target.dim.names.obs <- sdate_dim + + if (!all(target.dim.names.exp %in% dim.names.exp)) { + stop("Parameter 'exp' must have the dimensions defined in memb_dim ", + "and sdate_dim.") + } + + if(!is.null(exp_cor)){ + if (!all(target.dim.names.exp %in% dim.names.exp_cor)) { + stop("Parameter 'exp_cor' must have the dimensions defined in memb_dim ", + "and sdate_dim.") + } + } + + if (!all(c(sdate_dim) %in% dim.names.obs)) { + stop("Parameter 'obs' must have the dimension defined in sdate_dim ", + "parameter.") + } + + if (any(is.na(exp))) { + warning("Parameter 'exp' contains NA values.") + } + + if(!is.null(exp_cor)){ + if (any(is.na(exp_cor))) { + warning("Parameter 'exp_cor' contains NA values.") + } + } + + if (any(is.na(obs))) { + warning("Parameter 'obs' contains NA values.") + } + + if (memb_dim %in% names(dim(obs))) { + obs <- Subset(obs, along = memb_dim, indices = 1, drop = "selected") + } + data.set.sufficiently.large.out <- + Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs), + ncores = ncores, + fun = .data.set.sufficiently.large)$output1 + + if(!all(data.set.sufficiently.large.out)){ + if(na.fill){ + warning("Some forecast data could not be corrected due to data lack", + " and is replaced with NA values") + } else { + warning("Some forecast data could not be corrected due to data lack", + " and is replaced with uncorrected values") + } + } + + if (!na.rm %in% c(TRUE,FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE.") + } + if (cal.method == 'rpc-based') { + if (is.null(apply_to)) { + apply_to <- 'sign' + warning("'apply_to' cannot be NULL for 'rpc-based' method so it has been set to 'sign', as in Eade et al. (2014).") + } else if (!apply_to %in% c('all','sign')) { + stop("'apply_to' must be either 'all' or 'sign' when 'rpc-based' method is used.") + } + if (apply_to == 'sign') { + if (is.null(alpha)) { + alpha <- 0.1 + warning("'alpha' cannot be NULL for 'rpc-based' method so it has been set to 0.1, as in Eade et al. (2014).") + } else if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1) { + stop("'alpha' must be a number between 0 and 1.") + } + } + } + + if(is.null(exp_cor)){ + calibrated <- Apply(data = list(exp = exp, obs = obs), + cal.method = cal.method, + eval.method = eval.method, + multi.model = multi.model, + na.fill = na.fill, na.rm = na.rm, + apply_to = apply_to, alpha = alpha, + target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs), + ncores = ncores, output_dims = target.dim.names.exp, + fun = .cal)$output1 + dexes <- match(names(dim(exp)), names(dim(calibrated))) + calibrated <- aperm(calibrated, dexes) + dimnames(calibrated) <- dimnames(exp)[dexes] + }else{ + calibrated <- Apply(data = list(exp = exp, obs = obs, exp_cor = exp_cor), + cal.method = cal.method, + eval.method = eval.method, + multi.model = multi.model, + na.fill = na.fill, na.rm = na.rm, + apply_to = apply_to, alpha = alpha, + target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs, exp_cor = target.dim.names.exp), + ncores = ncores, output_dims = target.dim.names.exp, + fun = .cal)$output1 + dexes <- match(names(dim(exp_cor)), names(dim(calibrated))) + calibrated <- aperm(calibrated, dexes) + dimnames(calibrated) <- dimnames(exp_cor)[dexes] + } + + return(calibrated) +} + + +.data.set.sufficiently.large <- function(exp, obs){ + amt.min.samples <- 3 + amt.good.pts <- sum(!is.na(obs) & !apply(exp, c(2), function(x) all(is.na(x)))) + return(amt.good.pts > amt.min.samples) +} + +.make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor){ + if(eval.method == "leave-one-out"){ + dexes.lst <- lapply(seq(1, amt.points), function(x) return(list(eval.dexes = x, + train.dexes = seq(1, amt.points)[-x]))) + } else if (eval.method == "in-sample"){ + dexes.lst <- list(list(eval.dexes = seq(1, amt.points), + train.dexes = seq(1, amt.points))) + } else if (eval.method == "hindcast-vs-forecast"){ + dexes.lst <- list(list(eval.dexes = seq(1,amt.points_cor), + train.dexes = seq(1, amt.points))) + } else { + stop(paste0("unknown sampling method: ",eval.method)) + } + return(dexes.lst) +} + +.cal <- function(exp, obs, exp_cor = NULL, cal.method, eval.method, multi.model, na.fill, na.rm, apply_to, alpha) { + if(is.null(exp_cor)){ + exp_cor <- exp ## generate a copy of exp so that the same function can run + ## when exp_cor is provided and when it's not + } + obs <- as.vector(obs) + dims.fc <- dim(exp) + dims.fc_cor <- dim(exp_cor) ## new line + amt.mbr <- dims.fc[1] + amt.sdate <- dims.fc[2] + amt.sdate_cor <- dims.fc_cor[2] ## new line + var.cor.fc <- NA * exp_cor ## modified line (exp_cor instead of exp); + ## in case of exp_cor not provided, at this point exp_cor + ## is already the same as exp so no change + names(dim(var.cor.fc)) <- dims.fc + + if(!.data.set.sufficiently.large(exp = exp, obs = obs)){ + if(na.fill){ + return(var.cor.fc) + } else { + var.cor.fc[] <- exp[] + return(var.cor.fc) + } + } + eval.train.dexeses <- .make.eval.train.dexes(eval.method, amt.points = amt.sdate, + amt.points_cor = amt.sdate_cor) + amt.resamples <- length(eval.train.dexeses) + for (i.sample in seq(1, amt.resamples)) { + # defining training (tr) and evaluation (ev) subsets + eval.dexes <- eval.train.dexeses[[i.sample]]$eval.dexes + train.dexes <- eval.train.dexeses[[i.sample]]$train.dexes + + fc.ev <- exp_cor[ , eval.dexes, drop = FALSE] ## modified line (exp_cor instead of exp) + ## fc.ev is used to evaluate (not train; train should be done with exp (hindcast)) + fc.tr <- exp[ , train.dexes] + obs.tr <- obs[train.dexes , drop = FALSE] + + if(cal.method == "bias"){ + var.cor.fc[ , eval.dexes] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) + } else if(cal.method == "evmos"){ # forecast correction implemented + #calculate ensemble and observational characteristics + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + #calculate value for regression parameters + init.par <- c(.calc.evmos.par(quant.obs.fc.tr, na.rm = na.rm)) + #correct evaluation subset + var.cor.fc[ , eval.dexes] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) + } else if (cal.method == "mse_min"){ + #calculate ensemble and observational characteristics + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + #calculate value for regression parameters + init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model, na.rm = na.rm) + #correct evaluation subset + var.cor.fc[ , eval.dexes] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm) + } else if (cal.method == "crps_min"){ + #calculate ensemble and observational characteristics + quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr, na.rm = na.rm) + #calculate initial value for regression parameters + init.par <- c(.calc.mse.min.par(quant.obs.fc.tr, na.rm = na.rm), 0.001) + init.par[3] <- sqrt(init.par[3]) + #calculate regression parameters on training dataset + optim.tmp <- optim(par = init.par, + fn = .calc.crps.opt, + gr = .calc.crps.grad.opt, + quant.obs.fc = quant.obs.fc.tr, + na.rm = na.rm, + method = "BFGS") + + mbm.par <- optim.tmp$par + #correct evaluation subset + var.cor.fc[ , eval.dexes] <- .correct.crps.min.fc(fc.ev , mbm.par, na.rm = na.rm) + } else if (cal.method == 'rpc-based') { + ens_mean.ev <- multiApply::Apply(data = fc.ev, target_dims = names(amt.mbr), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean + ens_mean.tr <- multiApply::Apply(data = fc.tr, target_dims = names(amt.mbr), fun = mean, na.rm = na.rm)$output1 ## Ensemble mean + ens_spread.tr <- multiApply::Apply(data = list(fc.tr, ens_mean.tr), target_dims = names(amt.sdate), fun = "-")$output1 ## Ensemble spread + exp_mean.tr <- mean(fc.tr, na.rm = na.rm) ## Mean (climatology) + var_signal.tr <- var(ens_mean.tr, na.rm = na.rm) ## Ensemble mean variance + var_noise.tr <- var(as.vector(ens_spread.tr), na.rm = na.rm) ## Variance of ensemble members about ensemble mean (= spread) + var_obs.tr <- var(obs.tr, na.rm = na.rm) ## Variance in the observations + r.tr <- cor(x = ens_mean.tr, y = obs.tr, method = 'pearson', use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything")) ## Correlation between observations and the ensemble mean + if ((apply_to == 'all') || (apply_to == 'sign' && cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) { + ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr + var.cor.fc[ , eval.dexes] <- s2dv::Reorder(data = multiApply::Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, ens_mean_cal = ens_mean_cal), target_dims = names(amt.sdate), fun = .CalibrationMembersRPC, var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1, + order = names(dims.fc)) + dim(var.cor.fc) <- dims.fc + } else { ## no significant -> replacing with observed climatology + var.cor.fc[ , eval.dexes] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.ev)) + } + } else { + stop("unknown calibration method: ",cal.method) + } + } + return(var.cor.fc) +} + +.calc.obs.fc.quant <- function(obs, fc, na.rm){ #function to calculate different quantities of a series of ensemble forecasts and corresponding observations + amt.mbr <- dim(fc)[1] + obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) + fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) + cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs") + obs.av <- mean(obs, na.rm = na.rm) + obs.sd <- sd(obs, na.rm = na.rm) + return( + append( + .calc.fc.quant(fc = fc, na.rm = na.rm), + list( + obs.per.ens = obs.per.ens, + cor.obs.fc = cor.obs.fc, + obs.av = obs.av, + obs.sd = obs.sd + ) + ) + ) +} + +.calc.obs.fc.quant.ext <- function(obs, fc, na.rm){ #extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations + amt.mbr <- dim(fc)[1] + obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) + fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) + cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs") + obs.av <- mean(obs, na.rm = na.rm) + obs.sd <- sd(obs, na.rm = na.rm) + + return( + append( + .calc.fc.quant.ext(fc = fc, na.rm = na.rm), + list( + obs.per.ens = obs.per.ens, + cor.obs.fc = cor.obs.fc, + obs.av = obs.av, + obs.sd = obs.sd + ) + ) + ) +} + + +.calc.fc.quant <- function(fc, na.rm){ #function to calculate different quantities of a series of ensemble forecasts + amt.mbr <- dim(fc)[1] + fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) + fc.ens.av.av <- mean(fc.ens.av, na.rm = na.rm) + fc.ens.av.sd <- sd(fc.ens.av, na.rm = na.rm) + fc.ens.av.per.ens <- InsertDim(fc.ens.av, posdim = 1, lendim = amt.mbr) + fc.ens.sd <- apply(fc, c(2), sd, na.rm = na.rm) + fc.ens.var.av.sqrt <- sqrt(mean(fc.ens.sd^2, na.rm = na.rm)) + fc.dev <- fc - fc.ens.av.per.ens + fc.dev.sd <- sd(fc.dev, na.rm = na.rm) + fc.av <- mean(fc, na.rm = na.rm) + fc.sd <- sd(fc, na.rm = na.rm) + return( + list( + fc.ens.av = fc.ens.av, + fc.ens.av.av = fc.ens.av.av, + fc.ens.av.sd = fc.ens.av.sd, + fc.ens.av.per.ens = fc.ens.av.per.ens, + fc.ens.sd = fc.ens.sd, + fc.ens.var.av.sqrt = fc.ens.var.av.sqrt, + fc.dev = fc.dev, + fc.dev.sd = fc.dev.sd, + fc.av = fc.av, + fc.sd = fc.sd + ) + ) +} + +.calc.fc.quant.ext <- function(fc, na.rm){ #extended function to calculate different quantities of a series of ensemble forecasts + + amt.mbr <- dim(fc)[1] + repmat1.tmp <- InsertDim(fc, posdim = 1, lendim = amt.mbr) + repmat2.tmp <- aperm(repmat1.tmp, c(2, 1, 3)) + spr.abs <- apply(abs(repmat1.tmp - repmat2.tmp), c(3), mean, na.rm = na.rm) + spr.abs.per.ens <- InsertDim(spr.abs, posdim = 1, lendim = amt.mbr) + + return( + append(.calc.fc.quant(fc, na.rm = na.rm), + list(spr.abs = spr.abs, spr.abs.per.ens = spr.abs.per.ens)) + ) +} + +#Below are the core or elementary functions to calculate the regression parameters for the different methods +.calc.mse.min.par <- function(quant.obs.fc, multi.model = F, na.rm){ + par.out <- rep(NA, 3) + + if(multi.model){ + par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.ens.var.av.sqrt) + } else { + par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.dev.sd) + } + par.out[2] <- with(quant.obs.fc, abs(cor.obs.fc) * obs.sd / fc.ens.av.sd) + par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = na.rm) + + return(par.out) +} +.calc.evmos.par <- function(quant.obs.fc, na.rm){ + par.out <- rep(NA, 2) + par.out[2] <- with(quant.obs.fc, obs.sd / fc.sd) + par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = na.rm) + return(par.out) +} +#Below are the core or elementary functions to calculate the functions necessary for the minimization of crps +.calc.crps.opt <- function(par, quant.obs.fc, na.rm){ + return( + with(quant.obs.fc, + mean(abs(obs.per.ens - (par[1] + par[2] * fc.ens.av.per.ens + + ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev)), na.rm = na.rm) - + mean(abs((par[3])^2 * spr.abs + par[4]) / 2., na.rm = na.rm) + ) + ) +} + +.calc.crps.grad.opt <- function(par, quant.obs.fc, na.rm){ + sgn1 <- with(quant.obs.fc,sign(obs.per.ens - + (par[1] + par[2] * fc.ens.av.per.ens + + ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev))) + sgn2 <- with(quant.obs.fc, sign((par[3])^2 + par[4] / spr.abs.per.ens)) + sgn3 <- with(quant.obs.fc,sign((par[3])^2 * spr.abs + par[4])) + deriv.par1 <- mean(sgn1, na.rm = na.rm) + deriv.par2 <- with(quant.obs.fc, mean(sgn1 * fc.dev, na.rm = na.rm)) + deriv.par3 <- with(quant.obs.fc, + mean(2* par[3] * sgn1 * sgn2 * fc.ens.av.per.ens, na.rm = na.rm) - + mean(spr.abs * sgn3, na.rm = na.rm) / 2.) + deriv.par4 <- with(quant.obs.fc, + mean(sgn1 * sgn2 * fc.ens.av.per.ens / spr.abs.per.ens, na.rm = na.rm) - + mean(sgn3, na.rm = na.rm) / 2.) + return(c(deriv.par1, deriv.par2, deriv.par3, deriv.par4)) +} + +#Below are the core or elementary functions to correct the evaluation set based on the regression parameters +.correct.evmos.fc <- function(fc, par, na.rm){ + quant.fc.mp <- .calc.fc.quant(fc = fc, na.rm = na.rm) + return(with(quant.fc.mp, par[1] + par[2] * fc)) +} +.correct.mse.min.fc <- function(fc, par, na.rm){ + quant.fc.mp <- .calc.fc.quant(fc = fc, na.rm = na.rm) + return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * par[3])) +} +.correct.crps.min.fc <- function(fc, par, na.rm){ + quant.fc.mp <- .calc.fc.quant.ext(fc = fc, na.rm = na.rm) + return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * abs((par[3])^2 + par[4] / spr.abs))) +} + +# Function to calibrate the individual members with the RPC-based method +.CalibrationMembersRPC <- function(exp, ens_mean, ens_mean_cal, var_obs, var_noise, r){ + member_cal <- (exp - ens_mean) * sqrt(var_obs) * sqrt(1 - r^2) / sqrt(var_noise) + ens_mean_cal + return(member_cal) +} diff --git a/tools/tmp/RPS.R b/tools/tmp/RPS.R new file mode 100644 index 00000000..84956ac8 --- /dev/null +++ b/tools/tmp/RPS.R @@ -0,0 +1,193 @@ +#'Compute the Ranked Probability Score +#' +#'The Ranked Probability Score (RPS; Wilks, 2011) is defined as the sum of the +#'squared differences between the cumulative forecast probabilities (computed +#'from the ensemble members) and the observations (defined as 0% if the category +#'did not happen and 100% if it happened). It can be used to evaluate the skill +#'of multi-categorical probabilistic forecasts. The RPS ranges between 0 +#'(perfect forecast) and n-1 (worst possible forecast), where n is the number of +#'categories. In the case of a forecast divided into two categories (the lowest +#'number of categories that a probabilistic forecast can have), the RPS +#'corresponds to the Brier Score (BS; Wilks, 2011), therefore, ranges between 0 +#'and 1. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPSS (the +#' potential RPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of RPS with the same dimensions as "exp" except the +#''time_dim' and 'memb_dim' dimensions. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'res <- RPS(exp = exp, obs = obs) +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', + prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, + ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- 1:dim(obs)[time_dim] + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPS + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- time_dim + } else { + target_dims_obs <- c(time_dim, memb_dim) + } + + rps <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = c(time_dim, memb_dim), + obs = target_dims_obs), + output_dims = time_dim, + fun = .RPS, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + ncores = ncores)$output1 + + # Return only the mean RPS + rps <- MeanDims(rps, time_dim, na.rm = FALSE) + + return(rps) +} + + +.RPS <- function(exp, obs, prob_thresholds = c(1/3, 2/3), + indices_for_clim = NULL, Fair = FALSE) { + # exp: [sdate, memb] + # obs: [sdate, (memb)] + + exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds) + # exp_probs: [bin, sdate] + obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds) + # obs_probs: [bin, sdate] + + probs_exp_cumsum <- apply(exp_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps <- apply((probs_exp_cumsum - probs_obs_cumsum)^2, 2, sum) + # rps: [sdate] + + if (Fair) { # FairRPS + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] + R <- dim(exp)[2] #memb + R_new <- Inf + adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) + adjustment <- apply(adjustment, 2, sum) + rps <- rps + adjustment + } + + return(rps) +} + +.get_probs <- function(data, indices_for_quantiles, prob_thresholds) { + # if exp: [sdate, memb] + # if obs: [sdate, (memb)] + + # Add dim [memb = 1] to obs if it doesn't have memb_dim + if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1) + + # Absolute thresholds + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) + + # Probabilities + probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate] + for (i_time in 1:dim(data)[1]) { + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = length(quantiles) + 1) + } else { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) + } + } + + return(probs) +} + diff --git a/tools/tmp/RPSS.R b/tools/tmp/RPSS.R new file mode 100644 index 00000000..04c31378 --- /dev/null +++ b/tools/tmp/RPSS.R @@ -0,0 +1,223 @@ +#'Compute the Ranked Probability Skill Score +#' +#'The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score +#'based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to +#'assess whether a forecast presents an improvement or worsening with respect to +#'a reference forecast. The RPSS ranges between minus infinite and 1. If the +#'RPSS is positive, it indicates that the forecast has higher skill than the +#'reference forecast, while a negative value means that it has a lower skill. +#'Examples of reference forecasts are the climatological forecast (same +#'probabilities for all categories for all time steps), persistence, a previous +#'model version, and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref. +#'The statistical significance is obtained based on a Random Walk test at the +#'95% confidence level (DelSole and Tippett, 2016). +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension. The dimensions must be the same as 'exp' except +#' 'memb_dim'. If it is NULL, the climatological forecast is used as reference +#' forecast. The default value is NULL. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast and the reference forecast. The +#' default value is 'member'. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPSS (the +#' potential RPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'\item{$rpss}{ +#' A numerical array of the RPSS with the same dimensions as "exp" except the +#' 'time_dim' and 'memb_dim' dimensions. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance of the RPSS with the same +#' dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions. +#'} +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast +#'res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +#' +#'@import multiApply +#'@export +RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', + prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, + ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop('Parameter "ref" must be a numeric array.') + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + name_ref <- name_ref[-which(name_ref == memb_dim)] + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- 1:dim(obs)[time_dim] + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPSS + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- time_dim + } else { + target_dims_obs <- c(time_dim, memb_dim) + } + + if (!is.null(ref)) { # use "ref" as reference forecast + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = c(time_dim, memb_dim), + obs = target_dims_obs, + ref = c(time_dim, memb_dim)) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = c(time_dim, memb_dim), + obs = target_dims_obs) + } + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + ncores = ncores) + + return(output) +} + +.RPSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), + indices_for_clim = NULL, Fair = FALSE) { + # exp: [sdate, memb] + # obs: [sdate, (memb)] + # ref: [sdate, memb] or NULL + + # RPS of the forecast + rps_exp <- .RPS(exp = exp, obs = obs, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair) + + # RPS of the reference forecast + if (is.null(ref)) { ## using climatology as reference forecast + obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds) + # obs_probs: [bin, sdate] + + clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- array(clim_probs, dim = dim(obs_probs)) + # clim_probs: [bin, sdate] + + # Calculate RPS for each time step + probs_clim_cumsum <- apply(clim_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) + # rps_ref: [sdate] + +# if (Fair) { # FairRPS +# ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] +# R <- dim(exp)[2] #memb +# R_new <- Inf +# adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) +# adjustment <- apply(adjustment, 2, sum) +# rps_ref <- rps_ref + adjustment +# } + + } else { # use "ref" as reference forecast + rps_ref <- .RPS(exp = ref, obs = obs, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair) + } + + # RPSS + rpss <- 1 - mean(rps_exp) / mean(rps_ref) + + # Significance + sign <- .RandomWalkTest(skill_A = rps_exp, skill_B = rps_ref)$signif + + return(list(rpss = rpss, sign = sign)) +} + diff --git a/tools/tmp/RandomWalkTest.R b/tools/tmp/RandomWalkTest.R new file mode 100644 index 00000000..adeadc1e --- /dev/null +++ b/tools/tmp/RandomWalkTest.R @@ -0,0 +1,82 @@ +#'Random walk test for skill differences +#' +#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a +#'common reference) based on Random Walks, with significance estimate at the 95% +#'confidence level, as in DelSole and Tippett (2016). +#' +#'@param skill_A A numerical array of the time series of the skill with the +#' forecaster A's. +#'@param skill_B A numerical array of the time series of the skill with the +#' forecaster B's. The dimensions should be identical as parameter 'skill_A'. +#'@param time_dim A character string indicating the name of the dimension along +#' which the tests are computed. The default value is 'sdate'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of 2: +#'\item{$score}{ +#' A numerical array with the same dimensions as the input arrays except +#' 'time_dim'. The number of times that forecaster A has been better than +#' forecaster B minus the number of times that forecaster B has been better +#' than forecaster A (for skill positively oriented). If $score is positive +#' forecaster A is better than forecaster B, and if $score is negative +#' forecaster B is better than forecaster B. +#'} +#'\item{$signif}{ +#' A logical array with the same dimensions as the input arrays except +#' 'time_dim'. Whether the difference is significant or not at the 5% +#' significance level. +#'} +#' +#'@examples +#' fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2)) +#' fcst_B <- array(c(21:60), dim = c(sdate = 10, lat = 2, lon = 2)) +#' reference <- array(1:40, dim = c(sdate = 10, lat = 2, lon = 2)) +#' skill_A <- abs(fcst_A - reference) +#' skill_B <- abs(fcst_B - reference) +#' RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', ncores = 1) +#' +#'@import multiApply +#'@export +RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', ncores = NULL){ + + ## Check inputs + if (is.null(skill_A) | is.null(skill_B)){ + stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") + } + if(!is.numeric(skill_A) | !is.numeric(skill_B)){ + stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") + } + if (!identical(dim(skill_A),dim(skill_B))){ + stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") + } + if(!is.character(time_dim)){ + stop("Parameter 'time_dim' must be a character string.") + } + if(!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))){ + stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") + } + if (!is.null(ncores)){ + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1){ + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ## Compute the Random Walk Test + res <- multiApply::Apply(data = list(skill_A, skill_B), + target_dims = time_dim, + fun = .RandomWalkTest, + ncores = ncores) + return(res) +} + +.RandomWalkTest <- function(skill_A, skill_B){ + + score <- cumsum(skill_A > skill_B) - cumsum(skill_A < skill_B) + + ## TRUE if significant (if last value is above or below 2*sqrt(N)) + signif<- ifelse(test = (score[length(skill_A)] < (-2)*sqrt(length(skill_A))) | (score[length(skill_A)] > 2*sqrt(length(skill_A))), + yes = TRUE, no = FALSE) + + return(list("score"=score[length(skill_A)],"signif"=signif)) +} -- GitLab From 5c15a66abc86c4042c2a6060c3aed5714283927e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 13 Jun 2022 16:11:08 +0200 Subject: [PATCH 10/11] Add basic information to README.md --- README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/README.md b/README.md index e69de29b..5b28ef5d 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,21 @@ + +ESS Verification Suite +====================== + +This is the Git project for the ESS Verification Suite, which will serve as a tool for research projects and operational workflows involving subseasonal to seasonal to decadal forecast verification. + +The main developers of the tool are Victòria Agudetse (@vagudets), An-Chi Ho (@aho), Lluís Palma (@lpalma) and Núria Pérez-Zanón (@nperez). + +Resources +--------- + +Here you can access a presentation containing information relevant to the tool: +[ESS Verification Suite](https://docs.google.com/presentation/d/1R8Gcz5R_NTgcBQvXBkCPG3jY31BVPDur/edit#slide=id.p1) + +Branching strategy +------------------ + +Branches containing developments that are to be merged into the tool must contain "dev-" at the beginning of the name, followed by a short, meaningful description of the development in question. E.g. "dev-loading-subseasonal" for the branch containing developments related to the loading of subseasonal datasets. + +Users may have their own branches for personal use. These branches should start with "user-", which can optionally be followed by a brief description. + -- GitLab From b3d1fb1ba674e220e39ef41131f82f02a59ebea3 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 14 Jun 2022 10:56:26 +0200 Subject: [PATCH 11/11] Update Readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5b28ef5d..e540516a 100644 --- a/README.md +++ b/README.md @@ -10,12 +10,12 @@ Resources --------- Here you can access a presentation containing information relevant to the tool: -[ESS Verification Suite](https://docs.google.com/presentation/d/1R8Gcz5R_NTgcBQvXBkCPG3jY31BVPDur/edit#slide=id.p1) +[ESS Verification Suite](https://docs.google.com/presentation/d/1R8Gcz5R_NTgcBQvXBkCPG3jY31BVPDur/edit#slide=id.p1?target=_blank) Branching strategy ------------------ Branches containing developments that are to be merged into the tool must contain "dev-" at the beginning of the name, followed by a short, meaningful description of the development in question. E.g. "dev-loading-subseasonal" for the branch containing developments related to the loading of subseasonal datasets. -Users may have their own branches for personal use. These branches should start with "user-", which can optionally be followed by a brief description. +Users may have their own branches for personal use. These branches should start with "user-\", which can optionally be followed by a brief description. E.g. "user-vagudets-FOCUS". -- GitLab