diff --git a/Corr.R b/Corr.R new file mode 100644 index 0000000000000000000000000000000000000000..f1621240ede50f4b0eb35f6e99b25ee98fe6ab7f --- /dev/null +++ b/Corr.R @@ -0,0 +1,485 @@ +#'Compute the correlation coefficient between an array of forecast and their corresponding observation +#' +#'Calculate the correlation coefficient (Pearson, Kendall or Spearman) for +#'an array of forecast and an array of observation. The correlations are +#'computed along 'time_dim' that usually refers to the start date dimension. If +#''comp_dim' is given, the correlations are computed only if obs along comp_dim +#'dimension are complete between limits[1] and limits[2], i.e., there is no NA +#'between limits[1] and limits[2]. This option can be activated if the user +#'wants to account only for the forecasts which the corresponding observations +#'are available at all leadtimes.\cr +#'The confidence interval is computed by the Fisher transformation and the +#'significance level relies on an one-sided student-T distribution.\cr +#'The function can calculate ensemble mean before correlation by 'memb_dim' +#'specified and 'memb = F'. If ensemble mean is not calculated, correlation will +#'be calculated for each member. +#'If there is only one dataset for exp and obs, you can simply use cor() to +#'compute the correlation. +#' +#'@param exp A named numeric array of experimental data, with at least dimension +#' 'time_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param time_dim A character string indicating the name of dimension along +#' which the correlations are computed. The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param comp_dim A character string indicating the name of dimension along which +#' obs is taken into account only if it is complete. The default value +#' is NULL. +#'@param limits A vector of two integers indicating the range along comp_dim to +#' be completed. The default is c(1, length(comp_dim dimension)). +#'@param method A character string indicating the type of correlation: +#' 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no +#' member dimension, set NULL. The default value is NULL. +#'@param memb A logical value indicating whether to remain 'memb_dim' dimension +#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when +#' 'memb_dim' is not NULL. The default value is TRUE. +#'@param pval A logical value indicating whether to return or not the p-value +#' of the test Ho: Corr = 0. The default value is TRUE. +#'@param conf A logical value indicating whether to return or not the confidence +#' intervals. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: Corr = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, exp_memb, obs_memb, all other dimensions of exp except +#' time_dim and memb_dim).\cr +#'nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is the +#'number of observation (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and +#'nobs are omitted. exp_memb is the number of member in experiment (i.e., +#''memb_dim' in exp) and obs_memb is the number of member in observation (i.e., +#''memb_dim' in obs). If memb = F, exp_memb and obs_memb are omitted.\cr\cr +#'\item{$corr}{ +#' The correlation coefficient. +#'} +#'\item{$p.val}{ +#' The p-value. Only present if \code{pval = TRUE}. +#'} +#'\item{$conf.lower}{ +#' The lower confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' The upper confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$sign}{ +#' The statistical significance. Only present if \code{sign = TRUE}. +#'} +#' +#'@examples +#'# Case 1: Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'runmean_months <- 12 +#' +#'# Smooth along lead-times +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) +#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months) +#'required_complete_row <- 3 # Discard start dates which contain any NA lead-times +#'leadtimes_per_startdate <- 60 +#'corr <- Corr(MeanDims(smooth_ano_exp, 'member'), +#' MeanDims(smooth_ano_obs, 'member'), +#' comp_dim = 'ftime', dat_dim = 'dataset', +#' limits = c(ceiling((runmean_months + 1) / 2), +#' leadtimes_per_startdate - floor(runmean_months / 2))) +#' +#'# Case 2: Keep member dimension +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', dat_dim = 'dataset') +#'# ensemble mean +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE, +#' dat_dim = 'dataset') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@importFrom stats cor pt qnorm +#'@export +Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, + comp_dim = NULL, limits = NULL, method = 'pearson', + memb_dim = NULL, memb = TRUE, + pval = TRUE, conf = TRUE, sign = FALSE, + alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## comp_dim + if (!is.null(comp_dim)) { + if (!is.character(comp_dim) | length(comp_dim) > 1) { + stop("Parameter 'comp_dim' must be a character string.") + } + if (!comp_dim %in% names(dim(exp)) | !comp_dim %in% names(dim(obs))) { + stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## limits + if (!is.null(limits)) { + if (is.null(comp_dim)) { + stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") + } + if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | + length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { + stop(paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.")) + } + } + ## method + if (!(method %in% c("kendall", "spearman", "pearson"))) { + stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + } + ## memb + if (!is.logical(memb) | length(memb) > 1) { + stop("Parameter 'memb' must be one logical value.") + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'dat_dim' and 'memb_dim'.")) + } + if (dim(exp)[time_dim] < 3) { + stop("The length of time_dim must be at least 3 to compute correlation.") + } + + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- Reorder(obs, order_obs) + + + ############################### + # Calculate Corr + + # Remove data along comp_dim dim if there is at least one NA between limits + if (!is.null(comp_dim)) { + pos <- which(names(dim(obs)) == comp_dim) + if (is.null(limits)) { + obs_sub <- obs + } else { + obs_sub <- ClimProjDiags::Subset(obs, pos, list(limits[1]:limits[2])) + } + outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE)) + outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) + obs[which(outrows)] <- NA + rm(obs_sub, outrows) + } + if (!is.null(memb_dim)) { + if (!memb) { #ensemble mean + exp <- MeanDims(exp, memb_dim, na.rm = TRUE) + obs <- MeanDims(obs, memb_dim, na.rm = TRUE) +# name_exp <- names(dim(exp)) +# margin_dims_ind <- c(1:length(name_exp))[-which(name_exp == memb_dim)] +# exp <- apply(exp, margin_dims_ind, mean, na.rm = TRUE) #NOTE: remove NAs here +# obs <- apply(obs, margin_dims_ind, mean, na.rm = TRUE) + memb_dim <- NULL + } + } + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim, memb_dim), + c(time_dim, dat_dim, memb_dim)), + fun = .Corr, + dat_dim = dat_dim, memb_dim = memb_dim, + time_dim = time_dim, method = method, + pval = pval, conf = conf, sign = sign, alpha = alpha, + ncores = ncores) + + return(res) +} + +.Corr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', method = 'pearson', + conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05) { + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + if (is.null(memb_dim)) { + CORR <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + if (any(!is.na(exp)) && sum(!is.na(obs)) > 2) { + CORR[, ] <- cor(exp, obs, use = "pairwise.complete.obs", method = method) + } + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + for (j in 1:nobs) { + for (y in 1:nexp) { + if (any(!is.na(exp[, y])) && sum(!is.na(obs[, j])) > 2) { + CORR[y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + } + } +#---------------------------------------- +# Same as above calculation. +#TODO: Compare which is faster. +# CORR <- sapply(1:nobs, function(i) { +# sapply(1:nexp, function (x) { +# if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) { +# cor(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method) +# } else { +# NA +# } +# }) +# }) +#----------------------------------------- + } + + } else { # memb_dim != NULL + exp_memb <- as.numeric(dim(exp)[memb_dim]) # memb_dim + obs_memb <- as.numeric(dim(obs)[memb_dim]) + + CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + + if (is.null(dat_dim)) { + # exp: [sdate, memb_exp] + # obs: [sdate, memb_obs] + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + + if (any(!is.na(exp[,y])) && sum(!is.na(obs[, j])) > 2) { + CORR[, , y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + + } + } + } else { + # exp: [sdate, dat_exp, memb_exp] + # obs: [sdate, dat_obs, memb_obs] + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + CORR[, , y, j] <- sapply(1:nobs, function(i) { + sapply(1:nexp, function (x) { + if (any(!is.na(exp[, x, y])) && sum(!is.na(obs[, i, j])) > 2) { + cor(exp[, x, y], obs[, i, j], + use = "pairwise.complete.obs", + method = method) + } else { + NA + } + }) + }) + + } + } + } + + } + + +# if (pval) { +# for (i in 1:nobs) { +# p.val[, i] <- try(sapply(1:nexp, +# function(x) {(cor.test(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method)$p.value)/2}), silent = TRUE) +# if (class(p.val[, i]) == 'character') { +# p.val[, i] <- NA +# } +# } +# } + + if (pval || conf || sign) { + if (method == "kendall" | method == "spearman") { + if (!is.null(dat_dim) | !is.null(memb_dim)) { + tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3) + names(dim(tmp))[1] <- time_dim + eno <- Eno(tmp, time_dim) + } else { + tmp <- rank(obs) + tmp <- array(tmp) + names(dim(tmp)) <- time_dim + eno <- Eno(tmp, time_dim) + } + } else if (method == "pearson") { + eno <- Eno(obs, time_dim) + } + + if (is.null(memb_dim)) { + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + eno_expand[i, ] <- eno + } + } else { #member + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + for (i in 1:nexp) { + for (j in 1:exp_memb) { + eno_expand[i, , j, ] <- eno + } + } + } + + } + +#############old################# +#This doesn't return error but it's diff from cor.test() when method is spearman and kendall + if (pval || sign) { + t <- sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) + p.val <- pt(t, eno_expand - 2, lower.tail = FALSE) + if (sign) signif <- !is.na(p.val) & p.val <= alpha + } +################################### + if (conf) { + conf.lower <- alpha / 2 + conf.upper <- 1 - conf.lower + suppressWarnings({ + conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3)) + confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3)) + }) + } + +################################### + # Remove nexp and nobs if dat_dim = NULL + if (is.null(dat_dim)) { +# if (is.null(dat_dim) & !is.null(memb_dim)) { + + if (length(dim(CORR)) == 2) { + dim(CORR) <- NULL + if (pval) { + dim(p.val) <- NULL + } + if (conf) { + dim(conflow) <- NULL + dim(confhigh) <- NULL + } + if (sign) { + dim(signif) <- NULL + } + } else { + dim(CORR) <- dim(CORR)[3:length(dim(CORR))] + if (pval) { + dim(p.val) <- dim(p.val)[3:length(dim(p.val))] + } + if (conf) { + dim(conflow) <- dim(conflow)[3:length(dim(conflow))] + dim(confhigh) <- dim(confhigh)[3:length(dim(confhigh))] + } + if (sign) { + dim(signif) <- dim(signif)[3:length(dim(signif))] + } + } + } + +################################### + + res <- list(corr = CORR) + if (pval) { + res <- c(res, list(p.val = p.val)) + } + if (conf) { + res <- c(res, list(conf.lower = conflow, conf.upper = confhigh)) + } + if (sign) { + res <- c(res, list(sign = signif)) + } + + return(res) + +} + diff --git a/DST_plot.py b/DST_plot.py new file mode 100644 index 0000000000000000000000000000000000000000..c8ac1e6f3b0f8b84333be035ff5d03199180f3ff --- /dev/null +++ b/DST_plot.py @@ -0,0 +1,201 @@ +import numpy as np +import datetime +import xarray as xr +#import plotly.express as px +import cartopy.crs as ccrs +import matplotlib.pyplot as plt +import cartopy.crs as ccrs +from mpl_toolkits.basemap import Basemap +import pandas as pd +import os +import sys, getopt +from dateutil.relativedelta import relativedelta + +def get_domain(region): + + if region == "cat": + domain = [0.2034, 3.3046, 42.87,40.54] # lats from bigger to smaller + else: + print(f'Region <{region}> NOT FOUND') + + return(domain) + +def get_data(probs_file,skill_file,region): + + domain = get_domain(region) + + probs = xr.open_dataset(probs_file, decode_times=False) + skill = xr.open_dataset(skill_file, decode_times=False) + data = xr.merge([probs,skill]) + del probs,skill + + data = data.transpose('longitude', 'latitude', 'time', 'time_step') + + data = data.sel(dict(latitude=slice(domain[3],domain[2]))) + data = data.sel(dict(longitude=slice(domain[0],domain[1]))) + + return(data) + + +def generate_plot(data,region,locations,leadtime,thrs,colors,subtitle,outfile): + + fig = plt.figure(figsize=(3, 3), dpi=300) + + domain = get_domain(region) + + m = Basemap(llcrnrlon=domain[0],llcrnrlat=domain[3],urcrnrlon=domain[1], + urcrnrlat=domain[2],projection='mill', resolution='h') + + m.drawmapboundary(linewidth=0) + m.fillcontinents(color=colors['land'], alpha=1) + m.drawcoastlines(linewidth=0,color=colors['land']) + + m.drawlsmask(land_color=colors['land'], + ocean_color=colors['ocean']) + + for category in ["above","normal","below"]: + + c = {"above":colors['red'],"normal":colors['grey'],"below":colors['blue']} + + plotglyph(m,filter_tercile(data,category,thrs['bigcircle'], + thrs['skill'],"small"), + leadtime,"lowcircle", c[category]) + + plotglyph(m,filter_tercile(data,category,thrs['bigcircle'], + thrs['skill'],"big"), + leadtime,"bigcircle", c[category]) + + plotglyph(m, filter_extreme(data,"prob_ap90",thrs['extreme'],thrs['skill']), + leadtime, "upextreme", colors['red']) + + plotglyph(m, filter_extreme(data,"prob_bp10",thrs['extreme'],thrs['skill']), + leadtime, "lowextreme", colors['blue']) + + # plot locations: + for location,point in locations.items(): + x,y = m(point[0],point[1]) + m.plot(x,y,linestyle='none', marker="+", markersize=5, + markeredgecolor='k', markeredgewidth=0.5, c="None",alpha=0.4) + + if region == "cat": + m.readshapefile('comarques/bm5mv21sh0tpc1_20200601_0', 'Comarca', + default_encoding='ISO8859-1',color=colors['ocean'],linewidth=0.5) + else: + m.drawcountries(color=colors['ocean'], linewidth=1) + m.drawrivers(color=colors['ocean'], linewidth=1) + + #plt.suptitle(title, fontsize=10, weight='bold') + plt.title(subtitle, fontsize=7, alpha=0.75) + + fig.tight_layout() + plt.savefig(outfile,dpi=300) + #plt.show() + plt.close() + +def plotglyph(m, data, time, ptype, c): + lons = data.longitude.to_pandas().values + lats = data.latitude.to_pandas().values + lons, lats = np.meshgrid(lons,lats) + + ext = data.rpss.isel(time=time).to_masked_array().mask.transpose() + lons = np.where(ext == True, False, lons) + lats = np.where(ext == True, False, lats) + + alp = 0.8 + + x,y = m(lons,lats) + if ptype == "upextreme": + m.plot(x,y, linestyle='none', marker="^", markersize=5, alpha=alp, + markeredgecolor=c, markeredgewidth=1, c="None") + elif ptype == "lowextreme": + m.plot(x,y, linestyle='none', marker="v", markersize=5, alpha=alp, + markeredgecolor=c, markeredgewidth=1, c="None") + elif ptype == "bigcircle": + m.plot(x,y, linestyle='none', marker="o", markersize=1.6, alpha=alp, + c="None", markeredgecolor=c, markeredgewidth=1.6) + elif ptype == "lowcircle": + m.plot(x,y, linestyle='none', marker="o", markersize=1, alpha=alp, + c="None", markeredgecolor=c, markeredgewidth=1) + +def filter_tercile(df,category,prob_thrs,skill_thrs,circle): + + if skill_thrs is not None: + df = df.where(df.rpss >= skill_thrs, drop=False) + + # most-likely-tercile + if category == "normal": + df = df.where((df.prob_n > df.prob_an) &(df.prob_n > df.prob_bn), drop=False) + if circle == "big": + df = df.where(df.prob_n >= prob_thrs, drop=False) + elif circle == "small": + df = df.where(df.prob_n < prob_thrs, drop=False) + elif category == "below": + df = df.where((df.prob_bn > df.prob_n) &(df.prob_bn > df.prob_an), drop=False) + if circle == "big": + df = df.where(df.prob_bn >= prob_thrs, drop=False) + elif circle == "small": + df = df.where(df.prob_bn < prob_thrs, drop=False) + elif category == "above": + df = df.where((df.prob_an > df.prob_n) &(df.prob_an > df.prob_bn), drop=False) + if circle == "big": + df = df.where(df.prob_an >= prob_thrs, drop=False) + elif circle == "small": + df = df.where(df.prob_an < prob_thrs, drop=False) + + return(df) + +def filter_extreme(df,category,prob_thrs,skill_thrs): + + skill_var = {"prob_bp10":"bsp10", "prob_ap90":"bsp90"} + + if skill_thrs is not None: + df = df.where(df[skill_var[category]] > skill_thrs, drop=False) + if prob_thrs is not None: + df = df.where(df[category] > prob_thrs, drop=False) + + return(df) + +def main(argv): + + usage = 'DST-plots.py -v -d -s -o ' + try: + opts, args = getopt.getopt(argv,"hv:d:s:o:",["help","variable=","sdate=", "system="]) + except getopt.GetoptError: + print(usage) + sys.exit(2) + + for opt, arg in opts: + if opt == '-h': + print(usage) + sys.exit() + elif opt in ("-s", "--system"): + system = str(arg) + elif opt in ("-d", "--sdate"): + date = arg + elif opt in ("-v", "--variable"): + var = arg + elif opt in ("-o", "--outdir"): + outdir = arg + + week = datetime.datetime.strptime(date, "%Y%m%d").strftime("%V") + + if system == "s2s": + src_dir = '/esarchive/oper/S2S4E-data/weekly_statistics/S2S/' + probs_file = src_dir + '/' + var + '/' + date + '/' + var + \ + '-prob_' + date + '_' + week + '.nc' + skill_file = src_dir + '/' + var + '/' + date + '/' + var + \ + '-skill_week' + week + '.nc' + else: + src_dir = '/esarchive/oper/S2S4E-data/weekly_statistics/' + probs_file = src_dir + '/' + var + '/' + var + \ + '-prob_' + date + '_' + week + '.nc' + skill_file = src_dir + '/' + var + '/' + var + \ + '-skill_week' + week + '.nc' + + for leadtime in range(4): + generate_map(probs_file, skill_file, date, leadtime, var, system, outdir) + +if __name__ == "__main__": + main(sys.argv[1:]) + + diff --git a/GetProbs.R b/GetProbs.R new file mode 100644 index 0000000000000000000000000000000000000000..6549e232477161ea9adec25d6432328b49c3acca --- /dev/null +++ b/GetProbs.R @@ -0,0 +1,346 @@ +#'Compute probabilistic forecasts or the corresponding observations +#' +#'Compute probabilistic forecasts from an ensemble based on the relative +#'thresholds, or the probabilistic observations (i.e., which probabilistic +#'category was observed). A reference period can be specified to calculate the +#'absolute thresholds between each probabilistic category. The absolute +#'thresholds can be computed in cross-validation mode. If data is an ensemble, +#'the probabilities are calculated as the percentage of members that fall into +#'each category. For observations (or forecast without member dimension), 1 +#'means that the event happened, while 0 indicates that the event did not +#'happen. Weighted probabilities can be computed if the weights are provided for +#'each ensemble member and time step. The absolute thresholds can also be +#'provided directly for probabilities calculation. +#' +#'@param data A named numerical array of the forecasts or observations with, at +#' least, time dimension. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast, or NULL if there is no member +#' dimension (e.g., for observations, or for forecast with only one ensemble +#' member). The default value is 'member'. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param abs_thresholds A numeric array or vector of the absolute thresholds in +#' the same units as \code{data}. If an array is provided, it should have at +#' least 'bin_dim_abs' dimension. If it has more dimensions (e.g. different +#' thresholds for different locations, i.e. lon and lat dimensions), they +#' should match the dimensions of \code{data}, except the member dimension +#' which should not be included. The default value is NULL and, in this case, +#' 'prob_thresholds' is used for calculating the probabilities. +#'@param bin_dim_abs A character string of the dimension name of +#' 'abs_thresholds' array in which category limits are stored. It will also be +#' the probabilistic category dimension name in the output. The default value +#' is 'bin'. +#'@param indices_for_quantiles A vector of the indices to be taken along +#' 'time_dim' for computing the absolute thresholds between the probabilistic +#' categories. If NULL (default), the whole period is used. It is only used +#' when 'prob_thresholds' is provided. +#'@param weights A named numerical array of the weights for 'data' with +#' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value +#' is NULL. The ensemble should have at least 70 members or span at least 10 +#' time steps and have more than 45 members if consistency between the weighted +#' and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation mode. The default value +#' is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of probabilities with dimensions c(bin_dim_abs, the rest +#'dimensions of 'data' except 'memb_dim'). 'bin' dimension has the length of +#'probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. +#' +#'@examples +#'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) +#'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' indices_for_quantiles = 4:17) +#' +#'# abs_thresholds is provided +#'abs_thr1 <- c(-0.2, 0.3) +#'abs_thr2 <- array(c(-0.2, 0.3) + rnorm(40) * 0.1, dim = c(cat = 2, sdate = 20)) +#'res1 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr1) +#'res2 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr2, bin_dim_abs = 'cat') +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', + indices_for_quantiles = NULL, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + bin_dim_abs = 'bin', weights = NULL, cross.val = FALSE, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimensions.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimensions. If no member ", + "dimension exists, set it as NULL.") + } + } + ## bin_dim_abs + if (!is.character(bin_dim_abs) | length(bin_dim_abs) != 1) { + stop('Parameter "bin_dim_abs" must be a character string.') + } + ## prob_thresholds, abs_thresholds + if (!is.null(abs_thresholds) & !is.null(prob_thresholds)) { + .warning(paste0("Parameters 'prob_thresholds' and 'abs_thresholds' are both provided. ", + "Only the first one is used.")) + abs_thresholds <- NULL + } else if (is.null(abs_thresholds) & is.null(prob_thresholds)) { + stop("One of the parameters 'prob_thresholds' and 'abs_thresholds' must be provided.") + } + if (!is.null(prob_thresholds)) { + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_quantiles + if (is.null(indices_for_quantiles)) { + indices_for_quantiles <- 1:dim(data)[time_dim] + } else { + if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { + stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") + } else if (length(indices_for_quantiles) > dim(data)[time_dim] | + max(indices_for_quantiles) > dim(data)[time_dim] | + any(indices_for_quantiles < 1)) { + stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") + } + } + + } else { # abs_thresholds + + if (is.null(dim(abs_thresholds))) { # a vector + dim(abs_thresholds) <- length(abs_thresholds) + names(dim(abs_thresholds)) <- bin_dim_abs + } + # bin_dim_abs + if (!(bin_dim_abs %in% names(dim(abs_thresholds)))) { + stop("Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension.") + } + if (!is.null(memb_dim) && memb_dim %in% names(dim(abs_thresholds))) { + stop("Parameter abs_thresholds' cannot have member dimension.") + } + dim_name_abs <- names(dim(abs_thresholds))[which(names(dim(abs_thresholds)) != bin_dim_abs)] + if (any(!dim_name_abs %in% names(dim(data)))) { + stop("Parameter 'abs_thresholds' dimensions except 'bin_dim_abs' must be in 'data' as well.") + } else { + if (any(dim(abs_thresholds)[dim_name_abs] != dim(data)[dim_name_abs])) { + stop("Parameter 'abs_thresholds' dimensions must have the same length as 'data'.") + } + } + if (!is.null(indices_for_quantiles)) { + warning("Parameter 'indices_for_quantiles' is not used when 'abs_thresholds' are provided.") + } + abs_target_dims <- bin_dim_abs + if (time_dim %in% names(dim(abs_thresholds))) { + abs_target_dims <- c(bin_dim_abs, time_dim) + } + + } + + ## weights + if (!is.null(weights)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + +# if (is.null(dat_dim)) { + if (!is.null(memb_dim)) { + lendim_weights <- 2 + namesdim_weights <- c(time_dim, memb_dim) + } else { + lendim_weights <- 1 + namesdim_weights <- c(time_dim) + } + if (length(dim(weights)) != lendim_weights | + any(!names(dim(weights)) %in% namesdim_weights)) { + stop(paste0("Parameter 'weights' must have dimension ", + paste0(namesdim_weights, collapse = ' and '), ".")) + } + if (any(dim(weights)[namesdim_weights] != dim(data)[namesdim_weights])) { + stop(paste0("Parameter 'weights' must have the same dimension length as ", + paste0(namesdim_weights, collapse = ' and '), " dimension in 'data'.")) + } + weights <- Reorder(weights, namesdim_weights) + +# } else { +# if (length(dim(weights)) != 3 | any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) +# stop("Parameter 'weights' must have three dimensions with the names of 'memb_dim', 'time_dim' and 'dat_dim'.") +# if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | +# dim(weights)[time_dim] != dim(exp)[time_dim] | +# dim(weights)[dat_dim] != dim(exp)[dat_dim]) { +# stop(paste0("Parameter 'weights' must have the same dimension lengths ", +# "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) +# } +# weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) +# } + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + if (is.null(abs_thresholds)) { + res <- Apply(data = list(data = data), + target_dims = c(time_dim, memb_dim), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = prob_thresholds, + indices_for_quantiles = indices_for_quantiles, + weights = weights, cross.val = cross.val, ncores = ncores)$output1 + } else { + res <- Apply(data = list(data = data, abs_thresholds = abs_thresholds), + target_dims = list(c(time_dim, memb_dim), abs_target_dims), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = NULL, + indices_for_quantiles = NULL, + weights = NULL, cross.val = FALSE, ncores = ncores)$output1 + } + + return(res) +} + +.GetProbs <- function(data, indices_for_quantiles, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + weights = NULL, cross.val = FALSE) { + # .GetProbs() is used in RPS, RPSS, ROCSS + # data + ## if data is exp: [sdate, memb] + ## if data is obs: [sdate, (memb)] + # weights: [sdate, (memb)], same as data + # if abs_thresholds is not NULL: [bin, (sdate)] + + # Add dim [memb = 1] to data if it doesn't have memb_dim + if (length(dim(data)) == 1) { + dim(data) <- c(dim(data), 1) + if (!is.null(weights)) dim(weights) <- c(dim(weights), 1) + } + + # Calculate absolute thresholds + if (is.null(abs_thresholds)) { + if (cross.val) { + quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) + for (i_time in 1:dim(data)[1]) { + if (is.null(weights)) { + quantiles[, i_time] <- quantile(x = as.vector(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ], + weights[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles[, i_time] <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + } + + } else { + if (is.null(weights)) { + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], + weights[indices_for_quantiles, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + quantiles <- array(rep(quantiles, dim(data)[1]), + dim = c(bin = length(quantiles), dim(data)[1])) + } + + } else { # abs_thresholds provided + quantiles <- abs_thresholds + if (length(dim(quantiles)) == 1) { + quantiles <- InsertDim(quantiles, len = dim(data)[1], + pos = 2, name = names(dim(data))[1]) + } + } + # quantiles: [bin-1, sdate] + + # Probabilities + probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] + for (i_time in 1:dim(data)[1]) { + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) + } else { + if (is.null(weights)) { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], + threshold = quantiles[, i_time])) + } else { + sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + # find any quantiles that are outside the data range + integrated_probs <- array(dim = dim(quantiles)) + for (i_quant in 1:dim(quantiles)[1]) { + # for thresholds falling under the distribution + if (quantiles[i_quant, i_time] < min(sorted_data)) { + integrated_probs[i_quant, i_time] <- 0 + # for thresholds falling over the distribution + } else if (max(sorted_data) < quantiles[i_quant, i_time]) { + integrated_probs[i_quant, i_time] <- 1 + } else { + integrated_probs[i_quant, i_time] <- approx(sorted_data, cumulative_weights, + quantiles[i_quant, i_time], "linear")$y + } + } + probs[, i_time] <- append(integrated_probs[, i_time], 1) - append(0, integrated_probs[, i_time]) + if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { + stop(paste0("Probability in i_time = ", i_time, " is out of [0, 1].")) + } + } + } + } + + return(probs) +} + +.sorted_distributions <- function(data_vector, weights_vector) { + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_weights <- weights_vector[sorter] + cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights + cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 + cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 + return(list("data" = data_vector[sorter], "cumulative_weights" = cumulative_weights)) +} + + + diff --git a/RPS_clim.R b/RPS_clim.R new file mode 100644 index 0000000000000000000000000000000000000000..3f6c3697f47a98103930831d8b6a9d2e78badf84 --- /dev/null +++ b/RPS_clim.R @@ -0,0 +1,29 @@ +# RPS version for climatology +RPS_clim <- function(obs, indices_for_clim = NULL, + prob_thresholds = c(1/3, 2/3), cross.val = T, + bin_dim_abs = NULL, return_mean = TRUE) { + if (is.null(indices_for_clim)){ + indices_for_clim <- 1:length(obs) + } + if (is.null(bin_dim_abs)) { + obs_probs <- .GetProbs(data = obs, indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: + prob_thresholds = prob_thresholds, weights = NULL, + cross.val = cross.val) + } else { + obs_probs <- obs + } + # clim_probs: [bin, sdate] + clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- array(clim_probs, dim = dim(obs_probs)) + + # Calculate RPS for each time step + probs_clim_cumsum <- apply(clim_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) + + if (return_mean == TRUE) { + return(mean(rps_ref)) + } else { + return(rps_ref) + } +} diff --git a/SprErr.R b/SprErr.R new file mode 100644 index 0000000000000000000000000000000000000000..3b6370f82ad30944260ec4b76abf67f9f0215d5b --- /dev/null +++ b/SprErr.R @@ -0,0 +1,222 @@ +#'Compute the ratio between the ensemble spread and RMSE +#' +#'Compute the ratio between the spread of the members around the +#'ensemble mean in experimental data and the RMSE between the ensemble mean of +#'experimental and observational data. The p-value and/or the statistical +#'significance is provided by a one-sided Fisher's test. +#' +#'@param exp A named numeric array of experimental data with at least two +#' dimensions 'memb_dim' and 'time_dim'. +#'@param obs A named numeric array of observational data with at least two +#' dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. The default value +#' is 'member'. +#'@param time_dim A character string indicating the name of dimension along +#' which the ratio is computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param na.rm A logical value indicating whether to remove NA values. The default +#' value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of two arrays with dimensions c(nexp, nobs, the rest of +#' dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is +#' the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. +#' If dat_dim is NULL, nexp and nobs are omitted. \cr +#'\item{$ratio}{ +#' The ratio of the ensemble spread and RMSE. +#'} +#'\item{$p_val}{ +#' The p-value of the one-sided Fisher's test with Ho: SD/RMSE = 1. Only present +#' if \code{pval = TRUE}. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs, dat_dim = 'dataset') +#'# Reorder the data in order to plot it with PlotVsLTime +#'rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) +#'rsdrms_plot[, , 2, ] <- rsdrms$ratio +#'rsdrms_plot[, , 4, ] <- rsdrms$p.val +#'\dontrun{ +#'PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", +#' monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), +#' listobs = c('ERSST'), biglab = FALSE, siglev = TRUE) +#'} +#' +#'@import multiApply +#'@export +SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', pval = TRUE, sign = FALSE, + alpha = 0.05, na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions memb_dim and time_dim.")) + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. ", + "Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions except 'dat_dim' and 'memb_dim'.")) + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + # alpha + if (!is.numeric(alpha) | any(alpha < 0) | any(alpha > 1) | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + # na.rm + if (!na.rm %in% c(TRUE, FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate RatioSDRMS + + # If dat_dim = NULL, insert dat dim + remove_dat_dim <- FALSE + if (is.null(dat_dim)) { + dat_dim <- 'dataset' + exp <- InsertDim(exp, posdim = 1, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = 'dataset') + remove_dat_dim <- TRUE + } + res <- Apply(data = list(exp, obs), + target_dims = c(dat_dim, memb_dim, time_dim), + fun = .SprErr, + pval = pval, + sign = sign, + na.rm = na.rm, + ncores = ncores) + + if (remove_dat_dim) { + if (length(dim(res[[1]])) > 2) { + res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') + } else { + res <- lapply(res, as.numeric) + } + } + + return(res) +} + +.SprErr <- function(exp, obs, pval = TRUE, sign = FALSE, alpha = 0.05, na.rm = FALSE) { + # exp: [dat_exp, member, sdate] + # obs: [dat_obs, member, sdate] + nexp <- dim(exp)[1] + nobs <- dim(obs)[1] + # ensemble mean + ens_exp <- MeanDims(exp, 2, na.rm = na.rm) # [dat, sdate] + ens_obs <- MeanDims(obs, 2, na.rm = na.rm) + + # Create empty arrays + ratio <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + p.val <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + + # spread and error + spread <- sqrt(mean(apply(exp[jexp,,], 2, var, na.rm = na.rm), na.rm = na.rm)) + error <- sqrt(mean((ens_obs - ens_exp[jexp,])^2, na.rm = na.rm)) + ratio[jexp, jobs] <- spread/error + + # effective sample size + enospr <- sum(Eno(exp[jexp, , ] - InsertDim(ens_exp, 2, dim(exp)[2])[jexp, , ], + time_dim = names(dim(exp))[3])) + enodif <- Eno(ens_exp[jexp, ] - ens_obs[jobs, ], time_dim = names(dim(exp))[3]) + + if (pval) { + F <- (enospr[jexp] * spread[jexp]^2 / (enospr[jexp] - 1)) / (enodif * error^2 / (enodif - 1)) + if (!is.na(F) & !is.na(enospr[jexp]) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + p.val[jexp, jobs] <- 1 - pf(F, enospr[jexp] - 1, enodif - 1) + } else { + ratio[jexp, jobs] <- NA + } + } + } + } + res <- list(ratio = ratio) + if (pval) {res$p.val <- p.val} + if (sign) {res$sign <- p.val <= alpha/2} + + return(res) +} + diff --git a/auto-s2s_dstplot.sh b/auto-s2s_dstplot.sh new file mode 100644 index 0000000000000000000000000000000000000000..90697e57124ffa816533017b87a4f7b902cd608d --- /dev/null +++ b/auto-s2s_dstplot.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# AUTO INPUTS ################# +S2S4E_dir=%PROJDIR% +SDATE=%SDATE% +ARCHIVE=%ARCHIVE% +############################### + +module load Python/3.7.3-foss-2015a basemap xarray + +outsubdir=${ARCHIVE}/weekly_statistics/S2S/plots/${SDATE}/ + +set -vx +cd ${S2S4E_dir}/visualization/DST-plots/ +python DST-plots.py -v tas -d ${SDATE} -s s2s -o ${outsubdir} + + diff --git a/autosubmit/auto-scorecards.sh b/autosubmit/auto-scorecards.sh index 4b5273725bed84811e1267048d035a0e2f712a28..c30f643f3be53f216ead66675a9545a0e159198a 100644 --- a/autosubmit/auto-scorecards.sh +++ b/autosubmit/auto-scorecards.sh @@ -2,8 +2,8 @@ ############ AUTOSUBMIT INPUTS ############ proj_dir=%PROJDIR% -outdir=%OUTDIR% -recipe=%RECIPE% +outdir=%common.OUTDIR% +recipe=%common.RECIPE% ############################### cd $proj_dir diff --git a/bar_plot.py b/bar_plot.py new file mode 100644 index 0000000000000000000000000000000000000000..55ff60353832f87d9ce9f904ab426861a6577313 --- /dev/null +++ b/bar_plot.py @@ -0,0 +1,458 @@ +import numpy as np +import matplotlib.pyplot as plt +import xarray as xr +import yaml +import datetime +from dateutil.relativedelta import relativedelta + + + +def get_bardata(var,date,arch,location, coords, region, fcst_type): + """ Function that loads tercile,skill and probs data + from .nc files. + + var: str with the varible name to be load (tas,prlr,rsds...) + date: str with the sdate in YYYYMMDD format + arch: str with the archive dir + location: str with the location + return: df with the tercile,skill and probs + """ + + if fcst_type == "seasonal": + skill_file = f"{arch}/{var}/{date}/{var}-skill_month{date[4:6]}.nc" + terciles_file = f"{arch}/{var}/{date}/{var}-percentiles_month{date[4:6]}.nc" + probs_file = f"{arch}/{var}/{date}/{var}-prob_{date}_{date[4:6]}.nc" + else: + week = datetime.datetime.strptime(date, "%Y%m%d").strftime("%V") + skill_file = f"{arch}/{var}/{date}/{var}-skill_week{week}.nc" + terciles_file = f"{arch}/{var}/{date}/{var}-percentiles_week{week}.nc" + probs_file = f"{arch}/{var}/{date}/{var}-prob_{date}_{week}.nc" + + + probs = xr.open_dataset(probs_file, decode_times=False,drop_variables='time_step') + skill = xr.open_dataset(skill_file, decode_times=False, drop_variables='time_step') + terciles = xr.open_dataset(terciles_file, decode_times=False, drop_variables='time_step') + + data = xr.merge([probs,skill,terciles]) + del probs,skill,terciles + + data = data.transpose('longitude', 'latitude', 'time') + + data = data.sel(dict(latitude=coords[1]),method='nearest') + data = data.sel(dict(longitude=coords[0]),method='nearest') + data.attrs['location'] = location + data.attrs['Variable'] = location + + data['prob_bp10'] = data['prob_bp10'].where(data.rpss > 0,0) + data['prob_ap90'] = data['prob_ap90'].where(data.rpss > 0,0) + + data['prob_bp10'] = data['prob_bp10'].where(data.bsp10 > 0,0) + data['prob_ap90'] = data['prob_ap90'].where(data.bsp90 > 0,0) + + data['prob_bp10'] = data['prob_bp10'].where(data.prob_bp10 >= 40,0) + data['prob_ap90'] = data['prob_ap90'].where(data.prob_ap90 >= 40,0) + + return(data) + + +def df_to_pd(df): + + """ Function that converts from xarray dataset to pandas object + + df: xarray dataset from get_data + return: pandas dataframe for bar plot + """ + + location = df.attrs['location'] + df = df.to_dataframe() + df = df.drop(['latitude','longitude','rpss','bsp10','bsp90'],axis=1) + df = df[['prob_bp10','prob_bn','prob_n','prob_an','prob_ap90']] + df = df.rename(columns={'prob_bp10':"percentile 10", + 'prob_bn':"below normal", + 'prob_n':"normal", + 'prob_an':"above normal", + 'prob_ap90':"percentile 90"}) + ## TODO: change hardcoded + #df['Valid time'] = ['December','January','February'] + #df = df.set_index('Valid time') + df['location'] = location + + return(df) + +def load_colors(conf_file,var): + """ loads colors from conf file + + conf_file: conf_file string with filename including path + var: string with variable name (diff colors for precp) + return: dict with color codes according to conf + """ + with open(conf_file) as f: + conf = yaml.load(f, Loader=yaml.FullLoader) + colors = conf['colors'][var] + return(colors) + +def load_var_attrs(conf_file,var): + """ loads var atributes from conf file + + conf_file: conf_file string with filename including path + var: string with variable name (diff colors for precp) + return: dict with var attributes (units,longname...) according to conf + """ + with open(conf_file) as f: + conf = yaml.load(f, Loader=yaml.FullLoader) + var_attrs = conf['variables'][var] + return(var_attrs) + + +def convert_to_float(frac_str): + """ converts fraction in string to float + + frac_str: string with fraction ex. "2/4" + return: float resulting from operation + """ + try: + return float(frac_str) + except ValueError: + num, denom = frac_str.split('/') + try: + leading, num = num.split(' ') + whole = float(leading) + except ValueError: + whole = 0 + frac = float(num) / float(denom) + return whole - frac if whole < 0 else whole + frac + +def set_alpha(colors,skills,alpha): + """ sets alpha channel of the rgba code according to skill + + colors: rgb code in tupple form + skills: list with the skill value for each lead time + alpha: int alpha value to set in case skill < 0 + return: color in rgba form for each lead time + """ + colors = tuple(convert_to_float(itup) for itup in colors) + + return([colors + (1,) if s >= 0 else colors + (alpha,) for s in skills]) + +def set_edge_color(skills): + """ sets bars edge color as function of skill + black if >= 0; grey if not + + skills: list with the skill value for each lead time + return: str with color for each lead time + """ + return(['k' if s >= 0 else 'grey' for s in skills]) + + +def bar_plot(var,date,location,pd,colors,var_attrs,skills,terciles,extremes,outdir): + + """ generates the bar plot for terciles and extremes with + skill mask + + var: string with the variable to plot + date: string with sdate to plot YYYYMMDD + location: string with the location + pd: pandas dataframe with probs + var_attrs: dict with variable attributes + skills: xarray dataset with skills scores + terciles: xarray dataset with terciles scores + extremes: xarray dataset with extremes scores + outdir: string with the path to export the files + return: saves plot in png format + """ + + # inits figure + fig = plt.figure() + ax = fig.add_axes([0,0,1,1]) + + # sets colors and alpha according to skill values + colors_bn = set_alpha(colors['bn']['rgba'],skills.rpss.values,0.2) + colors_n = set_alpha(colors['n']['rgba'],skills.rpss.values,0.2) + colors_an = set_alpha(colors['an']['rgba'],skills.rpss.values,0.2) + colors_p10 = set_alpha(colors['p10']['rgba'],skills.rpss.values,1) + colors_p90 = set_alpha(colors['p90']['rgba'],skills.rpss.values,1) + + # sets edge colors according to skill values + edge_colors = set_edge_color(skills.rpss.values) + + # plots bars for terciles + + X = np.arange(pd.shape[0])*2 + + pan = ax.bar(X + 0.3, pd['above normal'].to_numpy(), + color = colors_an, width = 0.3, label='Above normal', + edgecolor=edge_colors,zorder=3) + + pn = ax.bar(X + 0.0, pd['normal'].to_numpy(), + color = colors_n, width = 0.3, label='Normal', + edgecolor=edge_colors,zorder=3) + + pbn = ax.bar(X - 0.3, pd['below normal'].to_numpy(), + color = colors_bn, width = 0.3, label='Below normal', + edgecolor=edge_colors,zorder=3) + # plots bars for extremes + + p90 = ax.bar(X + 0.5, pd['percentile 90'].to_numpy(), + color = colors_p90, width = 0.1, + label='Upper extreme',edgecolor='k',zorder=3) + + p10 = ax.bar(X - 0.5, pd['percentile 10'].to_numpy(), + color = colors_p10, width = 0.1, + label='Lower extreme',edgecolor='k',zorder=3) + + for cont,rect in enumerate(pn): + height = rect.get_height() + if height >= 40: + plt.text(rect.get_x() + rect.get_width() / 2.0, height+1, + f'{height:.0f}%', ha='center', + va='bottom',c=colors['n']['hex'],fontsize=12) + if skills.rpss[cont].values < 0: + plt.text(rect.get_x() + rect.get_width() / 2.0, 20, + 'Skill < 0%', ha='center', + va='bottom',c='k',fontsize=22,fontweight='roman') + + # if prob > 40 print text with prob value + for rect in pan: + height = rect.get_height() + if height >= 40: + plt.text(rect.get_x() + rect.get_width() / 2.0, height+1, + f'{height:.0f}%', ha='center', + va='bottom',c=colors['an']['hex'],fontsize=12) + for rect in pbn: + height = rect.get_height() + if height >= 40: + plt.text(rect.get_x() + rect.get_width() / 2.0, height+1, + f'{height:.0f}%', ha='center', + va='bottom',c=colors['bn']['hex'],fontsize=12) + + # if extreme exists plots text with extreme value + for cont,rect in enumerate(p10): + height = rect.get_height() + if height != 0: + plt.text(rect.get_x() - rect.get_width(), height+1, + '{:.0f}%\n(< {:.1f}{})'.format(height,float(extremes.p10[cont]),var_attrs['units']), + ha='right', va='center',color=colors['p10']['hex'],fontsize=11) + + for cont,rect in enumerate(p90): + height = rect.get_height() + if height != 0: + plt.text(rect.get_x() + rect.get_width(), height+1, + '{:.0f}%\n(> {:.1f}{})'.format(height,float(extremes.p90[cont]),var_attrs['units']), + ha='left', va='center',color=colors['p90']['hex'],fontsize=11) + + location = pd.location.values[0] + sdate = datetime.datetime.strptime(date, "%Y%m%d") + + title = '{} ({})'.format(var_attrs['name'],location) + subtitle = f'Seasonal forecast issued on {sdate.strftime("%b %Y" )}' + + plt.text(x=-0.8, y=115, s=title, fontsize=20, weight='bold') + plt.text(x=-0.8, y=107, s=subtitle, fontsize=14) + + ax.set_ylabel('Probability (%)',fontsize=14) + #ax.set_xlabel('Valid time') + + # sets terciles ticks + tcks = [-0.15,0.15] + ax.set_xticks(tcks + [i+2 for i in tcks] + [i+4 for i in tcks]) + terciles_vals = list(terciles.isel(time=0).to_array().values) + list(terciles.isel(time=1).to_array().values) + list(terciles.isel(time=2).to_array().values) + + if var == 'acprec': + terciles_vals = [0 if ter < 0 else ter for ter in terciles_vals] + #terciles_vals = [np.nan for ter in terciles_vals] + + plt.text(x=5, y=-10, s='[{}]'.format(var_attrs['units']), fontsize=14,rotation=0, ha='center', + color='grey') + + ax.set_xticklabels([f'{i:.1f}' if i is not np.nan else '' for i in terciles_vals], + fontdict={'fontsize':'11'},rotation=45,color='grey', + rotation_mode='anchor',ha='right',va='top') + #ax.tick_params(length=8) + + x_labels = [(sdate + relativedelta(months=+leadtime+1)).strftime("%B") + for leadtime in range(3)]## TODO: hardcoded leadtimes + x_positions = [0,2,4] + for ind,label in enumerate(x_labels): + plt.text(x=x_positions[ind], y=-22, s=label, fontsize=14,rotation=0, ha='center') + + #ax.legend(bbox_to_anchor=(1, 1.35),frameon=False,fontsize=11) + ax.legend(bbox_to_anchor=(0.2,0.83,0.6,0.2), + loc="upper center", + mode="expand", borderaxespad=1.5, ncol=2, + frameon=False) + + leg = ax.get_legend() + + # sets legend colors, to avoid colors with low opacity + hex_colors = [tuple(convert_to_float(itup) + for itup in colors[i]['rgba']) + for i in ['an','n','bn','p90','p10']] + for cont,handle in enumerate(leg.legendHandles): + handle.set_color(hex_colors[cont]) + + ax.set_ylim(0,100) + ax.set_xlim(-1,5) + ax.grid(axis='y',alpha=0.4,zorder=0); + + #plt.show() + fig.savefig(f'{outdir}/bar_plot-{var}-{date}-{location}.png',dpi=130,bbox_inches='tight') + #fig.savefig('test.png',dpi=130,bbox_inches='tight') + plt.close() + + +def bar_plot_subseas(var,date,location,pd,colors,var_attrs,skills,terciles,extremes,outdir): + + """ generates the bar plot for terciles and extremes with + skill mask + + var: string with the variable to plot + date: string with sdate to plot YYYYMMDD + location: string with the location + pd: pandas dataframe with probs + var_attrs: dict with variable attributes + skills: xarray dataset with skills scores + terciles: xarray dataset with terciles scores + extremes: xarray dataset with extremes scores + outdir: string with the path to export the files + return: saves plot in png format + """ + + # inits figure + fig = plt.figure() + ax = fig.add_axes([0,0,1,1]) + + # sets colors and alpha according to skill values + colors_bn = set_alpha(colors['bn']['rgba'],skills.rpss.values,0.2) + colors_n = set_alpha(colors['n']['rgba'],skills.rpss.values,0.2) + colors_an = set_alpha(colors['an']['rgba'],skills.rpss.values,0.2) + colors_p10 = set_alpha(colors['p10']['rgba'],skills.rpss.values,1) + colors_p90 = set_alpha(colors['p90']['rgba'],skills.rpss.values,1) + + # sets edge colors according to skill values + edge_colors = set_edge_color(skills.rpss.values) + + # plots bars for terciles + + X = np.arange(pd.shape[0])*2 + + pan = ax.bar(X + 0.3, pd['above normal'].to_numpy(), + color = colors_an, width = 0.3, label='Above normal', + edgecolor=edge_colors,zorder=3) + + pn = ax.bar(X + 0.0, pd['normal'].to_numpy(), + color = colors_n, width = 0.3, label='Normal', + edgecolor=edge_colors,zorder=3) + + pbn = ax.bar(X - 0.3, pd['below normal'].to_numpy(), + color = colors_bn, width = 0.3, label='Below normal', + edgecolor=edge_colors,zorder=3) + # plots bars for extremes + + p90 = ax.bar(X + 0.5, pd['percentile 90'].to_numpy(), + color = colors_p90, width = 0.1, + label='Upper extreme',edgecolor='k',zorder=3) + + p10 = ax.bar(X - 0.5, pd['percentile 10'].to_numpy(), + color = colors_p10, width = 0.1, + label='Lower extreme',edgecolor='k',zorder=3) + + for cont,rect in enumerate(pn): + height = rect.get_height() + if height >= 40: + plt.text(rect.get_x() + rect.get_width() / 2.0, height+1, + f'{height:.0f}%', ha='center', + va='bottom',c=colors['n']['hex'],fontsize=12) + if skills.rpss[cont].values < 0: + plt.text(rect.get_x() + rect.get_width() / 2.0, 20, + 'Skill < 0%', ha='center', + va='bottom',c='k',fontsize=18,fontweight='roman') + + # if prob > 40 print text with prob value + for rect in pan: + height = rect.get_height() + if height >= 40: + plt.text(rect.get_x() + rect.get_width() / 2.0, height+1, + f'{height:.0f}%', ha='center', + va='bottom',c=colors['an']['hex'],fontsize=12) + for rect in pbn: + height = rect.get_height() + if height >= 40: + plt.text(rect.get_x() + rect.get_width() / 2.0, height+1, + f'{height:.0f}%', ha='center', + va='bottom',c=colors['bn']['hex'],fontsize=12) + + # if extreme exists plots text with extreme value + for cont,rect in enumerate(p10): + height = rect.get_height() + if height != 0: + plt.text(rect.get_x() - rect.get_width(), height+1, + '{:.0f}%\n(< {:.1f}{})'.format(height,float(extremes.p10[cont]),var_attrs['units']), + ha='right', va='center',color=colors['p10']['hex'],fontsize=11) + + for cont,rect in enumerate(p90): + height = rect.get_height() + if height != 0: + plt.text(rect.get_x() + rect.get_width(), height+1, + '{:.0f}%\n(> {:.1f}{})'.format(height,float(extremes.p90[cont]),var_attrs['units']), + ha='left', va='center',color=colors['p90']['hex'],fontsize=11) + + location = pd.location.values[0] + sdate = datetime.datetime.strptime(date, "%Y%m%d") + + title = '{} ({})'.format(var_attrs['name'],location) + subtitle = f'Subseasonal forecast issued on {sdate.strftime("%d %b %Y" )}' + + plt.text(x=-0.8, y=135, s=title, fontsize=20, weight='bold') + plt.text(x=-0.8, y=127, s=subtitle, fontsize=14) + + ax.set_ylabel('Probability (%)',fontsize=14) + #ax.set_xlabel('Valid time') + + # sets terciles ticks + tcks = [-0.15,0.15] + ax.set_xticks(tcks + [i+2 for i in tcks] + [i+4 for i in tcks] + [i+6 for i in tcks]) + terciles_vals = list(terciles.isel(time=0).to_array().values) + list(terciles.isel(time=1).to_array().values) + list(terciles.isel(time=2).to_array().values) + list(terciles.isel(time=3).to_array().values) + + if var == 'acprec': + terciles_vals = [0 if ter < 0 else ter for ter in terciles_vals] + #terciles_vals = [np.nan for ter in terciles_vals] + + plt.text(x=7, y=-10, s='[{}]'.format(var_attrs['units']), fontsize=14,rotation=0, ha='center', + color='grey') + + ax.set_xticklabels([f'{i:.1f}' if i is not np.nan else '' for i in terciles_vals], + fontdict={'fontsize':'11'},rotation=45,color='grey', + rotation_mode='anchor',ha='right',va='top') + #ax.tick_params(length=8) + + x_labels = [f'{(sdate + relativedelta(days=+4) + relativedelta(days=+(leadtime)*7)).strftime("%d/%m")}-{(sdate + relativedelta(days=+4) + relativedelta(days=+(leadtime)*7) + relativedelta(days=+6)).strftime("%d/%m")}' + for leadtime in range(4)]## TODO: hardcoded leadtimes + + x_positions = [0,2,4,6] + for ind,label in enumerate(x_labels): + plt.text(x=x_positions[ind], y=-22, s=label, fontsize=10,rotation=0, ha='center') + + #ax.legend(bbox_to_anchor=(1, 1.35),frameon=False,fontsize=11) + ax.legend(bbox_to_anchor=(0.2,0.85,0.6,0.2), + loc="upper center", + mode="expand", borderaxespad=1.5, ncol=2, + frameon=False) + + leg = ax.get_legend() + + # sets legend colors, to avoid colors with low opacity + hex_colors = [tuple(convert_to_float(itup) + for itup in colors[i]['rgba']) + for i in ['an','n','bn','p90','p10']] + for cont,handle in enumerate(leg.legendHandles): + handle.set_color(hex_colors[cont]) + + ax.set_yticks([0,10,20,30,40,50,60,70,80,90,100]) + ax.set_yticklabels(["0","10","20","30","40","50","60","70","80","90","100",""]) + ax.set_ylim(0,120) + ax.set_xlim(-1,7) + ax.grid(axis='y',alpha=0.4,zorder=0); + + #plt.show() + fig.savefig(f'{outdir}/bar_plot-{var}-{date}-{location}.png',dpi=130,bbox_inches='tight') + plt.close() diff --git a/bigpredidata_fqa.R b/bigpredidata_fqa.R new file mode 100644 index 0000000000000000000000000000000000000000..b99441e21c7c2c8ea250096ad380dc8023213c8b --- /dev/null +++ b/bigpredidata_fqa.R @@ -0,0 +1,279 @@ +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +source("modules/Skill/Skill.R") +args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_bigpredidata_fqa.yml" +#recipe <- read_atomic_recipe(recipe_file) +recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) + +# start from here after doing previous steps with SUNSET recipe: + +# Full-cross-val workflow +sdate_dim <- dim(data$hcst$data)['syear'] +nmemb <- dim(data$hcst$data)['ensemble'] +nftime <- dim(data$hcst$data)['time'] +nlats <- dim(data$hcst$data)['latitude'] +nlons <- dim(data$hcst$data)['longitude'] + + +info(recipe$Run$logger, + paste("ftime", nftime)) +info(recipe$Run$logger, + paste(dim(data$obs$data))) +info(recipe$Run$logger, + paste(as.vector(data$hcst$coords$longitude))) +info(recipe$Run$logger, + paste(as.vector(data$obs$coords$longitude))) +info(recipe$Run$logger, + paste(as.vector(data$hcst$coords$latitude))) +info(recipe$Run$logger, + paste(as.vector(data$obs$coords$latitude))) + + + +cross <- CSTools:::.make.eval.train.dexes('leave-one-out', sdate_dim, NULL) +# Paralelized: +#loops <- array(1:length(cross), c(loop = length(cross))) +#res <- Apply(list(loops), target = NULL, +# fun = function(t) { +hcst_ev_res <- array(NA, c(nftime, nlats, + nlons, nmemb, + sdate_dim)) +cal_hcst_ev_res <- array(NA, c(nftime, nlats, + nlons, nmemb, + sdate_dim)) +obs_ev_res <- array(NA, c(nftime, nlats, + nlons, ensemble = 1, sdate_dim)) +obs_tr_res <- array(NA, c(sample = sdate_dim - 1, nftime, + nlats, nlons, ensemble = 1, sdate_dim)) +lims_hcst_tr_res <- array(NA, c(probs = 2, nftime, nlats, + nlons, sdate_dim)) +lims_obs_tr_res <- array(NA, c(probs = 2, nftime, nlats, + nlons, sdate_dim)) +lims_cal_hcst_tr_res <- array(NA, c(probs = 2, nftime, nlats, + nlons, sdate_dim)) + + + +info(recipe$Run$logger, + paste(dim(data$hcst$data))) +info(recipe$Run$logger, + paste(names(dim(data$hcst$data)))) + + + +#crps_clim_res <- array(NA, c(nftime, nlats, nlons, ensemble = 1, sdate_dim)) +for (t in 1:sdate_dim) { + info(recipe$Run$logger, + paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + # training + obs_tr <- data$obs$data[1,1,1,1,cross[[t]]$train.dexes,,,,] + hcst_tr <- data$hcst$data[1,1,1,1,cross[[t]]$train.dexes,,,,] + # eval years + hcst_ev <- data$hcst$data[1,1,1,1,cross[[t]]$eval.dexes,,,,] + obs_ev <- data$obs$data[1,1,1,1,cross[[t]]$eval.dexes,,,,] + dim(obs_tr) <- c(syear = (as.numeric(sdate_dim) - 1), nftime, nlats, nlons) + dim(obs_ev) <- c(syear = 1, nftime, nlats, nlons) + dim(hcst_tr) <- c(syear = (as.numeric(sdate_dim) - 1), + nftime, nlats, nlons, nmemb) + dim(hcst_ev) <- c(syear = 1, nftime, nlats, nlons, nmemb) + + # rm("obs_tr", "hcst_tr", "obs_ev", "hcst_ev") + cal_hcst_tr <- CSTools::Calibration(exp = hcst_tr, obs = obs_tr, + cal.method = recipe$Analysis$Workflow$Calibration$method, + eval.method = 'in-sample', sdate_dim = 'syear', + memb_dim = 'ensemble') + cal_hcst_ev <- CSTools::Calibration(exp = hcst_tr, obs = obs_tr, + exp_cor = hcst_ev, + cal.method = recipe$Analysis$Workflow$Calibration$method, + eval.method = 'in-sample', sdate_dim = 'syear', + memb_dim = 'ensemble') + + #Category limits + lims_hcst_tr <- Apply(hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs', ncores = recipe$Analysis$ncores)$output1 + lims_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), + fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs', ncores = recipe$Analysis$ncores)$output1 + lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'),#, 'ensemble'), + fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs', ncores = recipe$Analysis$ncores)$output1 + + gc() + hcst_ev_res[,,,,t] <- hcst_ev + cal_hcst_ev_res[,,,,t] <- cal_hcst_ev + obs_ev_res[,,,,t] <- obs_ev + obs_tr_res[,,,,,t] <- obs_tr + lims_hcst_tr_res[,,,,t] <- lims_hcst_tr + lims_cal_hcst_tr_res[,,,,t] <- lims_cal_hcst_tr + lims_obs_tr_res[,,,,t] <- lims_obs_tr + res <- list(hcst_ev = hcst_ev_res, + obs_ev = obs_ev_res, + obs_tr = obs_tr_res, #required as reference forecast for the CRPSS + cal_hcst_ev = cal_hcst_ev_res, + lims_hcst_tr = lims_hcst_tr_res, + lims_obs_tr = lims_obs_tr_res, + lims_cal_hcst_tr = lims_cal_hcst_tr_res) +} +# RPS +source("GetProbs.R") +hcst_probs_ev <- GetProbs(res$hcst_ev, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = res$lims_hcst_tr, + ncores = recipe$Analysis$ncores) +obs_probs_ev <- GetProbs(res$obs_ev, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = res$lims_obs_tr, + ncores = recipe$Analysis$ncores) +rps <- RPS(exp = hcst_probs_ev, obs = obs_probs_ev, memb_dim = NULL, + cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', + ncores = recipe$Analysis$ncores) +source("RPS_clim.R") +rps_clim <- Apply(list(obs_probs_ev), + target_dims = c('probs', 'syear'), + RPS_clim, bin_dim_abs = 'probs', cross.val = FALSE)$output1 +# RPSS +rpss <- RPSS(exp = hcst_probs_ev, obs = obs_probs_ev, + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'probs', + # We should use a line like this + #abs_threshold = res$lims_hcst_tr, + #prob_threshold = c(1/3, 2/3), + cross.val = FALSE, + ncores = recipe$Analysis$ncores) +# CRPS +crps <- CRPS(exp = res$hcst_ev, obs = res$obs_ev, + time_dim = 'syear', memb_dim = 'ensemble', + ncores = recipe$Analysis$ncores) +# Este no sé como se calcula????: +# Aquí no se puede porque estaría incluyendo información de los otros aņos +#source("modules/Skill/R/CRPS_clim.R") +# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí +# el resultado es igual a dentro del bucle. +crps_clim <- CRPS(exp = res$obs_tr, obs = res$obs_ev, + time_dim = 'syear', memb_dim = 'sample.syear', + ncores = recipe$Analysis$ncores) + + +# CRPSS +ref <- res$obs_tr +dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, + nftime, nlats, nlons, sdate_dim) +crpss <- CRPSS(exp = res$hcst_ev, obs = res$obs_ev, ref = ref, + memb_dim = 'ensemble', + time_dim = 'syear', clim.cross.val = FALSE, + ncores = recipe$Analysis$ncores) + + +# Corr +source("Corr.R") +enscorr <- Corr(res$hcst_ev, res$obs_ev, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = recipe$Analysis$ncores) + +# Mean Bias +#mean_bias <- Bias(res$hcst_ev, res$obs_ev, +mean_bias <- Bias(data$hcst$data, data$obs$data, + time_dim = 'syear', + memb_dim = 'ensemble', + ncores = recipe$Analysis$ncores) + +mean_bias_sign <- Apply(list(data$hcst$data, data$obs$data), + target_dims = list(c('syear', 'ensemble'), 'syear'), + fun = function(x,y) { + if (!(any(is.na(x)) || any(is.na(y)))) { + res <- t.test(x = y, + y = apply(x, 1, mean, na.rm = T), + alternative = "two.sided")$p.value + } else { + res <- NA + } + return(res)}, + ncores = sdate_dim)$output1 +mean_bias_sign <- mean_bias_sign <= 0.05 + +# Spread error ratio +source("SprErr.R") +enssprerr <- SprErr(exp = res$hcst_ev, obs = res$obs_ev, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + ncores = recipe$Analysis$ncores) +enssprerr_sign <- enssprerr$p.val +enssprerr_sign <- enssprerr_sign <= 0.05 +enssprerr <- enssprerr$ratio + + +if (any(is.na(rpss$sing))) { + info(recipe$Run$logger, + "RPSS NA") + + rpss$sing[is.na(rpss$sign)] <- FALSE +} +skill_metrics <- list(mean_bias = mean_bias, + mean_bias_significance = mean_bias_sign, + enscorr = enscorr$corr, + enscorr_significance = enscorr$sign, + enssprerr = enssprerr, + enssprerr_significance = enssprerr_sign, + rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, + rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, + crpss = crpss$crpss, crpss_significance = crpss$sign) +skill_metrics <- lapply(skill_metrics, function(x) { + InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) +original <- recipe$Run$output_dir +recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") +# Compute save metrics +source("modules/Saving/Saving.R") +#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) + save_metrics(recipe = recipe, + skill = skill_metrics, + data_cube = data$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + +recipe$Run$output_dir <- original + +source("modules/Visualization/Visualization.R") +if (data$hcst$coords$longitude[1] != 0) { + skill_metrics <- lapply(skill_metrics, function(x) { + Subset(x, along = 'longitude', indices = c(182:360, 1:181)) + }) +} + info(recipe$Run$logger, + paste("lons:", data$hcst$coords$longitude)) + info(recipe$Run$logger, + paste("lons:", data$obs$coords$longitude)) + + +#data$hcst$coords$longitude <- -179:180 + +Visualization(recipe, data, skill_metrics, significance = TRUE) + +source("tools/add_logo.R") +add_logo(recipe, "rsz_rsz_bsc_logo.png") + + + diff --git a/conf.yml b/conf.yml new file mode 100644 index 0000000000000000000000000000000000000000..db40d892cf0402882d8c016f1c0cae2ee8f75ae6 --- /dev/null +++ b/conf.yml @@ -0,0 +1,77 @@ +conf: + colors: + acprec: + blue : '#FFAB38' + red : '#41CBC9' + grey : '#ACACAC' + land : '#DEDEDE' + ocean : '#F7F7F7' + default: + blue : '#2BA4B4' + red : '#FF764D' + grey : '#ACACAC' + land : '#DEDEDE' + ocean : '#F7F7F7' + thrs: + extreme: 40 + skill: 0 + bigcircle: 50 + +colors: + acprec: + p10: + rgba: !!python/tuple [177/255,105/255,4/255] + hex: "#B16904" #"#F09416" + p90: + rgba: !!python/tuple [10/255,136/255,134/255] + hex: "#0A8886" #"#2CAAA8" + an: + rgba: !!python/tuple [65/255, 203/255, 201/255] + hex: "#41CBC9" + n: + rgba: !!python/tuple [172/255,172/255,172/255] + hex: "#ACACAC" + bn: + rgba: !!python/tuple [255/255,171/255,56/255] + hex: "#FFAB38" + default: + p10: + rgba: !!python/tuple [5/255,99/255,112/255] + hex: "#056370" #"#2BA4B4" + p90: + rgba: !!python/tuple [141/255, 42/255, 5/255] + hex: "#8D2A05" #"#E94B11" + an: + rgba: !!python/tuple [255/255, 118/255, 77/255] + hex: "#FF764D" + n: + rgba: !!python/tuple [172/255,172/255,172/255] + hex: "#ACACAC" + bn: + rgba: !!python/tuple [44/255,170/255,168/255] + hex: "#33BFD1" + +locations: + cat: + Barcelona: [2.1, 41.39] + Granollers: [2.29, 41.61] + Viladecans: [2.02, 41.31] + None: "None" + +files: + dir: $arch$/$var$/$sdate$/ + probs: $var$-$region$-prob_$sdate$.nc + skill: $var$-$region$-skill_month$month$.nc + terciles: $var$-$region$-percentiles_$month$.nc + +variables: + t2: + name: 'Temperature' + units: '$^\circ$C' + t02min: + name: 'Minimum temperature' + units: '$^\circ$C' + t02max: + name: 'Maximum temperature' + units: '$^\circ$C' + diff --git a/crontab.sh b/crontab.sh new file mode 100644 index 0000000000000000000000000000000000000000..d44c16461e20c06a38b0a781458454553f9ae597 --- /dev/null +++ b/crontab.sh @@ -0,0 +1,15 @@ +# #!/bin/bash +module load R/4.1.2-foss-2015a-bare +module load Python/3.7.3-foss-2015a + +today=$(date +"%A") +if [ "$today" == "Thursday" ]; then + current_date=$(date +"%Y%m%d") +elif [ "$today" == "Friday" ]; then + current_date=$(date -d "yesterday" +"%Y%m%d") +fi +echo "Current date: $current_date" + +Rscript /esarchive/scratch/ptrascas/R/sunset/oper_heat_i4c_plotforecastpdf.R $current_date +python xxxxxx # add current date to launch + diff --git a/crontab.sh~ b/crontab.sh~ new file mode 100644 index 0000000000000000000000000000000000000000..74cb073514d10d9bc69d7ba0b8b1658d490d9d86 --- /dev/null +++ b/crontab.sh~ @@ -0,0 +1,7 @@ +# #!/bin/bash +module load R/4.1.2-foss-2015a-bare +module load Python/3.7.3-foss-2015a +current_date=$(date +"%Y-%m-%d") +echo "Current date: $current_date" +Rscript /esarchive/scratch/ptrascas/R/sunset/oper_heat_i4c_plotforecastpdf.R $current_date +python xxxxxx # add current date to launch (only works on Thursdays) diff --git a/debug_scorecards_paloedits.Rmd b/debug_scorecards_paloedits.Rmd new file mode 100644 index 0000000000000000000000000000000000000000..fab97078204d40722e8eec409610d65f60ad9949 --- /dev/null +++ b/debug_scorecards_paloedits.Rmd @@ -0,0 +1,135 @@ +### Debugging Scorecards module without running the whole recipe + +First, you need to run SUNSET asking to produce and save metrics from the Skill module. In the recipe you need to request ÂīscorecardsÂī output format and several models and/or initialisation dates and metrics. For instance, in the case of Seasonal climate forecasts, the 12 initialisation dates can be requested. You can also include the scorecard section in the recipe to see which is the default result. + +SUNSET will create an output directory (e.g.: /esarchive/scratch/ptrascas/R/sunset_outputs/recipe_example_scorecards_20240213120100) that will contain a folder called ÂīoutputÂī in which numeric output files will be save (i.e. NetCDF file). Ensure it contains a folder `Skill` with the results of the verification (e.g. all start dates, metrics and models requested are stored). + + +To debug it, we will use an interactive session in the WS being in the SUNSET code folder: + + +```r +source(MODULES) +R +``` + +We will use the first function [execute_scorecards.R](https://earth.bsc.es/gitlab/es/sunset/-/blob/master/modules/Scorecards/execute_scorecards.R) on module Scorecard. + + +```r +source('tools/libs.R') +source('modules/Scorecards/Scorecards.R') + +``` + +Skip lines 4 to 6. Instead adjuste them to your case: + +``` +recipe_file <- "/esarchive/scratch/recipe_example_scorecards.yml" +output_dir <- "/esarchive/scratch/ptrascas/R/sunset_outputs/recipe_example_scorecards_20240213120100" # Copy this folder to your directory to use the skill metrics previously calculated +``` + + +Execute lines from 10 to 14: + +```r +recipe <- read_yaml(recipe_file) +recipe$Run$output_dir <- output_dir + +## Loop over variables +datasets <- recipe$Analysis$Datasets +``` + +In case we want to adjust some parameters of the final design, we can select the first model and reference by running: + +``` +system <- 1 +reference <- 1 +variable <- 1 +``` +This allows to load and run a small sample to avoid the task taking a long time. + +Now we execute the code from "execute_scorecards.R": + +```r + scorecard_recipe <- recipe + scorecard_recipe$Analysis$Datasets$System <- + recipe$Analysis$Datasets$System[[system]] + scorecard_recipe$Analysis$Datasets$Reference <- + recipe$Analysis$Datasets$Reference[[reference]] + scorecard_recipe$Analysis$Variables <- + recipe$Analysis$Variables[[variable]] + # Plot Scorecards + Scorecards(scorecard_recipe) +``` + + +Check the results (by opening the plot). Adjust one parameter, for instance the colorbar size: + + +```r +scorecard_recipe$Analysis$Workflow$Scorecards$legend_width <- 300 # tip: the best value is 690 +``` +Re-run the code: +```r + Scorecards(scorecard_recipe) +``` + +Check the output again and keep making small changes until you obtain the desired result. + +Scorecards code workflow: + +1. execute_scorecards.R that loops on variables, system and reference +2. Scorecards() stored under `modules/Scorecards/Scorecards.R` which input is a recipe +3. Then it starts the workflow to create scorecards: + 3.1 LoadMetrics + 3.2 Aggreage metrics + 3.3 Read any information on parameters specified in the recipe (i.e. recipe$Analysis$Workflow$Scorecards$table_label) + 3.4 Create the scorecards + +How to know available parameters? SCPlotScorecard.R +https://earth.bsc.es/gitlab/es/sunset/-/wikis/home#scorecards-module + + +How to use browser()? + +The browser() command is very useful for debugging, as it the script stops running as soon as it finds the command in the script. This way you can execute your scripts by parts, making it easier to find and fix a bug. + +For example, let's introduce a bug, in this case, a typo in the Scorecards.R script and re run the function: + +Open the /modules/Scorecards/Scorecards.R script and in line 40, add an "x" so it reads "namex". +```r + var <- recipe$Analysis$Variables$namex +``` + +Save the file, close it and rerun everything, including loading the Scorecards.R script +(otherwise the typo will not be saved): + +You will encounter this error: + +```r +Error in LoadMetrics(system = system, reference = reference, var = var, : + Parameter 'var' must be a character vector with the var names. +``` + +Now let's go and place the browser() in the Scorecards.R script, for example in line 55. +In the empty line, just write "browser()". Then run everyting again, including loading the modules. + +As you can see, the code stoped running in the line you placed the browser(), and in the R console it will appear "Browse[1]" + +If you click enter it will execute the next line of code. Keep clicking until it gives you an error, you will see that the error appears in line 79, in the function "LoadMetrics()" +because we introduced a typo. Here it indicates that var should have the names of the variables, but it's reading "namex". + +Type Q to stop debugging, go to the Scorecards.R file and fix the bug. + + + + + +How to use traceback()? + + + + + + diff --git a/hw_casestudy_anomalies.R b/hw_casestudy_anomalies.R new file mode 100644 index 0000000000000000000000000000000000000000..c725def098106eda33f69277fc590a3b1df1048b --- /dev/null +++ b/hw_casestudy_anomalies.R @@ -0,0 +1,76 @@ +Sys.setenv(LANG = "en") +Sys.setlocale("LC_TIME","en_GB") +library(startR) +library(s2dverification) +library(abind) +library(CSTools) +library(zeallot) +library(s2dv) +library(easyVerification) +library(ClimProjDiags) +library(ggplot2) + + + +Lat <- data0$hcst$coords$latitude +Lon <- data0$hcst$coords$longitude + +PlotEquiMap(aug03,Lon,Lat,coast_width=1.5, filled.continents = FALSE, brks = 21, bar_limits = c(-10,10), toptitle = ("blablabla"),width=10, height= 10,units = "Pa", country.borders=TRUE, bar_extra_margi\ +n = c(1, 0, 1, 0), fileout = "tas_cat_130803.png",triangle_ends = c(TRUE,TRUE),intylat=10,intxlon=10) + + +#ggsave("subseasonal_BCN_heatwave_tasmax_bss90.png", plot, width = 7, height = 5) + +path <- "/esarchive/scratch/ptrascas/R/sunset/tas_clim/era5land_monthlymean.nc" +path <- paste0(path) +lons.min <- -50 +lons.max <- 5 +lats.min <- 35 +lats.max <- 45 +clim <- Start(dat = path, + var='tas', + lat = values(list(lats.min, lats.max)), + lat_reorder = Sort(decreasing = TRUE), + lon = values(list(lons.min, lons.max)), + lon_reorder = CircularSort(-180, 180), + synonims = list(lat=c('lat','latitude'), + lon=c('lon','longitude')), + return_vars = list(lat = 'dat', + lon = 'dat'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + +clim <- as.s2dv_cube(clim) +clim$data <- clim$data[1,1,,] - 273.15 + +path <- "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_202308.nc" +path <- paste0(path) +day <- Start(dat = path, + var='tas', + time = 'all', + lat = values(list(lats.min, lats.max)), + lat_reorder = Sort(decreasing = TRUE), + lon = values(list(lons.min, lons.max)), + lon_reorder = CircularSort(-180, 180), + synonims = list(lat=c('lat','latitude'), + lon=c('lon','longitude')), + return_vars = list(lat = 'dat', + lon = 'dat'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + +day <- as.s2dv_cube(day) + +day$data <- day$data[1,1,23,,]-273.15 + +Lat <- clim$coords$lat +Lon <- clim$coords$lon + +anomaly <- day$data-clim$data + +PlotEquiMap(anomaly,Lon,Lat,coast_width=1.5, filled.continents = FALSE, brks = 21, bar_limits = c(-10,10), + toptitle = ("Anomalia de temperatura 23/08/23"),width=10, height= 10,units = "C", + country.borders=TRUE, bar_extra_margin = c(1, 0, 1, 0), + fileout = "tas_iberia_230823.png", + triangle_ends = c(TRUE,TRUE),intylat=10,intxlon=10) + diff --git a/hw_casestudy_vitigeoss.R b/hw_casestudy_vitigeoss.R new file mode 100644 index 0000000000000000000000000000000000000000..e7d29b42fc5afa40ef8eb6c0db63fb0c61cc73f9 --- /dev/null +++ b/hw_casestudy_vitigeoss.R @@ -0,0 +1,176 @@ +rm(list=ls()) +library(ncdf4) +library(abind) +library(fields) +library(CSTools) +library(RColorBrewer) +library(s2dv) +library(ggplot2) +library(startR) +library(multiApply) + +# Load 4 start dates + +week <- c("20230727", "20230803", "20230810", "20230817") +path <- "/esarchive/exp/VITIGEOSS/subseasonal/" +var = 't2' +region = 'cat' +path <- paste0(path, var, "/$sdates$/", var, "_", region, "_$sdates$.nc") +res <- Start(dat = path, var='t2', sdates = week, latitude = 'all', longitude = 'all', ensemble = 'all', time = 'all', retrieve = T) +res <- as.s2dv_cube(res) + +# Extract Barcelona + +bcn_lt4 <- res$data[1,1,1,15,32,,4] +bcn_lt3 <- res$data[1,1,2,15,32,,3] +bcn_lt2 <- res$data[1,1,3,15,32,,2] +bcn_lt1 <- res$data[1,1,4,15,32,,1] + +# Load probabilities with the ERA5land climatologies + +# For this I'm only taking the week I'm interested in (week34) + +# Load probabilities +# calcular terciles con ERA5 LAND con el periodo del hindcast +# For this I'm only taking the week prior to the one I'm interested in (20230817) +week <- c("20230817") +path <- "/esarchive/exp/VITIGEOSS/subseasonal/" +var = c("p33","p66","p10","p90") +region = 'cat' +path <- paste0(path,"t2/$sdates$/t2_", region, "_percentiles_week33.nc") +per <- Start(dat = path, var= var, sdates=week, latitude = 'all', longitude = 'all', time = 'all', retrieve = T) +per <- as.s2dv_cube(per) + +#take only first timestep because I've loaded the target week? +per33 <- per$data[1,1,1,15,32,1] +per66 <- per$data[1,2,1,15,32,1] +per10 <- per$data[1,3,1,15,32,1] +per90 <- per$data[1,4,1,15,32,1] + + +# Add the skill + +week <- c("20230727", "20230803", "20230810", "20230817") +weekn <- c("30","31","32","33") +path <- "/esarchive/exp/VITIGEOSS/subseasonal/" +var = c('rpss', 'bsp10', 'bsp90') +region = 'cat' +path <- paste0(path, "t2/$sdates$/t2_", region, "_skill_week$weekn$.nc") +skill <- Start(dat = path, var=var, sdates = week, weekn = weekn, latitude = 'all', longitude = 'all', time = 'all', retrieve = T) +skill <- as.s2dv_cube(skill) + +rpss4 <- skill$data[1,1,1,1,15,32,4] +rpss3 <- skill$data[1,1,2,2,15,32,3] +rpss2 <- skill$data[1,1,3,3,15,32,2] +rpss1 <- skill$data[1,1,4,4,15,32,1] + +rpss4 <- round(rpss4/100,digits=2) +rpss3 <- round(rpss3/100,digits=2) +rpss2 <- round(rpss2/100,digits=2) +rpss1 <- round(rpss1/100,digits=2) + +# Add observations: + +path_obs <- "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_202308.nc" +variable <- "tas" +regions <- c(lat = 41.39, lon = 2.1) +obs <- Start(dat = path_obs, var = variable, time = 'all', lat = values(regions[1]), lon = values(regions[2]), return_vars = list(lat = NULL, lon = NULL), retrieve = T) +#obs <- as.s2dv_cube(obs) + +as.vector(attributes(obs)$Variables$common$lat) +as.vector(attributes(obs)$Variables$common$lon) + +# Extract Barcelona and average from day 21 to day 27 +obs <- obs[1,1,21:27,1,1] +obs <- obs -273.15 +obs <- mean(obs) + +# Plot forecast PDF + +fcst <- data.frame(fcst1 = bcn_lt4, fcst2 = bcn_lt3, fcst3 = bcn_lt2, fcst4 = bcn_lt1) + +plot <-PlotForecastPDF(fcst,tercile.limits = c(terciles[2],terciles[3]), extreme.limits = c(terciles[1], terciles[4]), + var.name = "Temperatura diaria (C)",fcst.names = c(paste0("ftime: days 26-32\n sdate: 27/07/23\nRPSS: ",rpss4), + paste0("ftime: days 19-25\n sdate: 03/08/23\nRPSS: ",rpss3),paste0("ftime: days 12-18\n sdate: 10/08/23\nRPSS: ",rpss2), + paste0("ftime: days 5-11\n sdate: 17/08/23\nRPSS: ",rpss1)) , obs = c(obs,obs,obs,obs), + title = "Prediccion subestacional de la ola de calor de 2023 - Barcelona") + +ggsave("tsubseasonal_vit_BCN_aug23_t2_rpss.png", plot, width = 7, height = 5) + + +##### SAME WITH SEASONAL #### + +# Load 3 start dates, max fcst time = 3? + +month <- c("20230501", "20230601", "20230701") +path <- "/esarchive/exp/VITIGEOSS/seasonal/" +var = 't2' +region = 'cat' +path <- paste0(path, var, "/$sdates$/", var, "_", region, "_$sdates$.nc") +res <- Start(dat = path, var='t2', sdates = month, latitude = 'all', longitude = 'all', ensemble = 'all', time = 'all', retrieve = T) +res <- as.s2dv_cube(res) + +# Extract Barcelona + +bcn_lt3 <- res$data[1,1,1,15,32,,3] +bcn_lt2 <- res$data[1,1,2,15,32,,2] +bcn_lt1 <- res$data[1,1,3,15,32,,1] + +# Load probabilities +# calcular terciles con ERA5 LAND con el periodo del hindcast +# For this I'm only taking the week I'm interested in (20230817), is that correct? +month <- c("202307") +path <- "/esarchive/exp/VITIGEOSS/seasonal/" +var = c("p33","p66","p10","p90") +region = 'cat' +path <- paste0(path,"t2/$sdates$/t2_", region, "_percentiles_month07.nc") +per <- Start(dat = path, var= var, sdates=month, latitude = 'all', longitude = 'all', time = 'all', retrieve = T) +per <- as.s2dv_cube(per) + +#take only first timestep because I've loaded the target week? +per33 <- per$data[1,1,1,15,32,1] +per66 <- per$data[1,2,1,15,32,1] +per10 <- per$data[1,3,1,15,32,1] +per90 <- per$data[1,4,1,15,32,1] + +# Add the skill + +month <- c("20230501", "20230601", "20230701") +monthn <- c("05","06","07") +path <- "/esarchive/exp/VITIGEOSS/seasonal/" +var = c('rpss', 'bsp10', 'bsp90') +region = 'cat' +path <- paste0(path, "t2/$sdates$/t2_", region, "_skill_month$monthn$.nc") +skill <- Start(dat = path, var = var, sdates = month, monthn = monthn, + latitude = 'all', longitude = 'all', time = 'all', retrieve = T) +skill <- as.s2dv_cube(skill) + +rpss3 <- skill$data[1,1,1,1,15,32,3] +rpss2 <- skill$data[1,1,2,2,15,32,2] +rpss1 <- skill$data[1,1,3,3,15,32,1] + +rpss3 <- round(rpss3/100,digits=2) +rpss2 <- round(rpss2/100,digits=2) +rpss1 <- round(rpss1/100,digits=2) + +# Add observations: + +path_obs <- "/esarchive/recon/ecmwf/era5land/monthly_mean/tas_f1h/tas_202308.nc" +variable <- "tas" +regions <- c(lat = 41.39, lon = 2.1) +obs <- Start(dat = path_obs, var = variable, time = 'all', lat = values(regions[1]), + lon = values(regions[2]), return_vars = list(lat = NULL, lon = NULL), retrieve = T) +#obs <- as.s2dv_cube(obs) +obs <- obs -273.15 + +# PlotForecastPDF + +fcst <- data.frame(fcst1 = bcn_lt3, fcst2 = bcn_lt2, fcst3 = bcn_lt1) + +plot <-PlotForecastPDF(fcst,tercile.limits = c(per33,per66), extreme.limits = c(per10, per90), + var.name = "Temperatura diaria (C)",fcst.names = c(paste0("Valid for: Aug23\nIssued: May23\nRPSS: ",rpss3), + paste0("Valid for: Aug23\nIssued: June23\nRPSS: ",rpss2),paste0("Valid for: Aug23\nIssued: July23\nRPSS: ",rpss1)) , + obs = c(obs,obs,obs),title = "Prediccion estacional de la ola de calor de 2023 - Barcelona") + +ggsave("seasonal_vit_BCN_aug23_t2_rpss.png", plot, width = 7, height = 5) + diff --git a/hw_casestudy_weeklyevol.R b/hw_casestudy_weeklyevol.R new file mode 100644 index 0000000000000000000000000000000000000000..2cec57f6bb85708d63f3f163f3f5af48b2132117 --- /dev/null +++ b/hw_casestudy_weeklyevol.R @@ -0,0 +1,10 @@ +library(lubridate) +library(ggplot2) +library(RColorBrewer) +library(scales) +library(ClimProjDiags) +library(s2dv) +library(abind) +library(multiApply) +source("https://earth.bsc.es/gitlab/external/cstools/-/raw/master/R/CST_SplitDim.R") +source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-allow_plot_outside_reference/R/PlotWeeklyClim.R") diff --git a/modules/Calibration/Calibration_subseasonal.R b/modules/Calibration/Calibration_subseasonal.R new file mode 100644 index 0000000000000000000000000000000000000000..6c64b4a334cbc06e91b5da242f2dc6b306460ac3 --- /dev/null +++ b/modules/Calibration/Calibration_subseasonal.R @@ -0,0 +1,199 @@ +## TODO: Remove in the next release +source("modules/Calibration/calibrate_datasets.R") + +Calibration <- function(recipe, data) { + # Function that calibrates the hindcast using the method stated in the + # recipe. If the forecast is not null, it calibrates it as well. + # + # data: list of s2dv_cube objects containing the hcst, obs and fcst. + # Optionally, it may also have hcst.full_val and obs.full_val. + # recipe: object obtained when passing the .yml recipe file to read_yaml() + + method <- tolower(recipe$Analysis$Workflow$Calibration$method) + + if (method == "raw") { + warn(recipe$Run$logger, + paste("The Calibration module has been called, but the calibration", + "method in the recipe is 'raw'. The hcst and fcst will not be", + "calibrated.")) + fcst_calibrated <- data$fcst + hcst_calibrated <- data$hcst + if (!is.null(data$hcst.full_val)) { + hcst_full_calibrated <- data$hcst.full_val + } else { + hcst_full_calibrated <- NULL + } + CALIB_MSG <- "##### NO CALIBRATION PERFORMED #####" + + } else { + ## TODO: Calibrate full fields when present + # Calibration function params + mm <- recipe$Analysis$Datasets$Multimodel + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm = F + } else { + na.rm = recipe$Analysis$remove_NAs + } + + CALIB_MSG <- "##### CALIBRATION COMPLETE #####" + # Replicate observation array for the multi-model case + ## TODO: Implement for obs.full_val + if (mm) { + obs.mm <- data$obs$data + for(dat in 1:(dim(data$hcst$data)['dat'][[1]]-1)) { + obs.mm <- abind(obs.mm, data$obs$data, + along=which(names(dim(data$obs$data)) == 'dat')) + } + names(dim(obs.mm)) <- names(data$obs$dims) + data$obs$data <- obs.mm + remove(obs.mm) + } + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + + CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") + ## TODO: implement other calibration methods + if (!(method %in% CST_CALIB_METHODS)) { + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "monthly data.")) + stop() + } else { + # Calibrate the hindcast + hcst_calibrated <- CST_Calibration(data$hcst, data$obs, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "member", + sdate_dim = "syear", + ncores = ncores) + # In the case where anomalies have been computed, calibrate full values + if (!is.null(data$hcst.full_val)) { + hcst_full_calibrated <- CST_Calibration(data$hcst.full_val, + data$obs.full_val, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + memb_dim = "member", + sdate_dim = "syear", + ncores = ncores) + } else { + hcst_full_calibrated <- NULL + } + + # Calibrate the forecast + if (!is.null(data$fcst)) { + fcst_calibrated <- CST_Calibration(data$hcst, data$obs, data$fcst, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "member", + sdate_dim = "syear", + ncores = ncores) + } else { + fcst_calibrated <- NULL + } + } + } else if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { + # Daily data calibration using Quantile Mapping + if (!(method %in% c("qmap"))) { + error(recipe$Run$logger, + paste("Calibration method in the recipe is not available for", + "daily data. Only quantile mapping 'qmap is implemented.")) + stop() + } + # Calibrate the hindcast + dim_order <- names(dim(data$hcst$data)) + hcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, + exp_cor = NULL, + sdate_dim = "syear", + memb_dim = "member", + # window_dim = "time", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) + # Restore dimension order + hcst_calibrated$data <- Reorder(hcst_calibrated$data, dim_order) + # In the case where anomalies have been computed, calibrate full values + if (!is.null(data$hcst.full_val)) { + hcst_full_calibrated <- CST_QuantileMapping(data$hcst.full_val, + data$obs.full_val, + exp_cor = NULL, + sdate_dim = "syear", + memb_dim = "member", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) + } else { + hcst_full_calibrated <- NULL + } + + if (!is.null(data$fcst)) { + # Calibrate the forecast + fcst_calibrated <- CST_QuantileMapping(data$hcst, data$obs, + exp_cor = data$fcst, + sdate_dim = "syear", + memb_dim = "member", + # window_dim = "time", + method = "QUANT", + ncores = ncores, + na.rm = na.rm, + wet.day = F) + # Restore dimension order + fcst_calibrated$data <- Reorder(fcst_calibrated$data, dim_order) + } else { + fcst_calibrated <- NULL + } + } + } + info(recipe$Run$logger, CALIB_MSG) + .log_memory_usage(recipe$Run$logger, "After calibration") + # Saving + if (recipe$Analysis$Workflow$Calibration$save != 'none') { + info(recipe$Run$logger, "##### START SAVING CALIBRATED DATA #####") + + ## TODO: What do we do with the full values? + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Calibration/") + if ((recipe$Analysis$Workflow$Calibration$save %in% + c('all', 'exp_only', 'fcst_only')) && (!is.null(data$fcst))) { + save_forecast(recipe = recipe, data_cube = fcst_calibrated, type = 'fcst') + } + if (recipe$Analysis$Workflow$Calibration$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = hcst_calibrated, type = 'hcst') + } + if (recipe$Analysis$Workflow$Calibration$save == 'all') { + save_observations(recipe = recipe, data_cube = data$obs) + } + } + + ## TODO: Sort out returns + return_list <- list(hcst = hcst_calibrated, + obs = data$obs, + fcst = fcst_calibrated) + if (!is.null(hcst_full_calibrated)) { + return_list <- append(return_list, + list(hcst.full_val = hcst_full_calibrated, + obs.full_val = data$obs.full_val)) + } + return(return_list) +} diff --git a/modules/Downscaling/Downscaling.R b/modules/Downscaling/Downscaling.R index 59233dc2ac02749e91b597e832c721a412915f55..bb5c2197ab6c3a698437da642d53a6e375871d7e 100644 --- a/modules/Downscaling/Downscaling.R +++ b/modules/Downscaling/Downscaling.R @@ -34,7 +34,7 @@ Downscaling <- function(recipe, data) { target_grid <- tolower(recipe$Analysis$Workflow$Downscaling$target_grid) nanalogs <- as.numeric(recipe$Analysis$Workflow$Downscaling$nanalogs) size <- recipe$Analysis$Workflow$Downscaling$size - + if (is.null(recipe$Analysis$ncores)) { ncores <- 1 } else { @@ -47,12 +47,10 @@ Downscaling <- function(recipe, data) { } else { loocv <- recipe$Analysis$loocv } - DOWNSCAL_TYPES <- c("none", "int", "intbc", "intlr", "analogs", "logreg") BC_METHODS <- c("quantile_mapping", "bias", "evmos", "mse_min", "crps_min", "rpc-based", "qm") LR_METHODS <- c("basic", "large-scale", "4nn") LOG_REG_METHODS <- c("ens_mean", "ens_mean_sd", "sorted_members") - if (!(type %in% DOWNSCAL_TYPES)) { stop("Downscaling type in the recipe is not available. Accepted types ", "are 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'.") @@ -66,7 +64,7 @@ Downscaling <- function(recipe, data) { if (is.null(target_grid)) { stop("Please provide the target grid in the recipe.") } - + # Ensure that observations are in the same grid as experiments # Only needed for this method because the others already return the # observations diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index cd4252ab7365d038b9333f506604e2ad4d6b9afb..e02dc55ddf047afa01e9d1198e0803a5eec9dd77 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -128,7 +128,8 @@ save_metrics <- function(recipe, # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) vars <- c(vars, subset_skill) - ArrayToNc(vars, outfile) + + ArrayToNc(vars, outfile) } } info(recipe$Run$logger, "##### SKILL METRICS SAVED TO NETCDF FILE #####") diff --git a/modules/Scorecards/R/tmp/SCPlotScorecard.R b/modules/Scorecards/R/tmp/SCPlotScorecard.R index 4373057b6e0d3901abcb3fc27c09006f5156131d..e24ffce753263b61aad5e20b174d8ff120abfa4f 100644 --- a/modules/Scorecards/R/tmp/SCPlotScorecard.R +++ b/modules/Scorecards/R/tmp/SCPlotScorecard.R @@ -280,7 +280,7 @@ SCPlotScorecard <- function(data, row.dim = 'region', subrow.dim = 'time', } ## Check legend white space if (is.null(legend.white.space)){ - legend.white.space <- 6 + legend.white.space <- 3.5 } else { legend.white.space <- legend.white.space } @@ -407,7 +407,7 @@ SCPlotScorecard <- function(data, row.dim = 'region', subrow.dim = 'time', table.html <- column_spec(table.html.part[[n.last.list]], 1, bold = TRUE, width_min = paste0(col1.width, 'cm')) %>% column_spec(2, bold = TRUE, width_min = paste0(col2.width, 'cm')) %>% - column_spec(3:n.columns, width_min = "1.2cm") %>% + column_spec(3:n.columns, width_min = "1.5cm") %>% column_spec(c(1, 2, column.borders), border_right = "2px solid black") %>% column_spec(1, border_left = "2px solid black") %>% column_spec(n.columns, border_right = "2px solid black") %>% @@ -420,7 +420,6 @@ SCPlotScorecard <- function(data, row.dim = 'region', subrow.dim = 'time', # White space for legend legend.white.space <- 37.8 * legend.white.space ## converting pixels to cm - # Create and save color bar legend scorecard_legend <- .SCLegend(legend.breaks = legend.breaks, palette = palette, diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index 56f08204ad5443b94bbc281a31f3c75cf5cb7614..957d3261a00521f6402767c00f8f3dce598be4fe 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -45,7 +45,7 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, start.months, forecast.months, region.names, metrics, legend.breaks = NULL, table.label = NULL, fileout.label = NULL, - legend.white.space = NULL, + legend.white.space = NULL, legend.width = NULL, col1.width = NULL, col2.width = NULL, output.path){ @@ -129,7 +129,6 @@ ScorecardsSingle <- function(data, system, reference, var, start.year, end.year, ## Legend inputs plot.legend = TRUE label.scale = 1.4 - legend.width = 555 legend.height = 50 ## Data display inputs diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index 0dbcd9210c9227200c872204526ea4e6df05adcb..2b1abecf5017392879c6b4dc803b60b84c742d41 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -69,6 +69,7 @@ Scorecards <- function(recipe) { inf.to.na <- recipe$Analysis$Workflow$Scorecards$inf_to_na table.label <- recipe$Analysis$Workflow$Scorecards$table_label fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label + legend.width <- recipe$Analysis$Workflow$Scorecards$legend_width legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width @@ -141,6 +142,7 @@ Scorecards <- function(recipe) { region.names = names(regions), metrics = metrics.visualize, table.label = table.label, + legend.width = legend.width, fileout.label = fileout.label, legend.white.space = legend.white.space, col1.width = col1.width, diff --git a/modules/Skill/Skill_subseasonal.R b/modules/Skill/Skill_subseasonal.R new file mode 100644 index 0000000000000000000000000000000000000000..5f7f7c25bf3f961771c618ae77a54ea662e6af53 --- /dev/null +++ b/modules/Skill/Skill_subseasonal.R @@ -0,0 +1,465 @@ +# This module should calculate verification metrics at any time of the workflow +# It should implement different verification metrics: +# - FRPSS and RPSS +# - FCRPSS and CRPSS +# - enscorr +# - bias +# - reliability diagram +# - ask Carlos which decadal metrics he is currently using + +source("modules/Skill/R/compute_quants.R") +source("modules/Skill/R/compute_probs.R") +source("modules/Skill/R/s2s.metrics.R") +source("modules/Saving/R/drop_dims.R") +## TODO: Remove when they are included in s2dv +source("modules/Skill/R/RPS_clim.R") +source("modules/Skill/R/CRPS_clim.R") +source("modules/Skill/R/tmp/GetProbs.R") +## TODO: Remove in the next release +source("modules/Skill/compute_skill_metrics.R") +source("modules/Skill/compute_probabilities.R") + +Skill <- function(recipe, data, agg = 'global') { + + # data$hcst: s2dv_cube containing the hindcast + # obs: s2dv_cube containing the observations + # recipe: auto-s2s recipe as provided by read_yaml + + ## TODO: Adapt time_dims to subseasonal case + ## TODO: Add dat_dim + ## TODO: Refine error messages + ## TODO: Add check to see if anomalies are provided (info inside s2dv_cube) + +# if (recipe$Analysis$Workflow$Anomalies$compute) { +# if (is.null(clim_data$hcst) || is.null(clim_obs)) { +# warn(recipe$Run$logger, "Anomalies have been requested in the recipe, +# but the climatologies have not been provided in the +# compute_skill_metrics call. Be aware that some metrics like the +# Mean Bias may not be correct.") +# } +# } else { +# warn(recipe$Run$logger, "Anomaly computation was not requested in the +# recipe. Be aware that some metrics, such as the CRPSS may not be +# correct.") +# } + time_dim <- 'syear' + memb_dim <- 'member' + metrics <- tolower(recipe$Analysis$Workflow$Skill$metric) + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm = F + } else { + na.rm = recipe$Analysis$remove_NAs + } + if (is.null(recipe$Analysis$Workflow$Skill$cross_validation)) { + warn(recipe$Run$logger, + "cross_validation parameter not defined, setting it to FALSE.") + cross.val <- FALSE + } else { + cross.val <- recipe$Analysis$Workflow$Skill$cross_validation + } + skill_metrics <- list() + for (metric in strsplit(metrics, ", | |,")[[1]]) { + # Whether the fair version of the metric is to be computed + if (metric %in% c('frps', 'frpss', 'bss10', 'bss90', + 'fcrps', 'fcrpss')) { + Fair <- T + } else { + Fair <- F + } + # Whether to compute correlation for the ensemble mean or for each member + if (metric == 'corr_individual_members') { + memb <- T + } else if (metric == 'enscorr') { + memb <- F + } + # Ranked Probability Score and Fair version + if (metric %in% c('rps', 'frps')) { + skill <- RPS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + cross.val = cross.val, + ncores = ncores) + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill + rps_clim <- Apply(list(data$obs$data), + target_dims = c(time_dim, memb_dim), + cross.val = cross.val, + RPS_clim)$output1 + rps_clim <- .drop_dims(rps_clim) + skill_metrics[[paste0(metric, "_clim")]] <- rps_clim + # Ranked Probability Skill Score and Fair version + } else if (metric %in% c('rpss', 'frpss')) { + skill <- RPSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + cross.val = cross.val, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$rpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + # Brier Skill Score - 10th percentile + } else if (metric == 'bss10') { + skill <- RPSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + prob_thresholds = 0.1, + cross.val = cross.val, + Fair = Fair, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$rpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + # Brier Skill Score - 90th percentile + } else if (metric == 'bss90') { + skill <- RPSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + prob_thresholds = 0.9, + cross.val = cross.val, + Fair = Fair, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$rpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + # CRPS and FCRPS + } else if (metric %in% c('crps', 'fcrps')) { + skill <- CRPS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + Fair = Fair, + ncores = ncores) + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill + crps_clim <- Apply(list(data$obs$data), target_dims = time_dim, + fun = CRPS_clim, memb_dim = memb_dim, + clim.cross.val = cross.val)$output1 + crps_clim <- .drop_dims(crps_clim) + skill_metrics[['crps_clim']] <- crps_clim + # CRPSS and FCRPSS + } else if (metric %in% c('crpss', 'fcrpss')) { + skill <- CRPSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + clim.cross.val = cross.val, + Fair = Fair, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$crpss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + } else if (metric == 'rms') { + source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/RMS.R") + hcst_mean <- Apply(list(data$hcst$data), target_dims = memb_dim, + fun = mean, na.rm = na.rm, ncores = ncores)$output1 + hcst_mean <- InsertDim(hcst_mean, pos = 1, len = 1, name = memb_dim) + skill <- RMS(exp = hcst_mean, + obs = data$obs$data, + time_dim = time_dim, dat_dim = NULL, comp_dim = NULL, + limits = NULL, conf = FALSE, alpha = 0.05, ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$rms + # Mean bias (climatology) + } else if (metric == 'mean_bias') { + ## TODO: Eliminate option to compute from anomalies + # Compute from full field + if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + skill <- Bias(data$hcst.full_val$data, data$obs.full_val$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } else { + skill <- Bias(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill + # Mean bias skill score + } else if (metric == 'mean_bias_ss') { + if ((!is.null(data$hcst.full_val)) && (!is.null(data$obs.full_val)) && + (recipe$Analysis$Workflow$Anomalies$compute)) { + skill <- AbsBiasSS(data$hcst.full_val$data, data$obs.full_val$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } else { + skill <- AbsBiasSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + ncores = ncores) + } + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$biasSS + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + # Ensemble mean correlation + } else if (metric %in% c('enscorr', 'corr_individual_members')) { + ## TODO: Return significance + ## TODO: Implement option for Kendall and Spearman methods? + skill <- s2dv::Corr(data$hcst$data, data$obs$data, + dat_dim = 'dat', + time_dim = time_dim, + method = 'pearson', + memb_dim = memb_dim, + memb = memb, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$corr + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + } else if (metric == 'rmsss') { + # Compute RMSSS + skill <- RMSSS(data$hcst$data, data$obs$data, + dat_dim = 'dat', + time_dim = time_dim, + memb_dim = memb_dim, + pval = FALSE, + sign = TRUE, + sig_method = 'Random Walk', + ncores = ncores) + # Compute ensemble mean and modify dimensions + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$rmsss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + } else if (metric == 'mse') { + skill <- MSE(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + dat_dim = 'dat', + comp_dim = NULL, + conf = FALSE, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$mse + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + } else if (metric == 'msss') { + skill <- MSSS(data$hcst$data, data$obs$data, + time_dim = time_dim, + memb_dim = memb_dim, + dat_dim = 'dat', + sign = TRUE, + ncores = ncores) + skill <- lapply(skill, function(x) { + .drop_dims(x)}) + skill_metrics[[ metric ]] <- skill$msss + skill_metrics[[ paste0(metric, "_significance") ]] <- skill$sign + } else if (metric == 'enssprerr') { + # Remove ensemble dim from obs to avoid veriApply warning + obs_noensdim <- ClimProjDiags::Subset(data$obs$data, "ensemble", 1, + drop = "selected") + capture.output( + skill <- easyVerification::veriApply(verifun = 'EnsSprErr', + fcst = data$hcst$data, + obs = obs_noensdim, + tdim = which(names(dim(data$hcst$data))==time_dim), + ensdim = which(names(dim(data$hcst$data))==memb_dim), + na.rm = na.rm, + ncpus = ncores) + ) + remove(obs_noensdim) + skill <- .drop_dims(skill) + skill_metrics[[ metric ]] <- skill + # SpecsVerification metrics + } else if (grepl("specs", metric, fixed = TRUE)) { + # Compute SpecsVerification version of the metrics + ## Retain _specs in metric name for clarity + metric_name <- (strsplit(metric, "_"))[[1]][1] # Get metric name + if (!(metric_name %in% c('frpss', 'frps', 'bss10', 'bss90', 'enscorr', + 'rpss'))) { + warn(recipe$Run$logger, + "Some of the requested SpecsVerification metrics are not available.") + } + capture.output( + skill <- Compute_verif_metrics(data$hcst$data, data$obs$data, + skill_metrics = metric_name, + verif.dims=c("syear", "sday", "sweek"), + na.rm = na.rm, + ncores = ncores) + ) + skill <- .drop_dims(skill) + if (metric_name == "frps") { + # Compute yearly mean for FRPS + skill <- colMeans(skill, dims = 1) + } + skill_metrics[[ metric ]] <- skill + } + } + info(recipe$Run$logger, "##### SKILL METRIC COMPUTATION COMPLETE #####") + .log_memory_usage(recipe$Run$logger, when = "After skill metric computation") + # Save outputs + #NURIA: I think change the output_dir is a problem for future savings + if (recipe$Analysis$Workflow$Skill$save != 'none') { + info(recipe$Run$logger, "##### START SAVING SKILL METRIC #####") + } + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Skill/") + # Separate 'corr' from the rest of the metrics because of extra 'ensemble' dim + if (recipe$Analysis$Workflow$Skill$save == 'all') { + corr_metric_names <- grep("^corr_individual_members", names(skill_metrics)) + if (length(corr_metric_names) == 0) { + save_metrics(recipe = recipe, skill = skill_metrics, + data_cube = data$hcst, agg = agg) + } else { + # Save corr + if (length(skill_metrics[corr_metric_names]) > 0) { + save_corr(recipe = recipe, skill = skill_metrics[corr_metric_names], + data_cube = data$hcst) + } + # Save other skill metrics + if (length(skill_metrics[-corr_metric_names]) > 0) { + save_metrics(recipe = recipe, skill = skill_metrics[-corr_metric_names], + data_cube = data$hcst, agg = agg) + } + } + } + # Return results + return(skill_metrics) +} + +Probabilities <- function(recipe, data) { + ## TODO: Do hcst and fcst at the same time + if (is.null(recipe$Analysis$ncores)) { + ncores <- 1 + } else { + ncores <- recipe$Analysis$ncores + } + + if (is.null(recipe$Analysis$remove_NAs)) { + na.rm = F + } else { + na.rm = recipe$Analysis$remove_NAs + } + + named_probs <- list() + named_probs_fcst <- list() + named_quantiles <- list() + + if (is.null(recipe$Analysis$Workflow$Probabilities$percentiles)) { + error(recipe$Run$logger, "Quantiles and probability bins have been + requested, but no thresholds are provided in the recipe.") + stop() + } else { + for (element in recipe$Analysis$Workflow$Probabilities$percentiles) { + # Parse thresholds in recipe + thresholds <- sapply(element, function (x) eval(parse(text = x))) + quants <- compute_quants(data$hcst$data, thresholds, + ncores = ncores, + na.rm = na.rm) + probs <- compute_probs(data$hcst$data, quants, + ncores = ncores, + na.rm = na.rm) + + for (i in seq(1:dim(quants)['bin'][[1]])) { + named_quantiles <- append(named_quantiles, + list(ClimProjDiags::Subset(quants, + 'bin', i))) + names(named_quantiles)[length(named_quantiles)] <- paste0("percentile_", + as.integer(thresholds[i]*100)) + } + for (i in seq(1:dim(probs)['bin'][[1]])) { + if (i == 1) { + name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) + } else if (i == dim(probs)['bin'][[1]]) { + name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) + } else { + name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", + as.integer(thresholds[i]*100)) + } + named_probs <- append(named_probs, + list(ClimProjDiags::Subset(probs, + 'bin', i))) + names(named_probs)[length(named_probs)] <- name_i + } + + # Compute fcst probability bins + if (!is.null(data$fcst)) { + probs_fcst <- compute_probs(data$fcst$data, quants, + ncores = ncores, + na.rm = na.rm) + + for (i in seq(1:dim(probs_fcst)['bin'][[1]])) { + if (i == 1) { + name_i <- paste0("prob_b", as.integer(thresholds[1]*100)) + } else if (i == dim(probs_fcst)['bin'][[1]]) { + name_i <- paste0("prob_a", as.integer(thresholds[i-1]*100)) + } else { + name_i <- paste0("prob_", as.integer(thresholds[i-1]*100), "_to_", + as.integer(thresholds[i]*100)) + } + named_probs_fcst <- append(named_probs_fcst, + list(ClimProjDiags::Subset(probs_fcst, + 'bin', i))) + names(named_probs_fcst)[length(named_probs_fcst)] <- name_i + } + } + } + # Rearrange dimensions and return probabilities + named_probs <- lapply(named_probs, function(x) {.drop_dims(x)}) + named_quantiles <- lapply(named_quantiles, function(x) {.drop_dims(x)}) + if (!is.null(data$fcst)) { + fcst_years <- dim(data$fcst$data)[['syear']] + named_probs_fcst <- lapply(named_probs_fcst, function(x) {.drop_dims(x)}) + # function(x) {Subset(x, + # along = 'syear', + # indices = 1:fcst_years, + # drop = 'non-selected')}) + results <- list(probs = named_probs, + probs_fcst = named_probs_fcst, + percentiles = named_quantiles) + } else { + results <- list(probs = named_probs, + percentiles = named_quantiles) + } + + info(recipe$Run$logger, + "##### PERCENTILES AND PROBABILITY CATEGORIES COMPUTED #####") + .log_memory_usage(recipe$Run$logger, when = "After anomaly computation") + # Save outputs + if (recipe$Analysis$Workflow$Probabilities$save != 'none') { + info(recipe$Run$logger, + "##### START SAVING PERCENTILES AND PROBABILITY CATEGORIES #####") + + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Skill/") + # Save percentiles + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'percentiles_only')) { + save_percentiles(recipe = recipe, percentiles = results$percentiles, + data_cube = data$hcst) + } + # Save probability bins + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = results$probs, + data_cube = data$hcst, type = "hcst") + if (!is.null(results$probs_fcst)) { + save_probabilities(recipe = recipe, probs = results$probs_fcst, + data_cube = data$fcst, type = "fcst") + } + } + } + + # Return results + return(results) + } +} + diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 3d00742dd4de5443c9cafddd13a7990ffafc1dd4..fdbf27ead80ccd2e947cf308d80bc6022d10353e 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -114,7 +114,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", "Forecast Ensemble Mean / ", "Init.: ", i_syear) months <- lubridate::month(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ], - label = T, abb = F) + label = T, abb = F,locale = "en_GB") years <- lubridate::year(fcst$attrs$Dates[1, 1, which(start_date == i_syear), ]) if (recipe$Analysis$Workflow$Visualization$multi_panel) { diff --git a/modules/Visualization/R/plot_skill_metrics_subseasonal.R b/modules/Visualization/R/plot_skill_metrics_subseasonal.R new file mode 100644 index 0000000000000000000000000000000000000000..b4c2b273dffb70f3d413b33b9575b0c7e1662bc6 --- /dev/null +++ b/modules/Visualization/R/plot_skill_metrics_subseasonal.R @@ -0,0 +1,258 @@ +library(stringr) + +plot_skill_metrics <- function(recipe, data_cube, skill_metrics, + outdir, significance = F, output_conf) { + # recipe: Auto-S2S recipe + # archive: Auto-S2S archive + # data_cube: s2dv_cube object with the corresponding hindcast data + # skill_metrics: list of named skill metrics arrays + # outdir: output directory + # significance: T/F, whether to display the significance dots in the plots + + # Abort if frequency is daily + if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { + error(recipe$Run$logger, "Visualization functions not yet implemented + for daily data.") + stop() + } + # Abort if skill_metrics is not list + if (!is.list(skill_metrics) || is.null(names(skill_metrics))) { + stop("The element 'skill_metrics' must be a list of named arrays.") + } + + latitude <- data_cube$coords$lat + longitude <- data_cube$coords$lon + archive <- get_archive(recipe) + system_name <- archive$System[[recipe$Analysis$Datasets$System$name]]$name + hcst_period <- paste0(recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + init_month <- as.numeric(substr(recipe$Analysis$Time$sdate, + start = 1, stop = 2)) + } else { + ## TODO: Sort out decadal initial month (is it always January?) + init_month <- 1 + } + month_label <- tolower(month.name[init_month]) + month_abbreviation <- month.abb[init_month] + # Get months + months <- lubridate::month(Subset(data_cube$attrs$Dates, + "syear", indices = 1), + label = T, abb = F,locale = "en_GB") + if (!is.null(recipe$Analysis$Workflow$Visualization$projection)) { + projection <- tolower(recipe$Analysis$Workflow$Visualization$projection) + } else { + projection <- "cylindrical_equidistant" + } + + # Define color palette and number of breaks according to output format + if (tolower(recipe$Analysis$Output_format) %in% c("scorecards", "cerise")) { + diverging_palette <- "purpleorange" + sequential_palette <- "Oranges" + } else { + diverging_palette <- "bluered" + sequential_palette <- "Reds" + } + # Group different metrics by type + skill_scores <- c("rpss", "bss90", "bss10", "frpss", "crpss", "mean_bias_ss", + "enscorr", "rpss_specs", "bss90_specs", "bss10_specs", + "enscorr_specs", "rmsss", "msss") + scores <- c("rps", "frps", "crps", "frps_specs", "mse") + # Loop over variables and assign colorbar and plot parameters to each metric + for (var in 1:data_cube$dims[['var']]) { + var_skill <- lapply(skill_metrics, function(x) { + ClimProjDiags::Subset(x, along = 'var', + indices = var, + drop = 'selected')}) + for (name in c(skill_scores, scores, "mean_bias", "enssprerr")) { + if (name %in% names(skill_metrics)) { + units <- NULL + # Define plot characteristics and metric name to display in plot + if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", + "rpss_specs", "bss90_specs", "bss10_specs", + "rmsss", "msss")) { + display_name <- toupper(strsplit(name, "_")[[1]][1]) + skill <- var_skill[[name]] + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL + } else if (name == "mean_bias_ss") { + display_name <- "Mean Bias Skill Score" + skill <- var_skill[[name]] + brks <- seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- NULL + } else if (name %in% c("enscorr", "enscorr_specs")) { + display_name <- "Ensemble Mean Correlation" + skill <- var_skill[[name]] + brks <- seq(-1, 1, by = 0.2) + cols <- clim.colors(length(brks) - 1, diverging_palette) + col_inf <- NULL + col_sup <- NULL + } else if (name %in% scores) { + skill <- var_skill[[name]] + display_name <- toupper(strsplit(name, "_")[[1]][1]) + brks <- seq(0, 1, by = 0.1) + colorbar <- grDevices::hcl.colors(length(brks), sequential_palette) + cols <- colorbar[1:(length(colorbar) - 1)] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] + } else if (name == "enssprerr") { + skill <- var_skill[[name]] + display_name <- "Spread-to-Error Ratio" + brks <- c(0, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.4, 1.6, 1.8, 2) + colorbar <- clim.colors(length(brks), diverging_palette) + cols <- colorbar[1:length(colorbar) - 1] + col_inf <- NULL + col_sup <- colorbar[length(colorbar)] + } else if (name %in% "mean_bias") { + skill <- var_skill[[name]] + display_name <- "Mean Bias" + max_value <- max(abs(quantile(skill, 0.02, na.rm = T)), + abs(quantile(skill, 0.98, na.rm = T))) + brks <- max_value * seq(-1, 1, by = 0.2) + colorbar <- clim.colors(length(brks) + 1, diverging_palette) + cols <- colorbar[2:(length(colorbar) - 1)] + col_inf <- colorbar[1] + col_sup <- colorbar[length(colorbar)] + units <- data_cube$attrs$Variable$metadata[[var_name]]$units + } + # Reorder dimensions + skill <- Reorder(skill, c("time", "longitude", "latitude")) + # If the significance has been requested and the variable has it, + # retrieve it and reorder its dimensions. + significance_name <- paste0(name, "_significance") + if ((significance) && (significance_name %in% names(skill_metrics))) { + skill_significance <- var_skill[[significance_name]] + skill_significance <- Reorder(skill_significance, c("time", + "longitude", + "latitude")) + # Split skill significance into list of lists, along the time dimension + # This allows for plotting the significance dots correctly. + skill_significance <- ClimProjDiags::ArrayToList(skill_significance, + dim = "time", + level = "sublist", + names = "dots") + } else { + skill_significance <- NULL + } + # Define output file name and titles + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + outfile <- paste0(outdir[var], name, "-", month_label) + } else { + outfile <- paste0(outdir[var], name) + } + # Get variable name and long name + var_name <- data_cube$attrs$Variable$varName[[var]] + var_long_name <- data_cube$attrs$Variable$metadata[[var_name]]$long_name + # Multi-panel or single-panel plots + if (recipe$Analysis$Workflow$Visualization$multi_panel) { + # Define titles + toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), + "\n", display_name, " / ", hcst_period) + titles <- as.vector(months) + ## TODO: Combine PlotLayout with PlotRobinson? + suppressWarnings( + PlotLayout(PlotEquiMap, c('longitude', 'latitude'), + asplit(skill, MARGIN=1), # Splitting array into a list + longitude, latitude, + special_args = skill_significance, + dot_symbol = 20, + toptitle = toptitle, + title_scale = 0.6, + titles = titles, + filled.continents = F, + brks = brks, + cols = cols, + col_inf = col_inf, + col_sup = col_sup, + fileout = paste0(outfile, ".png"), + bar_label_digits = 3, + bar_extra_margin = rep(0.9, 4), + extra_margin = rep(1, 4), + bar_label_scale = 1.5, + axes_label_scale = 1.3, + width = 11,#default i + height = 11) + ) + } else { + # Define function and parameters depending on projection + if (projection == 'cylindrical_equidistant') { + fun <- PlotEquiMap + output_configuration <- output_conf$PlotEquiMap$skill_metric + base_args <- list(var = NULL, dots = NULL, + lon = longitude, lat = latitude, + dot_symbol = 20, dot_size = 1, + title_scale = 0.6, + filled.continents = F, brks = brks, cols = cols, + col_inf = col_inf, col_sup = col_sup, + units = units, font.main = 2, + bar_label_digits = 3, bar_label_scale = 1.5, + axes_label_scale = 1, width = 8, height = 5) + base_args[names(output_configuration)] <- output_configuration + } else { + fun <- PlotRobinson + common_projections <- c("robinson", "stereographic", "lambert_europe") + if (projection %in% common_projections) { + target_proj <- get_proj_code(projection) + } else { + target_proj <- projection + } + ## TODO: handle output_conf + base_args <- list(data = NULL, mask = NULL, + lon = longitude, lat = latitude, + lon_dim = 'longitude', lat_dim = 'latitude', + target_proj = target_proj, legend = 's2dv', + style = 'point', brks = brks, cols = cols, + col_inf = col_inf, col_sup = col_sup) + } + # Loop over forecast times + for (i in 1:dim(skill)[['time']]) { + # Get forecast time label + forecast_time <- match(months[i], month.name) - init_month + 1 + + if (forecast_time < 1) { + forecast_time <- forecast_time + 12 + } + forecast_time <- sprintf("%02d", forecast_time) + # Define plot title + toptitle <- paste(system_name, "/", + str_to_title(var_long_name), + "\n", display_name, "/", months[i], "/", + hcst_period) + # Modify base arguments + base_args[[1]] <- skill[i, , ] + if (!is.null(skill_significance)) { + base_args[[2]] <- skill_significance[[i]][[1]] + significance_caption <- "alpha = 0.05" + } else { + significance_caption <- NULL + } + if (identical(fun, PlotRobinson)) { + ## TODO: Customize alpha and other technical details depending on the metric + base_args[['caption']] <- + paste0("Nominal start date: ", + "1st of ", str_to_title(month_label), "\n", + "Forecast month: ", forecast_time, "\n", + "Reference: ", recipe$Analysis$Datasets$Reference, "\n", + significance_caption) + } + fileout <- paste0(outfile, "_ft", forecast_time, ".png") + # Plot + + do.call(fun, + args = c(base_args, + list(toptitle = toptitle, + fileout = fileout))) + } + } + } + } + } + info(recipe$Run$logger, + "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") +} diff --git a/modules/Visualization/output_size.yml b/modules/Visualization/output_size.yml index 0cd945be4b2d9cf578df057e2f2dc63c8ff241a2..f52bd3034f04c8c814be199e29b63a881be7c919 100644 --- a/modules/Visualization/output_size.yml +++ b/modules/Visualization/output_size.yml @@ -27,6 +27,42 @@ region: #units inches Multipanel: Robinson: skill_metrics: {width: 8, height: 5} + + Iberia: #latmin: 34, latmax: 46, lonmin: -10, lonmax: 5 + PlotEquiMap: + skill_metrics: + width: 8 + height: 7.5 + intylat: 2 + intxlon: 2 + axes_label_scale: 0.8 + bar_label_scale: 1.2 + bar_extra_margin: !expr c(2,1,0.5,1) + dot_size: 1.7 + dot_symbol: 4 + font.main: 1 + forecast_ensemble_mean: + width: 8 + height: 7.5 + intylat: 2 + intxlon: 2 + axes_label_scale: 0.8 + bar_label_scale: 1.2 + bar_extra_margin: !expr c(2,1,0.5,1) + dot_symbol: 4 + dot_size: 1.7 + font.main: 1 + most_likely_terciles: + width: 8 + height: 7.5 + intylat: 2 + intxlon: 2 + dot_size: 2 + plot_margin: !expr c(0, 4.1, 4.1, 2.1) + Multipanel: + Robinson: + skill_metrics: {width: 8, height: 5} + NA-EU: #Norht Atlantic European region Mediterranean: Global: diff --git a/modules/Visualization/test_solar_ind.R b/modules/Visualization/test_solar_ind.R new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/oper_heat_i4c_plotforecastpdf.R b/oper_heat_i4c_plotforecastpdf.R new file mode 100644 index 0000000000000000000000000000000000000000..e9b543594e4f086acc4e0d83ab18f4fa44773f88 --- /dev/null +++ b/oper_heat_i4c_plotforecastpdf.R @@ -0,0 +1,169 @@ +rm(list=ls()) +library(ncdf4) +library(abind) +library(fields) +library(CSTools) +library(RColorBrewer) +library(s2dv) +library(ggplot2) +library(startR) +library(multiApply) + +# Load 1 start date + + +#args = commandArgs(trailingOnly = TRUE) + +sdates <- format(Sys.Date() , "%Y%m%d") + +# Nuria's code to load the dates and name the weeks + +#sdates <- "20240411" +week <- format(seq(as.Date("20240104", "%Y%m%d"), as.Date("20241226", "%Y%m%d"), 7), "%Y%m%d") +weeknum <- sprintf("%02d", which(week == sdates)) + + +path <- "/esarchive/exp/VITIGEOSS/subseasonal/" +region <- 'cat' + +# There are some issues picking the location, numbers must be "exact" +Barcelona <- c(lat = 41.39, lon = 2.1) +Granollers <- c(lat = 41.61, lon = 2.29) +Viladecans <- c(lat = 41.31, lon = 2.02) +locations <- list(Barcelona, Granollers) + +vars <- c('t2','t02min','t02max') #just for testing + +for (loc in locations) { + + if (loc == Barcelona) { + loc_name <- "Barcelona" + } else if (loc == Granollers) { + loc_name <- "Granollers" + } + + for (var in vars){ + + # FORECAST + path_fcst <- paste0(path, var, "/$sdates$/", var, "_", region, "_$sdates$.nc") + fcst <- Start(dat = path_fcst, + var = var, + sdates = sdates, + latitude = values(loc[[1]]), + longitude = values(loc[[2]]), + ensemble = 'all', + time = 'all', + retrieve = T) + + as.vector(attributes(fcst)$Variables$common$latitude) + as.vector(attributes(fcst)$Variables$common$longitude) + + # PROBABILITIES + path_probs <- paste0(path, var, "/$sdates$/", var, "_", region, "_prob_$sdates$.nc") + + probs <- Start(dat = path_probs, + var = c('prob_bn', 'prob_n', 'prob_an'), + sdates = sdates, + latitude = values(loc[[1]]), + longitude = values(loc[[2]]), + time = 'all', + return_vars = list(latitude = NULL, longitude = NULL, time = 'sdates'), + retrieve = T) + + as.vector(attributes(probs)$Variables$common$latitude) + as.vector(attributes(probs)$Variables$common$longitude) + attributes(probs)$Variables$common$time + dim(attributes(probs)$Variables$common$time) + + # TERCILE LIMITS + + path_lims <- paste0(path, var, "/$sdates$/", var, "_", region, + "_percentiles_week$weeknum$.nc") + dim(week) <- c(weeks = length(week), sdates = 1) + lims <- Start(dat = path_lims, + var = c('p33', 'p66'), + sdates = sdates, + weeknum = 'all', + weeknum_depends = 'sdates', + latitude = values(loc[[1]]), + longitude = values(loc[[2]]), + time = 'all', + return_vars = list(latitude = NULL, longitude = NULL, time = 'sdates'), + retrieve = T) + as.vector(attributes(lims)$Variables$common$latitude) + as.vector(attributes(lims)$Variables$common$longitude) + attributes(lims)$Variables$common$time + dim(attributes(lims)$Variables$common$time) + + # SKILL + + path_skill <- paste0(path, var, "/$sdates$/", var, "_", region, + "_skill_week$weeknum$.nc") + + rpss <- Start(dat = path_skill, + var = c('rpss'), + sdates = sdates, + weeknum = 'all', + weeknum_depends = 'sdates', + latitude = values(loc[[1]]), + longitude = values(loc[[2]]), + time = 'all', + return_vars = list(latitude = NULL, longitude = NULL, time = 'sdates'), + retrieve = T) + as.vector(attributes(rpss)$Variables$common$latitude) + as.vector(attributes(rpss)$Variables$common$longitude) + attributes(rpss)$Variables$common$time + dim(attributes(rpss)$Variables$common$time) + + mondays <- format(seq(as.Date(sdates, "%Y%m%d") + 4, as.Date(sdates, "%Y%m%d") + 30, 7), "%d-%m-%Y") + sundays <- format(seq(as.Date(sdates, "%Y%m%d") + 10, as.Date(sdates, "%Y%m%d") + 40, 7), "%d-%m-%Y") + subtit <- sapply(1:4, function(x) { paste0("Semana del\n", mondays[x], "\nal ", + sundays[x], "\nRPSS = ", round(rpss[1,1,1,1,1,1,x]/100,digits=2))}) + + # Plot forecast PDF + + fcst <- data.frame(fcst1 = fcst[1,1,1,1,1,,1], fcst2 = fcst[1,1,1,1,1,,2], fcst3 = fcst[1,1,1,1,1,,3], fcst4 = fcst[1,1,1,1,1,,4]) + p33 <- c(lims[1,1,1,1,1,1,1],lims[1,1,1,1,1,1,2],lims[1,1,1,1,1,1,3],lims[1,1,1,1,1,1,4]) + p66 <- c(lims[1,2,1,1,1,1,1],lims[1,2,1,1,1,1,2],lims[1,2,1,1,1,1,3],lims[1,2,1,1,1,1,4]) + terlims <- array(c(p33,p66), dim = c(4,2)) + + # Improve this part to automatise week labels in the subplot titles: + + PlotForecastPDF(fcst,tercile.limits = terlims, var.name = "Temperatura (\u00B0C)",fcst.names = + subtit, title = paste0("Prediccion de temperatura en ",loc_name,"\nEmitida en ", + format(as.Date(sdates, "%Y%m%d"), "%d-%m-%Y")), color.set="s2s4e", + add.ensmemb = "no", plotfile = paste0("I4C_forecastpdf_",var,"_",loc_name,".png")) + + } + } + +print("Hi Palo") + +########################################################################################################################################### + +# After the operational, add observations for evaluation + +# Add observations: + +#path_obs <- "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_202308.nc" +#variable <- "tas" +#regions <- c(lat = 41.39, lon = 2.1) +#obs <- Start(dat = path_obs, var = variable, time = 'all', lat = values(regions[1]), lon = values(regions[2]), return_vars = list(lat = NULL, lon = NULL), retrieve = T) +#obs <- as.s2dv_cube(obs) + +#as.vector(attributes(obs)$Variables$common$lat) +#as.vector(attributes(obs)$Variables$common$lon) + +# Extract Barcelona and average from day 21 to day 27 +#obs <- obs[1,1,21:27,1,1] +#obs <- obs -273.15 +#obs <- mean(obs) + + +#plot <-PlotForecastPDF(fcst,tercile.limits = c(terciles[2],terciles[3]), extreme.limits = c(terciles[1], terciles[4]), +# var.name = "Temperatura (C)",fcst.names = c(paste0("ftime: days 26-32\n sdate: 27/07/23\nRPSS: ",rpss4), +# paste0("ftime: days 19-25\n sdate: 03/08/23\nRPSS: ",rpss3),paste0("ftime: days 12-18\n sdate: 10/08/23\nRPSS: ",rpss2), +# paste0("ftime: days 5-11\n sdate: 17/08/23\nRPSS: ",rpss1)) , obs = c(obs,obs,obs,obs), +# title = "Prediccion subestacional de la ola de calor de 2023 - Barcelona",color.set="s2s4e") + +#ggsave("test1.png", plot, width = 7, height = 5) diff --git a/recipe_bigpredidata.yml b/recipe_bigpredidata.yml new file mode 100644 index 0000000000000000000000000000000000000000..2e8cd4bacd2462d9b7fd98be97a5c9f044718e65 --- /dev/null +++ b/recipe_bigpredidata.yml @@ -0,0 +1,111 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: V. Agudetse + Info: Test for recipe splitting + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: # ECVs and Indicators? + - {name: tas , freq: monthly_mean} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: ECMWF-SEAS5} + Multimodel: False # single option + Reference: + - {name: ERA5} # multiple references for single model? + Time: + sdate: # list, split + # - '0101' + # - '0201' + # - '0301' + # - '0401' + - '0501' + # - '0601' + # - '0701' + # - '0801' + # - '0901' + # - '1001' + # - '1101' + # - '1201' + fcst_year: 2023 # list, don't split, handled internally + hcst_start: '1993' # single option + hcst_end: '2020' # single option + ftime_min: 1 # single option + ftime_max: 2 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + Regrid: + method: bilinear ## TODO: allow multiple methods? + type: '/esarchive/recon/ecmwf/era5land/monthly_mean/tas_f1h/tas_199506.nc' + Workflow: + Anomalies: + compute: no + cross_validation: no + save: 'none' + Calibration: + method: evmos ## TODO: list, split? + save: 'none' + Skill: + metric: rpss # list, don't split + cross_validation: yes + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # list, don't split + save: 'all' + Visualization: + plots: skill_metrics, most_likely_terciles + multi_panel: no + dots: both + Indicators: + index: no # ? + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Europe: {lon.min: -20, lon.max: 40, lat.min: 20, lat.max: 80} + # Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + # Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: -90} + Iberia: {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 90} + start_months: 'all' + metric: rpss + metric_aggregation: 'score' + inf_to_na: TRUE + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 14 + remove_NAs: no # bool, don't split + Output_format: S2S4E # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/ptrascas/R/sunset + code_dir: /esarchive/scratch/ptrascas/R/sunset + autosubmit: yes + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/ptrascas/R/sunset/script_scorecards.R # replace with the path to your script + expid: a6sb # replace with your EXPID + hpc_user: bsc32413 # replace with your hpc username + wallclock: 03:00 # hh:mm + processors_per_job: 14 + platform: nord3v2 + custom_directives: ['#SBATCH --exclusive'] + email_notifications: no # enable/disable email notifications. Change it if you want to. + email_address: paloma.trascasa@bsc.es # replace with your email address + notify_completed: no # notify me by email when a job finishes + notify_failed: no # notify me by email when a job fails + diff --git a/recipe_bigpredidata_fqa.yml b/recipe_bigpredidata_fqa.yml new file mode 100644 index 0000000000000000000000000000000000000000..53d6de711d24f5c476bff69f482acd162a3606e8 --- /dev/null +++ b/recipe_bigpredidata_fqa.yml @@ -0,0 +1,85 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: monthly_mean, units: C} + #- {name: prlr, freq: monthly_mean, units: mm, flux: no} + Datasets: + System: + - {name: ECMWF-SEAS5.1} + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' + fcst_year: '2023' # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Calibration: + method: evmos # Mandatory, str: bias, evmos, mse_min, crps_min, rpc_based + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss bss10 bss90 + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics + multi_panel: no + dots: both + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: S2S4E + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/ptrascas/R/sunset_outputs # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/ptrascas/R/sunset # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/ptrascas/R/sunset/bigpredidata_fqa.R # replace with the path to your script + expid: a768 # replace with your EXPID + hpc_user: bsc032413 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: no # enable/disable email notifications. Change it if you want to. + email_address: paloma.trascasa@bsc.es # replace with your email address + notify_completed: no # notify me by email when a job finishes + notify_failed: no # notify me by email when a job fails diff --git a/recipe_ecvs_seasonal_scorecards.yml b/recipe_ecvs_seasonal_scorecards.yml new file mode 100644 index 0000000000000000000000000000000000000000..fdda9173a7b3bc06fddaa1cde2d4afc349f6dea4 --- /dev/null +++ b/recipe_ecvs_seasonal_scorecards.yml @@ -0,0 +1,104 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: V. Agudetse + Info: Test for recipe splitting + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: # ECVs and Indicators? + - {name: prlr, freq: monthly_mean, units: mm} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: ECMWF-SEAS5} + Multimodel: False # single option + Reference: + - {name: ERA5} # multiple references for single model? + Time: + sdate: # list, split + - '0101' + # - '0201' + # - '0301' + # - '0401' + # - '0501' + # - '0601' + # - '0701' + # - '0801' + # - '0901' + # - '1001' + # - '1101' + # - '1201' + #fcst_year: '2020' # list, don't split, handled internally + hcst_start: '2013' # single option + hcst_end: '2016' # single option + ftime_min: 1 # single option + ftime_max: 6 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + Regrid: + method: bilinear ## TODO: allow multiple methods? + type: to_system + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: 'none' + Calibration: + method: raw ## TODO: list, split? + save: 'none' + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # list, don't split + cross_validation: yes + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] # list, don't split + save: 'none' + Visualization: + plots: skill_metrics + Indicators: + index: no # ? + Scorecards: + execute: yes # yes/no + regions: + Iberia : {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 44} + start_months: NULL + metric: mean_bias enscorr rpss crpss enssprerr + metric_aggregation: 'score' + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 7 + remove_NAs: no # bool, don't split + Output_format: Scorecards # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/ptrascas/R/sunset + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/ + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/execute_scorecards_data_loading.R # replace with the path to your script + expid: a6a3 # replace with your EXPID + hpc_user: bsc32878 # replace with your hpc username + wallclock: 03:00 # hh:mm + processors_per_job: 8 + platform: nord3v2 + email_notifications: no # enable/disable email notifications. Change it if you want to. + email_address: nadia.milders@bsc.es # replace with your email address + notify_completed: no # notify me by email when a job finishes + notify_failed: no # notify me by email when a job fails + + diff --git a/recipe_example_scorecards.yml b/recipe_example_scorecards.yml new file mode 100644 index 0000000000000000000000000000000000000000..b6710b639d3c9f5850b31121bd691da820fb9015 --- /dev/null +++ b/recipe_example_scorecards.yml @@ -0,0 +1,100 @@ +Description: + Author: An-Chi Ho + Info: Compute Skills and Plot Scorecards with Autosubmit + +Analysis: + Horizon: seasonal + Variables: + - {name: prlr, freq: monthly_mean, units: mm/day} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: ECMWF-SEAS5} + Multimodel: False # single option + Reference: + - {name: ERA5} + Time: + sdate: # list, split + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' + fcst_year: + hcst_start: '1993' # single option + hcst_end: '2016' # single option + ftime_min: 1 # single option + ftime_max: 6 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + - {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: 'none' + Calibration: + method: mse_min + save: 'none' + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # list, don't split + cross_validation: yes + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'none' + Scorecards: + execute: yes # yes/no + regions: + Iberia: {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 44} + EU: {lon.min: -20, lon.max: 40, lat.min: 20, lat.max: 80} + start_months: 'all' + metric: mean_bias enscorr rpss crpss enssprerr + metric_aggregation: 'score' + inf_to_na: TRUE # Optional, bool: set inf values in data to NA, default is FALSE table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + legend_width: 690 + legend_white_space: 4.9 + + ncores: 8 + remove_NAs: no # bool, don't split + Output_format: Scorecards # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/ptrascas/R/sunset_outputs/ + code_dir: /esarchive/scratch/ptrascas/R/sunset/ + autosubmit: yes + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/ptrascas/R/sunset/script_example_scorecards.R # replace with the path to your script + expid: a6yh # replace with your EXPID + hpc_user: bsc32413 # replace with your hpc username + wallclock: 03:00 # hh:mm + processors_per_job: 8 + platform: nord3v2 + custom_directives: ['#SBATCH --exclusive'] + email_notifications: no # enable/disable email notifications. Change it if you want to. + email_address: paloma.trascasa@bsc.es # replace with your email address + notify_completed: no # notify me by email when a job finishes + notify_failed: no # notify me by email when a job fails + + + diff --git a/recipe_forecast_quality_assessment.yml b/recipe_forecast_quality_assessment.yml new file mode 100644 index 0000000000000000000000000000000000000000..2c34fb390ec3c6f5d4dff8965e2780191c0451cc --- /dev/null +++ b/recipe_forecast_quality_assessment.yml @@ -0,0 +1,122 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: V. Agudetse + Info: Test for recipe splitting + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: # ECVs and Indicators? +# - {name: tas rsds prlr, freq: monthly_mean} + - {name: rsds , freq: monthly_mean} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: ECMWF-SEAS5} + Multimodel: False # single option + Reference: + - {name: ERA5} # multiple references for single model? + Time: + sdate: # list, split + #- '0101' + #- '0201' + #- '0301' + #- '0401' + #- '0501' + - '0601' + #- '0701' + #- '0801' + #- '0901' + #- '1001' + #- '1101' + #- '1201' + fcst_year: '2021' # list, don't split, handled internally + hcst_start: '2015' # single option + hcst_end: '2018' # single option + ftime_min: 1 # single option + ftime_max: 3 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} +# Regrid: +# method: bilinear +# type: to_system + Regrid: + method: + type: none + Workflow: + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. + bc_method: bias # If type intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type intlr. Options: 'basic', 'large_scale', '4nn' + log_reg_method: # If type logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # 'all'/'none'/'exp_only' + Anomalies: + compute: yes + cross_validation: no + save: 'all' + Calibration: + method: mse_min ## TODO: list, split? + save: 'none' + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # list, don't split + cross_validation: yes + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # list, don't split + save: 'all' + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean + multi_panel: no + dots: both + Indicators: + index: no # ? + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: -90} + start_months: NULL + metric: mean_bias enscorr rpss crpss enssprerr + metric_aggregation: 'score' + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 7 + remove_NAs: no # bool, don't split + Output_format: S2S4E # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/ptrascas/R/sunset + code_dir: /esarchive/scratch/ptrascas/R/sunset + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/ptrascas/R/sunset/test_solar_ind.R # replace with the path to your script + expid: a6a3 # replace with your EXPID + hpc_user: bsc32878 # replace with your hpc username + wallclock: 03:00 # hh:mm + processors_per_job: 8 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: paloma.trascasa@bsc.es # replace with your email address + notify_completed: no # notify me by email when a job finishes + notify_failed: no # notify me by email when a job fails + diff --git a/recipe_heatwave.yml b/recipe_heatwave.yml new file mode 100644 index 0000000000000000000000000000000000000000..7f2dfec33df996e5bba39332378efa50ed688c1d --- /dev/null +++ b/recipe_heatwave.yml @@ -0,0 +1,87 @@ +Description: + Author: nmilders + Info: scorecards data + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tasmax + freq: monthly_mean # Mandatory, str: either monthly_mean or daily_mean + Datasets: + System: + name: ECMWF-SEAS5 # Mandatory ECMWF-SEAS5, CMCC-SPS3.5, DWD-GCFS2.1 + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: ## MMDD + # - '0101' + # - '0201' + # - '0301' + # - '0401' + # - '0501' + - '0601' + # - '0701' + # - '0801' + # - '0901' + # - '1001' + # - '1101' + # - '1201' + + fcst_year: 2023 # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2020' #2016 # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 3 # Mandatory, int: Last leadtime time step in months + Region: + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + Regrid: + method: bilinear # conservative for prlr, bilinear for tas, psl, sfcWind + type: to_system + Workflow: + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + save: 'all' + Anomalies: + compute: no + cross_validation: no + save: 'none' + Skill: + metric: mean_bias rpss bss10 bss90 # str: Skill metric or list of skill metrics. See docu. + cross_validation: yes + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + projection: 'cylindrical_equidistant' + plots: skill_metrics, most_likely_terciles + multi_panel: no + mask_terciles: no + dots_terciles: yes +# Scorecards: +# execute: no # yes/no +# regions: +# Iberia: {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 44} +# Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} +# Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} +# Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: -90} +# start_months: 'all' +# metric: mean_bias enscorr rpss crpss EnsSprErr bss10 bss90 +# metric_aggregation: 'score' +# inf_to_na: TRUE +# table_label: +# fileout_label: +# col1_width: +# col2_width: +# calculate_diff: FALSE + ncores: 7 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards #S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/ptrascas/R/sunset + code_dir: /esarchive/scratch/ptrascas/R/sunset diff --git a/recipe_scorecards_atomic.yml b/recipe_scorecards_atomic.yml new file mode 100644 index 0000000000000000000000000000000000000000..a8adb6c12f6c388c05798734f53c023426995b02 --- /dev/null +++ b/recipe_scorecards_atomic.yml @@ -0,0 +1,70 @@ +Description: + Author: nmilders + Info: scorecards data + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas # Mandatory, str: tas prlr psl sfcWind + freq: monthly_mean # Mandatory, str: either monthly_mean or daily_mean + Datasets: + System: + name: ECMWF-SEAS5 # Mandatory ECMWF-SEAS5, CMCC-SPS3.5, DWD-GCFS2.1 + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0101' ## MMDD + fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: -90 # Mandatory, int: minimum latitude + latmax: 90 # Mandatory, int: maximum latitude + lonmin: 0 # Mandatory, int: minimum longitude + lonmax: 359.9 # Mandatory, int: maximum longitude + Regrid: + method: bilinear # conservative for prlr, bilinear for tas, psl, sfcWind + type: to_system + Workflow: + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + save: 'none' + Anomalies: + compute: yes + cross_validation: yes + save: 'none' + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # str: Skill metric or list of skill metrics. See docu. + cross_validation: yes + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: 'none' + Indicators: + index: no + Scorecards: + execute: yes # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: -90} + start_months: 'all' + metric: mean_bias enscorr rpss crpss enssprerr + metric_aggregation: 'score' + inf_to_na: TRUE + table_label: + fileout_label: + col1_width: + col2_width: + calculate_diff: FALSE + ncores: 7 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: Scorecards #S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/nmilders/scorecards_data/new/to_system/cross_validation/all_cross_val/ + code_dir: /esarchive/scratch/nmilders/gitlab/git_clones/s2s-suite/ diff --git a/recipe_subseasonal.yml b/recipe_subseasonal.yml new file mode 100644 index 0000000000000000000000000000000000000000..65e6a291018774d5a096bf3d71d9409694fe3382 --- /dev/null +++ b/recipe_subseasonal.yml @@ -0,0 +1,107 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: V. Agudetse + Info: Test for recipe splitting + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: # ECVs and Indicators? + - {name: tas, freq: monthly_mean} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: ECMWF-SEAS5} + Multimodel: False # single option + Reference: + - {name: ERA5} # multiple references for single model? + Time: + sdate: # list, split + #- '0101' + #- '0201' + #- '0301' + #- '0401' + #- '0501' + - '0601' + #- '0701' + #- '0801' + #- '0901' + #- '1001' + #- '1101' + #- '1201' + fcst_year: '2023' # list, don't split, handled internally + hcst_start: '1993' # single option + hcst_end: '2020' # single option + ftime_min: 1 # single option + ftime_max: 4 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: no + save: 'none' + Calibration: + method: evmos ## TODO: list, split? + save: 'all' + Skill: + metric: rpss # list, don't split + cross_validation: yes + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # list, don't split + save: 'all' + Visualization: + plots: most_likely_terciles + multi_panel: no + dots: both + Indicators: + index: no # ? + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: -90} + start_months: NULL + metric: rpss + metric_aggregation: 'score' + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 7 + remove_NAs: no # bool, don't split + Output_format: S2S4E # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/ptrascas/R/sunset + code_dir: /esarchive/scratch/ptrascas/R/sunset + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/ptrascas/R/sunset/test_heatwave.R # replace with the path to your script + expid: a6a3 # replace with your EXPID + hpc_user: bsc32878 # replace with your hpc username + wallclock: 03:00 # hh:mm + processors_per_job: 8 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: paloma.trascasa@bsc.es # replace with your email address + notify_completed: no # notify me by email when a job finishes + notify_failed: no # notify me by email when a job fails + diff --git a/recipe_tas_full_calib.yml b/recipe_tas_full_calib.yml new file mode 100644 index 0000000000000000000000000000000000000000..4903828537a85d6f3800ce707a15cab0c59f511a --- /dev/null +++ b/recipe_tas_full_calib.yml @@ -0,0 +1,91 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: K + Datasets: + System: + name: ECMWF-SEAS5.1 #Meteo-France-System8 + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0101' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 3 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 0 + latmax: 10 + lonmin: 0 + lonmax: 15.5 + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Calibration: + method: evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: cerise + output_dir: /esarchive/scratch/ptrascas/sunset_outputs # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/ptrascas/R/sunset # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32413 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: no # enable/disable email notifications. Change it if you want to. + email_address: paloma.trascasa@bsc.es # replace with your email address + notify_completed: no # notify me by email when a job finishes + notify_failed: no # notify me by email when a job fails diff --git a/recipe_test.yml b/recipe_test.yml new file mode 100644 index 0000000000000000000000000000000000000000..de829abb33b4ce5a3cb6549939ccf9c3e2132d8a --- /dev/null +++ b/recipe_test.yml @@ -0,0 +1,107 @@ +################################################################################ +## RECIPE DESCRIPTION +################################################################################ + +Description: + Author: V. Agudetse + Info: Test for recipe splitting + +################################################################################ +## ANALYSIS CONFIGURATION +################################################################################ + +Analysis: + Horizon: Seasonal + Variables: # ECVs and Indicators? + - {name: tas, freq: monthly_mean} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: ECMWF-SEAS5} + Multimodel: False # single option + Reference: + - {name: ERA5} # multiple references for single model? + Time: + sdate: # list, split + #- '0101' + #- '0201' + #- '0301' + #- '0401' + #- '0501' + - '0601' + #- '0701' + #- '0801' + #- '0901' + #- '1001' + #- '1101' + #- '1201' + fcst_year: '2023' # list, don't split, handled internally + hcst_start: '1993' # single option + hcst_end: '2020' # single option + ftime_min: 1 # single option + ftime_max: 4 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: no + cross_validation: no + save: 'none' + Calibration: + method: raw ## TODO: list, split? + save: 'all' + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr bss10 bss90 # list, don't split + cross_validation: yes + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # list, don't split + save: 'all' + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean + multi_panel: no + dots: both + Indicators: + index: no # ? + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: -90} + start_months: NULL + metric: mean_bias enscorr rpss crpss enssprerr + metric_aggregation: 'score' + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 7 + remove_NAs: no # bool, don't split + Output_format: S2S4E # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/ptrascas/R/sunset + code_dir: /esarchive/scratch/ptrascas/R/sunset + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/ptrascas/R/sunset/test_heatwave.R # replace with the path to your script + expid: a6a3 # replace with your EXPID + hpc_user: bsc32878 # replace with your hpc username + wallclock: 03:00 # hh:mm + processors_per_job: 8 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: paloma.trascasa@bsc.es # replace with your email address + notify_completed: no # notify me by email when a job finishes + notify_failed: no # notify me by email when a job fails + diff --git a/script_example_scorecards.R b/script_example_scorecards.R new file mode 100644 index 0000000000000000000000000000000000000000..33e6d075ab56bfe09a1e236bb4b930243f1e93d5 --- /dev/null +++ b/script_example_scorecards.R @@ -0,0 +1,34 @@ +############################################################################### +## Author: An-Chi Ho +## Description: Computes some skill metrics and plots scorecards with Autosubmit +## Instructions: Follow the steps described in: +## use_cases/ex1_2_autosubmit_scorecards/ex1_2-handson.md +## This script should be called by bash script launch_SUNSET.sh. +############################################################################### + +# Load modules +source("modules/Loading/Loading.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source('tools/libs.R') +source('modules/Scorecards/Scorecards.R') +source("modules/Units/Units.R") + + +# Read recipe +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +recipe <- read_atomic_recipe(recipe_file) + +# Load data +data <- Loading(recipe) + +# Units +data <- Units(recipe, data) +# Compute tos anomalies +data <- Anomalies(recipe, data) +# Compute skill metrics +skill_metrics <- Skill(recipe, data) + + diff --git a/script_scorecards.R b/script_scorecards.R new file mode 100644 index 0000000000000000000000000000000000000000..cb97dfd812dbb95b0290b505b809e56d55ecafd2 --- /dev/null +++ b/script_scorecards.R @@ -0,0 +1,55 @@ +# This is an example for forecast verification and evaluation. + +Sys.setenv(LANG = "en") +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Scorecards/Scorecards.R") +source("modules/Visualization/Visualization.R") +source("modules/Downscaling/Downscaling.R") + +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_ecvs_seasonal_scorecards.yml" +recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) + +# Units +data <- Units(recipe, data) + +# Calibrate data +#data <- Calibration(recipe,data) + +# Calculate anomalies +#data <- Anomalies (recipe, data) + +# Calculate skill metrics +skill_metrics <- Skill(recipe, data) + +scorecards <- Scorecards(recipe) + +# Calculate probabilities +probabilities <- Probabilities(recipe, data) + +# Visualize data +Visualization(recipe, data, skill_metrics, probabilities, significance = T) + +## Add BSC logo +#logo <- "tools/BSC_logo_95.jpg" +#system <- list.files(paste0(recipe$Run$output_dir, "/plots")) +#variable <- strsplit(recipe$Analysis$Variable$name, ", | |,")[[1]] +#files <- lapply(variable, function(x) { +# f <- list.files(paste0(recipe$Run$output_dir, "/plots/", +# system, "/", x)) +# full_path <- paste0(recipe$Run$output_dir, "/plots/", +# system, "/", x,"/", f)})[[1]] +#dim(files) <- c(file = length(files)) +#Apply(list(files), target_dims = NULL, function(x) { +# system(paste("composite -gravity southeast -geometry +10+10", +# logo, x, x))}, ncores = recipe$Analysis$ncores) + + diff --git a/seasonal_plotforecastpdf_downscaled.R b/seasonal_plotforecastpdf_downscaled.R new file mode 100644 index 0000000000000000000000000000000000000000..1a377024dbd9da1d2a29348781463827e4f0bb8b --- /dev/null +++ b/seasonal_plotforecastpdf_downscaled.R @@ -0,0 +1,76 @@ +Sys.setenv(LANG = "en") +Sys.setlocale("LC_TIME","en_GB") +library(startR) +library(s2dverification) +library(abind) +library(CSTools) +library(zeallot) +library(s2dv) +library(easyVerification) +library(ClimProjDiags) +library(ggplot2) + +data0 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/seasonal/catalonia/intlr/data/cerra/tas-intlr-4nn-tm8-lt0.RDS") + +# Select the point of Barcelona +bcn0 <- data0$hcst$data[,49,63,1,1,1,1,1,] + +# Select August 2003 +aug03bcn0 <- bcn0[5,] -273.15 + +# Estimate terciles from hindcast as well as the extremes +terciles <-quantile(bcn0[-5,], probs = c(.1,.33,.66,.9), na.rm = TRUE) +terciles <- terciles -273.15 + + +# Same for leadtimes 1, 2 and 3 +data1 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/seasonal/catalonia/intlr/data/cerra/tas-intlr-4nn-tm8-lt1.RDS") +bcn1 <- data1$hcst$data[,49,63,1,1,1,1,1,] +aug03bcn1 <- bcn1[5,] - 273.15 + +data2 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/seasonal/catalonia/intlr/data/cerra/tas-intlr-4nn-tm8-lt2.RDS") +bcn2 <- data2$hcst$data[,49,63,1,1,1,1,1,] +aug03bcn2 <- bcn2[5,] -273.15 + +data3 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/seasonal/catalonia/intlr/data/cerra/tas-intlr-4nn-tm8-lt3.RDS") +bcn3 <- data3$hcst$data[,49,63,1,1,1,1,1,] +aug03bcn3 <- bcn3[5,] -273.15 + +# Add RPSS +skill0 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/seasonal/catalonia/intlr/skill/cerra/tas-intlr-4nn-tm8-lt0.RDS") +skill0 <- skill0$rpss[1,49,63] +skill0 <- round(skill0,digits=2) + +skill1 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/seasonal/catalonia/intlr/skill/cerra/tas-intlr-4nn-tm8-lt1.RDS") +skill1 <- skill1$rpss[1,49,63] +skill1 <- round(skill1,digits=2) + +skill2 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/seasonal/catalonia/intlr/skill/cerra/tas-intlr-4nn-tm8-lt2.RDS") +skill2 <- skill2$rpss[1,49,63] +skill2 <- round(skill2,digits=2) + + +skill3 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/seasonal/catalonia/intlr/skill/cerra/tas-intlr-4nn-tm8-lt3.RDS") +skill3 <- skill3$rpss[1,49,63] +skill3 <- round(skill3,digits=2) + + + +# Plot forecast pdf + +# To plot the observations, I take the average of all members +obs0 <- mean(aug03bcn0) +obs1 <- mean(aug03bcn1) +obs2 <- mean(aug03bcn2) +obs3 <- mean(aug03bcn3) + + +fcst <- data.frame(fcst1 = aug03bcn3, fcst2 = aug03bcn2, fcst3 = aug03bcn1, fcst4 = aug03bcn0) + +plot <-PlotForecastPDF(fcst,tercile.limits = c(terciles[2],terciles[3]),var.name = "Temperatura diaria (C)",fcst.names = c(paste0("August\nissued: April\nRPSS: ",skill3), paste0("August\nissued: May\nRPSS: ",skill2), paste0("August\nissued: June\nRPSS: ",skill1), paste0("August\nissued: July\nRPSS: ",skill0)),obs = c(obs0,obs0,obs0,obs0),title = "Prediccion de la ola de calor de 2003 - Barcelona") +ggsave("seasonal_BCN_heatwave_tas_rpss.png", plot, width = 7, height = 5) +#if I add extreme.limits it says "Error in integrate(approxfun(x, ymax), lower = min(x), upper = max(x)) : +# maximum number of subdivisions reached" + +PlotForecastPDF(fcst,tercile.limits = c(terciles[2],terciles[3]),var.name = "Temperatura media diaria (C)",fcst.names = c(paste0("ftime: days 26-32\nsdate: 14/07/03\nRPSS: ",skill3), paste0("ftime: days 19-25\nsdate: 21/07/03\nRPSS: ",skill2), paste0("ftime: +days 12-18\nsdate: 28/07/03\nRPSS: ",skill1), paste0("ftime: days 5-11\nsdate: 04/08/03\nRPSS: ",skill0)),obs = c(obs0,obs0,obs0,obs0),title = "Prediccion de la ola de calor de 2003 - Barcelona") diff --git a/skill-plot.py b/skill-plot.py new file mode 100644 index 0000000000000000000000000000000000000000..ee5e88828b46e597bc8616ae4dda9b56e8b6e0ad --- /dev/null +++ b/skill-plot.py @@ -0,0 +1,185 @@ +import numpy as np +import datetime +import xarray as xr +import plotly.express as px +import cartopy.crs as ccrs +import matplotlib.pyplot as plt +import cartopy.crs as ccrs +from mpl_toolkits.basemap import Basemap +import pandas as pd +import os +import sys, getopt + +def plotglyph(m, data, time, ptype, c): + lons = data.longitude.to_pandas().values + lats = data.latitude.to_pandas().values + lons, lats = np.meshgrid(lons,lats) + + ext = data.rpss.isel(time=time).to_masked_array().mask.transpose() + lons = np.where(ext == True, False, lons) + lats = np.where(ext == True, False, lats) + + alp = 0.8 + + x,y = m(lons,lats) + if ptype == "upextreme": + m.plot(x,y, linestyle='none', marker="^", markersize=4, alpha=alp, + markeredgecolor=c, markeredgewidth=1, c="None") + elif ptype == "lowextreme": + m.plot(x,y, linestyle='none', marker="v", markersize=4, alpha=alp, + markeredgecolor=c, markeredgewidth=1, c="None") + elif ptype == "hightercile": + m.plot(x,y, linestyle='none', marker="o", markersize=1.5, alpha=alp, + c="None", markeredgecolor=c, markeredgewidth=1.5) + else: + m.plot(x,y, linestyle='none', marker="o", markersize=0.7, alpha=alp, + c="None", markeredgecolor=c, markeredgewidth=0.7) + + +def generate_map(probs_file, skill_file, date, leadtime,var, system, outdir): + + sdate = datetime.datetime.strptime(date, "%Y%m%d") + delta_days = datetime.timedelta(days=4) + idelta_weeks = datetime.timedelta(weeks=leadtime) + fdelta_weeks = datetime.timedelta(weeks=leadtime+1) + + ileadtime = sdate + delta_days + idelta_weeks + fleadtime = sdate + delta_days + fdelta_weeks - datetime.timedelta(days=1) + + ileadtime = ileadtime.strftime("%b %d" ) + fleadtime = fleadtime.strftime("%b %d" ) + sdate = sdate.strftime("%d %b %Y" ) + + filename = '{}/{}-{}-{}-lead{}.png'.format(outdir,var, system, date, leadtime+1) + + title = 'Sub-seasonal forecast of temperature' + subtitle = 'Issued on {}, valid for {} to {}'.format(sdate, ileadtime,fleadtime) + + ######################### + # SETTINGS ######################## + ######################## + + domain = [-27, 51, 68, 23] # lats from bigger to smaller + extreme_threshold = 40 # % + bigcircle_threshold = 50 # % + skill_threshold = 0 # % + + # colors + blue = '#2BA4B4' + red ='#FF764D' + grey = '#ACACAC' + land = '#DEDEDE' + ocean = '#F7F7F7' + + ######################### + # DATA FILTERING ######################## + ######################### + + probs = xr.open_dataset(probs_file, decode_times=False) + skill = xr.open_dataset(skill_file, decode_times=False) + + data = xr.merge([probs,skill]) + del probs,skill + + data = data.transpose('longitude', 'latitude', 'time', 'time_step') + data = data.assign_coords(longitude=(((data.longitude + 180) % 360) - 180)) + data = data.sortby('longitude') + + data = data.sel(dict(latitude=slice(domain[2],domain[3]))) + data = data.sel(dict(longitude=slice(domain[0],domain[1]))) + + t = data.where(data.rpss >= skill_threshold, drop=False) + + n = t.where((t.prob_n > t.prob_an) &(t.prob_n > t.prob_bn), drop=False) + sn = n.where(n.prob_n < bigcircle_threshold, drop=False) + bnn = n.where(n.prob_n >= bigcircle_threshold, drop=False) + + b = t.where((t.prob_bn > t.prob_n) &(t.prob_bn > t.prob_an), drop=False) + sb = b.where(b.prob_bn < bigcircle_threshold, drop=False) + bbb = b.where(b.prob_bn >= bigcircle_threshold, drop=False) + + a = t.where((t.prob_an > t.prob_n) &(t.prob_an > t.prob_bn), drop=False) + sa = a.where(a.prob_an < bigcircle_threshold, drop=False) + baa = a.where(a.prob_an >= bigcircle_threshold, drop=False) + + p90_extremes = data.where(data.bsp90 > skill_threshold, drop=False) + p90_extremes = p90_extremes.where(p90_extremes.prob_ap90 > extreme_threshold, drop=False) + + p10_extremes = data.where(data.bsp10 > skill_threshold, drop=False) + p10_extremes = p10_extremes.where(p10_extremes.prob_bp10 > extreme_threshold, drop=False) + + ######################### + # PLOT ######################## + ######################### + + m = Basemap(llcrnrlon=domain[0],llcrnrlat=domain[3],urcrnrlon=domain[1], + urcrnrlat=domain[2],projection='mill', resolution='h') + m.drawmapboundary(linewidth=0) + m.fillcontinents(color=land, alpha=1) + m.drawcoastlines(linewidth=0,color=land) + + m.drawlsmask(land_color=land, + ocean_color=ocean) + + plotglyph(m, sa, leadtime, "tercile", red) + plotglyph(m, sn, leadtime, "tercile", grey) + plotglyph(m, sb, leadtime, "tercile", blue) + + plotglyph(m, baa, leadtime, "hightercile", red) + plotglyph(m, bnn, leadtime, "hightercile", grey) + plotglyph(m, bbb, leadtime, "hightercile", blue) + + plotglyph(m, p90_extremes, leadtime, "upextreme", red) + plotglyph(m, p10_extremes, leadtime, "lowextreme", blue) + + plt.text(x=4.7, y=7.2*10**6, s=title, fontsize=10, weight='bold') + plt.text(x=4.7, y=6.9*10**6, s=subtitle, fontsize=8, alpha=0.75) + + m.drawcountries(color=ocean,linewidth=1) + plt.savefig(filename,dpi=200) + plt.close() + +def main(argv): + + usage = 'DST-plots.py -v -d -s -o ' + try: + opts, args = getopt.getopt(argv,"hv:d:s:o:",["help","variable=","sdate=", "system="]) + except getopt.GetoptError: + print(usage) + sys.exit(2) + + for opt, arg in opts: + if opt == '-h': + print(usage) + sys.exit() + elif opt in ("-s", "--system"): + system = str(arg) + elif opt in ("-d", "--sdate"): + date = arg + elif opt in ("-v", "--variable"): + var = arg + elif opt in ("-o", "--outdir"): + outdir = arg + + week = datetime.datetime.strptime(date, "%Y%m%d").strftime("%V") + + if system == "s2s": + src_dir = '/esarchive/oper/S2S4E-data/weekly_statistics/S2S/' + probs_file = src_dir + '/' + var + '/' + date + '/' + var + \ + '-prob_' + date + '_' + week + '.nc' + skill_file = src_dir + '/' + var + '/' + date + '/' + var + \ + '-skill_week' + week + '.nc' + else: + src_dir = '/esarchive/oper/S2S4E-data/weekly_statistics/' + probs_file = src_dir + '/' + var + '/' + var + \ + '-prob_' + date + '_' + week + '.nc' + skill_file = src_dir + '/' + var + '/' + var + \ + '-skill_week' + week + '.nc' + + for leadtime in range(4): + generate_map(probs_file, skill_file, date, leadtime, var, system, outdir) + +if __name__ == "__main__": + main(sys.argv[1:]) + + diff --git a/subseasonal_1_data_loading.R b/subseasonal_1_data_loading.R new file mode 100644 index 0000000000000000000000000000000000000000..42a0b1b75138b0208f75d74fd17baeab33c4303f --- /dev/null +++ b/subseasonal_1_data_loading.R @@ -0,0 +1,302 @@ +Sys.setenv(LANG = "en") +Sys.setlocale("LC_TIME","en_GB") +library(startR) +library(s2dverification) +library(abind) +library(CSTools) +library(zeallot) +library(s2dv) +library(easyVerification) +library(ClimProjDiags) + +#First of all we define the initialisation date and the variable +variable<-"tas" +fcst.sdate<-20030806 + + +#------------------------------------------------------------------- +# Settings +#------------------------------------------------------------------- + +#data directories +fcst.dir <- "/esarchive/exp/ncep/cfs-v2/" +hcst.dir <- "/esarchive/exp/ncep/cfs-v2/" +obs.dir <- "/esarchive/recon/ecmwf/era5/" + +##Time aggregation in esarchive +fcst.freq <- "weekly_mean" + +# Even though we will deal with weekly aggregation, +# in /esarchive it is specified the frequency of the original +# data before being aggregated. Here we define these frequencies for +# the several variables contained at esarchive + + switch (tolower(variable), + "tas" = {freq.hcst <- 'f24h'; freq.obs <- 'f1h'}, + "tasmin" = {freq.hcst <- 'f24h'; freq.obs <- 'f24h'}, + "tasmax" = {freq.hcst <- 'f24h'; freq.obs <- 'f24h'}, + "psl" = {freq.hcst <- 'f24h'; freq.obs <- 'f1h'}, + "prlr" = {freq.hcst <- 'f6h'; freq.obs <- 'f1h'; accum <- TRUE}, + "rsds" = {freq.hcst <- 's0-24h'; freq.obs <- 'f1h'; accum <- TRUE}, + "sfcwind" = {freq.hcst <- 'f24h'; freq.obs <- 'f1h'} + ) + +# grid of the observations. We are taking here +# observations that are already regrided and archived in esarchive. +# You can select the non regrided "-r1440x721cds" +obs.grid <- "-r384x190cds" +#only for psl +#obs.grid <- "-384x190" + + +# Let's define the path for each of the files that will be retrieved +# for the observations, hindcast and forecast. +# "var" and "file_date" will be specified when doing the start call + +obs.path <- paste0( + obs.dir,fcst.freq,"/$var$_", + freq.obs,obs.grid,"/$file_date$.nc") + +hcst.path <- paste0( + hcst.dir,fcst.freq,"/s2s","/$var$_", + freq.hcst,"/$file_date$", + ".nc") + +fcst.path <- paste0( + fcst.dir,fcst.freq,"/s2s", "/$var$_", + freq.hcst,"/$file_date$", + ".nc") + + +#region limits +lons.min <- 5 +lons.max <- 10 +lats.min <- 5 +lats.max <- 10 + +#number of members +hcst.nmember <- 12 +fcst.nmember <- 48 + +# lead times to retrieve +ltmin <- 1 +ltmax <- 4 + +#define the reference period +# normal is 1999 to 2020 +hcst_start_year <- 2015 +hcst_end_year <- 2020 +#the next two parameters are used to define the arrays containing the dates +hcst.ltmin <- as.numeric(substr(as.character(fcst.sdate),1,4)) - hcst_end_year +hcst.ltmax <- as.numeric(substr(as.character(fcst.sdate),1,4)) - hcst_start_year + + +# let's select the number of weeks +# used to calculate the skill. It does not affect calibration, it would be only used +# to make larger the sample and calculate the skill. Let's do it 1 for now +n.skill.weeks <- 9 + +# We define now the function to obtain the array of the dates that will be retrieved for the hindcast +# This function retrieves also a window containing the last and next monday (dimension "sday") +# to make larger the sample when calibrating. However we will select just the forecast initialisation date + +get.file_dates <- function(startdate, var){ + + # This function generates the sdates path to retrieve the hcst + # files. Outputs a multi-dimensional array with dims: + # (sweek,sday, syear) containing the character string needed to + # access each file. Ex: "20201001/tas_20191001.nc" + + sdates <- numeric(0) + startdate <- as.Date(toString(startdate), "%Y%m%d") + while (length(sdates) < n.skill.weeks){ + if (format(startdate, "%a") == "Thu" || format(startdate, "%a") == "Mon") { + sdates <- c(sdates,format(startdate, "%Y%m%d")) + } + startdate <- startdate - 1 + } + + file_dates <- array(numeric(),c(0,3,(hcst.ltmax-hcst.ltmin)+1)) + file_dates.fcst <- array(numeric(),c(0,3,(hcst.ltmax-hcst.ltmin)+1)) + for (sdate in sdates) { + ## TODO: take into account year of the fcst to select hcst years + fcst.day <- substr(sdate,7,8) + fcst.month <- substr(sdate,5,6) + fcst.year <- substr(sdate,1,4) + + startdate <- as.Date(toString(sdate), "%Y%m%d") + + # makes sure of giving a: [M T M || T M T] combination + if(format(startdate, "%a") == "Thu"){ + hcst.sdays <- c(startdate - 3, startdate, startdate + 4, recursive=T) + } else { + hcst.sdays <- c(startdate - 4, startdate, startdate + 3, recursive=T) + } + + hcst.sdays <- format(as.Date(hcst.sdays,"%Y%m%d"), "%Y%m%d") + + sdays.file_dates <- array(numeric(),c(0,(hcst.ltmax-hcst.ltmin)+1)) + sdays.file_dates.fcst <- array(hcst.sdays,c(3,(hcst.ltmax-hcst.ltmin)+1)) + + + for (sday.sdate in hcst.sdays) { + sday.year <- substr(sday.sdate, 1,4) + sday.mday <- substr(sday.sdate, 5,8) + sday.years <- + paste((strtoi(sday.year)-hcst.ltmax):(strtoi(sday.year)-hcst.ltmin), sep = "") + sday.dates <- apply(expand.grid(sday.years, sday.mday), 1, paste, collapse="") + sdays.file_dates <- abind(sdays.file_dates, sday.dates, + along=1) + } + + names(dim(sdays.file_dates)) <- c('sday','syear') + + file_dates <- abind(file_dates, sdays.file_dates, along=1) + file_dates.fcst <- abind(file_dates.fcst, sdays.file_dates.fcst, along=1) + + } + + dir_dates <- paste0(file_dates.fcst,"/",var,"_",file_dates) + dim(dir_dates) <- dim(file_dates) + names(dim(dir_dates)) <- c('sweek','sday','syear') + + dir_dates <- substr(dir_dates,10,24) + return(dir_dates) + +} + +if(n.skill.weeks %% 2 != 0) { + fcst_sweek_ind <- (n.skill.weeks+1)/2 +} else{ + fcst_sweek_ind <- (n.skill.weeks)/2 +} + +if(fcst_sweek_ind != 1) { + if(fcst_sweek_ind %% 2 == 0) { + fcst.sdate_to_start<-as.character(format(as.Date(as.character(fcst.sdate),format = "%Y%m%d")+4+7*((fcst_sweek_ind/2)-1),"%Y%m%d")) + file_dates <- get.file_dates(fcst.sdate_to_start,variable) + } else{ + fcst.sdate_to_start<-as.character(format(as.Date(as.character(fcst.sdate),format = "%Y%m%d")+7*((fcst_sweek_ind-1)/2),"%Y%m%d")) + file_dates <- get.file_dates(fcst.sdate_to_start,variable) + } +} else { + file_dates <- get.file_dates(fcst.sdate,variable) +} + +#Let's select the central day of the window (corresponding to the forecast initialisation date) +#file_dates<-Subset(file_dates, along=c("sday"),indices=c(2),drop=FALSE) + +file_dates <- Subset(file_dates, along=c("sweek","sday"),indices=list(n.skill.weeks:1,2),drop=FALSE) + +#Let's give format the file containing the date for the forcast +file_dates.fcst <- paste0(variable, '_',fcst.sdate) +dim(file_dates.fcst) <- dim(Subset(file_dates, c('sweek', 'sday', 'syear'), + list(1,1,1))) + + + + +# We check now if the first year of the hindcast is available or not + hcst.dir1 <- paste0( + hcst.dir,fcst.freq, + "/s2s/",variable,"_", freq.hcst, + "/", + paste0(file_dates[1,1,]),".nc") #syears + + first.exists <- FALSE + n <- 1 + while(!first.exists){ + if(!file.exists(hcst.dir1[n])){ + n <- n + 1 + } else { + first.exists <- TRUE + if (n > 1){ + hcst.ltmax <- hcst.ltmax + (1-n) + warning(paste0("NEW HCST INIT YEAR: ", + as.numeric(substr(fcst.sdate,1,4))-hcst.ltmax)) + + }}} + + + +#Let's proceed with the loading of the data + + #fcst + + fcst <- Start(dat = fcst.path, + var = variable, + file_date = file_dates.fcst, + time = indices(ltmin:ltmax), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort(-180, 180), + synonims = list(latitude=c('lat','latitude'), + longitude=c('lon','longitude'), + ensemble=c('member','ensemble')), + ensemble = indices(1:fcst.nmember), + return_vars = list(lat = 'dat', + lon = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = T) + + #hcst + + hcst<-Start(dat = hcst.path, + var = variable, + file_date = file_dates, + time = indices(ltmin:ltmax), + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort(-180, 180),# + synonims = list(latitude=c('lat','latitude'), + longitude=c('lon','longitude'), + ensemble=c('member','ensemble')), + ensemble = indices(1:hcst.nmember), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = c('file_date')), + split_multiselected_dims = TRUE, + num_procs = 1, + retrieve = T) + + +# We now extract the dates from the hcst to retrieve the +# observations corresponding to the lead times of the hindcast + + dates <- attr(hcst, 'Variables')$common$time + dates_file <- sapply(dates, format, '%Y%m%d') + dates_file <- format(as.Date(dates_file, '%Y%m%d')-3, "%Y%m%d") + dates_file <- paste0(variable,"_",dates_file) + dim(dates_file) <- dim(Subset(hcst, + along=c('dat','var', + 'latitude', 'longitude', 'ensemble'), + list(1,1,1,1,1), drop='selected')) + + # Notice that dates_file is not the same as file_dates. + # file_dates is the array containing the strings of the initialisations for the hindcast, + # while dates_file contains the hindcast times + + #let's obtain the observations + obs <- Start(dat = obs.path, + var = variable, + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(decreasing = TRUE), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = CircularSort(-180, 180), + synonims = list(latitude=c('lat','latitude'), + longitude=c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + +obs <- as.s2dv_cube(obs) +hcst <- as.s2dv_cube(hcst) +#data$hcst$data <-multiApply::Apply(data$hcst$data,target_dims=c("sweek","syear"),fun=as.numeric,output_dims="syear")$output1 +#data$obs$data <-multiApply::Apply(data$obs$data,target_dims=c("sweek","syear"),fun=as.numeric,output_dims="syear")$output1 diff --git a/subseasonal_1_data_loading_downscaled.R b/subseasonal_1_data_loading_downscaled.R new file mode 100644 index 0000000000000000000000000000000000000000..3ff4209a2d68df64855cc234118fe365fd0bd145 --- /dev/null +++ b/subseasonal_1_data_loading_downscaled.R @@ -0,0 +1,106 @@ +Sys.setenv(LANG = "en") +Sys.setlocale("LC_TIME","en_GB") +library(startR) +library(s2dverification) +library(abind) +library(CSTools) +library(zeallot) +library(s2dv) +library(easyVerification) +library(ClimProjDiags) +library(ggplot2) + +data0 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/data/cerra/tasmax-intlr-4nn-tw32-lt0.RDS") + +# Select the point of Barcelona +bcn0 <- data0$hcst$data[,49,63,1,1,1,1,] + +# Select August 2003 +aug03bcn0 <- bcn0[5,] -273.15 + +# Estimate terciles from hindcast as well as the extremes +terciles <-quantile(bcn0[-5,], probs = c(.1,.33,.66,.9), na.rm = TRUE) +terciles <- terciles -273.15 + + +# Same for leadtimes 1, 2 and 3 +data1 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/data/cerra/tasmax-intlr-4nn-tw32-lt1.RDS") +bcn1 <- data1$hcst$data[,49,63,1,1,1,1,] +aug03bcn1 <- bcn1[5,] - 273.15 + +data2 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/data/cerra/tasmax-intlr-4nn-tw32-lt2.RDS") +bcn2 <- data2$hcst$data[,49,63,1,1,1,1,] +aug03bcn2 <- bcn2[5,] -273.15 + +data3 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/data/cerra/tasmax-intlr-4nn-tw32-lt3.RDS") +bcn3 <- data3$hcst$data[,49,63,1,1,1,1,] +aug03bcn3 <- bcn3[5,] -273.15 + +# Add RPSS +skill0 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/skill/cerra/tasmax-intlr-4nn-tw32-lt0.RDS") +skill0 <- skill0$bss90[1,49,63] +skill0 <- round(skill0,digits=2) + +skill1 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/skill/cerra/tasmax-intlr-4nn-tw32-lt1.RDS") +skill1 <- skill1$bss90[1,49,63] +skill1 <- round(skill1,digits=2) + +skill2 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/skill/cerra/tasmax-intlr-4nn-tw32-lt2.RDS") +skill2 <- skill2$bss90[1,49,63] +skill2 <- round(skill2,digits=2) + + +skill3 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/skill/cerra/tasmax-intlr-4nn-tw32-lt3.RDS") +skill3 <- skill3$bss90[1,49,63] +skill3 <- round(skill3,digits=2) + +# Plot forecast pdf + +# To plot the observations, I take the average of all members +obs0 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/data/cerra/tasmax-intlr-4nn-tw33-lt0.RDS") +obs0 <- obs0$obs$data[5,1,1,1,49,1,63] + +fcst <- data.frame(fcst1 = aug03bcn3, fcst2 = aug03bcn2, fcst3 = aug03bcn1, fcst4 = aug03bcn0) + +plot <-PlotForecastPDF(fcst,tercile.limits = c(terciles[2],terciles[3]),var.name = "Temperatura maxima diaria (C)",fcst.names = c(paste0("ftime: days 26-32\nsdate: 14/07/03\nBSS90: ",skill3), paste0("ftime: days 19-25\nsdate: 21/07/03\nBSS90: ",skill2), paste0("ftime: days 12-18\nsdate: 28/07/03\nBSS90: ",skill1), paste0("ftime: days 5-11\nsdate: 04/08/03\nBSS90: ",skill0)),obs = c(obs0,obs0,obs0,obs0),title = "Prediccion de la ola de calor de 2003 - Barcelona") +ggsave("subseasonal_BCN_heatwave_tasmax_bss90.png", plot, width = 7, height = 5) +#if I add extreme.limits it says "Error in integrate(approxfun(x, ymax), lower = min(x), upper = max(x)) : +# maximum number of subdivisions reached" + +PlotForecastPDF(fcst,tercile.limits = c(terciles[2],terciles[3]),var.name = "Temperatura media diaria (C)",fcst.names = c(paste0("ftime: days 26-32\nsdate: 14/07/03\nRPSS: ",skill3), paste0("ftime: days 19-25\nsdate: 21/07/03\nRPSS: ",skill2), paste0("ftime: +days 12-18\nsdate: 28/07/03\nRPSS: ",skill1), paste0("ftime: days 5-11\nsdate: 04/08/03\nRPSS: ",skill0)),obs = c(obs0,obs0,obs0,obs0),title = "Prediccion de la ola de calor de 2003 - Barcelona") + + +# Area aggregation: + +data0 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/data/cerra/tas-intlr-4nn-tw32-lt0.RDS") +cat0 <- data0$hcst$data[,,,1,1,1,1,] +cat0 <- MeanDims(cat0[,,,] -273.15, c("latitude", "longitude"), na.rm=TRUE) +aug03cat0 <- cat0[5,] + +data1 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/data/cerra/tas-intlr-4nn-tw32-lt1.RDS") +cat1 <- data1$hcst$data[,,,1,1,1,1,] +cat1 <- MeanDims(cat1[,,,] -273.15, c("latitude", "longitude"), na.rm=TRUE) +aug03cat1 <- cat1[5,] + +data2 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/data/cerra/tas-intlr-4nn-tw32-lt2.RDS") +cat2 <- data2$hcst$data[,,,1,1,1,1,] +cat2 <- MeanDims(cat2[,,,] -273.15, c("latitude", "longitude"), na.rm=TRUE) +aug03cat2 <- cat2[5,] + +data3 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/data/cerra/tas-intlr-4nn-tw32-lt3.RDS") +cat3 <- data3$hcst$data[,,,1,1,1,1,] +cat3 <- MeanDims(cat3[,,,] -273.15, c("latitude", "longitude"), na.rm=TRUE) +aug03cat3 <- cat3[5,] + +# Estimate terciles from hindcast as well as the extremes +terciles <-quantile(cat0[-5,], probs = c(.1,.33,.66,.9), na.rm = TRUE) +terciles <- terciles + +obs0 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/data/cerra/tas-intlr-4nn-tw33-lt0.RDS") +obs0 <- MeanDims(obs0$obs$data[5,1,1,1,,1,] -273.15, c("latitude","longitude"), na.rm = TRUE) + +fcst <- data.frame(fcst1 = aug03cat3, fcst2 = aug03cat2, fcst3 = aug03cat1, fcst4 = aug03cat0) + +plot <- PlotForecastPDF(fcst,tercile.limits = c(terciles[2],terciles[3]),var.name = "Temperatura diaria (C)",fcst.names = c(paste0("ftime: days 26-32\nsdate: 14/07/03"), paste0("ftime: days 19-25\nsdate: 21/07/03"), paste0("ftime: days 12-18\nsdate: 28/07/03"), paste0("ftime: days 5-11\nsdate: 04/08/03")),obs = c(obs0,obs0,obs0,obs0),title = "Prediccion de temperatura diaria, Agosto de 2003 - Cataluņa") +ggsave("subseasonal_CAT_heatwave_tas.png", plot, width = 7, height = 5) diff --git a/subseasonal_2_mostlikelyterciles.R b/subseasonal_2_mostlikelyterciles.R new file mode 100644 index 0000000000000000000000000000000000000000..271ebd049ed3e493e79966da35f7a5ab48683dd7 --- /dev/null +++ b/subseasonal_2_mostlikelyterciles.R @@ -0,0 +1,74 @@ +library(CSTools) +library(s2dv) +library(zeallot) +library(easyVerification) +library(ClimProjDiags) + +# After loading the data, we now follow an adapted version of a script to plot most likely terciles in subseasonal data + +# define latitude and longitude: + +Lat <- hcst$coords$lat +Lon <- hcst$coords$lon + +# specify hindcast years +ini <- hcst_start_year +fin <- hcst_end_year +numyears <- fin - ini +1 + +source("https://earth.bsc.es/gitlab/external/cstools/-/raw/d6914a40c11d09168b9b4a191e9a0362b56a5f0c/R/CST_MergeDims.R") + +cal <- CST_Calibration(hcst, obs, cal.method = 'evmos', eval.method = 'leave-one-out', multi.model = FALSE, na.rm = TRUE, sdate_dim = 'syear', memb_dim = 'ensemble') + +# Calculate anomalies + +# Plot anomalies + +anom <- CST_Anomaly(cal, obs, memb_dim = 'ensemble', dim_anom = 'syear', dat_dim = c('dat','m'), ftime_dim = 'time') +PlotEquiMap(anom$obs$data[1,1,1,1,5,1,,],Lon,Lat,coast_width=1.5, filled.continents = FALSE, brks = 21, bar_limits = c(-10,10), toptitle = paste(variable, fcst.sdate),width=10, height= 10,units = "Pa", country.borders=TRUE, bar_extra_margin = c(1, 0, 1, 0), fileout = "obs_psl_060803.png",triangle_ends = c(TRUE,TRUE),intylat=10,intxlon=10) + + + + +# Probabilities of each tercile are computed by evaluating which tercile is forecasted by each ensemble member for the latest forecast. + +PB <- ProbBins(cal$data[1,1,5,1,,,,,], fcyr = numyears, time_dim = "syear", thr = c(1/3, 2/3), compPeriod = "Without fcyr") + +# make the probability map: + + +prob_map <- MeanDims(PB, c('ensemble')) + +#Plot the most likely tercile maps, change 4th dimension according to the leadime that you want to plot. lt0 = 1, etc. + +#PlotMostLikelyQuantileMap(probs = prob_map[,1,1,,], lon = Lon, lat = Lat, coast_width = 1.5, legend_scale = 0.5, toptitle = paste0('Most likely tercile -', variable, ' - ECMWF, System5 -lt0 - 23 August 2023'), width = 10, height = 8) + +# Add RPSS to the plot +# create a new s2dv cube, "obsd" to add member dimension to observations +obs <- CST_InsertDim(obs, posdim = 9, lendim = 1, name = "ensemble", values = 1) + +test_hcst <- CST_MergeDims(hcst, c('syear', 'sweek')) +test_obs <- CST_MergeDims(obs, c('syear', 'sweek')) +#RPSS <- CST_MultiMetric(test_hcst, test_obs, metric = 'rpss', multimodel = FALSE, time_dim = 'time', memb_dim = 'member', sdate_dim = 'syear') +rpss <- s2dv::RPSS(test_hcst$data, test_obs$data, time_dim = 'syear', memb_dim = 'ensemble', cross.val = TRUE, na.rm=TRUE) +bias <- s2dv::Bias(test_hcst$data, test_obs$data, time_dim = 'syear', memb_dim = 'ensemble', na.rm=TRUE) +cor <- s2dv::Corr(test_hcst$data, test_obs$data, time_dim = 'syear', memb_dim = 'ensemble', memb = FALSE, sign = TRUE) +crpss <- s2dv::CRPSS(test_hcst$data, test_obs$data, time_dim = 'syear', memb_dim = 'ensemble') + +skill_metrics <- list(rpss = rpss$rpss, rpss_significance = rpss$sign, + mean_bias = bias, + corr = cor$cor, corr_significance = cor$sign, + crpss = crpss$crpss, cprss_significance = crpss$sign) + +# editar segun el numero de latitudes y longitudes +skill_metrics <- lapply(skill_metrics, function(x) { + dim(x) <- c(var = 1, time = 4, latitude = 6, longitude =5) + return(x)}) +#PlotEquiMap(RPSS$rpss[1,1,1,1,,], lat = Lat, lon = Lon, brks = seq(-1, 1, by = 0.1), filled.continents = FALSE) + +# Mask rpss to show it in grey on tope of the terciles +mask_rpss <- ifelse((RPSS$rpss <= 0) | is.na(RPSS$rpss),1,0) + +# plot most likely terciles with shader areas where there's no skill + +PlotMostLikelyQuantileMap(probs = prob_map[,1,1,,], toptitle = 'T2m 21-27 Aug, issued 17 Aug', lon = Lon, lat = Lat, coast_width = 1.5, legend_scale = 0.5, mask = mask_rpss[1,1,1,1,,], width=10, height = 8) diff --git a/subseasonal_2_mostlikelyterciles_sunset.R b/subseasonal_2_mostlikelyterciles_sunset.R new file mode 100644 index 0000000000000000000000000000000000000000..e0e32cb89ed11e67746f97078fb643b12f1433e6 --- /dev/null +++ b/subseasonal_2_mostlikelyterciles_sunset.R @@ -0,0 +1,72 @@ +library(CSTools) +library(s2dv) +library(zeallot) +library(easyVerification) +library(ClimProjDiags) + +# After loading the data, we now follow an adapted version of a script to plot most likely terciles in subseasonal data + +# define latitude and longitude: + +Lat <- hcst$coords$lat +Lon <- hcst$coords$lon + +# specify hindcast years +ini <- hcst_start_year +fin <- hcst_end_year +numyears <- fin - ini +1 + +cal <- CST_Calibration(hcst, obs, cal.method = 'evmos', eval.method = 'leave-one-out', multi.model = FALSE, na.rm = TRUE, sdate_dim = 'syear', memb_dim = 'member') + +# Probabilities of each tercile are computed by evaluating which tercile is forecasted by each ensemble member for the latest forecast. + +PB <- ProbBins(cal$data[1,1,5,1,,,,,], fcyr = numyears, time_dim = "syear", thr = c(1/3, 2/3), compPeriod = "Without fcyr") + +# make the probability map: + + +prob_map <- MeanDims(PB, c('member')) + +#Plot the most likely tercile maps, change 4th dimension according to the leadime that you want to plot. lt0 = 1, etc. + +#PlotMostLikelyQuantileMap(probs = prob_map[,1,1,,], lon = Lon, lat = Lat, coast_width = 1.5, legend_scale = 0.5, toptitle = paste0('Most likely tercile -', variable, ' - ECMWF, System5 -lt0 - 23 August 2023'), width = 10, height = 8) + +# Add RPSS to the plot +# create a new s2dv cube, "obsd" to add member dimension to observations +obs <- CST_InsertDim(obs, posdim = 9, lendim = 1, name = "member", values = 1) + +test_hcst <- CST_MergeDims(cal, c('syear', 'sweek')) +test_obs <- CST_MergeDims(obs, c('syear', 'sweek')) +#RPSS <- CST_MultiMetric(test_hcst, test_obs, metric = 'rpss', multimodel = FALSE, time_dim = 'time', memb_dim = 'member', sdate_dim = 'syear') +RPSS <- s2dv::RPSS(test_hcst$data, test_obs$data, time_dim = 'syear', memb_dim = 'member', cross.val = TRUE, na.rm=TRUE) + +#PlotEquiMap(RPSS$rpss[1,1,1,1,,], lat = Lat, lon = Lon, brks = seq(-1, 1, by = 0.1), filled.continents = FALSE) + +# Try SUNSET visualization module: +Sys.setenv(LANG = "en") +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Scorecards/Scorecards.R") +source("modules/Visualization/Visualization.R") +source("modules/Downscaling/Downscaling.R") + +recipe_file <- "recipe_subseasonal.yml" +recipe <- prepare_outputs(recipe_file) + +# need to edit visualization from SUNSET +Visualization(recipe,test_hcst, skill_metrics=RPSS, prob_map[,1,1,,], significance = T) + + + + + +# Mask rpss to show it in grey on tope of the terciles +mask_rpss <- ifelse((RPSS$rpss <= 0) | is.na(RPSS$rpss),1,0) + +# plot most likely terciles with shader areas where there's no skill + +PlotMostLikelyQuantileMap(probs = prob_map[,1,1,,], toptitle = 'T2m 21-27 Aug, issued 17 Aug', lon = Lon, lat = Lat, coast_width = 1.5, legend_scale = 0.5, mask = mask_rpss[1,1,1,1,,], width=10, height = 8) diff --git a/subseasonal_3_pdf.R b/subseasonal_3_pdf.R new file mode 100644 index 0000000000000000000000000000000000000000..c16c9306f7e27d05f1d4a77b5980e948bfeca315 --- /dev/null +++ b/subseasonal_3_pdf.R @@ -0,0 +1,47 @@ +library(CSTools) +library(s2dv) +library(zeallot) +library(easyVerification) +library(ClimProjDiags) + +# After loading the data, we now follow an adapted version of a script to plot most likely terciles in subseasonal data + +# define latitude and longitude: + +Lat <- hcst$coords$lat +Lon <- hcst$coords$lon + +# specify hindcast years +ini <- hcst_start_year +fin <- hcst_end_year +numyears <- fin - ini +1 + +#calibrate data +cal <- CST_Calibration(hcst, obs, cal.method = 'evmos', eval.method = 'leave-one-out', multi.model = FALSE, na.rm = TRUE, sdate_dim = 'syear', memb_dim = 'member') + +# Merge time dimension +test_hcst <- CST_MergeDims(cal, c('syear', 'sweek')) +test_obs <- CST_MergeDims(obs, c('syear', 'sweek')) + + +# leave dimensions with time and weeks +# estimate terciles from observations +terciles <-quantile(testobs, probs = c(.1,.33,.66,.9), na.rm = TRUE) + + +#make the plot +fcst <- data.frame(test_hcst$data[1,1,1,5,1,1,1,]-273.15) + +PlotForecastPDF(fcst,tercile.limits = c(terciles[2],terciles[3]),extreme.limits = c(terciles[1], terciles[4]),var.name = "Temperature (C)", title = paste("Test Forecast ini ",fcst.sdate," valid for ",fcst.sdate)) + + + + + + +test_hcst <- CST_MergeDims(cal, c('syear', 'sweek')) +test_obs <- CST_MergeDims(obs, c('syear', 'sweek')) +#RPSS <- CST_MultiMetric(test_hcst, test_obs, metric = 'rpss', multimodel = FALSE, time_dim = 'time', memb_dim = 'member', sdate_dim = 'syear') +RPSS <- s2dv::RPSS(test_hcst$data, test_obs$data, time_dim = 'syear', memb_dim = 'member', cross.val = TRUE, na.rm=TRUE) + + diff --git a/subseasonal_BCN_heatwave_tas.pdf b/subseasonal_BCN_heatwave_tas.pdf new file mode 100644 index 0000000000000000000000000000000000000000..16b732e7e0e3b133184beccc2bb3cae038f667f4 Binary files /dev/null and b/subseasonal_BCN_heatwave_tas.pdf differ diff --git a/subseasonal_anomalies_downscaled.R b/subseasonal_anomalies_downscaled.R new file mode 100644 index 0000000000000000000000000000000000000000..03b969bfa1c3a0a543159600576588dcf39c842d --- /dev/null +++ b/subseasonal_anomalies_downscaled.R @@ -0,0 +1,28 @@ +Sys.setenv(LANG = "en") +Sys.setlocale("LC_TIME","en_GB") +library(startR) +library(s2dverification) +library(abind) +library(CSTools) +library(zeallot) +library(s2dv) +library(easyVerification) +library(ClimProjDiags) +library(ggplot2) + +data0 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/data/cerra/tasmax-intlr-4nn-tw33-lt0.RDS") + +# Select the point of Barcelona +anom <- CST_Anomaly(data0$obs, data0$obs,dim_anom = 'syear',dat_dim = "dat") + +# Select August 2003 +aug03 <- anom$obs$data[5,1,1,1,,1,] -273.15 + +Lat <- data0$hcst$coords$latitude +Lon <- data0$hcst$coords$longitude + +PlotEquiMap(aug03,Lon,Lat,coast_width=1.5, filled.continents = FALSE, brks = 21, bar_limits = c(-10,10), toptitle = ("blablabla"),width=10, height= 10,units = "Pa", country.borders=TRUE, bar_extra_margin = c(1, 0, 1, 0), fileout = "tas_cat_130803.png",triangle_ends = c(TRUE,TRUE),intylat=10,intxlon=10) + + +#ggsave("subseasonal_BCN_heatwave_tasmax_bss90.png", plot, width = 7, height = 5) + diff --git a/subseasonal_plotforecastpdf_downscaled.R b/subseasonal_plotforecastpdf_downscaled.R new file mode 100644 index 0000000000000000000000000000000000000000..301dfb119978b84655404a68fa6731094be6962b --- /dev/null +++ b/subseasonal_plotforecastpdf_downscaled.R @@ -0,0 +1,78 @@ +Sys.setenv(LANG = "en") +Sys.setlocale("LC_TIME","en_GB") +library(startR) +library(s2dverification) +library(abind) +library(CSTools) +library(zeallot) +library(s2dv) +library(easyVerification) +library(ClimProjDiags) +library(ggplot2) + +data0 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/data/cerra/tasmax-intlr-4nn-tw32-lt0.RDS") + +# Select the point of Barcelona +bcn0 <- data0$hcst$data[,49,63,1,1,1,1,] + +# Select August 2003 +aug03bcn0 <- bcn0[5,] -273.15 + +# Estimate terciles from hindcast as well as the extremes +terciles <-quantile(bcn0[-5,], probs = c(.1,.33,.66,.9), na.rm = TRUE) +terciles <- terciles -273.15 + + +# Same for leadtimes 1, 2 and 3 +data1 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/data/cerra/tasmax-intlr-4nn-tw32-lt1.RDS") +bcn1 <- data1$hcst$data[,49,63,1,1,1,1,] +aug03bcn1 <- bcn1[5,] - 273.15 + +data2 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/data/cerra/tasmax-intlr-4nn-tw32-lt2.RDS") +bcn2 <- data2$hcst$data[,49,63,1,1,1,1,] +aug03bcn2 <- bcn2[5,] -273.15 + +data3 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/data/cerra/tasmax-intlr-4nn-tw32-lt3.RDS") +bcn3 <- data3$hcst$data[,49,63,1,1,1,1,] +aug03bcn3 <- bcn3[5,] -273.15 + +# Add RPSS +skill0 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/skill/cerra/tasmax-intlr-4nn-tw32-lt0.RDS") +skill0 <- skill0$rpss[1,49,63] +skill0 <- round(skill0,digits=2) + +skill1 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/skill/cerra/tasmax-intlr-4nn-tw32-lt1.RDS") +skill1 <- skill1$rpss[1,49,63] +skill1 <- round(skill1,digits=2) + +skill2 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/skill/cerra/tasmax-intlr-4nn-tw32-lt2.RDS") +skill2 <- skill2$rpss[1,49,63] +skill2 <- round(skill2,digits=2) + + +skill3 <- readRDS("/esarchive/scratch/eduzenli/downscaling/impetus/subseasonal/catalonia/intlr/skill/cerra/tasmax-intlr-4nn-tw32-lt3.RDS") +skill3 <- skill3$rpss[1,49,63] +skill3 <- round(skill3,digits=2) + + + +# Plot forecast pdf + +# To plot the observations, I take the average of all members +obs0 <- mean(aug03bcn0) +obs1 <- mean(aug03bcn1) +obs2 <- mean(aug03bcn2) +obs3 <- mean(aug03bcn3) + + +fcst <- data.frame(fcst1 = aug03bcn3, fcst2 = aug03bcn2, fcst3 = aug03bcn1, fcst4 = aug03bcn0) + +plot <-PlotForecastPDF(fcst,tercile.limits = c(terciles[2],terciles[3]),var.name = "Temperatura maxima diaria (C)",fcst.names = c(paste0("ftime: days 26-32\nsdate: 14/07/03\nRPSS: ",skill3), + paste0("ftime: days 19-25\nsdate: 21/07/03\nRPSS: ",skill2), paste0("ftime: days 12-18\nsdate: 28/07/03\nRPSS: ",skill1), paste0("ftime: days 5-11\nsdate: 04/08/03\nRPSS: ",skill0)), + title = "Prediccion de la ola de calor de 2003 - Barcelona") +ggsave("test1.png", plot, width = 7, height = 5) +#if I add extreme.limits it says "Error in integrate(approxfun(x, ymax), lower = min(x), upper = max(x)) : +# maximum number of subdivisions reached" + +PlotForecastPDF(fcst,tercile.limits = c(terciles[2],terciles[3]),var.name = "Temperatura media diaria (C)",fcst.names = c(paste0("ftime: days 26-32\nsdate: 14/07/03\nRPSS: ",skill3), paste0("ftime: days 19-25\nsdate: 21/07/03\nRPSS: ",skill2), paste0("ftime: +days 12-18\nsdate: 28/07/03\nRPSS: ",skill1), paste0("ftime: days 5-11\nsdate: 04/08/03\nRPSS: ",skill0)),obs = c(obs0,obs0,obs0,obs0),title = "Prediccion de la ola de calor de 2003 - Barcelona") diff --git a/test_fqa.R b/test_fqa.R new file mode 100644 index 0000000000000000000000000000000000000000..e9b5d13e34064d665e696bd4d5a743bb038e74e1 --- /dev/null +++ b/test_fqa.R @@ -0,0 +1,95 @@ +# This is an example for forecast verification and evaluation. + +Sys.setenv(LANG = "en") +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Scorecards/Scorecards.R") +source("modules/Visualization/Visualization.R") +source("modules/Downscaling/Downscaling.R") + +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_forecast_quality_assessment.yml" +recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) + +# Units +data <- Units(recipe, data) + +# Downscale anomalies +data <- Downscaling(recipe, data) + +# Calculate anomalies +data <- Anomalies (recipe, data) + +# Calibrate data +data <- Calibration(recipe, data) + +# Calculate skill metrics +skill_metrics <- Skill(recipe, data) + +# Calculate probabilities +probabilities <- Probabilities(recipe, data) + +# Visualize data +Visualization(recipe, data, skill_metrics,probabilities, significance = T) + +## Add BSC logo +logo <- "tools/BSC_logo_95.jpg" +system <- list.files(paste0(recipe$Run$output_dir, "/plots")) +variable <- strsplit(recipe$Analysis$Variable$name, ", | |,")[[1]] +files <- lapply(variable, function(x) { + f <- list.files(paste0(recipe$Run$output_dir, "/plots/", + system, "/", x)) + full_path <- paste0(recipe$Run$output_dir, "/plots/", + system, "/", x,"/", f)})[[1]] +dim(files) <- c(file = length(files)) +Apply(list(files), target_dims = NULL, function(x) { + system(paste("composite -gravity southeast -geometry +10+10", + logo, x, x))}, ncores = recipe$Analysis$ncores) + + +###############################################################################################################3 + +# Compute anomalies +#data <- Anomalies(recipe, data) + +#dim(data$obs$data) +#class(data$obs) +#names(data$obs) +#names(data$obs$attr) +# [dat, var, sady, sweek, syear, time, latitude, longitude, ensemble] + +# Test with just 1 dimension + +#data$hcst$[1,1,1,1,1] +#data$obs[1] + +# Only from workstation (clone repository if using it in Nord3) +# Do I want to change units? +#library(CSIndicators) +#source("https://earth.bsc.es/gitlab/es/csindicators/-/raw/master/R/TotalTimeExceedingThreshold.R") +#ind_obs <- CST_TotalTimeExceedingThreshold(data$obs, threshold = 2, time_dim = "time", start = list(1,1), end = list(30,6)) +#ind_hcst <- CST_TotalTimeExceedingThreshold(data$hcst, threshold = 2, time_dim = "time", start = list(1,1), end = list(30,6)) + +#ind <- list(hcst = ind_hcst,obs = ind_obs, fcst = NULL) +#ind$hcst <- CST_InsertDim(ind$hcst, posdim = 5, lendim = 1, name = "time", values = 1) +#ind$obs <- CST_InsertDim(ind$obs, posdim = 5, lendim = 1, name = "time", values = 1) + + +#ind$hcst$attrs$Dates <- ind$hcst$attrs$Dates[1,1,] +#ind$obs$attrs$Dates <- ind$obs$attrs$Dates[1,1,] +#dim(ind$hcst$attrs$Dates) <- c(syear = length(ind$hcst$attrs$Dates), time = 1) +#dim(ind$obs$attrs$Dates) <- c(syear = length(ind$obs$attrs$Dates), time = 1) + + +# Compute skill metrics +#skill_metrics <- Skill(recipe, ind) + +# get the scorecard plots: +#Visualization(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/test_heatwave.R b/test_heatwave.R new file mode 100644 index 0000000000000000000000000000000000000000..23f19dc013e3562329ea557221eef97e118ead9a --- /dev/null +++ b/test_heatwave.R @@ -0,0 +1,94 @@ +# This is an example for forecast verification and evaluation. + +Sys.setenv(LANG = "en") +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Scorecards/Scorecards.R") +source("modules/Visualization/Visualization.R") +source("modules/Downscaling/Downscaling.R") + +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_test.yml" +recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) + +# Units +data <- Units(recipe, data) + +# Calibrate data +#data <- Calibration(recipe,data) + +# Calculate anomalies +#data <- Anomalies (recipe, data) + +# Calculate skill metrics +skill_metrics <- Skill(recipe, data) + +#scorecards <- Scorecards(recipe) + +# Calculate probabilities +probabilities <- Probabilities(recipe, data) + +# Visualize data +Visualization(recipe, data, skill_metrics, probabilities, significance = T) + +## Add BSC logo +#logo <- "tools/BSC_logo_95.jpg" +#system <- list.files(paste0(recipe$Run$output_dir, "/plots")) +#variable <- strsplit(recipe$Analysis$Variable$name, ", | |,")[[1]] +#files <- lapply(variable, function(x) { +# f <- list.files(paste0(recipe$Run$output_dir, "/plots/", +# system, "/", x)) +# full_path <- paste0(recipe$Run$output_dir, "/plots/", +# system, "/", x,"/", f)})[[1]] +#dim(files) <- c(file = length(files)) +#Apply(list(files), target_dims = NULL, function(x) { +# system(paste("composite -gravity southeast -geometry +10+10", +# logo, x, x))}, ncores = recipe$Analysis$ncores) + + +###############################################################################################################3 + +# Compute anomalies +#data <- Anomalies(recipe, data) + +#dim(data$obs$data) +#class(data$obs) +#names(data$obs) +#names(data$obs$attr) +# [dat, var, sady, sweek, syear, time, latitude, longitude, ensemble] + +# Test with just 1 dimension + +#data$hcst$[1,1,1,1,1] +#data$obs[1] + +# Only from workstation (clone repository if using it in Nord3) +# Do I want to change units? +#library(CSIndicators) +#source("https://earth.bsc.es/gitlab/es/csindicators/-/raw/master/R/TotalTimeExceedingThreshold.R") +#ind_obs <- CST_TotalTimeExceedingThreshold(data$obs, threshold = 2, time_dim = "time", start = list(1,1), end = list(30,6)) +#ind_hcst <- CST_TotalTimeExceedingThreshold(data$hcst, threshold = 2, time_dim = "time", start = list(1,1), end = list(30,6)) + +#ind <- list(hcst = ind_hcst,obs = ind_obs, fcst = NULL) +#ind$hcst <- CST_InsertDim(ind$hcst, posdim = 5, lendim = 1, name = "time", values = 1) +#ind$obs <- CST_InsertDim(ind$obs, posdim = 5, lendim = 1, name = "time", values = 1) + + +#ind$hcst$attrs$Dates <- ind$hcst$attrs$Dates[1,1,] +#ind$obs$attrs$Dates <- ind$obs$attrs$Dates[1,1,] +#dim(ind$hcst$attrs$Dates) <- c(syear = length(ind$hcst$attrs$Dates), time = 1) +#dim(ind$obs$attrs$Dates) <- c(syear = length(ind$obs$attrs$Dates), time = 1) + + +# Compute skill metrics +#skill_metrics <- Skill(recipe, ind) + +# get the scorecard plots: +#Visualization(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/test_solar_ind.R b/test_solar_ind.R new file mode 100644 index 0000000000000000000000000000000000000000..8aa3344e7228bf9e47737d16b01d000a64a8cffd --- /dev/null +++ b/test_solar_ind.R @@ -0,0 +1,94 @@ +# This is an example for forecast verification and evaluation. + +Sys.setenv(LANG = "en") +source("modules/Loading/Loading.R") +source("modules/Calibration/Calibration.R") +source("modules/Anomalies/Anomalies.R") +source("modules/Skill/Skill.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Scorecards/Scorecards.R") +source("modules/Visualization/Visualization.R") +source("modules/Downscaling/Downscaling.R") + +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_scorecards_atomic.yml" +recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) + +# Units +data <- Units(recipe, data) + +# Calibrate data +#data <- Calibration(recipe,data) + +# Calculate anomalies +#data <- Anomalies (recipe, data) + +# Calculate skill metrics +skill_metrics <- Skill(recipe, data) + +scorecards <- Scorecards(recipe) + +# Calculate probabilities +probabilities <- Probabilities(recipe, data) + +# Visualize data +Visualization(recipe, data, skill_metrics, probabilities, significance = T) + +## Add BSC logo +#logo <- "tools/BSC_logo_95.jpg" +#system <- list.files(paste0(recipe$Run$output_dir, "/plots")) +#variable <- strsplit(recipe$Analysis$Variable$name, ", | |,")[[1]] +#files <- lapply(variable, function(x) { +# f <- list.files(paste0(recipe$Run$output_dir, "/plots/", +# system, "/", x)) +# full_path <- paste0(recipe$Run$output_dir, "/plots/", +# system, "/", x,"/", f)})[[1]] +#dim(files) <- c(file = length(files)) +#Apply(list(files), target_dims = NULL, function(x) { +# system(paste("composite -gravity southeast -geometry +10+10", +# logo, x, x))}, ncores = recipe$Analysis$ncores) + + +###############################################################################################################3 + +# Compute anomalies +#data <- Anomalies(recipe, data) + +#dim(data$obs$data) +#class(data$obs) +#names(data$obs) +#names(data$obs$attr) +# [dat, var, sady, sweek, syear, time, latitude, longitude, ensemble] + +# Test with just 1 dimension + +#data$hcst$[1,1,1,1,1] +#data$obs[1] + +# Only from workstation (clone repository if using it in Nord3) +# Do I want to change units? +#library(CSIndicators) +#source("https://earth.bsc.es/gitlab/es/csindicators/-/raw/master/R/TotalTimeExceedingThreshold.R") +#ind_obs <- CST_TotalTimeExceedingThreshold(data$obs, threshold = 2, time_dim = "time", start = list(1,1), end = list(30,6)) +#ind_hcst <- CST_TotalTimeExceedingThreshold(data$hcst, threshold = 2, time_dim = "time", start = list(1,1), end = list(30,6)) + +#ind <- list(hcst = ind_hcst,obs = ind_obs, fcst = NULL) +#ind$hcst <- CST_InsertDim(ind$hcst, posdim = 5, lendim = 1, name = "time", values = 1) +#ind$obs <- CST_InsertDim(ind$obs, posdim = 5, lendim = 1, name = "time", values = 1) + + +#ind$hcst$attrs$Dates <- ind$hcst$attrs$Dates[1,1,] +#ind$obs$attrs$Dates <- ind$obs$attrs$Dates[1,1,] +#dim(ind$hcst$attrs$Dates) <- c(syear = length(ind$hcst$attrs$Dates), time = 1) +#dim(ind$obs$attrs$Dates) <- c(syear = length(ind$obs$attrs$Dates), time = 1) + + +# Compute skill metrics +#skill_metrics <- Skill(recipe, ind) + +# get the scorecard plots: +#Visualization(recipe, data, skill_metrics, probabilities, significance = T) diff --git a/vitigeoss_plots.py b/vitigeoss_plots.py new file mode 100644 index 0000000000000000000000000000000000000000..8b956b9fb4639c6c9a1328ae0d669da142eb7e62 --- /dev/null +++ b/vitigeoss_plots.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import yaml +import os +from DST_plot import * + +from bar_plot import * + + +def main(system_type,date, ecvs): + + if system_type == "seasonal": + system = "Seas5-knn" + else: + system = "cfsv2-knn" + + regions = ['cat'] + origdir = f'/esarchive/oper/VITIGEOSS/' + # ------------------------------------- + # DST PLOTS + # ------------------------------------- + out = f'/esarchive/scratch/ptrascas/R/sunset/' + outdir = '{}/plots/{}/{}/'.format(out,system_type,date) + os.makedirs(outdir, exist_ok=True) + + for region in regions: + + #archive = f"/esarchive/oper/S2S-downscale/statistics/analogs/vitigeoos-{region}/" + srcdir = f"//esarchive/oper/VITIGEOSS/output/" + + for var in ecvs: + if system_type == "seasonal": + leadtimes = range(3) + archive = srcdir + f"/system51c3s/monthly_statistics/vitigeoss-{region}/" + probs_file = f"{archive}/{var}/{date}/{var}-prob_{date}_{date[4:6]}.nc" + skill_file = f"{archive}/{var}/{date}/{var}-skill_month{date[4:6]}.nc" + else: + leadtimes = range(4) + archive = srcdir + f"/cfsv2/weekly_statistics/vitigeoss-{region}/" + week = datetime.datetime.strptime(date, "%Y%m%d").strftime("%V") + probs_file = f"{archive}/{var}/{date}/{var}-prob_{date}_{week}.nc" + skill_file = f"{archive}/{var}/{date}/{var}-skill_week{week}.nc" + + for leadtime in leadtimes: + + sdate = datetime.datetime.strptime(date, "%Y%m%d") + + if system_type == "seasonal": + delta = datetime.timedelta(weeks=4*leadtime) + lead = (sdate + relativedelta(months=+leadtime+1)).strftime("%b %Y" ) + title = 'Seasonal forecast of temperature' + subtitle = 'Valid for {}, issued on {}'.format(lead,sdate.strftime("%d %b %Y" )) + + else: + delta_days = datetime.timedelta(days=4) + idelta_weeks = datetime.timedelta(weeks=leadtime) + fdelta_weeks = datetime.timedelta(weeks=leadtime+1) + ileadtime = sdate + delta_days + idelta_weeks + fleadtime = sdate + delta_days + fdelta_weeks - datetime.timedelta(days=1) + title = 'Subseasonal forecast of temperature' + subtitle = 'Valid from {} to {}, issued on {}'.format(ileadtime.strftime("%b %d"), + fleadtime.strftime("%b %d"), + sdate.strftime("%d %b %Y" )) + + filename = '{}/DST-{}-{}-{}-{}-lead{}.png'.format(outdir,region,var, system, date, leadtime+1) + + + with open("conf.yml") as f: + conf = yaml.load(f, Loader=yaml.FullLoader) + if var == "acprec": + colors = conf['conf']['colors'][var] + else: + colors = conf['conf']['colors']["default"] + thrs = conf['conf']['thrs'] + locations = conf['locations'][region] + + data = get_data(probs_file,skill_file,region) + + generate_plot(data,region,locations,leadtime,thrs,colors,subtitle,filename) + + # ------------------------------------- + # BAR PLOTS + # ------------------------------------- + + outdir = '{}/plots/{}/{}/'.format(origdir,system_type,date) + + if not os.path.exists(outdir): + os.mkdir(outdir) + for var in ecvs: + for region in regions: + if system_type == "seasonal": + archive = f"/esarchive/oper/VITIGEOSS/output/system51c3s/monthly_statistics/vitigeoss-{region}/" + else: + archive = f"/esarchive/oper/VITIGEOSS/output/cfsv2/weekly_statistics/vitigeoss-{region}/" + + + with open("conf.yml") as f: + conf = yaml.load(f, Loader=yaml.FullLoader) + locations = conf['locations'][region] + + for location,coords in locations.items(): + + data = get_bardata(var,date,archive,location,coords,region,system_type) + terciles = data[['p33','p66']] + extremes = data[['p10','p90']] + skills = data[['rpss','bsp90','bsp10']] + data = df_to_pd(data) + + if var == "acprec": + colors = load_colors("conf.yml",var) + else: + colors = load_colors("conf.yml","default") + + var_attrs = load_var_attrs("conf.yml",var) + if system_type == "seasonal": + bar_plot(var,date,location,data,colors,var_attrs,skills,terciles,extremes,outdir) + else: + bar_plot_subseas(var,date,location,data,colors,var_attrs,skills,terciles,extremes,outdir) + + +if __name__ == "__main__": + + sys_type = sys.argv[1] + #run_conf = "conf/vitigeoos-seasonal-run.yml" + run_sdate = sys.argv[2] + target_var = sys.argv[3] + #run_sdate = "20210701" + + main(sys_type, run_sdate, [target_var]) + +