diff --git a/.gitignore b/.gitignore index 263c4e640a4ffe3bd13bf6a14f80c37553954d4d..4a496178201417420c9c4a44d7111a735349b558 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ ecsbatch.log* modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml conf/vitigeoss-vars-dict.yml +slurm* +*.png +*.RDS diff --git a/GetProbs.R b/GetProbs.R new file mode 100644 index 0000000000000000000000000000000000000000..6549e232477161ea9adec25d6432328b49c3acca --- /dev/null +++ b/GetProbs.R @@ -0,0 +1,346 @@ +#'Compute probabilistic forecasts or the corresponding observations +#' +#'Compute probabilistic forecasts from an ensemble based on the relative +#'thresholds, or the probabilistic observations (i.e., which probabilistic +#'category was observed). A reference period can be specified to calculate the +#'absolute thresholds between each probabilistic category. The absolute +#'thresholds can be computed in cross-validation mode. If data is an ensemble, +#'the probabilities are calculated as the percentage of members that fall into +#'each category. For observations (or forecast without member dimension), 1 +#'means that the event happened, while 0 indicates that the event did not +#'happen. Weighted probabilities can be computed if the weights are provided for +#'each ensemble member and time step. The absolute thresholds can also be +#'provided directly for probabilities calculation. +#' +#'@param data A named numerical array of the forecasts or observations with, at +#' least, time dimension. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast, or NULL if there is no member +#' dimension (e.g., for observations, or for forecast with only one ensemble +#' member). The default value is 'member'. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param abs_thresholds A numeric array or vector of the absolute thresholds in +#' the same units as \code{data}. If an array is provided, it should have at +#' least 'bin_dim_abs' dimension. If it has more dimensions (e.g. different +#' thresholds for different locations, i.e. lon and lat dimensions), they +#' should match the dimensions of \code{data}, except the member dimension +#' which should not be included. The default value is NULL and, in this case, +#' 'prob_thresholds' is used for calculating the probabilities. +#'@param bin_dim_abs A character string of the dimension name of +#' 'abs_thresholds' array in which category limits are stored. It will also be +#' the probabilistic category dimension name in the output. The default value +#' is 'bin'. +#'@param indices_for_quantiles A vector of the indices to be taken along +#' 'time_dim' for computing the absolute thresholds between the probabilistic +#' categories. If NULL (default), the whole period is used. It is only used +#' when 'prob_thresholds' is provided. +#'@param weights A named numerical array of the weights for 'data' with +#' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value +#' is NULL. The ensemble should have at least 70 members or span at least 10 +#' time steps and have more than 45 members if consistency between the weighted +#' and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation mode. The default value +#' is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of probabilities with dimensions c(bin_dim_abs, the rest +#'dimensions of 'data' except 'memb_dim'). 'bin' dimension has the length of +#'probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. +#' +#'@examples +#'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) +#'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' indices_for_quantiles = 4:17) +#' +#'# abs_thresholds is provided +#'abs_thr1 <- c(-0.2, 0.3) +#'abs_thr2 <- array(c(-0.2, 0.3) + rnorm(40) * 0.1, dim = c(cat = 2, sdate = 20)) +#'res1 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr1) +#'res2 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr2, bin_dim_abs = 'cat') +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', + indices_for_quantiles = NULL, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + bin_dim_abs = 'bin', weights = NULL, cross.val = FALSE, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimensions.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimensions. If no member ", + "dimension exists, set it as NULL.") + } + } + ## bin_dim_abs + if (!is.character(bin_dim_abs) | length(bin_dim_abs) != 1) { + stop('Parameter "bin_dim_abs" must be a character string.') + } + ## prob_thresholds, abs_thresholds + if (!is.null(abs_thresholds) & !is.null(prob_thresholds)) { + .warning(paste0("Parameters 'prob_thresholds' and 'abs_thresholds' are both provided. ", + "Only the first one is used.")) + abs_thresholds <- NULL + } else if (is.null(abs_thresholds) & is.null(prob_thresholds)) { + stop("One of the parameters 'prob_thresholds' and 'abs_thresholds' must be provided.") + } + if (!is.null(prob_thresholds)) { + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_quantiles + if (is.null(indices_for_quantiles)) { + indices_for_quantiles <- 1:dim(data)[time_dim] + } else { + if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { + stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") + } else if (length(indices_for_quantiles) > dim(data)[time_dim] | + max(indices_for_quantiles) > dim(data)[time_dim] | + any(indices_for_quantiles < 1)) { + stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") + } + } + + } else { # abs_thresholds + + if (is.null(dim(abs_thresholds))) { # a vector + dim(abs_thresholds) <- length(abs_thresholds) + names(dim(abs_thresholds)) <- bin_dim_abs + } + # bin_dim_abs + if (!(bin_dim_abs %in% names(dim(abs_thresholds)))) { + stop("Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension.") + } + if (!is.null(memb_dim) && memb_dim %in% names(dim(abs_thresholds))) { + stop("Parameter abs_thresholds' cannot have member dimension.") + } + dim_name_abs <- names(dim(abs_thresholds))[which(names(dim(abs_thresholds)) != bin_dim_abs)] + if (any(!dim_name_abs %in% names(dim(data)))) { + stop("Parameter 'abs_thresholds' dimensions except 'bin_dim_abs' must be in 'data' as well.") + } else { + if (any(dim(abs_thresholds)[dim_name_abs] != dim(data)[dim_name_abs])) { + stop("Parameter 'abs_thresholds' dimensions must have the same length as 'data'.") + } + } + if (!is.null(indices_for_quantiles)) { + warning("Parameter 'indices_for_quantiles' is not used when 'abs_thresholds' are provided.") + } + abs_target_dims <- bin_dim_abs + if (time_dim %in% names(dim(abs_thresholds))) { + abs_target_dims <- c(bin_dim_abs, time_dim) + } + + } + + ## weights + if (!is.null(weights)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + +# if (is.null(dat_dim)) { + if (!is.null(memb_dim)) { + lendim_weights <- 2 + namesdim_weights <- c(time_dim, memb_dim) + } else { + lendim_weights <- 1 + namesdim_weights <- c(time_dim) + } + if (length(dim(weights)) != lendim_weights | + any(!names(dim(weights)) %in% namesdim_weights)) { + stop(paste0("Parameter 'weights' must have dimension ", + paste0(namesdim_weights, collapse = ' and '), ".")) + } + if (any(dim(weights)[namesdim_weights] != dim(data)[namesdim_weights])) { + stop(paste0("Parameter 'weights' must have the same dimension length as ", + paste0(namesdim_weights, collapse = ' and '), " dimension in 'data'.")) + } + weights <- Reorder(weights, namesdim_weights) + +# } else { +# if (length(dim(weights)) != 3 | any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) +# stop("Parameter 'weights' must have three dimensions with the names of 'memb_dim', 'time_dim' and 'dat_dim'.") +# if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | +# dim(weights)[time_dim] != dim(exp)[time_dim] | +# dim(weights)[dat_dim] != dim(exp)[dat_dim]) { +# stop(paste0("Parameter 'weights' must have the same dimension lengths ", +# "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) +# } +# weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) +# } + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + if (is.null(abs_thresholds)) { + res <- Apply(data = list(data = data), + target_dims = c(time_dim, memb_dim), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = prob_thresholds, + indices_for_quantiles = indices_for_quantiles, + weights = weights, cross.val = cross.val, ncores = ncores)$output1 + } else { + res <- Apply(data = list(data = data, abs_thresholds = abs_thresholds), + target_dims = list(c(time_dim, memb_dim), abs_target_dims), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = NULL, + indices_for_quantiles = NULL, + weights = NULL, cross.val = FALSE, ncores = ncores)$output1 + } + + return(res) +} + +.GetProbs <- function(data, indices_for_quantiles, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + weights = NULL, cross.val = FALSE) { + # .GetProbs() is used in RPS, RPSS, ROCSS + # data + ## if data is exp: [sdate, memb] + ## if data is obs: [sdate, (memb)] + # weights: [sdate, (memb)], same as data + # if abs_thresholds is not NULL: [bin, (sdate)] + + # Add dim [memb = 1] to data if it doesn't have memb_dim + if (length(dim(data)) == 1) { + dim(data) <- c(dim(data), 1) + if (!is.null(weights)) dim(weights) <- c(dim(weights), 1) + } + + # Calculate absolute thresholds + if (is.null(abs_thresholds)) { + if (cross.val) { + quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) + for (i_time in 1:dim(data)[1]) { + if (is.null(weights)) { + quantiles[, i_time] <- quantile(x = as.vector(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ], + weights[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles[, i_time] <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + } + + } else { + if (is.null(weights)) { + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], + weights[indices_for_quantiles, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + quantiles <- array(rep(quantiles, dim(data)[1]), + dim = c(bin = length(quantiles), dim(data)[1])) + } + + } else { # abs_thresholds provided + quantiles <- abs_thresholds + if (length(dim(quantiles)) == 1) { + quantiles <- InsertDim(quantiles, len = dim(data)[1], + pos = 2, name = names(dim(data))[1]) + } + } + # quantiles: [bin-1, sdate] + + # Probabilities + probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] + for (i_time in 1:dim(data)[1]) { + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) + } else { + if (is.null(weights)) { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], + threshold = quantiles[, i_time])) + } else { + sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + # find any quantiles that are outside the data range + integrated_probs <- array(dim = dim(quantiles)) + for (i_quant in 1:dim(quantiles)[1]) { + # for thresholds falling under the distribution + if (quantiles[i_quant, i_time] < min(sorted_data)) { + integrated_probs[i_quant, i_time] <- 0 + # for thresholds falling over the distribution + } else if (max(sorted_data) < quantiles[i_quant, i_time]) { + integrated_probs[i_quant, i_time] <- 1 + } else { + integrated_probs[i_quant, i_time] <- approx(sorted_data, cumulative_weights, + quantiles[i_quant, i_time], "linear")$y + } + } + probs[, i_time] <- append(integrated_probs[, i_time], 1) - append(0, integrated_probs[, i_time]) + if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { + stop(paste0("Probability in i_time = ", i_time, " is out of [0, 1].")) + } + } + } + } + + return(probs) +} + +.sorted_distributions <- function(data_vector, weights_vector) { + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_weights <- weights_vector[sorter] + cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights + cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 + cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 + return(list("data" = data_vector[sorter], "cumulative_weights" = cumulative_weights)) +} + + + diff --git a/RPSS.R b/RPSS.R new file mode 100644 index 0000000000000000000000000000000000000000..f50fdc5f7dbd5784dea40d46ae3d296b8e3930dd --- /dev/null +++ b/RPSS.R @@ -0,0 +1,642 @@ +#'Compute the Ranked Probability Skill Score +#' +#'The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score +#'based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to +#'assess whether a forecast presents an improvement or worsening with respect to +#'a reference forecast. The RPSS ranges between minus infinite and 1. If the +#'RPSS is positive, it indicates that the forecast has higher skill than the +#'reference forecast, while a negative value means that it has a lower skill.\cr +#'Examples of reference forecasts are the climatological forecast (same +#'probabilities for all categories for all time steps), persistence, a previous +#'model version, and another model. It is computed as +#'\code{RPSS = 1 - RPS_exp / RPS_ref}. The statistical significance is obtained +#'based on a Random Walk test at the specified confidence level (DelSole and +#'Tippett, 2016).\cr +#'The function accepts either the ensemble members or the probabilities of +#'each data as inputs. If there is more than one dataset, RPSS will be +#'computed for each pair of exp and obs data. The NA ratio of data will be +#'examined before the calculation. If the ratio is higher than the threshold +#'(assigned by parameter \code{na.rm}), NA will be returned directly. NAs are +#'counted by per-pair method, which means that only the time steps that all the +#'datasets have values count as non-NA values. +#' +#'@param exp A named numerical array of either the forecast with at least time +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. +#'@param ref A named numerical array of either the reference forecast with at +#' least time and member dimensions, or the probabilities with at least time and +#' category dimensions. The probabilities can be generated by +#' \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except +#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +#' not have dataset dimension. If there is corresponding reference for each +#' experiment, the dataset dimension must have the same length as in 'exp'. If +#' 'ref' is NULL, the climatological forecast is used as reference forecast. +#' The default value is NULL. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast and the reference forecast. The +#' default value is 'member'. If the data are probabilities, set memb_dim as +#' NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when exp, obs, and ref are probabilities. The +#' default value is NULL, which means that the data are not probabilities. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPSS (the +#' potential RPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param weights_exp A named numerical array of the forecast ensemble weights +#' for probability calculation. The dimension should include 'memb_dim', +#' 'time_dim' and 'dat_dim' if there are multiple datasets. All dimension +#' lengths must be equal to 'exp' dimension lengths. The default value is NULL, +#' which means no weighting is applied. The ensemble should have at least 70 +#' members or span at least 10 time steps and have more than 45 members if +#' consistency between the weighted and unweighted methodologies is desired. +#'@param weights_ref Same as 'weights_exp' but for the reference forecast. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistics categories in cross-validation. The default value is +#' FALSE. +#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it +#' means the lower limit for the fraction of the non-NA values. 1 is equal to +#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). +# The function returns NA if the fraction of non-NA values in the data is less +#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. +#'@param sig_method.type A character string indicating the test type of the +#' significance method. Check \code{RandomWalkTest()} parameter +#' \code{test.type} for details. The default is 'two.sided.approx', which is +#' the default of \code{RandomWalkTest()}. +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'\item{$rpss}{ +#' A numerical array of RPSS with dimensions c(nexp, nobs, the rest dimensions +#' of 'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +#' experiment (i.e., dat_dim in exp), and nobs is the number of observation +#' i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance of the RPSS with the same +#' dimensions as $rpss. +#'} +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#'set.seed(1) +#'exp <- array(rnorm(3000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'set.seed(2) +#'obs <- array(rnorm(300), dim = c(lat = 3, lon = 2, sdate = 50)) +#'set.seed(3) +#'ref <- array(rnorm(3000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'weights <- sapply(1:dim(exp)['sdate'], function(i) { +#' n <- abs(rnorm(10)) +#' n/sum(n) +#' }) +#'dim(weights) <- c(member = 10, sdate = 50) +#'# Use data as input +#'res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast +#'res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +#'res <- RPSS(exp = exp, obs = obs, ref = ref, weights_exp = weights, weights_ref = weights) +#'res <- RPSS(exp = exp, obs = obs, alpha = 0.01, sig_method.type = 'two.sided') +#' +#'# Use probs as input +#'exp_probs <- GetProbs(exp, memb_dim = 'member') +#'obs_probs <- GetProbs(obs, memb_dim = NULL) +#'ref_probs <- GetProbs(ref, memb_dim = 'member') +#'res <- RPSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, +#' cat_dim = 'bin') +#' +#'@import multiApply +#'@export +RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, nmemb_ref = NULL, + weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = FALSE, + sig_method.type = 'two.sided.approx', alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (!is.array(obs) | !is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop("Parameter 'ref' must be a numeric array.") + if (any(is.null(names(dim(ref)))) | any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") + } + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } + } + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs)) | + (!is.null(ref) & !cat_dim %in% names(dim(ref)))) { + stop("Parameter 'cat_dim' is not found in 'exp', 'obs', or 'ref' dimension.") + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp, obs, and ref (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim)) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop("If parameter 'ref' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.") + } + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- seq_len(dim(obs)[time_dim]) + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## weights_exp + if (!is.null(weights_exp) & is.null(cat_dim)) { + if (!is.array(weights_exp) | !is.numeric(weights_exp)) + stop("Parameter 'weights_exp' must be a named numeric array.") + + if (is.null(dat_dim)) { + if (length(dim(weights_exp)) != 2 | + !all(names(dim(weights_exp)) %in% c(memb_dim, time_dim))) { + stop("Parameter 'weights_exp' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + } + if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] | + dim(weights_exp)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights_exp' must have the same dimension lengths as ", + "'memb_dim' and 'time_dim' in 'exp'.") + } + weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights_exp)) != 3 | + !all(names(dim(weights_exp)) %in% c(memb_dim, time_dim, dat_dim))) { + stop("Parameter 'weights_exp' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + } + if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] | + dim(weights_exp)[time_dim] != dim(exp)[time_dim] | + dim(weights_exp)[dat_dim] != dim(exp)[dat_dim]) { + stop("Parameter 'weights_exp' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.") + } + weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim, dat_dim)) + } + } else if (!is.null(weights_exp) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'exp' is probability already, so parameter ", + "'weights_exp' is not used. Change 'weights_exp' to NULL.")) + weights_exp <- NULL + } + ## weights_ref + if (!is.null(weights_ref) & is.null(cat_dim)) { + if (!is.array(weights_ref) | !is.numeric(weights_ref)) + stop("Parameter 'weights_ref' must be a named numeric array.") + + if (is.null(dat_dim) | ((!is.null(dat_dim)) && (!dat_dim %in% names(dim(ref))))) { + if (length(dim(weights_ref)) != 2 | + !all(names(dim(weights_ref)) %in% c(memb_dim, time_dim))) { + stop("Parameter 'weights_ref' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + } + if (dim(weights_ref)[memb_dim] != dim(exp)[memb_dim] | + dim(weights_ref)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights_ref' must have the same dimension lengths as ", + "'memb_dim' and 'time_dim' in 'ref'.") + } + weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights_ref)) != 3 | + !all(names(dim(weights_ref)) %in% c(memb_dim, time_dim, dat_dim))) { + stop("Parameter 'weights_ref' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + } + if (dim(weights_ref)[memb_dim] != dim(ref)[memb_dim] | + dim(weights_ref)[time_dim] != dim(ref)[time_dim] | + dim(weights_ref)[dat_dim] != dim(ref)[dat_dim]) { + stop("Parameter 'weights_ref' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'ref'.") + } + weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim, dat_dim)) + } + } else if (!is.null(weights_ref) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'ref' is probability already, so parameter ", + "'weights_ref' is not used. Change 'weights_ref' to NULL.")) + weights_ref <- NULL + } + ## na.rm + if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) { + stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1') + } + ## alpha + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop("Parameter 'alpha' must be a number between 0 and 1.") + } + ## sig_method.type + #NOTE: These are the types of RandomWalkTest() + if (!sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { + stop("Parameter 'sig_method.type' must be 'two.sided.approx', 'two.sided', ", + "'greater', or 'less'.") + } + if (sig_method.type == 'two.sided.approx' && alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPSS + + ## Decide target_dims + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) + } + + if (!is.null(ref)) { # use "ref" as reference forecast + if (!is.null(memb_dim)) { + if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) { + target_dims_ref <- c(time_dim, memb_dim, dat_dim) + } else { + target_dims_ref <- c(time_dim, memb_dim) + } + } else { + target_dims_ref <- c(time_dim, cat_dim, dat_dim) + } + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs, + ref = target_dims_ref) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs) + } + + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + nmemb = nmemb, nmemb_ref = nmemb_ref, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + + return(output) + +} + +.RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, nmemb_ref = NULL, + weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, + na.rm = FALSE, sig_method.type = 'two.sided.approx', alpha = 0.05) { + #--- if memb_dim: + # exp: [sdate, memb, (dat)] + # obs: [sdate, (memb), (dat)] + # ref: [sdate, memb, (dat)] or NULL + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] + # ref: [sdate, bin, (dat)] or NULL + + if (isTRUE(na.rm)) { + f_NAs <- 0 + } else if (isFALSE(na.rm)) { + f_NAs <- 1 + } else { + f_NAs <- na.rm + } + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + # Calculate RPS + + if (!is.null(ref)) { + + # Adjust dimensions to be [sdate, memb, dat] for both exp, obs, and ref + ## Insert memb_dim in obs + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + ## Insert dat_dim + if (is.null(dat_dim)) { + dim(obs) <- c(dim(obs), dat = nobs) + dim(exp) <- c(dim(exp), dat = nexp) + if (!is.null(weights_exp)) dim(weights_exp) <- c(dim(weights_exp), dat = nexp) + } + if (is.null(dat_dim) || (!is.null(dat_dim) && !dat_dim %in% names(dim(ref)))) { + nref <- 1 + dim(ref) <- c(dim(ref), dat = nref) + if (!is.null(weights_ref)) dim(weights_ref) <- c(dim(weights_ref), dat = nref) + } else { + nref <- as.numeric(dim(ref)[dat_dim]) # should be the same as nexp + } + + # Find good values then calculate RPS + rps_exp <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + rps_ref <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + for (k in 1:nref) { + if (nref != 1 & k != i) { # if nref is 1 or equal to nexp, calculate rps + next + } + exp_data <- exp[, , i, drop = F] + obs_data <- obs[, , j, drop = F] + ref_data <- ref[, , k, drop = F] + exp_mean <- rowMeans(exp_data) + obs_mean <- rowMeans(obs_data) + ref_mean <- rowMeans(ref_data) + good_values <- !is.na(exp_mean) & !is.na(obs_mean) & !is.na(ref_mean) + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + + if (f_NAs <= sum(good_values) / length(good_values)) { + rps_exp[good_values, i, j] <- .RPS(exp = exp[good_values, , i], + obs = obs[good_values, , j], + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = NULL, + prob_thresholds = prob_thresholds, + indices_for_clim = good_indices_for_clim, + Fair = Fair, nmemb = nmemb, + weights = weights_exp[good_values, , i], + cross.val = cross.val, na.rm = na.rm) + rps_ref[good_values, i, j] <- .RPS(exp = ref[good_values, , k], + obs = obs[good_values, , j], + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = NULL, + prob_thresholds = prob_thresholds, + indices_for_clim = good_indices_for_clim, + Fair = Fair, nmemb = nmemb_ref, + weights = weights_ref[good_values, , k], + na.rm = na.rm, cross.val = cross.val) + } + } + } + } + + } else { # ref is NULL + rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + nmemb = nmemb, weights = weights_exp, + cross.val = cross.val, na.rm = na.rm) + + # RPS of the reference forecast + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + + rps_ref <- array(NA, dim = c(dim(obs)[time_dim], nexp = nexp, nobs = nobs)) + + if (is.null(dat_dim)) { + dim(obs) <- c(dim(obs), nobs = nobs) + dim(exp) <- c(dim(exp), nexp = nexp) + dim(rps_exp) <- dim(rps_ref) + } + + for (i in 1:nexp) { + for (j in 1:nobs) { + # Use good values only + good_values <- !is.na(rps_exp[, i, j]) + if (f_NAs <= sum(good_values) / length(good_values)) { + obs_data <- obs[good_values, , j] + if (is.null(dim(obs_data))) dim(obs_data) <- c(length(obs_data), 1) + + if (is.null(cat_dim)) { # calculate probs + # Subset indices_for_clim + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + obs_probs <- .GetProbs(data = obs_data, + indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, + weights = NULL, cross.val = cross.val) + } else { + obs_probs <- t(obs_data) + } + # obs_probs: [bin, sdate] + + clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), + 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- array(clim_probs, dim = dim(obs_probs)) + # clim_probs: [bin, sdate] + + # Calculate RPS for each time step + probs_clim_cumsum <- apply(clim_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps_ref[good_values, i, j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2) + } else { + probs_clim_cumsum <- array(NA, c(1, 2)) + } + if (Fair) { # FairRPS + if (!is.null(memb_dim)) { + if (memb_dim %in% names(dim(exp))) { + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + R <- dim(obs)[1] #number of years + } + } else { + R <- nmemb_ref + } + adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) + adjustment <- colSums(adjustment) + rps_ref[, i, j] <- rps_ref[, i, j] + adjustment + } + } + } + } + + if (is.null(dat_dim)) { + dim(rps_ref) <- dim(rps_exp) <- dim(exp)[time_dim] + } + +#---------------------------------------------- + # Calculate RPSS + + if (!is.null(dat_dim)) { + # rps_exp and rps_ref: [sdate, nexp, nobs] + rps_exp_mean <- colMeans(rps_exp, na.rm = TRUE) + rps_ref_mean <- colMeans(rps_ref, na.rm = TRUE) + rpss <- array(dim = c(nexp = nexp, nobs = nobs)) + sign <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (!all(is.na(rps_exp_mean))) { + for (i in 1:nexp) { + for (j in 1:nobs) { + rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j] + ind_nonNA <- !is.na(rps_exp[, i, j]) + if (!any(ind_nonNA)) { + sign[i, j] <- NA + } else { + sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j], + skill_B = rps_ref[ind_nonNA, i, j], + test.type = sig_method.type, alpha = alpha, + sign = T, pval = F)$sign + } + } + } + } + + # Turn NaN into NA + if (any(is.nan(rpss))) rpss[which(is.nan(rpss))] <- NA + + } else { # dat_dim is NULL + + ind_nonNA <- !is.na(rps_exp) + if (!any(ind_nonNA)) { + rpss <- NA + sign <- NA + } else { + # rps_exp and rps_ref: [sdate] + rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE) + sign <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA], + skill_B = rps_ref[ind_nonNA], + test.type = sig_method.type, alpha = alpha, + sign = T, pval = F)$sign + } + } + + return(list(rpss = rpss, sign = sign)) +} + + diff --git a/RandomWalkTest.R b/RandomWalkTest.R new file mode 100644 index 0000000000000000000000000000000000000000..36a85d283f089c84deea2cd3c4b94819495e352d --- /dev/null +++ b/RandomWalkTest.R @@ -0,0 +1,185 @@ +#'Random Walk test for skill differences +#' +#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a +#'common observational reference) based on Random Walks (DelSole and Tippett, +#'2016). +#' +#'@param skill_A A numerical array of the time series of the scores obtained +#' with the forecaster A. +#'@param skill_B A numerical array of the time series of the scores obtained +#' with the forecaster B. The dimensions should be identical as parameter +#' 'skill_A'. +#'@param time_dim A character string indicating the name of the dimension along +#' which the tests are computed. The default value is 'sdate'. +#'@param test.type A character string indicating the type of significance test. +#' It can be "two.sided.approx" (to assess whether forecaster A and forecaster +#' B are significantly different in terms of skill with a two-sided test using +#' the approximation of DelSole and Tippett, 2016), "two.sided" (to assess +#' whether forecaster A and forecaster B are significantly different in terms +#' of skill with an exact two-sided test), "greater" (to assess whether +#' forecaster A shows significantly better skill than forecaster B with a +#' one-sided test for negatively oriented scores), or "less" (to assess whether +#' forecaster A shows significantly better skill than forecaster B with a +#' one-sided test for positively oriented scores). The default value is +#' "two.sided.approx". +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test (output "sign"). The default value is 0.05. +#'@param pval A logical value indicating whether to return the p-value of the +#' significance test. The default value is TRUE. +#'@param sign A logical value indicating whether to return the statistical +#' significance of the test based on 'alpha'. The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list with: +#'\item{$score}{ +#' A numerical array with the same dimensions as the input arrays except +#' 'time_dim'. The number of times that forecaster A has been better than +#' forecaster B minus the number of times that forecaster B has been better +#' than forecaster A (for skill negatively oriented, i.e., the lower the +#' better). If $score is positive, forecaster A has been better more times +#' than forecaster B. If $score is negative, forecaster B has been better more +#' times than forecaster A. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance with the same dimensions +#' as the input arrays except "time_dim". Returned only if "sign" is TRUE. +#'} +#'\item{$p.val}{ +#' A numeric array of the p-values with the same dimensions as the input arrays +#' except "time_dim". Returned only if "pval" is TRUE. +#'} +#' +#'@details +#' Null and alternative hypothesis for "two-sided" test (regardless of the +#' orientation of the scores):\cr +#' H0: forecaster A and forecaster B are not different in terms of skill\cr +#' H1: forecaster A and forecaster B are different in terms of skill +#' +#' Null and alternative hypothesis for one-sided "greater" (for negatively +#' oriented scores, i.e., the lower the better) and "less" (for positively +#' oriented scores, i.e., the higher the better) tests:\cr +#' H0: forecaster A is not better than forecaster B\cr +#' H1: forecaster A is better than forecaster B +#' +#' Examples of negatively oriented scores are the RPS, RMSE and the Error, while +#' the ROC score is a positively oriented score. +#' +#' DelSole and Tippett (2016) approximation for two-sided test at 95% confidence +#' level: significant if the difference between the number of times that +#' forecaster A has been better than forecaster B and forecaster B has been +#' better than forecaster A is above 2sqrt(N) or below -2sqrt(N). +#' +#'@references +#'DelSole and Tippett (2016): https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#' fcst_A <- array(data = 11:50, dim = c(sdate = 10, lat = 2, lon = 2)) +#' fcst_B <- array(data = 21:60, dim = c(sdate = 10, lat = 2, lon = 2)) +#' reference <- array(data = 1:40, dim = c(sdate = 10, lat = 2, lon = 2)) +#' scores_A <- abs(fcst_A - reference) +#' scores_B <- abs(fcst_B - reference) +#' res1 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, pval = FALSE, sign = TRUE) +#' res2 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, test.type = 'greater') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', + test.type = 'two.sided.approx', alpha = 0.05, pval = TRUE, + sign = FALSE, ncores = NULL) { + + # Check inputs + ## skill_A and skill_B + if (is.null(skill_A) | is.null(skill_B)) { + stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") + } + if (!is.numeric(skill_A) | !is.numeric(skill_B)) { + stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") + } + if (!identical(dim(skill_A), dim(skill_B))) { + stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))) { + stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") + } + ## alpha + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop("Parameter 'alpha' must be a number between 0 and 1.") + } + ## test.type + if (!test.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { + stop("Parameter 'test.type' must be 'two.sided.approx', 'two.sided', 'greater', or 'less'.") + } + if (test.type == 'two.sided.approx') { + if (alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + } + if (pval) { + .warning("p-value cannot be returned with the DelSole and Tippett (2016) ", + "aproximation. Returning the significance at the 0.05 significance level.") + } + sign <- TRUE + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ## Compute the Random Walk Test + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B), + target_dims = list(skill_A = time_dim, + skill_B = time_dim), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, pval = pval, sign = sign, + ncores = ncores) + + return(res) +} + +.RandomWalkTest <- function(skill_A, skill_B, test.type = 'two.sided.approx', + alpha = 0.05, pval = TRUE, sign = FALSE) { + #skill_A and skill_B: [sdate] + + N.eff <- length(skill_A) + + A_better <- sum(skill_B > skill_A) + B_better <- sum(skill_B < skill_A) + + output <- NULL + output$score <- A_better - B_better + + if (test.type == 'two.sided.approx') { + output$sign <- abs(output$score) > (2 * sqrt(N.eff)) + + } else { + + if (!is.na(output$score)) { + p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1 - alpha, + alternative = test.type)$p.value + + } else { + p.val <- NA + } + + if (pval) { + output$p.val <- p.val + } + if (sign) { + output$sign <- !is.na(p.val) & p.val <= alpha + } + + } + + return(output) +} + diff --git a/SprErr.R b/SprErr.R new file mode 100644 index 0000000000000000000000000000000000000000..e37261118e666d8643ac42a1e1cf3b92b7903c69 --- /dev/null +++ b/SprErr.R @@ -0,0 +1,228 @@ +#'Compute the ratio between the ensemble spread and RMSE +#' +#'Compute the ratio between the spread of the members around the +#'ensemble mean in experimental data and the RMSE between the ensemble mean of +#'experimental and observational data. The p-value and/or the statistical +#'significance is provided by a one-sided Fisher's test. +#' +#'@param exp A named numeric array of experimental data with at least two +#' dimensions 'memb_dim' and 'time_dim'. +#'@param obs A named numeric array of observational data with at least two +#' dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. The default value +#' is 'member'. +#'@param time_dim A character string indicating the name of dimension along +#' which the ratio is computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param na.rm A logical value indicating whether to remove NA values. The default +#' value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of two arrays with dimensions c(nexp, nobs, the rest of +#' dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is +#' the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. +#' If dat_dim is NULL, nexp and nobs are omitted. \cr +#'\item{$ratio}{ +#' The ratio of the ensemble spread and RMSE. +#'} +#'\item{$p_val}{ +#' The p-value of the one-sided Fisher's test with Ho: SD/RMSE = 1. Only present +#' if \code{pval = TRUE}. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs, dat_dim = 'dataset') +#'# Reorder the data in order to plot it with PlotVsLTime +#'rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) +#'rsdrms_plot[, , 2, ] <- rsdrms$ratio +#'rsdrms_plot[, , 4, ] <- rsdrms$p.val +#'\dontrun{ +#'PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", +#' monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), +#' listobs = c('ERSST'), biglab = FALSE, siglev = TRUE) +#'} +#' +#'@import multiApply +#'@export +SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', pval = TRUE, sign = FALSE, + alpha = 0.05, na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions memb_dim and time_dim.")) + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. ", + "Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions except 'dat_dim' and 'memb_dim'.")) + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + # alpha + if (!is.numeric(alpha) | any(alpha < 0) | any(alpha > 1) | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + # na.rm + if (!na.rm %in% c(TRUE, FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate RatioSDRMS + + # If dat_dim = NULL, insert dat dim + remove_dat_dim <- FALSE + if (is.null(dat_dim)) { + dat_dim <- 'dataset' + exp <- InsertDim(exp, posdim = 1, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = 'dataset') + remove_dat_dim <- TRUE + } + + res <- Apply(list(exp, obs), + target_dims = list(c(dat_dim, memb_dim, time_dim), + c(dat_dim, memb_dim, time_dim)), + pval = pval, + sign = sign, + na.rm = na.rm, + fun = .SprErr, + time_dim = time_dim, + ncores = ncores) + + if (remove_dat_dim) { + if (length(dim(res[[1]])) > 2) { + res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') + } else { + res <- lapply(res, as.numeric) + } + } + + return(res) +} + +.SprErr <- function(exp, obs, pval = TRUE, sign = FALSE, alpha = 0.05, na.rm = FALSE, time_dim = time_dim) { + # exp: [dat_exp, member, sdate] + # obs: [dat_obs, member, sdate] + nexp <- dim(exp)[1] + nobs <- dim(obs)[1] + + # ensemble mean + ens_exp <- MeanDims(exp, 2, na.rm = na.rm) # [dat, sdate] + ens_obs <- MeanDims(obs, 2, na.rm = na.rm) + + # Create empty arrays + ratio <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + p.val <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + + # spread and error + spread <- sqrt(mean(apply(exp[jexp,,], 2, var, na.rm = na.rm), na.rm = na.rm)) + error <- sqrt(mean((ens_obs - ens_exp[jexp,])^2, na.rm = na.rm)) + ratio[jexp, jobs] <- spread/error + + # effective sample size + enospr <- sum(Eno(apply(exp[jexp,,], 2, var, na.rm = na.rm), + names(dim(exp))[3])) + enodif <- Eno((ens_exp[jexp, ] - ens_obs[jobs, ])^2, time_dim = time_dim) + if (pval) { + F <- (enospr[jexp] * spread[jexp]^2 / (enospr[jexp] - 1)) / (enodif * error^2 / (enodif - 1)) + if (!is.na(F) & !is.na(enospr[jexp]) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + p.val[jexp, jobs] <- pf(F, enospr[jexp] - 1, enodif - 1) + p.val[jexp, jobs] <- 2 * min(p.val[jexp, jobs], 1 - p.val[jexp, jobs]) + } else { + ratio[jexp, jobs] <- NA + } + } + } + } + + res <- list(ratio = ratio) + if (pval) {res$p.val <- p.val} + if (sign) {res$sign <- p.val <= alpha} + + return(res) +} + diff --git a/autosubmit/conf_cerise/autosubmit.yml b/autosubmit/conf_cerise/autosubmit.yml new file mode 100644 index 0000000000000000000000000000000000000000..b94f93268985cee89352e28fc1e037fab27687d0 --- /dev/null +++ b/autosubmit/conf_cerise/autosubmit.yml @@ -0,0 +1,22 @@ +config: + EXPID: + AUTOSUBMIT_VERSION: 4.0.73 + MAXWAITINGJOBS: 99 + # Default maximum number of jobs to be running at the same time at any platform + # Default: 6 + TOTALJOBS: 20 + SAFETYSLEEPTIME: 10 + RETRIALS: 0 +mail: + NOTIFICATIONS: + TO: +communications: + # Communications library used to connect with platforms: paramiko or saga. + # Default: paramiko + API: paramiko +storage: + # Defines the way of storing the progress of the experiment. The available options are: + # A PICKLE file (pkl) or an SQLite database (db). Default: pkl + TYPE: pkl + # Defines if the remote logs will be copied to the local platform. Default: True. + COPY_REMOTE_LOGS: True diff --git a/autosubmit/conf_cerise/expdef.yml b/autosubmit/conf_cerise/expdef.yml new file mode 100644 index 0000000000000000000000000000000000000000..ae93c79a970372b88662bdfacbce06428eff1128 --- /dev/null +++ b/autosubmit/conf_cerise/expdef.yml @@ -0,0 +1,44 @@ +DEFAULT: + EXPID: + HPCARCH: ATHOS-hpc +experiment: + DATELIST: + MEMBERS: fc0 + CHUNKSIZEUNIT: month + CHUNKSIZE: 1 + NUMCHUNKS: + CHUNKINI: 1 + CALENDAR: standard +project: + PROJECT_TYPE: local + # Destination folder name for project. type: STRING, default: leave empty, + PROJECT_DESTINATION: auto-s2s +# If PROJECT_TYPE is not git, no need to change +git: + # Repository URL STRING: 'https://github.com/torvalds/linux.git' + PROJECT_ORIGIN: https://earth.bsc.es/gitlab/es/auto-s2s.git + # Select branch or tag, STRING, default: 'master', help: {'master' (default), 'develop', 'v3.1b', ...} + PROJECT_BRANCH: master + # type: STRING, default: leave empty, help: if model branch is a TAG leave empty + PROJECT_COMMIT: '' +svn: + PROJECT_URL: '' + PROJECT_REVISION: '' +# If PROJECT_TYPE is not local, no need to change +local: + # type: STRING, help: /foo/bar/ecearth + PROJECT_PATH: /esarchive/scratch/vagudets/repos/auto-s2s/ +# If PROJECT_TYPE is none, no need to change +project_files: + # Where is PROJECT CONFIGURATION file location relative to project root path + FILE_PROJECT_CONF: '' + # Where is JOBS CONFIGURATION file location relative to project root path + FILE_JOBS_CONF: '' + # Default job scripts type in the project. type: STRING, default: bash, supported: 'bash', 'python' or 'r' + JOB_SCRIPTS_TYPE: '' +rerun: + # Is a rerun or not? [Default: Do set FALSE]. BOOLEAN: TRUE, FALSE + RERUN: FALSE + # If RERUN: TRUE then supply the list of chunks to rerun + # LIST: [ 19601101 [ fc0 [1 2 3 4] fc1 [1] ] 19651101 [ fc0 [16-30] ] ] + CHUNKLIST: '' diff --git a/autosubmit/conf_cerise/jobs.yml b/autosubmit/conf_cerise/jobs.yml new file mode 100644 index 0000000000000000000000000000000000000000..4d4127b64294f7b43263496762b0169b473b0e96 --- /dev/null +++ b/autosubmit/conf_cerise/jobs.yml @@ -0,0 +1,16 @@ +JOBS: + verification: + FILE: autosubmit/auto-verification-CERISE.sh + RUNNING: chunk + WALLCLOCK: + NOTIFY_ON: + PLATFORM: ATHOS-hpc + PROCESSORS: + scorecards: + FILE: autosubmit/auto-scorecards.sh + WALLCLOCK: 00:30 + PLATFORM: ATHOS-ecs + NOTIFY_ON: + PROCESSORS: 1 + DEPENDENCIES: verification + diff --git a/autosubmit/conf_cerise/platforms.yml b/autosubmit/conf_cerise/platforms.yml new file mode 100644 index 0000000000000000000000000000000000000000..74e6bda16902e3986f23ce13da4164332ad928e7 --- /dev/null +++ b/autosubmit/conf_cerise/platforms.yml @@ -0,0 +1,24 @@ +## TODO: Change platform +Platforms: + ATHOS-hpc: + TYPE: slurm + HOST: hpc-batch + USER: cyce + PROJECT: copext ## TO BE CHANGED + #EC_QUEUE: hpc + SCRATCH_DIR: /ec/res4/scratch/cyce + ADD_PROJECT_TO_HOST: false + CUSTOM_DIRECTIVES: ["#SBATCH --hint=nomultithread"] + QUEUE: np + ATHOS-ecs: + TYPE: slurm + HOST: ecs-batch + USER: cyce + PROJECT: copext ## TO BE CHANGED + #EC_QUEUE: hpc + SCRATCH_DIR: /ec/res4/scratch/cyce + ADD_PROJECT_TO_HOST: false + CUSTOM_DIRECTIVES: ["#SBATCH --hint=nomultithread"] + QUEUE: ef + + diff --git a/autosubmit/conf_cerise/proj.yml b/autosubmit/conf_cerise/proj.yml new file mode 100644 index 0000000000000000000000000000000000000000..679cf63b1ced38fd833d28ea9acfa145a1e9bc4f --- /dev/null +++ b/autosubmit/conf_cerise/proj.yml @@ -0,0 +1,4 @@ +common: + MODULES: "MODULES" + OUTDIR: + SCRIPT: diff --git a/autosubmit/conf_mars/autosubmit.yml b/autosubmit/conf_mars/autosubmit.yml index 030081165150f1ac62dd897e11437ff893195f74..8302d9bb4b5f6e29e5f5da6e48f73241e1fc7d6f 100644 --- a/autosubmit/conf_mars/autosubmit.yml +++ b/autosubmit/conf_mars/autosubmit.yml @@ -1,7 +1,7 @@ config: EXPID: AUTOSUBMIT_VERSION: 4.0.73 - MAXWAITINGJOBS: 16 + MAXWAITINGJOBS: 100 # Default maximum number of jobs to be running at the same time at any platform # Default: 6 TOTALJOBS: 16 diff --git a/conf/archive.yml b/conf/archive.yml index 61f62be230b4ff05021a60a17e38e5e4d446cff9..a8569b14610133a83dc8fadf6600996317875ab2 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -1,3 +1,79 @@ +cerise: + src: "/ec/res4/hpcperm/cyce/data/esarchive/" + System: + ECMWF-i2o2: + name: "ECMWF-i2o2" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ecmwf/systemi2o2/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", "tos":"monthly_mean/tos_f6h/", + "tasmin":"monthly_mean/tasmin_f6h/", "tasmax":"monthly_mean/tasmax_f6h/", + "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} + nmember: + hcst: NULL + sdates: + - ["0201", 25, 4] # sdate, nmember, ftimes + - ["0501", 51, 4] + - ["0801", 25, 4] + - ["1101", 51, 4] + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_systemi2o2.txt" + lsm: "static/lsm/lsm_20211101.grb" + CMCC-SPS3.5: + name: "CMCC-SPS3.5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/cmcc/system35/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f6h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", "tos":"monthly_mean/tos_f6h/", + "tasmin":"monthly_mean/tasmin_f6h/", "tasmax":"monthly_mean/tasmax_f6h/", + "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} + nmember: + hcst: 40 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_system35c3s.txt" + lsm: "static/lsm/lsm_19930101.grb" + Meteo-France-System8: + name: "Meteo-France System 8" + institution: "Meteo-France" + src: "exp/meteo_france/system8/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tos":"monthly_mean/tos_f6h/", + "prlr":"monthly_mean/prlr_f6h/", "sfcWind": "monthly_mean/sfcWind_f6h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", + "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} + nmember: + hcst: 25 #50 + time_stamp_lag: "+1" + calendar: "proleptic_gregorian" + reference_grid: "conf/grid_description/griddes_system8.txt" + lsm: "static/lsm/lsm_19930101.grb" + UKMO-System602: + name: "UKMO-S602" + institution: "UK MetOffice" + src: "exp/ukmo/system602/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "tos":"monthly_mean/tos_f6h/", + "prlr":"monthly_mean/prlr_f6h/", "sfcWind": "monthly_mean/sfcWind_f6h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin": "monthly_mean/tasmin_f6h/", + "psl":"monthly_mean/psl_f6h/", "tdps":"monthly_mean/tdps_f6h/"} + nmember: + hcst: 28 #42 + time_stamp_lag: "+1" + calendar: "proleptic_gregorian" + reference_grid: "conf/grid_description/griddes_system602.txt" + lsm: "static/lsm/lsm_19930101.grb" + Reference: + ERA5: + name: "ERA5" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "recon/ecmwf/era5/" + monthly_mean: {"tas":"monthly_mean/tas_f1h/", "prlr":"monthly_mean/prlr_f1h/", + "psl":"monthly_mean/psl_f1h/", "sfcWind":"monthly_mean/sfcWind_f1h/", + "tos":"monthly_mean/tos_f1h/", + "tasmax":"monthly_mean/tasmax_f24h/", + "tasmin":"monthly_mean/tasmin_f24h/", "tdps":"monthly_mean/tdps_f1h/"} + calendar: "standard" + reference_grid: "conf/grid_description/griddes_GRIB_system5_m1.txt" esarchive: src: "/esarchive/" System: diff --git a/conf/autosubmit.yml b/conf/autosubmit.yml index 25872a0ef510b95ebf0f8dbe911bf5a544d3d5ff..0cf19ca212ebbc64772fa043e9b4d1a45e866118 100644 --- a/conf/autosubmit.yml +++ b/conf/autosubmit.yml @@ -12,3 +12,11 @@ mars: conf_format: yaml experiment_dir: /esarchive/autosubmit/ ## TO BE CHANGED userID: bsc32 ## TO BE CHANGED +cerise: + platform: local ## TO BE CHANGED + module_version: autosubmit/4.0.0b-foss-2015a-Python-3.7.3 ## TO BE CHANGED + auto_version: 4.0.0 + conf_format: yaml + experiment_dir: /home/cyce/autosubmit/ ## TO BE CHANGED + userID: cyce ## TO BE CHANGED + diff --git a/conf/grid_description/griddes_system602.txt b/conf/grid_description/griddes_system602.txt new file mode 100644 index 0000000000000000000000000000000000000000..ec1fce240f20e130d2acc3978a3519c3565b3996 --- /dev/null +++ b/conf/grid_description/griddes_system602.txt @@ -0,0 +1,19 @@ +# +# gridID 1 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 + + diff --git a/conf/grid_description/griddes_system8.txt b/conf/grid_description/griddes_system8.txt new file mode 100644 index 0000000000000000000000000000000000000000..ec1fce240f20e130d2acc3978a3519c3565b3996 --- /dev/null +++ b/conf/grid_description/griddes_system8.txt @@ -0,0 +1,19 @@ +# +# gridID 1 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 + + diff --git a/conf/grid_description/griddes_systemi2o2.txt b/conf/grid_description/griddes_systemi2o2.txt new file mode 100644 index 0000000000000000000000000000000000000000..a024e8d5361d8d33ec1f8c2997282ac6a661cb3f --- /dev/null +++ b/conf/grid_description/griddes_systemi2o2.txt @@ -0,0 +1,18 @@ +# +# gridID 1 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 + diff --git a/conf/output_dictionaries/scorecards.yml b/conf/output_dictionaries/scorecards.yml index fa92042caf218489eecf32febfd9e9e10bb6717d..8eecb20d83bea820602cd9c32c8998bede5d9285 100644 --- a/conf/output_dictionaries/scorecards.yml +++ b/conf/output_dictionaries/scorecards.yml @@ -8,6 +8,9 @@ System: Meteo-France-System7: short_name: "meteofrances7" display_name: "Meteo-France System 7" + Meteo-France-System8: + short_name: "meteofrances8" + display_name: "Meteo-France System 8" DWD-GCFS2.1: short_name: "dwds21" display_name: "DWD System 21" @@ -23,6 +26,9 @@ System: UK-MetOffice-Glosea600: short_name: "ukmos600" display_name: "UK Met Office System 600" + UKMO-System602: + short_name: "ukmo602" + display_name: "UL Met Office System 602" NCEP-CFSv2: short_name: "nceps2" display_name: "NCEP System 2" diff --git a/conf/slurm_templates/run_parallel_workflow.sh b/conf/slurm_templates/run_parallel_workflow.sh index e9ef6964e74919c809931a206d25bc731384a86b..4a473ec7568dd29329259b28183c5eeeb5233405 100644 --- a/conf/slurm_templates/run_parallel_workflow.sh +++ b/conf/slurm_templates/run_parallel_workflow.sh @@ -8,9 +8,9 @@ set -vx script=$1 atomic_recipe=$2 -source MODULES -# module load conda/22.11.1-2 -# conda activate condaCerise -# export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib +#source MODULES + module load conda/22.11.1-2 + conda activate condaCerise + export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib Rscript ${script} ${atomic_recipe} diff --git a/conf/slurm_templates/run_scorecards.sh b/conf/slurm_templates/run_scorecards.sh index 9abcac172bb0a25c078d62ea9d7ddd136811b201..c00c4b79d3655556af28a55be5d91de78d578642 100644 --- a/conf/slurm_templates/run_scorecards.sh +++ b/conf/slurm_templates/run_scorecards.sh @@ -14,8 +14,10 @@ set -vx recipe=$1 outdir=$2 -source MODULES - +#source MODULES +module load conda/22.11.1-2 +conda activate condaCerise +export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib # Execute scorecards Rscript modules/Scorecards/execute_scorecards.R ${recipe} ${outdir} diff --git a/conf/variable-dictionary.yml b/conf/variable-dictionary.yml index 78b19b1a3fbd08002cd59e18da6786cd7dc3f33b..dcb51c9cebf56bc9e0a221d0061702ac036fa858 100644 --- a/conf/variable-dictionary.yml +++ b/conf/variable-dictionary.yml @@ -21,23 +21,23 @@ vars: accum: no tas: units: "K" - long_name: "Near-Surface Air Temperature" + long_name: "2-metre air temperature" standard_name: "air_temperature" accum: no tos: - units: "degC" - long_name: "Sea Surface Temperature" + units: "K" + long_name: "Sea surface temperature" standard_name: "sea_surface_temperature" accum: no # outname: "t2" tasmax: units: "K" - long_name: "Maximum Near-Surface Air Temperature" + long_name: "Maximum 2-metre air temperature" standard_name: "air_temperature" accum: no tasmin: units: "K" - long_name: "Minimum Near-Surface Air Temperature" + long_name: "Minimum 2-metre air temperature" standard_name: "air_temperature" accum: no ts: @@ -47,12 +47,12 @@ vars: accum: no tdps: units: "K" - long_name: "2 metre dewpoint temperature" + long_name: "2-metre dewpoint temperature" standard_name: accum: no sfcWind: units: "m s-1" - long_name: "Near-Surface Wind Speed" + long_name: "Near surface wind speed" standard_name: "wind_speed" accum: no sfcWindmax: @@ -111,7 +111,7 @@ vars: accum: yes psl: units: "Pa" - long_name: "Sea Level Pressure" + long_name: "Sea level pressure" standard_name: "air_pressure_at_mean_sea_level" accum: no clt: diff --git a/crossval.R b/crossval.R new file mode 100644 index 0000000000000000000000000000000000000000..d6d7b9e969f8d265dd03844b87dac7e23bea5c82 --- /dev/null +++ b/crossval.R @@ -0,0 +1,85 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages + +## data dimensions +sdate_dim <- dim(data$hcst$data)['syear'] +nmemb <- dim(data$hcst$data)['ensemble'] +nftime <- dim(data$hcst$data)['time'] +nlats <- dim(data$hcst$data)['latitude'] +nlons <- dim(data$hcst$data)['longitude'] + +source("make.eval.train.dexes.R") +cross <- make.eval.train.dexes('k-fold', sdate_dim, NULL, k = k) + +outdim <- length(cross) # leave-one-out should be equal to sdate_dim +# What we need to return? +ano_hcst_ev_res <- array(NA, + c(nftime, nlats, nlons, nmemb, syear = outdim)) +ano_obs_ev_res <- array(NA, + c(nftime, nlats, nlons, ensemble = 1, syear = outdim)) +ano_obs_tr_res <- array(NA, +c(sample = length(cross[[1]]$train.dexes), nftime, nlats, nlons, ensemble = 1, syear = outdim)) +lims_ano_hcst_tr_res <- array(NA, + c(probs = 2, nftime, nlats, nlons, syear = outdim)) +lims_ano_obs_tr_res <- array(NA, + c(probs = 2, nftime, nlats, nlons, syear = outdim)) + +for (t in 1:outdim) { + info(recipe$Run$logger, paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = recipe$Analysis$ncores) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = recipe$Analysis$ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = recipe$Analysis$ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = recipe$Analysis$ncores) + rm("clim_obs_tr", "clim_hcst_tr", "obs_tr", "hcst_tr", "obs_ev", "hcst_ev") + + #Category limits + lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x) { + quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs', + ncores = recipe$Analysis$ncores)$output1 + lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), + fun = function(x) { + quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs', + ncores = recipe$Analysis$ncores)$output1 + gc() + ano_hcst_ev_res[,,,,t] <- ano_hcst_ev + ano_obs_ev_res[,,,,t] <- ano_obs_ev + ano_obs_tr_res[,,,,,t] <- ano_obs_tr + lims_ano_hcst_tr_res[,,,,t] <- lims_ano_hcst_tr + lims_ano_obs_tr_res[,,,,t] <- lims_ano_obs_tr + res <- list(ano_hcst_ev = ano_hcst_ev_res, + ano_obs_ev = ano_obs_ev_res, + ano_obs_tr = ano_obs_tr_res, #reference forecast for the CRPSS + lims_ano_hcst_tr = lims_ano_hcst_tr_res, + lims_ano_obs_tr = lims_ano_obs_tr_res) +} +info(recipe$Run$logger, + paste0("Cross-validation loop ended, returning elements:", + paste(names(res), collapse = " "))) + + + diff --git a/datadownloading/adaptor.mars.internal-1709842918.8984334-4366-3-bb479a3d-4ff6-48f9-9b6d-654aff127d5b.grib b/datadownloading/adaptor.mars.internal-1709842918.8984334-4366-3-bb479a3d-4ff6-48f9-9b6d-654aff127d5b.grib new file mode 100644 index 0000000000000000000000000000000000000000..01166c8898827ba0ae4bd6c31818cb033d778154 Binary files /dev/null and b/datadownloading/adaptor.mars.internal-1709842918.8984334-4366-3-bb479a3d-4ff6-48f9-9b6d-654aff127d5b.grib differ diff --git a/datadownloading/dl-all-sfc.log b/datadownloading/dl-all-sfc.log new file mode 100644 index 0000000000000000000000000000000000000000..e878982be2ca12523f81175db94ed1686ff1a961 --- /dev/null +++ b/datadownloading/dl-all-sfc.log @@ -0,0 +1,6 @@ +2024-03-19 09:32:47,606 INFO Welcome to the CDS +2024-03-19 09:32:47,607 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels-monthly-means +2024-03-19 09:32:47,754 INFO Request is queued +2024-03-19 13:37:21,821 INFO Request is completed +2024-03-19 13:37:21,895 INFO Downloading https://download-0013-clone.copernicus-climate.eu/cache-compute-0013/cache/data9/adaptor.mars.internal-1710854906.5186088-1783-8-40415c3c-1797-4a4f-94a7-c532ffcd6940.grib to download.grib (3.1G) + 0%| | 0.00/3.06G [00:00 script.py +import cdsapi +import os +import yaml + +c = cdsapi.Client() + +years = [str(year) for year in range($year, $year_end + 1)] +#months = [str(month).zfill(2) for month in range(int($month_start), int($month_end) + 1)] +#months = [str(month) if month >= 10 else '0' + str(month) for month in range(2, 9 + 1)] + +#months = [str(month).zfill(2) for month in range(int("$month_start".strip("0")), int("$month_end".strip("0")) + 1)] +python << EOF +if not os.path.isfile("${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb"): + c.retrieve( + 'reanalysis-era5-pressure-levels-monthly-means', + { + 'format': 'grib', + 'product_type': 'monthly_averaged_reanalysis', + 'variable': '$code', + 'pressure_level': '$lev', + 'year': years, + 'months': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'], + 'time': '00:00', + }, + '${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb') +EOF +python script.py + cdo splityearmon ${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb ${var}${lev}${suffix}/${var}${lev}_ + rm ${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb + done #lev + done #var + else #freq 1h + for var in ta ; do #ta ua va; do + case $var in + "ta") code="temperature";; + "ua") code="u_component_of_wind";; + "va") code="v_component_of_wind";; + esac + for lev in 850; do + + mkdir -p ${var}${lev}${suffix} + if [[ $(( ( $year_end - $year_start + 1) * ( 10#$month_end - 10#$month_start + 1) *31*24 )) -ge 120000 ]]; then + echo "maximum fields per request is 120.000 (160 months). Reduce the start and end year/month" + exit + else +year=$year_start +python << EOF + +import cdsapi +import os +import yaml + +c = cdsapi.Client() +years = [str(year) for year in range($year, $year_end + 1)] +months = [str(month).zfill(2) for month in range(int("$month_start".strip("0")), int("$month_end".strip("0")) + 1)] + +if not os.path.isfile("${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb"): + c.retrieve( + 'reanalysis-era5-pressure-levels', + { + 'product_type': 'reanalysis', + 'format': 'grib', + 'variable': $code, + 'pressure_level': '$lev', + 'time': [ + '00:00', '01:00', '02:00', + '03:00', '04:00', '05:00', + '06:00', '07:00', '08:00', + '09:00', '10:00', '11:00', + '12:00', '13:00', '14:00', + '15:00', '16:00', '17:00', + '18:00', '19:00', '20:00', + '21:00', '22:00', '23:00', + ], + 'day': [ + '01', '02', '03', + '04', '05', '06', + '07', '08', '09', + '10', '11', '12', + '13', '14', '15', + '16', '17', '18', + '19', '20', '21', + '22', '23', '24', + '25', '26', '27', + '28', '29', '30', + '31', + ], + 'year': years, + 'month': months', + }, + '${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb') + +EOF + cdo splityearmon ${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb ${var}${lev}${suffix}/${var}${lev}_ + rm ${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb + fi #number of fields + done #lev + done #var + fi #freq diff --git a/datadownloading/dl-era5-allyears-3D.1993-2017-01-12.txt b/datadownloading/dl-era5-allyears-3D.1993-2017-01-12.txt new file mode 100644 index 0000000000000000000000000000000000000000..f44115a9d1933139f2f580a8682e1bea08a82474 --- /dev/null +++ b/datadownloading/dl-era5-allyears-3D.1993-2017-01-12.txt @@ -0,0 +1,31 @@ +Vim: Warning: Output is not to a terminal +[?2004h[?1049h[?1h=[?2004h[?12h[?12l[?25l"dl-era5-allyears-3D.bash" 149L, 4403Ccat << EOF > script.py  +import cdsapi +import os +import yaml + +c = cdsapi.Client() + +years = [str(year) for year in range($year, $year_end + 1)] +months = [str(month).zfill(2) for month in range(int("$month_start".strip("0")), int("$month_end".strrip("0")) + 1)] +if not os.path.isfile("${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${moonth_end}.grb"): + c.retrieve( + 'reanalysis-era5-pressure-levels-monthly-means', + { + 'format': 'grib', + 'product_type': 'monthly_averaged_reanalysis', + 'variable': '$code', + 'pressure_level': '$lev', + 'year': years, + 'months': months, + 'time': '00:00', + }, + '${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb') +EOF +python script.pycdo splityearmon ${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${{month_end}.grb ${var}${lev}${suffix}/${var}${lev}_rm ${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grbb done #levdone #varelse #freq 1hfor var in ta ; do #ta ua va; docase $var in"ta") code="temperature";;"ua") code="u_component_of_wind";;"va") code="v_component_of_wind";;esacfor lev in 850; domkdir -p ${var}${lev}${suffix}if [[ $(( ( $year_end - $year_start + 1) * ( 10#$month_end - 10#$month_start + 1) *31*24 ))) -ge 120000 ]]; thenecho "maximum fields per request is 120.000 (160 months). Reduce the start and end year/month"exitelse +year=$year_start +python << EOF + +import cdsapi77,1651%[?25h[?25lType :qa! and press to abandon all changes and exit Vim77,1651%[?25h[?25l^M 8,9 [?25h[?25l^M 9,2-9[?25h[?25l^M 80,3-17[?25h[?25l^M 1,2-9 [?25h[?25l81,2-951%[?25h[?25l81,2-951%[?25h[?25l81,2-951%[?25h[?25l81,2-951%[?25h[?25l::[?2004h[?25hq[?25l[?25h [?25lE173: 4 more files to edit[?2004h81,2-951%[?25h[?25l^M 2,7 [?25h[?25l^M 3,9[?25h[?25lType :qa! and press to abandon all changes and exit Vim83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l83,951%[?25h[?25l[A [?25h[?25l[A [?25h[?25l[A [?25h[?25l[A [?25h[?25l[A [?25h[?2004l[?2004l[?1l>[?1049lVim: Error reading input, exiting... +Vim: Finished. +5 files to edit diff --git a/datadownloading/dl-era5-allyears-3D.bash b/datadownloading/dl-era5-allyears-3D.bash new file mode 100755 index 0000000000000000000000000000000000000000..8e1c10102d95b02a1243a9b54965243850ff4082 --- /dev/null +++ b/datadownloading/dl-era5-allyears-3D.bash @@ -0,0 +1,153 @@ +#!/bin/bash + + + +#module load ecmwfapi +module load conda/22.11.1-2 +conda activate cdsapi +#export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib + +set -v + + +#workdir=$1 +#model=$2 +# freq=$3 +year_start=$1 +year_end=$2 +#month_start=$3 +#month_end=$4 + +freq="mon" +workdir=$SCRATCH/cerise/data/ + + if [[ $freq == "mon" ]]; then + freq_dir="monthly_mean" + suffix="_f1h" + else + freq_dir="1hourly" + suffix="" + fi + + mkdir -p $workdir/esarchive/recon/ecmwf/era5/$freq_dir + cd $workdir/esarchive/recon/ecmwf/era5/$freq_dir + + if [[ $freq == "mon" ]]; then +#tasmax and tasmin doesn't exist + for var in ta ; do #ta ua va; do + case $var in + "ta") code="temperature";; + "ua") code="u_component_of_wind";; + "va") code="v_component_of_wind";; + esac + for lev in 850 ; do + mkdir -p ${var}${lev}${suffix} +year=$year_start +while [[ $year -le $year_end ]]; do + if [[ $( ls ${var}${lev}${suffix}/${var}${lev}_${year}* | wc -l ) == $((10#$month_end-10#$month_start+1)) ]]; then + year=$((year+1)) + else + break + fi +done +#python << EOF +cat << EOF > script.py +import cdsapi +import os +import yaml + +c = cdsapi.Client() + +years = [str(year) for year in range($year, $year_end + 1)] +#months = [str(month).zfill(2) for month in range(int($month_start), int($month_end) + 1)] +#months = [str(month) if month >= 10 else '0' + str(month) for month in range(2, 9 + 1)] + +#months = [str(month).zfill(2) for month in range(int("$month_start".strip("0")), int("$month_end".strip("0")) + 1)] +python << EOF +if not os.path.isfile("${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb"): + c.retrieve( + 'reanalysis-era5-pressure-levels-monthly-means', + { + 'format': 'grib', + 'product_type': 'monthly_averaged_reanalysis', + 'variable': '$code', + 'pressure_level': '$lev', + 'year': years, + 'months': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'], + 'time': '00:00', + }, + '${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb') +EOF +python script.py + cdo splityearmon ${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb ${var}${lev}${suffix}/${var}${lev}_ + rm ${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb + done #lev + done #var + else #freq 1h + for var in ta ; do #ta ua va; do + case $var in + "ta") code="temperature";; + "ua") code="u_component_of_wind";; + "va") code="v_component_of_wind";; + esac + for lev in 850; do + + mkdir -p ${var}${lev}${suffix} + if [[ $(( ( $year_end - $year_start + 1) * ( 10#$month_end - 10#$month_start + 1) *31*24 )) -ge 120000 ]]; then + echo "maximum fields per request is 120.000 (160 months). Reduce the start and end year/month" + exit + else +year=$year_start +python << EOF + +import cdsapi +import os +import yaml + +c = cdsapi.Client() +years = [str(year) for year in range($year, $year_end + 1)] +months = [str(month).zfill(2) for month in range(int("$month_start".strip("0")), int("$month_end".strip("0")) + 1)] + +if not os.path.isfile("${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb"): + c.retrieve( + 'reanalysis-era5-pressure-levels', + { + 'product_type': 'reanalysis', + 'format': 'grib', + 'variable': $code, + 'pressure_level': '$lev', + 'time': [ + '00:00', '01:00', '02:00', + '03:00', '04:00', '05:00', + '06:00', '07:00', '08:00', + '09:00', '10:00', '11:00', + '12:00', '13:00', '14:00', + '15:00', '16:00', '17:00', + '18:00', '19:00', '20:00', + '21:00', '22:00', '23:00', + ], + 'day': [ + '01', '02', '03', + '04', '05', '06', + '07', '08', '09', + '10', '11', '12', + '13', '14', '15', + '16', '17', '18', + '19', '20', '21', + '22', '23', '24', + '25', '26', '27', + '28', '29', '30', + '31', + ], + 'year': years, + 'month': months', + }, + '${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb') + +EOF + cdo splityearmon ${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb ${var}${lev}${suffix}/${var}${lev}_ + rm ${var}${lev}${suffix}/${var}${lev}_${year_start}${month_start}-${year_end}${month_end}.grb + fi #number of fields + done #lev + done #var + fi #freq diff --git a/datadownloading/dl-era5-allyears-manual.bash b/datadownloading/dl-era5-allyears-manual.bash new file mode 100755 index 0000000000000000000000000000000000000000..7f1ff32741b2243f485029cc53caae4d266df640 --- /dev/null +++ b/datadownloading/dl-era5-allyears-manual.bash @@ -0,0 +1,160 @@ +#!/bin/bash + + + +#module load ecmwfapi +module load conda/22.11.1-2 +conda activate cdsapi +#export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib + +set -v + + +#workdir=$1 +#model=$2 +# freq=$3 +year_start=$1 +year_end=$2 +#month_start=$3 +#month_end=$4 + +freq="mon" +workdir=$SCRATCH/cerise/data/ + + if [[ $freq == "mon" ]]; then + freq_dir="monthly_mean" + suffix="_f1h" + else + freq_dir="1hourly" + suffix="" + fi + + mkdir -p $workdir/esarchive/recon/ecmwf/era5/$freq_dir + cd $workdir/esarchive/recon/ecmwf/era5/$freq_dir + + if [[ $freq == "mon" ]]; then +#tasmax and tasmin doesn't exist + for var in sfcWind tos tdps prlr psl tas; do #sfcWind tas tos psl prlr tdps; do + case $var in + "sfcWind") code="10m_wind_speed";; + "tas") code="2m_temperature";; + "tos") code="sea_surface_temperature";; + "psl") code="mean_sea_level_pressure";; + "prlr") code="total_precipitation";; + "tdps") code="2m_dewpoint_temperature";; + esac + + mkdir -p ${var}${suffix} +year=$year_start +while [[ $year -le $year_end ]]; do + if [[ $( ls ${var}${suffix}/${var}_${year}* | wc -l ) == $((10#$month_end-10#$month_start+1)) ]]; then + year=$((year+1)) + else + break + fi +done +#cat << EOF > script.py +python << EOF +import cdsapi +import os +import yaml + +#c = cdsapi.Client() + +with open('/home/cyce/.cdsapirc', 'r') as f: + credentials = yaml.safe_load(f) + +c = cdsapi.Client(url=credentials['url'], key=credentials['key']) + +years = [str(year) for year in range($year, $year_end + 1)] +#months = [str(month).zfill(2) for month in range(int("$month_start".strip("0")), int("$month_end".strip("0")) + 1)] +#months = [str(month).zfill(2) for month in range(int($month_start), int($month_end) + 1)] + +if not os.path.isfile("${var}${suffix}/${var}_${year}${month_start}-${year_end}${month_end}.grb"): + c.retrieve( + 'reanalysis-era5-single-levels-monthly-means', + { + 'format': 'grib', + 'product_type': 'monthly_averaged_reanalysis', + 'variable': '$code', + 'year': years, + 'months': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'], + 'time': '00:00', + }, + '${var}${suffix}/${var}_${year}${month_start}-${year_end}${month_end}.grb') +EOF + cdo splityearmon ${var}${suffix}/${var}_${year_start}${month_start}-${year_end}${month_end}.grb ${var}${suffix}/${var}_ + rm ${var}${suffix}/${var}_${year_start}${month_start}-${year_end}${month_end}.grb + done #var + else #freq 1h + for var in tasmin tasmax; do + case $var in + "sfcWind") code="10m_wind_speed";; + "tas") code="2m_temperature";; + "uas") code="10m_u_component_of_wind";; + "vas") code="10m_v_component_of_wind";; + "tos") code="sea_surface_temperature";; + "psl") code="mean_sea_level_pressure";; + "prlr") code="total_precipitation";; + "tasmax") code="'maximum_2m_temperature_since_previous_post_processing'";; + "tasmin") code="'minimum_2m_temperature_since_previous_post_processing'";; + "tdps") code="2m_dewpoint_temperature";; + esac + + mkdir -p ${var}${suffix} + if [[ $(( ( $year_end - $year_start + 1) * ( 10#$month_end - 10#$month_start + 1) *31*24 )) -ge 120000 ]]; then + echo "maximum fields per request is 120.000 (160 months). Reduce the start and end year/month" + exit + else +year=$year_start +python << EOF + +import cdsapi +import os +import yaml + +c = cdsapi.Client() +years = [str(year) for year in range($year, $year_end + 1)] +months = [str(month).zfill(2) for month in range(int("$month_start".strip("0")), int("$month_end".strip("0")) + 1)] + +if not os.path.isfile("${var}${suffix}/${var}_${year_start}${month_start}-${year_end}${month_end}.grb"): + c.retrieve( + 'reanalysis-era5-single-levels', + { + 'product_type': 'reanalysis', + 'format': 'grib', + 'variable': $code, + 'time': [ + '00:00', '01:00', '02:00', + '03:00', '04:00', '05:00', + '06:00', '07:00', '08:00', + '09:00', '10:00', '11:00', + '12:00', '13:00', '14:00', + '15:00', '16:00', '17:00', + '18:00', '19:00', '20:00', + '21:00', '22:00', '23:00', + ], + 'day': [ + '01', '02', '03', + '04', '05', '06', + '07', '08', '09', + '10', '11', '12', + '13', '14', '15', + '16', '17', '18', + '19', '20', '21', + '22', '23', '24', + '25', '26', '27', + '28', '29', '30', + '31', + ], + 'year': years, + 'month': months', + }, + '${var}${suffix}/${var}_${year_start}${month_start}-${year_end}${month_end}.grb') + +EOF + cdo splityearmon ${var}${suffix}/${var}_${year_start}${month_start}-${year_end}${month_end}.grb ${var}${suffix}/${var}_ + rm ${var}${suffix}/${var}_${year_start}${month_start}-${year_end}${month_end}.grb + fi #number of fields + done #var + fi #freq diff --git a/datadownloading/dl-era5-allyears.bash b/datadownloading/dl-era5-allyears.bash new file mode 100755 index 0000000000000000000000000000000000000000..5db9bd249461276057aec5d7c53e76bfb80825f9 --- /dev/null +++ b/datadownloading/dl-era5-allyears.bash @@ -0,0 +1,160 @@ +#!/bin/bash + + + +#module load ecmwfapi +module load conda/22.11.1-2 +conda activate cdsapi +#export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib + +set -v + + +#workdir=$1 +#model=$2 +# freq=$3 +year_start=$1 +year_end=$2 +#month_start=$3 +#month_end=$4 + +freq="mon" +workdir=$SCRATCH/cerise/data/tmp + + if [[ $freq == "mon" ]]; then + freq_dir="monthly_mean" + suffix="_f1h" + else + freq_dir="1hourly" + suffix="" + fi + + mkdir -p $workdir/esarchive/recon/ecmwf/era5/$freq_dir + cd $workdir/esarchive/recon/ecmwf/era5/$freq_dir + + if [[ $freq == "mon" ]]; then +#tasmax and tasmin doesn't exist + for var in sfcWind tos tdps prlr psl tas; do #sfcWind tas tos psl prlr tdps; do + case $var in + "sfcWind") code="10m_wind_speed";; + "tas") code="2m_temperature";; + "tos") code="sea_surface_temperature";; + "psl") code="mean_sea_level_pressure";; + "prlr") code="total_precipitation";; + "tdps") code="2m_dewpoint_temperature";; + esac + + mkdir -p ${var}${suffix} +year=$year_start +while [[ $year -le $year_end ]]; do + if [[ $( ls ${var}${suffix}/${var}_${year}* | wc -l ) == $((10#$month_end-10#$month_start+1)) ]]; then + year=$((year+1)) + else + break + fi +done +#cat << EOF > script.py +python << EOF +import cdsapi +import os +import yaml + +#c = cdsapi.Client() + +with open('/home/cyce/.cdsapirc', 'r') as f: + credentials = yaml.safe_load(f) + +c = cdsapi.Client(url=credentials['url'], key=credentials['key']) + +years = [str(year) for year in range($year, $year_end + 1)] +#months = [str(month).zfill(2) for month in range(int("$month_start".strip("0")), int("$month_end".strip("0")) + 1)] +#months = [str(month).zfill(2) for month in range(int($month_start), int($month_end) + 1)] + +if not os.path.isfile("${var}${suffix}/${var}_${year}${month_start}-${year_end}${month_end}.grb"): + c.retrieve( + 'reanalysis-era5-single-levels-monthly-means', + { + 'format': 'grib', + 'product_type': 'monthly_averaged_reanalysis', + 'variable': '$code', + 'year': years, + 'months': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'], + 'time': '00:00', + }, + '${var}${suffix}/${var}_${year}${month_start}-${year_end}${month_end}.grb') +EOF + cdo splityearmon ${var}${suffix}/${var}_${year_start}${month_start}-${year_end}${month_end}.grb ${var}${suffix}/${var}_ + rm ${var}${suffix}/${var}_${year_start}${month_start}-${year_end}${month_end}.grb + done #var + else #freq 1h + for var in tasmin tasmax; do + case $var in + "sfcWind") code="10m_wind_speed";; + "tas") code="2m_temperature";; + "uas") code="10m_u_component_of_wind";; + "vas") code="10m_v_component_of_wind";; + "tos") code="sea_surface_temperature";; + "psl") code="mean_sea_level_pressure";; + "prlr") code="total_precipitation";; + "tasmax") code="'maximum_2m_temperature_since_previous_post_processing'";; + "tasmin") code="'minimum_2m_temperature_since_previous_post_processing'";; + "tdps") code="2m_dewpoint_temperature";; + esac + + mkdir -p ${var}${suffix} + if [[ $(( ( $year_end - $year_start + 1) * ( 10#$month_end - 10#$month_start + 1) *31*24 )) -ge 120000 ]]; then + echo "maximum fields per request is 120.000 (160 months). Reduce the start and end year/month" + exit + else +year=$year_start +python << EOF + +import cdsapi +import os +import yaml + +c = cdsapi.Client() +years = [str(year) for year in range($year, $year_end + 1)] +months = [str(month).zfill(2) for month in range(int("$month_start".strip("0")), int("$month_end".strip("0")) + 1)] + +if not os.path.isfile("${var}${suffix}/${var}_${year_start}${month_start}-${year_end}${month_end}.grb"): + c.retrieve( + 'reanalysis-era5-single-levels', + { + 'product_type': 'reanalysis', + 'format': 'grib', + 'variable': $code, + 'time': [ + '00:00', '01:00', '02:00', + '03:00', '04:00', '05:00', + '06:00', '07:00', '08:00', + '09:00', '10:00', '11:00', + '12:00', '13:00', '14:00', + '15:00', '16:00', '17:00', + '18:00', '19:00', '20:00', + '21:00', '22:00', '23:00', + ], + 'day': [ + '01', '02', '03', + '04', '05', '06', + '07', '08', '09', + '10', '11', '12', + '13', '14', '15', + '16', '17', '18', + '19', '20', '21', + '22', '23', '24', + '25', '26', '27', + '28', '29', '30', + '31', + ], + 'year': years, + 'month': months', + }, + '${var}${suffix}/${var}_${year_start}${month_start}-${year_end}${month_end}.grb') + +EOF + cdo splityearmon ${var}${suffix}/${var}_${year_start}${month_start}-${year_end}${month_end}.grb ${var}${suffix}/${var}_ + rm ${var}${suffix}/${var}_${year_start}${month_start}-${year_end}${month_end}.grb + fi #number of fields + done #var + fi #freq diff --git a/datadownloading/dl-era5.bash b/datadownloading/dl-era5.bash new file mode 100755 index 0000000000000000000000000000000000000000..66181c10558018c7cfb29a04e15635ff4ccf6808 --- /dev/null +++ b/datadownloading/dl-era5.bash @@ -0,0 +1,130 @@ +#!/bin/bash + + + +#module load ecmwfapi +module load conda/22.11.1-2 +conda activate cdsapi +#export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib + +set -v + + +#workdir=$1 +#model=$2 +# freq=$3 +freq="1hourly" +workdir=$SCRATCH/cerise/data/ + + if [[ $freq == "mon" ]]; then + freq_dir="monthly_mean" + suffix="_f1h" + else + freq_dir="1hourly" + suffix="" + fi + + mkdir -p $workdir/esarchive/recon/ecmwf/era5/$freq_dir + cd $workdir/esarchive/recon/ecmwf/era5/$freq_dir + + for year in $( seq 1993 2017); do + for mon in $( seq -w 1 12); do + + if [[ $freq == "mon" ]]; then +#tasmax and tasmin doesn't exist + for var in sfcWind tos tdps prlr psl tas; do #sfcWind tas tos psl prlr tdps; do + case $var in + "sfcWind") code="10m_wind_speed";; + "tas") code="2m_temperature";; + "tos") code="sea_surface_temperature";; + "psl") code="mean_sea_level_pressure";; + "prlr") code="total_precipitation";; + "tdps") code="2m_dewpoint_temperature";; + esac + + mkdir -p ${var}${suffix} + + if [[ ! -f ${var}${suffix}/${var}_${year}${mon}.grb ]] ; then +python << EOF +import cdsapi +import yaml +c = cdsapi.Client() + +c.retrieve( + 'reanalysis-era5-single-levels-monthly-means', + { + 'format': 'grib', + 'product_type': 'monthly_averaged_reanalysis', + 'variable': '$code', + 'year': '$year', + 'month': '$mon', + 'time': '00:00', + }, + '${var}${suffix}/${var}_${year}${mon}.grb') +EOF + fi #file exists + done #var + else #freq 1h + for var in tasmin tasmax; do + case $var in + "sfcWind") code="10m_wind_speed";; + "tas") code="2m_temperature";; + "uas") code="10m_u_component_of_wind";; + "vas") code="10m_v_component_of_wind";; + "tos") code="sea_surface_temperature";; + "psl") code="mean_sea_level_pressure";; + "prlr") code="total_precipitation";; + "tasmax") code="'maximum_2m_temperature_since_previous_post_processing'";; + "tasmin") code="'minimum_2m_temperature_since_previous_post_processing'";; + "tdps") code="2m_dewpoint_temperature";; + esac + + mkdir -p ${var}${suffix} + if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then +python << EOF + +import cdsapi +import yaml + +c = cdsapi.Client() + +c.retrieve( + 'reanalysis-era5-single-levels', + { + 'product_type': 'reanalysis', + 'format': 'grib', + 'variable': $code, + 'time': [ + '00:00', '01:00', '02:00', + '03:00', '04:00', '05:00', + '06:00', '07:00', '08:00', + '09:00', '10:00', '11:00', + '12:00', '13:00', '14:00', + '15:00', '16:00', '17:00', + '18:00', '19:00', '20:00', + '21:00', '22:00', '23:00', + ], + 'day': [ + '01', '02', '03', + '04', '05', '06', + '07', '08', '09', + '10', '11', '12', + '13', '14', '15', + '16', '17', '18', + '19', '20', '21', + '22', '23', '24', + '25', '26', '27', + '28', '29', '30', + '31', + ], + 'year': '$year', + 'month': '$mon', + }, + '${var}${suffix}/${var}_${year}${mon}.grb') + +EOF + fi #file exists + done #var + fi #freq + done #year + done #mon diff --git a/datadownloading/dl-gpcp_v3.2.sh b/datadownloading/dl-gpcp_v3.2.sh new file mode 100644 index 0000000000000000000000000000000000000000..26a907cec87be364347ae248ea31d71bb2aa15a4 --- /dev/null +++ b/datadownloading/dl-gpcp_v3.2.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +module load conda/22.11.1-2 +conda activate condaCerise +export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib + +######################################### +# This script downloads M2TMNXFLX data +######################################### +# Input parameters +year=$1 + +# Paths +path_out='/ec/res4/hpcperm/data/esarchive/obs/nasa/gpcp_v3.2/monthly' +mkdir -p ${path_out} +cd ${path_out} +for year in $(seq 1993 2023); do + for mon in $(seq -w 1 12);do + url="https://measures.gesdisc.eosdis.nasa.gov/data/GPCP/GPCPMON.3.2/${year}" + file="GPCPMON_L3_${year}${mon}_V3.2.nc4" + wget --load-cookies ~/.urs_cookies --save-cookies ~/.urs_cookies --keep-session-cookies --no-check-certificate --secure-protocol=TLSv1_2 -nc "${url}/${file}" + rm wget-log* + done +done + diff --git a/datadownloading/dl-manual.py b/datadownloading/dl-manual.py new file mode 100644 index 0000000000000000000000000000000000000000..19ec288843c74727dbb04a8fcbf43e4f8cba393a --- /dev/null +++ b/datadownloading/dl-manual.py @@ -0,0 +1,22 @@ +import cdsapi + +c = cdsapi.Client() + +c.retrieve( + 'reanalysis-era5-single-levels-monthly-means', + { + 'product_type': 'monthly_averaged_reanalysis', + 'variable': [ + '10m_wind_speed', '2m_dewpoint_temperature', '2m_temperature', + 'mean_sea_level_pressure', 'sea_surface_temperature', 'total_precipitation', + ], + 'year': [ + '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', + ], + 'month': [ + '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', + ], + 'time': '00:00', + 'format': 'grib', + }, + 'download.grib') diff --git a/datadownloading/dl-mask_seasonal.bash b/datadownloading/dl-mask_seasonal.bash new file mode 100755 index 0000000000000000000000000000000000000000..d002790d9e080514b86e7d442cdf4339cba7627f --- /dev/null +++ b/datadownloading/dl-mask_seasonal.bash @@ -0,0 +1,89 @@ +#!/bin/bash + + + +#module load ecmwfapi +module load conda/22.11.1-2 +conda activate condaCerise +export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib + +set -v + + +#workdir=$1 +#model=$2 +# freq=$3 +freq="6hourly" +workdir=$SCRATCH/cerise/data/ + + +for model in ukmo; do #ecmwf cmcc meteo_france dwd ukmo; do +#model="ecmwf" #cmcc, meteo_france, dwd, cmcc, ukmo + + case $model in + + "ecmwf") origin="ecmf" ; system="51";; + "meteo_france") origin="lfpw" ; system="8";; + "dwd") origin="edzw" ; system="21";; + "cmcc") origin="cmcc" ; system="35" ;; + "ukmo") origin="egrr" ; system="602" ;; + esac + + + if [[ $freq == "mon" ]]; then + freq_dir="monthly_mean" + suffix="_f6h" + else + freq_dir="6hourly" + suffix="" + fi + + mkdir -p $workdir/esarchive/exp/$model/system$system/$freq_dir + cd $workdir/esarchive/exp/$model/system$system/$freq_dir + + for year in $( seq 1993 1994); do + for mon in $( seq -w 1 10); do + + if [[ $freq == "mon" ]]; then + + for var in land-sea; do + case $var in + "land-sea") code="172";; + esac + + mkdir -p ${var}${suffix} + + done #var + else #freq 6h + for var in land-sea; do + case $var in + "land-sea") code="172.128";; + esac + + mkdir -p ${var}${suffix} + if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then +mars << EOF +retrieve, +class=c3, +date=$year-$mon-09, +expver=1, +step=0, +levtype=sfc, +method=1, +number=301/302/303/304/305/306/307, +origin=$origin, +param=$code, +stream=mmsf, +system=$system, +time=00:00:00, +type=fc, +expect=any, +target="${var}${suffix}/${var}_${year}${mon}01.grb" +EOF + + fi #file exists + done #var + fi #freq + done #year + done #mon +done #model diff --git a/datadownloading/dl-seasonal-expECMWF.bash b/datadownloading/dl-seasonal-expECMWF.bash new file mode 100755 index 0000000000000000000000000000000000000000..38e911c7c35725ba72c3be059381b2f1d6b3cbcc --- /dev/null +++ b/datadownloading/dl-seasonal-expECMWF.bash @@ -0,0 +1,133 @@ +#!/bin/bash + + + +#module load ecmwfapi +module load conda/22.11.1-2 +conda activate condaCerise +export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib + +set -v + + +#workdir=$1 +#model=$2 +# freq=$3 +freq="mon" +workdir=$HPCPERM/data/ + + +for model in ecmwf; do #ecmwf cmcc meteo_france dwd ukmo; do +#model="ecmwf" #cmcc, meteo_france, dwd, cmcc, ukmo + + case $model in + + "ecmwf") origin="ecmf" ; system="i2o2";; + "meteo_france") origin="lfpw" ; system="8";; + "dwd") origin="edzw" ; system="21";; + "cmcc") origin="cmcc" ; system="35" ;; + "ukmo") origin="egrr" ; system="602" ;; + esac + + + if [[ $freq == "mon" ]]; then + freq_dir="monthly_mean" + suffix="_f6h" + else + freq_dir="6hourly" + suffix="" + fi + + mkdir -p $workdir/esarchive/exp/$model/system$system/$freq_dir + cd $workdir/esarchive/exp/$model/system$system/$freq_dir + + for year in $( seq 1993 2022); do + for mon in 05 11; do + + if [[ $freq == "mon" ]]; then + + for var in ta850; do #sfcWind tas tos psl prlr tasmin tasmax tdps; do + case $var in + "ta850") code="130.128";; + "sfcWind") code="207.128";; + "tas") code="167.128";; + "tos") code="34.128";; + "psl") code="151.128";; + "prlr") code="228.172";; + "tasmin") code="52.128";; + "tasmax") code="51.128";; + "tdps") code="168.128";; + esac + + mkdir -p ${var}${suffix} + + if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then +mars << EOF +retrieve, +class=rd, +date=$year-$mon-01, +expver=$system, +fcmonth=1/2/3/4, +levtype=pl, +levelist=850, +method=1, +number=0/to/50/by/1, +origin=$origin, +param=$code, +stream=msmm, +time=00:00:00, +grid=1/1, +area=89.5/0.5/-89.5/359.5, +interpolation="--interpolation=grid-box-average", +type=fcmean, +expect=any, +target='${var}${suffix}/${var}_${year}${mon}01.grb' + +EOF + cdo shifttime,-5days ${var}${suffix}/${var}_${year}${mon}01.grb ${var}${suffix}/${var}_${year}${mon}01.grb2 + mv ${var}${suffix}/${var}_${year}${mon}01.grb2 ${var}${suffix}/${var}_${year}${mon}01.grb + fi #file exists + done #var + else #freq 6h + for var in sfcWind tas tos psl prlr tasmin tasmax tdps; do + case $var in + "sfcWind") code="207.128";; + "tas") code="167.128";; + "tos") code="34.128";; + "psl") code="151.128";; + "prlr") code="228.172";; + "tasmin") code="52.128";; + "tasmax") code="51.128";; + "tdps") code="168.128";; + esac + + mkdir -p ${var}${suffix} + if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then +mars << EOF +retrieve, +class=c3, +date=$year-$mon-01 +expver=1, +step=1/to/5160/by/6, +levtype=sfc, +method=1, +number=0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35/36/37/38/39/40/41/42/43/44/45/46/47/48/49/50, +origin=$origin, +param=$code, +stream=mmsf, +system=$system, +time=00:00:00, +type=fc, +grid=1/1, +area=89.5/0.5/-89.5/359.5, +interpolation="--interpolation=grid-box-average", +expect=any, +target="${var}${suffix}/${var}_${year}${mon}01.grb" +EOF + + fi #file exists + done #var + fi #freq + done #year + done #mon +done #model diff --git a/datadownloading/dl-seasonal-expECMWF_mask.bash b/datadownloading/dl-seasonal-expECMWF_mask.bash new file mode 100755 index 0000000000000000000000000000000000000000..53261bb974a016ebc46f3a4497ba6a1b66e55a76 --- /dev/null +++ b/datadownloading/dl-seasonal-expECMWF_mask.bash @@ -0,0 +1,134 @@ +#!/bin/bash + + + +#module load ecmwfapi +module load conda/22.11.1-2 +conda activate condaCerise +export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib + +set -v + + +#workdir=$1 +#model=$2 +# freq=$3 +freq="mon" +workdir=$HPCPERM/data/ + + +for model in ecmwf; do #ecmwf cmcc meteo_france dwd ukmo; do +#model="ecmwf" #cmcc, meteo_france, dwd, cmcc, ukmo + + case $model in + + "ecmwf") origin="ecmf" ; system="i2o2";; + "meteo_france") origin="lfpw" ; system="8";; + "dwd") origin="edzw" ; system="21";; + "cmcc") origin="cmcc" ; system="35" ;; + "ukmo") origin="egrr" ; system="602" ;; + esac + + + if [[ $freq == "mon" ]]; then + freq_dir="monthly_mean" + suffix="_f6h" + else + freq_dir="6hourly" + suffix="" + fi + + mkdir -p $workdir/esarchive/exp/$model/system$system/$freq_dir + cd $workdir/esarchive/exp/$model/system$system/$freq_dir + + for year in $( seq 2021 2022); do + for mon in 11; do + + if [[ $freq == "mon" ]]; then + + for var in lsm; do #sfcWind tas tos psl prlr tasmin tasmax tdps; do + case $var in + "lsm") code="172.128";; + "ta850") code="130.128";; + "sfcWind") code="207.128";; + "tas") code="167.128";; + "tos") code="34.128";; + "psl") code="151.128";; + "prlr") code="228.172";; + "tasmin") code="52.128";; + "tasmax") code="51.128";; + "tdps") code="168.128";; + esac + + mkdir -p ${var}${suffix} + + if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then +mars << EOF +retrieve, +class=rd, +date=$year-$mon-01, +expver=$system, +levtype=surface, +level=off, +method=1, +number=0/to/50/by/1, +origin=$origin, +param=$code, +step=0, +stream=mmsf, +time=00:00:00, +grid=1/1, +area=89.5/0.5/-89.5/359.5, +interpolation="--interpolation=grid-box-average", +type=fc, +expect=any, +target='${var}${suffix}/${var}_${year}${mon}01.grb' + +EOF + cdo shifttime,-5days ${var}${suffix}/${var}_${year}${mon}01.grb ${var}${suffix}/${var}_${year}${mon}01.grb2 + mv ${var}${suffix}/${var}_${year}${mon}01.grb2 ${var}${suffix}/${var}_${year}${mon}01.grb + fi #file exists + done #var + else #freq 6h + for var in sfcWind tas tos psl prlr tasmin tasmax tdps; do + case $var in + "sfcWind") code="207.128";; + "tas") code="167.128";; + "tos") code="34.128";; + "psl") code="151.128";; + "prlr") code="228.172";; + "tasmin") code="52.128";; + "tasmax") code="51.128";; + "tdps") code="168.128";; + esac + + mkdir -p ${var}${suffix} + if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then +mars << EOF +retrieve, +class=c3, +date=$year-$mon-01 +expver=1, +step=1/to/5160/by/6, +levtype=sfc, +method=1, +number=0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35/36/37/38/39/40/41/42/43/44/45/46/47/48/49/50, +origin=$origin, +param=$code, +stream=mmsf, +system=$system, +time=00:00:00, +type=fc, +grid=1/1, +area=89.5/0.5/-89.5/359.5, +interpolation="--interpolation=grid-box-average", +expect=any, +target="${var}${suffix}/${var}_${year}${mon}01.grb" +EOF + + fi #file exists + done #var + fi #freq + done #year + done #mon +done #model diff --git a/datadownloading/dl-seasonal.bash b/datadownloading/dl-seasonal.bash new file mode 100755 index 0000000000000000000000000000000000000000..cce0af4460e1f1ab324812903afde85b05410716 --- /dev/null +++ b/datadownloading/dl-seasonal.bash @@ -0,0 +1,126 @@ +#!/bin/bash + + + +#module load ecmwfapi +module load conda/22.11.1-2 +conda activate condaCerise +export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib + +set -v + + +#workdir=$1 +#model=$2 +# freq=$3 +freq="mon" +workdir=$HPCPERM/data/ + + +for model in cmcc meteo_france; do # cmcc meteo_france; do #ecmwf cmcc meteo_france dwd ukmo; do +#model="ecmwf" #cmcc, meteo_france, dwd, cmcc, ukmo + + case $model in + + "ecmwf") origin="ecmf" ; system="51";; + "meteo_france") origin="lfpw" ; system="8";; + "dwd") origin="edzw" ; system="21";; + "cmcc") origin="cmcc" ; system="35" ;; + "ukmo") origin="egrr" ; system="602" ;; + esac + + + if [[ $freq == "mon" ]]; then + freq_dir="monthly_mean" + suffix="_f6h" + else + freq_dir="6hourly" + suffix="" + fi + + mkdir -p $workdir/esarchive/exp/$model/system$system/$freq_dir + cd $workdir/esarchive/exp/$model/system$system/$freq_dir + + for year in $( seq 1993 2016); do + for mon in $( seq -w 1 12); do + + if [[ $freq == "mon" ]]; then + + for var in tasmax tasmin; do #sfcWind tas tos psl prlr tasmin tasmax tdps; do + case $var in + "sfcWind") code="207.128";; + "tas") code="167.128";; + "tos") code="34.128";; + "psl") code="151.128";; + "prlr") code="228.172";; + "tasmin") code="52.128";; + "tasmax") code="51.128";; + "tdps") code="168.128";; + esac + + mkdir -p ${var}${suffix} + + if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then +mars << EOF +retrieve, +class=c3, +date=$year-$mon-01, +expver=1, +fcmonth=1/2/3/4/5/6, +levtype=sfc, +method=1, +number=0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35/36/37/38/39/40/41/42/43/44/45/46/47/48/49/50, +origin=$origin, +param=$code, +stream=msmm, +system=$system, +time=00:00:00, +type=fcmean, +expect=any, +target='${var}${suffix}/${var}_${year}${mon}01.grb' + +EOF + cdo shifttime,-5days ${var}${suffix}/${var}_${year}${mon}01.grb ${var}${suffix}/${var}_${year}${mon}01.grb2 + mv ${var}${suffix}/${var}_${year}${mon}01.grb2 ${var}${suffix}/${var}_${year}${mon}01.grb + fi #file exists + done #var + else #freq 6h + for var in tasmax; do #sfcWind tas tos psl prlr tasmin tasmax tdps; do + case $var in + "sfcWind") code="207.128";; + "tas") code="167.128";; + "tos") code="34.128";; + "psl") code="151.128";; + "prlr") code="228.172";; + "tasmin") code="52.128";; + "tasmax") code="51.128";; + "tdps") code="168.128";; + esac + + mkdir -p ${var}${suffix} + if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then +mars << EOF +retrieve, +class=c3, +date=$year-$mon-01 +expver=1, +step=1/to/5160/by/6, +levtype=sfc, +method=1, +number=0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35/36/37/38/39/40/41/42/43/44/45/46/47/48/49/50, +origin=$origin, +param=$code, +stream=mmsf, +system=$system, +time=00:00:00, +type=fc, +expect=any, +target="${var}${suffix}/${var}_${year}${mon}01.grb" +EOF + + fi #file exists + done #var + fi #freq + done #year + done #mon +done #model diff --git a/datadownloading/dl-seasonal_pl.bash b/datadownloading/dl-seasonal_pl.bash new file mode 100755 index 0000000000000000000000000000000000000000..fb9049e914d7d30ad8fafca8afc6240262742d35 --- /dev/null +++ b/datadownloading/dl-seasonal_pl.bash @@ -0,0 +1,128 @@ +#!/bin/bash + + + +#module load ecmwfapi +module load conda/22.11.1-2 +conda activate condaCerise +export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib + +set -v + + +#workdir=$1 +#model=$2 +# freq=$3 +freq="mon" +workdir=$SCRATCH/cerise/data/ + + +for model in ukmo; do # cmcc meteo_france; do #ecmwf cmcc meteo_france dwd ukmo; do +#model="ecmwf" #cmcc, meteo_france, dwd, cmcc, ukmo + + case $model in + + "ecmwf") origin="ecmf" ; system="51";; + "meteo_france") origin="lfpw" ; system="8";; + "dwd") origin="edzw" ; system="21";; + "cmcc") origin="cmcc" ; system="35" ;; + "ukmo") origin="egrr" ; system="602" ;; + esac + + + if [[ $freq == "mon" ]]; then + freq_dir="monthly_mean" + suffix="_f6h" + else + freq_dir="6hourly" + suffix="" + fi + + mkdir -p $workdir/esarchive/exp/$model/system$system/$freq_dir + cd $workdir/esarchive/exp/$model/system$system/$freq_dir + + for year in $( seq 1993 1994); do + for mon in $( seq -w 2 12); do + + if [[ $freq == "mon" ]]; then + + for var in ta850; do #tasmax tas tasmin tos sfcWind psl tdps prlr; do #sfcWind tas tos psl prlr tasmin tasmax tdps; do + case $var in + "ta850") code="130.128";; + "sfcWind") code="207.128";; + "tas") code="167.128";; + "tos") code="34.128";; + "psl") code="151.128";; + "prlr") code="228.172";; + "tasmin") code="52.128";; + "tasmax") code="51.128";; + "tdps") code="168.128";; + esac + + mkdir -p ${var}${suffix} + + if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then +mars << EOF +retrieve, +class=c3, +date=$year-$mon-01, +expver=1, +fcmonth=1/2/3/4/5/6, +levtype=pl, +levelist=850, +method=1, +number=0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35/36/37/38/39/40/41/42/43/44/45/46/47/48/49/50, +origin=$origin, +param=$code, +stream=msmm, +system=$system, +time=00:00:00, +type=fcmean, +expect=any, +target='${var}${suffix}/${var}_${year}${mon}01.grb' + +EOF + cdo shifttime,-5days ${var}${suffix}/${var}_${year}${mon}01.grb ${var}${suffix}/${var}_${year}${mon}01.grb2 + mv ${var}${suffix}/${var}_${year}${mon}01.grb2 ${var}${suffix}/${var}_${year}${mon}01.grb + fi #file exists + done #var + else #freq 6h + for var in sfcWind tas tos psl prlr tasmin tasmax tdps; do + case $var in + "sfcWind") code="207.128";; + "tas") code="167.128";; + "tos") code="34.128";; + "psl") code="151.128";; + "prlr") code="228.172";; + "tasmin") code="52.128";; + "tasmax") code="51.128";; + "tdps") code="168.128";; + esac + + mkdir -p ${var}${suffix} + if [[ ! -f ${var}${suffix}/${var}_${year}${mon}01.grb ]] ; then +mars << EOF +retrieve, +class=c3, +date=$year-$mon-01 +expver=1, +step=1/to/5160/by/6, +levtype=sfc, +method=1, +number=0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35/36/37/38/39/40/41/42/43/44/45/46/47/48/49/50, +origin=$origin, +param=$code, +stream=mmsf, +system=$system, +time=00:00:00, +type=fc, +expect=any, +target="${var}${suffix}/${var}_${year}${mon}01.grb" +EOF + + fi #file exists + done #var + fi #freq + done #year + done #mon +done #model diff --git a/datadownloading/monthly_maxmin.bash b/datadownloading/monthly_maxmin.bash new file mode 100644 index 0000000000000000000000000000000000000000..c2ff5e7e2e4feb76c01879d16fc9ce91cc91d277 --- /dev/null +++ b/datadownloading/monthly_maxmin.bash @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +module load conda/22.11.1-2 +conda activate condaCerise +export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib + +dir=/ec/res4/scratch/cyce/cerise/data/esarchive/recon/ecmwf/era5/1hourly/tasmax/ +cd $dir +outdir=/ec/res4/scratch/cyce/cerise/data/esarchive/recon/ecmwf/era5/monthly_mean/tasmax_f24h/ +mkdir -p $outdir +for f in *; do + echo "$f" + if [[ ! -f $outdir/$f ]]; then + cdo daymax "$f" $TMPDIR/$f + cdo monmean $TMPDIR/$f $outdir/$f + fi +done + diff --git a/datadownloading/rename.bash b/datadownloading/rename.bash new file mode 100644 index 0000000000000000000000000000000000000000..7e8a63727cf84afa39a55256224ad21ff32536e7 --- /dev/null +++ b/datadownloading/rename.bash @@ -0,0 +1,3 @@ +for f in *.grib; do + mv "$f" "${f%.grib}.grb" +done diff --git a/datadownloading/shiftime.bash b/datadownloading/shiftime.bash new file mode 100644 index 0000000000000000000000000000000000000000..46effb96669921d70b4b156d818c7e058198dc84 --- /dev/null +++ b/datadownloading/shiftime.bash @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +for f in *; do + echo "$f" + cdo shifttime,+1days "$f" "$f"2 + mv "$f"2 "$f" +done + diff --git a/full_NAO_scorecards.R b/full_NAO_scorecards.R new file mode 100644 index 0000000000000000000000000000000000000000..0c83bdb65428e6e1aa77773db50824589edb6c65 --- /dev/null +++ b/full_NAO_scorecards.R @@ -0,0 +1,248 @@ + +source("modules/Loading/Loading.R") +#source("modules/Units/Units.R") +source("modules/Saving/Saving.R") +source("modules/Visualization/Visualization.R") +source("modules/Units/Units.R") + +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "recipe_NAO_scorecards.yml" +recipe <- read_atomic_recipe(recipe_file) +#recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +# Full-cross-val workflow +sdate_dim <- dim(data$hcst$data)['syear'] +nftime <- dim(data$hcst$data)['time'] +cross <- CSTools:::.make.eval.train.dexes('leave-one-out', sdate_dim, NULL) +# Paralelized: +loops <- array(1:length(cross), c(loop = length(cross))) + + source("modules/Indices/R/tmp/NAO.R") + source("modules/Indices/R/tmp/Utils.R") + source("modules/Indices/R/tmp/EOF.R") + source("modules/Indices/R/tmp/ProjectField.R") + + # subset NAO region (because grib is global data): + sub <- SelBox(data$hcst$data, + lon = as.vector(data$hcst$coords$longitude), + lat = as.vector(data$hcst$coords$latitude), + region = c(lonmin = -80, lonmax = 40, + latmin = 20, latmax = 80), + latdim = 'latitude', londim = 'longitude') + data$hcst$data <- sub$data + data$hcst$coords$longitude <- sub$lon + data$hcst$coords$latitude <- sub$lat + sub <- SelBox(data$obs$data, + lon = as.vector(data$obs$coords$longitude), + lat = as.vector(data$obs$coords$latitude), + region = c(lonmin = -80, lonmax = 40, + latmin = 20, latmax = 80), + latdim = 'latitude', londim = 'longitude') + data$obs$data <- sub$data + data$obs$coords$longitude <- sub$lon + data$obs$coords$latitude <- sub$lat + +# UKMO January 1993 is missing: +if (recipe$Analysis$Time$sdate == '0101') { + if (recipe$Analysis$Datasets$System$name == "UKMO-System602") { + info(recipe$Run$logger, + "UKMO January 1993 not available") + + data$hcst <- CST_Subset(data$hcst, along = 'syear', indices = 2:sdate_dim) + data$obs <- CST_Subset(data$obs, along = 'syear', indices = 2:sdate_dim) + sdate_dim <- dim(data$hcst$data)['syear'] + } +} + + +res <- Apply(list(loops), target = NULL, + fun = function(t) { + # subset years: + # training + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + # eval years + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr) + # compute NAO: + nao <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_hcst_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores) + + nao_obs_ev <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_obs_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores)$exp_cor + #Standarisation: + # Need the nao_hcst (for the train.dexes) to standarize the eval.dexes? + nao_hcst_ev <- Apply(list(nao$exp, nao$exp_cor), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_ev <- Apply(list(nao$obs, nao_obs_ev), + target_dims = c('syear','ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), + function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_tr <- Apply(list(nao$obs), target_dims = 'syear', + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, 1, + function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores, output_dims = 'syear')$output1 + nao_hcst_tr <- Apply(list(nao$exp), target_dims = c('syear', 'ensemble'), + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, c(1,2), function (z) {(z-means)/sd})}, ncores = recipe$Analysis$ncores)$output1 + #Category limits + lims_nao_hcst_tr <- Apply(nao_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs', ncores = recipe$Analysis$ncores)$output1 + lims_nao_obs_tr <- Apply(nao_obs_tr, target_dims = 'syear', + fun = function(x) {quantile(as.vector(x), c(1/3, 2/3), na.rm = TRUE)}, + output_dims = 'probs', ncores = recipe$Analysis$ncores)$output1 + + return(list(#ano_obs_tr = ano_obs_tr, ano_hcst_tr = ano_hcst_tr, + #ano_hcst_ev = ano_hcst_ev, ano_obs_ev = ano_obs_ev, + #clim_obs_tr = clim_obs_tr, clim_hcst_tr = clim_hcst_tr, + nao_hcst_tr = nao_hcst_tr, nao_hcst_ev = nao_hcst_ev, + nao_obs_ev = nao_obs_ev, nao_obs_tr = nao_obs_tr, + lims_nao_hcst_tr = lims_nao_hcst_tr, lims_nao_obs_tr = lims_nao_obs_tr)) + }, ncores = 1) +# RPS +source("GetProbs.R") +nao_hcst_probs_ev <- GetProbs(res$nao_hcst_ev, time_dim = 'loop', + prob_thresholds = NULL, + bin_dim_abs = 'probs', indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = res$lims_nao_hcst_tr, + ncores = recipe$Analysis$ncores) +res$lims_nao_obs_tr <- Subset(res$lims_nao_obs_tr, indices = 1, along = 'ensemble', + drop = 'selected') +nao_obs_probs_ev <- GetProbs(res$nao_obs_ev, time_dim = 'loop', + prob_thresholds = NULL, + bin_dim_abs = 'probs', indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = res$lims_nao_obs_tr, + ncores = recipe$Analysis$ncores) +rps <- RPS(exp = nao_hcst_probs_ev, obs = nao_obs_probs_ev, memb_dim = NULL, + cat_dim = 'probs', cross.val = FALSE, time_dim = 'loop', + ncores = recipe$Analysis$ncores) +source("modules/Skill/R/RPS_clim.R") +rps_clim <- Apply(list(nao_obs_probs_ev), + target_dims = c('probs', 'syear'), + RPS_clim, bin_dim_abs = 'probs', cross.val = FALSE)$output1 + +# RPSS +rpss <- RPSS(exp = nao_hcst_probs_ev, obs = nao_obs_probs_ev, + time_dim = 'loop', memb_dim = NULL, + cat_dim = 'probs', + # We should use a line like this + #abs_threshold = res$lims_ano_hcst_tr, + #prob_threshold = c(1/3, 2/3), + cross.val = FALSE, + ncores = recipe$Analysis$ncores) +# CRPS +crps <- CRPS(exp = res$nao_hcst_ev, obs = res$nao_obs_ev, + time_dim = 'loop', memb_dim = 'ensemble', + ncores = recipe$Analysis$ncores) +res$nao_obs_ev <- Subset(res$nao_obs_ev, indices = 1, along = 'syear', drop = 'selected') +crps_clim <- CRPS(exp = res$nao_obs_tr, obs = res$nao_obs_ev, + time_dim = 'loop', memb_dim = 'syear', + ncores = recipe$Analysis$ncores) + +# CRPSS +ref <- res$nao_obs_tr +dim(ref) <- c(ensemble = as.numeric(sdate_dim) -1, + nftime, loop = as.numeric(sdate_dim)) +res$nao_hcst_ev <- Subset(res$nao_hcst_ev, indices = 1, along = 'syear', drop = 'selected') + +crpss <- CRPSS(exp = res$nao_hcst_ev, obs = res$nao_obs_ev, ref = ref, + memb_dim = 'ensemble', + time_dim = 'loop', clim.cross.val = FALSE, + ncores = recipe$Analysis$ncores) + +# Corr +enscorr <- s2dv::Corr(res$nao_hcst_ev, res$nao_obs_ev, + dat_dim = 'dat', + time_dim = 'loop', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = recipe$Analysis$ncores) + +# Mean Bias +#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, +mean_bias <- Bias(data$hcst$data, data$obs$data, + time_dim = 'syear', + memb_dim = 'ensemble', + ncores = recipe$Analysis$ncores) +# Spread error ratio +obs_noensdim <- ClimProjDiags::Subset(res$ano_obs_ev, "ensemble", 1, + drop = "selected") +enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', + fcst = res$ano_hcst_ev, + obs = obs_noensdim, + tdim = which(names(dim(res$ano_hcst_ev))=='loop'), + ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), + na.rm = TRUE, + ncpus = recipe$Analysis$ncores) + +skill_metrics <- list(mean_bias = mean_bias, enscorr = enscorr$corr, + enscorr_significance = enscorr$sign, enssprerr = enssprerr, + #rps = rps, + rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, + crpss = crpss$crpss, crpss_significance = crpss$sign) +skill_metrics <- lapply(skill_metrics, function(x) { + InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) +original <- recipe$Run$output_dir +recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") +# Compute save metrics +source("modules/Saving/Saving.R") +Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) +recipe$Run$output_dir <- original + +source("modules/Visualization/Visualization.R") +#PlotEquiMap(mean_bias[1,1,1,1,1,,c(182:360, 1:181)], +# lon = -179:180, +# lat = data$hcst$attrs$Variable$metadata$latitude, filled.co = F, +# fileout = "/esarchive/scratch/nperez/test.png") +skill_metrics <- lapply(skill_metrics, function(x) { + Subset(x, along = 'longitude', indices = c(182:360, 1:181)) + }) +data$hcst$coords$longitude <- -179:180 +Visualization(recipe, data, skill_metrics, significance = TRUE) diff --git a/full_ecvs_scorecards.R b/full_ecvs_scorecards.R new file mode 100644 index 0000000000000000000000000000000000000000..fa6daf84a847be2a2375cc864272704797814c3d --- /dev/null +++ b/full_ecvs_scorecards.R @@ -0,0 +1,247 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "recipe_tas_scorecards_seasonal.yml" +recipe <- read_atomic_recipe(recipe_file) +#recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) + +#reg <- SelBox(data$hcst$data, lon = as.vector(data$hcst$coords$longitude), +# lat = as.vector(data$hcst$coords$latitude), +# region = c(lonmin = 180, lonmax = 270, latmin = -30, latmax = 30), +# latdim = 'latitude', londim = 'longitude') +# +#data$hcst$data <- reg$data +#data$hcst$coords$longitude <- reg$lon +#data$hcst$coords$latitude <- reg$lat + +#reg <- SelBox(data$obs$data, lon = as.vector(data$obs$coords$longitude), +# lat = as.vector(data$obs$coords$latitude), +# region = c(lonmin = 180, lonmax = 270, latmin = -30, latmax = 30), +# latdim = 'latitude', londim = 'longitude') + +#data$obs$data <- reg$data +#data$obs$coords$longitude <- reg$lon +#data$obs$coords$latitude <- reg$lat + +# UKMO January 1993 is missing: +if (recipe$Analysis$Time$sdate == '0101') { + if (recipe$Analysis$Datasets$System$name == "UKMO-System602") { + if (1993 %in% recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end) { + info(recipe$Run$logger, + "UKMO January 1993 not available") + ind <- recipe$Analysis$Time$hcst_start:recipe$Analysis$Time$hcst_end + ind <- (1:length(ind))[-which(ind == 1993)] + data$hcst <- CST_Subset(data$hcst, along = 'syear', indices = ind) + data$obs <- CST_Subset(data$obs, along = 'syear', indices = ind) + sdate_dim <- dim(data$hcst$data)['syear'] + } + } +} + +k <- 3 +source("crossval.R") + +## Define FAIR option: +fair <- TRUE + +## START SKILL ASSESSMENT: +# RPS +source("GetProbs.R") +ano_hcst_probs_ev <- GetProbs(res$ano_hcst_ev, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = res$lims_ano_hcst_tr, + ncores = recipe$Analysis$ncores) +ano_obs_probs_ev <- GetProbs(res$ano_obs_ev, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'probs', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = res$lims_ano_obs_tr, + ncores = recipe$Analysis$ncores) +source("modules/Skill/R/tmp/RPS.R") +rps <- RPS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, memb_dim = NULL, + cat_dim = 'probs', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, + ncores = recipe$Analysis$ncores) +source("modules/Skill/R/RPS_clim.R") +rps_clim <- Apply(list(ano_obs_probs_ev), + target_dims = c('probs', 'syear'), + RPS_clim, bin_dim_abs = 'probs', Fair = fair, + cross.val = FALSE, ncores = recipe$Analysis$ncores)$output1 +# RPSS +source("RPSS.R") +source("RandomWalkTest.R") +rpss <- RPSS(exp = ano_hcst_probs_ev, obs = ano_obs_probs_ev, + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'probs', Fair = fair, nmemb = nmemb, + nmemb_ref = sdate_dim - k, + # We should use a line like this + #abs_threshold = res$lims_ano_hcst_tr, + #prob_threshold = c(1/3, 2/3), + cross.val = FALSE, + na.rm = FALSE, + ncores = recipe$Analysis$ncores) +#PlotEquiMap(rps[1,,], lon = data$hcst$coords$longitude, +# lat = data$hcst$coords$latitude, filled.c = F, +# fileout = "test.png") +# CRPS +crps <- CRPS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = recipe$Analysis$ncores) +# Este no sé como se calcula????: +# Aquí no se puede porque estaría incluyendo información de los otros años +#source("modules/Skill/R/CRPS_clim.R") +# Pero si lo hago con el ano_obs_tr si puedo hacerlo aquí +# el resultado es igual a dentro del bucle. +crps_clim <- CRPS(exp = res$ano_obs_tr, obs = res$ano_obs_ev, + time_dim = 'syear', memb_dim = 'sample', + Fair = fair, + ncores = recipe$Analysis$ncores) + + +# CRPSS +ref <- res$ano_obs_tr +dim(ref) <- c(ensemble = as.numeric(sdate_dim) - k, + nftime, nlats, nlons, sdate_dim) +crpss <- CRPSS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, ref = ref, + memb_dim = 'ensemble', Fair = fair, + time_dim = 'syear', clim.cross.val = FALSE, + ncores = recipe$Analysis$ncores) + + +# Corr +source("modules/Skill/R/tmp/Corr.R") +enscorr <- Corr(res$ano_hcst_ev, res$ano_obs_ev, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = 0.05, + ncores = recipe$Analysis$ncores) + +# Mean Bias +#mean_bias <- Bias(res$ano_hcst_ev, res$ano_obs_ev, +mean_bias <- Bias(data$hcst$data, data$obs$data, + time_dim = 'syear', + memb_dim = 'ensemble', + ncores = recipe$Analysis$ncores) + +mean_bias_sign <- Apply(list(data$hcst$data, data$obs$data), + target_dims = list(c('syear', 'ensemble'), 'syear'), + fun = function(x,y) { + if (!(any(is.na(x)) || any(is.na(y)))) { + res <- t.test(x = y, + y = apply(x, 1, mean, na.rm = T), + alternative = "two.sided")$p.value + } else { + res <- NA + } + return(res)}, + ncores = recipe$Analysis$ncores)$output1 +mean_bias_sign <- mean_bias_sign <= 0.05 +#PlotEquiMap(mean_bias[1,1,1,1,1,,], lat = data$hcst$coords$latitude, +# lon = data$hcst$coords$longitude, +# dots = mean_bias_sign[1,1,1,1,1,,,1]) + +# Spread error ratio +source("SprErr.R") +enssprerr <- SprErr(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + ncores = recipe$Analysis$ncores) +enssprerr_sign <- enssprerr$p.val +enssprerr_sign <- enssprerr_sign <= 0.05 +enssprerr <- enssprerr$ratio + +# RMSE +rms <- RMS(exp = res$ano_hcst_ev, obs = res$ano_obs_ev, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = 0.05, + ncores = recipe$Analysis$ncores) + +#obs_noensdim <- ClimProjDiags::Subset(res$ano_obs_ev, "ensemble", 1, +# drop = "selected") + +#enssprerr <- easyVerification::veriApply(verifun = 'EnsSprErr', +# fcst = res$ano_hcst_ev, +# obs = obs_noensdim, +# tdim = which(names(dim(res$ano_hcst_ev))=='syear'), +# ensdim = which(names(dim(res$ano_hcst_ev))=='ensemble'), +# na.rm = FALSE, +# ncpus = recipe$Analysis$ncores) +if (any(is.na(rpss$sing))) { + info(recipe$Run$logger, + "RPSS NA") + + rpss$sing[is.na(rpss$sign)] <- FALSE +} +skill_metrics <- list(mean_bias = mean_bias, + mean_bias_significance = mean_bias_sign, + enscorr = enscorr$corr, + enscorr_significance = enscorr$sign, + enssprerr = enssprerr, + enssprerr_significance = enssprerr_sign, + rps = rps, rps_clim = rps_clim, crps = crps, crps_clim = crps_clim, + rpss = rpss$rpss, rpss_significance = rpss$sign, #crps = crps, + crpss = crpss$crpss, crpss_significance = crpss$sign, + rms = rms$rms) +skill_metrics <- lapply(skill_metrics, function(x) { + InsertDim(drop(x), len = 1, pos = 1, name = 'var')}) +original <- recipe$Run$output_dir +recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") + +skill_metrics <- lapply(skill_metrics, function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) +# Compute save metrics +source("modules/Saving/Saving.R") +#Saving <- Saving(recipe = recipe, data = data, skill = skill_metrics) + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + +recipe$Run$output_dir <- original + +source("modules/Visualization/Visualization.R") +if (data$hcst$coords$longitude[1] != 0) { + skill_metrics <- lapply(skill_metrics, function(x) { + Subset(x, along = 'longitude', indices = c(182:360, 1:181)) + }) +} + info(recipe$Run$logger, + paste("lons:", data$hcst$coords$longitude)) + info(recipe$Run$logger, + paste("lons:", data$obs$coords$longitude)) + + +data$hcst$coords$longitude <- -179:180 + +Visualization(recipe, data, skill_metrics, significance = 'both') + +source("tools/add_logo.R") +add_logo(recipe, "rsz_rsz_bsc_logo.png") + diff --git a/launch_SUNSET.sh b/launch_SUNSET.sh index 6149a9639604942f58897363dd0c854f010b79a2..5b6825233596dc837790d7e21301ca3a232df850 100644 --- a/launch_SUNSET.sh +++ b/launch_SUNSET.sh @@ -103,10 +103,10 @@ fi tmpfile=$(mktemp ${TMPDIR-/tmp}/SUNSET.XXXXXX) # Create outdir and split recipes -source MODULES -# module load conda/22.11.1-2 -# conda activate condaCerise -# export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib +#source MODULES + module load conda/22.11.1-2 + conda activate condaCerise + export LD_LIBRARY_PATH=/perm/cyce/conda/envs/condaCerise/lib Rscript split.R ${recipe} $disable_unique_ID --tmpfile $tmpfile diff --git a/make.eval.train.dexes.R b/make.eval.train.dexes.R new file mode 100644 index 0000000000000000000000000000000000000000..d48606c725a586ea815a9b9fa69ae579fffd88c6 --- /dev/null +++ b/make.eval.train.dexes.R @@ -0,0 +1,45 @@ +make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor, + k = 1) { + if (k >= amt.points && !is.null(k)) { + stop("k need to be smaller than the amt.points") + } + + if (eval.method == "leave-one-out") { + dexes.lst <- lapply(seq(1, amt.points), function(x) return(list(eval.dexes = x, + train.dexes = seq(1, amt.points)[-x]))) + } else if (eval.method == "k-fold") { + # k is a odd number + dexes.lst <- lapply(seq(1, amt.points), function(x, kfold = k) { + if (x >= ((kfold-1)/2) + 1 && x + ((kfold-1)/2) <= amt.points) { + ind <- (x-((kfold-1)/2)):(x+((kfold-1)/2)) + } else if (x < ((kfold-1)/2) + 1) { + ind <- c((amt.points - ((kfold-1)/2-x)):amt.points, 1:(x+(kfold-1)/2)) + } else if ((x+((kfold-1)/2)) > amt.points) { + ind <- c((x-(kfold-1)/2):amt.points, 1:(((kfold-1)/2)-amt.points+x)) + } else { + stop("Review make.eval.train.dexes function") + } + return(list(eval.dexes = x, train.dexes = seq(1, amt.points)[-ind])) + }) + } else if (eval.method == "retrospective") { + # k can be any integer indicating the when to start + dexes.lst <- base::Filter(length, lapply(seq(1, amt.points), + function(x, mindata = k) { + if (x > k) { + eval.dexes <- x + train.dexes <- 1:(x-1) + return(list(eval.dexes = x, + train.dexes = 1:(x-1))) + }})) + } else if (eval.method == "in-sample") { + dexes.lst <- list(list(eval.dexes = seq(1, amt.points), + train.dexes = seq(1, amt.points))) + } else if (eval.method == "hindcast-vs-forecast") { + dexes.lst <- list(list(eval.dexes = seq(1,amt.points_cor), + train.dexes = seq(1, amt.points))) + } else { + stop(paste0("unknown sampling method: ", eval.method)) + } + return(dexes.lst) +} + diff --git a/mask.R b/mask.R new file mode 100644 index 0000000000000000000000000000000000000000..98b6a22e9826c42e13f4de3821c0d7c63b9db8bc --- /dev/null +++ b/mask.R @@ -0,0 +1,33 @@ +source("modules/Loading/Loading.R") +source("modules/Loading/R/GRIB/grbload.R") +library(gribr) + +path <- "../data/esarchive/exp/cmcc/system35/6hourly/land-sea/" + +path <- "/ec/res4/scratch/cyce/cerise/data/esarchive/exp/ukmo/system602/6hourly/land-sea/" +files <- list.files(path) +files <- paste0(path, files) +pos <- max((which(strsplit(files[1], "")[[1]] == "."))) +result <- NULL +for (dat in files) { + file_to_load <- grib_open(dat) + gm1 <- grib_get_message(file_to_load, 1) + memb_ftime <- grib_select(file_to_load, + list(validityDate = substr(dat, pos - 8, pos - 1)))$values + result <- abind::abind(result, + array(as.numeric(memb_ftime), + dim = c(longitude = gm1$Ni, + latitude = gm1$Nj)), along = 3) + # Save memory + +} +test22 <- array(as.numeric(gm1$values), + dim = c(longitude = gm1$Ni, + latitude = gm1$Nj)) +names(dim(result)) <- c('lon', 'lat', 'time') +vari <- Apply(list(result), target_dims = 'time', fun = function(x) {var(x)})$output1 +range(vari) +lons <- unique(gm1$longitudes) +lats <- unique(gm1$latitudes) +PlotEquiMap(result[,,1], lon = lons, lat = lats, filled.c = FALSE, fileout = 'test.png') + diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R new file mode 100644 index 0000000000000000000000000000000000000000..bf9cff9fdf3aac80e95d1eb3cb733dcac2121770 --- /dev/null +++ b/modules/Crossval/Crossval_anomalies.R @@ -0,0 +1,324 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_anomalies <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + orig_dims <- names(dim(data$hcst$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst$data))) { + agg = 'region' + nregions <- dim(data$hcst$data)['region'] + } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + ## output objects + ano_hcst_ev_res <- NULL + ano_obs_ev_res <- NULL + ano_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_ano_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_ano_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # compute category limits + lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) + #store results + ano_hcst_ev_res <- abind(ano_hcst_ev_res, ano_hcst_ev, + along = length(dim(ano_hcst_ev)) + 1) + ano_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, + along = length(dim(ano_obs_ev)) + 1) + ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, + along = length(dim(ano_obs_tr)) + 1) + for(ps in 1:length(categories)) { + lims_ano_hcst_tr_res[[ps]] <- abind(lims_ano_hcst_tr_res[[ps]], lims_ano_hcst_tr[[ps]], + along = length(dim(lims_ano_hcst_tr[[ps]])) + 1) + lims_ano_obs_tr_res[[ps]] <- abind(lims_ano_obs_tr_res[[ps]], lims_ano_obs_tr[[ps]], + along = length(dim(lims_ano_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + # Add dim names: + names(dim(ano_hcst_ev_res)) <- ev_dim_names + names(dim(ano_obs_ev_res)) <- ev_dim_names + names(dim(ano_obs_tr_res)) <- tr_dim_names + # To make crps_clim to work the reference forecast need to have same dims as obs: + ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) + lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } +browser() + # Forecast anomalies: + if (!is.null(data$fcst)) { + clim_hcst <- Apply(ano_hcst_ev_res, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + hcst_probs_ev[[ps]] <- GetProbs(ano_hcst_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_ano_hcst_tr_res[[ps]], + ncores = ncores) + obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_ano_obs_tr_res[[ps]], + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } + } + # Convert to s2dv_cubes the resulting anomalies + ano_hcst <- data$hcst + ano_hcst$data <- ano_hcst_ev_res + ano_obs <- data$obs + ano_obs$data <- ano_obs_ev_res + + info(recipe$Run$logger, + "#### Anomalies and Probabilities Done #####") + if (recipe$Analysis$Workflow$Anomalies$save != 'none') { + info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Anomalies$save == 'all') { + save_observations(recipe = recipe, data_cube = ano_obs) + } + } + # Save probability bins + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_hcst) + probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_obs) + if (!is.null(data$fcst)) { + probs_fcst <- append(list(Subset(fcst_probs[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_fcst) + } + all_names <- c(all_names, name_elem) + } + } + names(probs_hcst) <- all_names + if (!('var' %in% names(dim(probs_hcst[[1]])))) { + probs_hcst <- lapply(probs_hcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + names(probs_obs) <- all_names + if (!('var' %in% names(dim(probs_obs[[1]])))) { + probs_obs <- lapply(probs_obs, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + if (!('var' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(syear = 1, dim(x)) + return(x)}) + } + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = probs_hcst, + data_cube = data$hcst, agg = agg, + type = "hcst") + save_probabilities(recipe = recipe, probs = probs_obs, + data_cube = data$hcst, agg = agg, + type = "obs") + # TODO Forecast + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + } + # Save ensemble mean for multimodel option: + hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(hcst_EM = + Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), + data_cube = data$hcst, agg = agg, + module = "statistics") + fcst_EM <- NULL + if (!is.null(data$fcst)) { + fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(fcst_EM = + Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), + data_cube = data$fcst, agg = agg, + module = "statistics") + } + return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + hcst_EM = hcst_EM, fcst_EM = fcst_EM, + cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + obs_tr = lims_ano_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev), + ref_obs_tr = ano_obs_tr_res)) +} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + + diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R new file mode 100644 index 0000000000000000000000000000000000000000..fd76d67f58e354fe037878ff8e29e4ebdd4312c5 --- /dev/null +++ b/modules/Crossval/Crossval_metrics.R @@ -0,0 +1,260 @@ + +source("modules/Saving/Saving.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Crossval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") + +## data_crossval is the result from function full_crossval_anomalies or similar. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices +## the recipe could be used to read the Percentiles +## if fair is TRUE, the nmemb used to compute the probabilities is needed + ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference +Crossval_metrics <- function(recipe, data_crossval, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { + ncores <- recipe$Analysis$ncores + alpha <- recipe$Analysis$Skill$alpha + na.rm <- recipe$Analysis$remove_NAs + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + + # TODO: distinguish between rpss and bss + # if 1 percentile -> bss + # if more than 1 -> rpss + exe_rps <- unlist(lapply(categories, function(x) { + if (length(x) > 1) { + x <- x[1] *100 + } + return(x)})) + if (is.null(alpha)) { + alpha <- 0.05 + } + ## START SKILL ASSESSMENT: + skill_metrics <- list() + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]] + # The recipe allows to requset more than only terciles: + for (ps in 1:length(exe_rps)) { + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = data_crossval$probs$hcst_ev[[ps]], + obs = data_crossval$probs$obs_ev[[1]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, + ncores = ncores) + rps_clim <- Apply(list(data_crossval$probs$obs_ev[[1]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + # names based on the categories: + # To use it when visualization works for more rps + #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps + #skill_metrics[[paste0('rps_clim', + # exe_rps[ps])]] <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = data_crossval$probs$hcst_ev[[1]], + obs = data_crossval$probs$obs_ev[[1]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'cat', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = categories[[ps]], + indices_for_clim = NULL, + Fair = fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + # TO USE IT when visualization works for more rpsss + #skill_metrics[[paste0('rpss', exe_rps[ps])]] <- rpss$rpss + #skill_metrics[[paste0('rpss', + # exe_rps[ps], + # "_significance")]] <- rpss$sign + } + } + if ('crps' %in% requested_metrics) { + crps <- CRPS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = ncores) + skill_metrics$crps <- crps + crps_clim <- CRPS(exp = data_crossval$ref_obs_tr, + obs = data_crossval$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, ncores = ncores) + skill_metrics$crps_clim <- crps_clim + } + if ('crpss' %in% requested_metrics) { + crpss <- CRPSS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + ref = data_crossval$ref_obs_tr, + memb_dim = 'ensemble', Fair = fair, + time_dim = 'syear', clim.cross.val = FALSE, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + } + + if ('enscorr' %in% requested_metrics) { + enscorr <- Corr(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha, + ncores = ncores) + skill_metrics$enscorr <- enscorr + } + if ('mean_bias' %in% requested_metrics) { + if (!is.null(data_crossval$hcst.full_val$data)) { + mean_bias <- Bias(exp = data_crossval$hcst.full_val$data, + obs = data_crossval$obs.full_val$data, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } else { + info(recipe$Run$logger, + "Full values not available") + } + } + if ('enssprerr' %in% requested_metrics) { + enssprerr <- SprErr(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + ncores = ncores) + skill_metrics$SprErr <- enssprerr$ratio + skill_metrics$SprErr_significance <- enssprerr$p.val <= alpha + } + if ('rms' %in% requested_metrics) { + rms <- RMS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + if ('rmss' %in% requested_metrics) { + rmss <- RMSSS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + if (is.null(data_crossval$hcst_EM)) { + data_crossval$hcst_EM <- MeanDims(data_crossval$hcst$data, + dims = 'ensemble', + drop = TRUE) + } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + std_hcst <- Apply(data = data_crossval$hcst_EM, + target_dims = 'syear', + fun = 'sd')$output1 + + std_obs <- Apply(data = data_crossval$obs$data, + target_dims = 'syear', + fun = 'sd')$output1 + + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(data = data_crossval$hcst_EM, + target_dims = 'syear', + fun = 'sd')$output1 ^ 2 + + var_obs <- Apply(data = data_crossval$obs$data, + target_dims = 'syear', + fun = 'sd')$output1 ^ 2 + + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs + } ## close if on variance + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(data = data_crossval$obs$data, + time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } ## close on n_eff + + if (any(c('cov', 'covariance') %in% requested_metrics)) { + covariance <- Apply(data = list(x = data_crossval$obs$data, + y = data_crossval$hcst_EM), + target_dims = 'syear', + fun = function(x, y) { + cov(as.vector(x), as.vector(y), + use = "everything", + method = "pearson")})$output1 + skill_metrics$covariance <- covariance + } + original <- recipe$Run$output_dir + recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") + + skill_metrics <- lapply(skill_metrics, function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) + # Save metrics + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data_crossval$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + + recipe$Run$output_dir <- original + # reduce dimension to work with Visualization module: + skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) + skill_metrics <- lapply(skill_metrics, function(x){ + InsertDim(x, pos = 1, len = 1, name = 'var')}) + return(skill_metrics) +} + + diff --git a/modules/Indices/R/tmp/EOF.R b/modules/Indices/R/tmp/EOF.R new file mode 100644 index 0000000000000000000000000000000000000000..734c71c3f6e94ed805cdb3e92a22002666541a9e --- /dev/null +++ b/modules/Indices/R/tmp/EOF.R @@ -0,0 +1,293 @@ +#'Area-weighted empirical orthogonal function analysis using SVD +#' +#'Perform an area-weighted EOF analysis using single value decomposition (SVD) +#'based on a covariance matrix or a correlation matrix if parameter 'corr' is +#'set to TRUE. +#' +#'@param ano A numerical array of anomalies with named dimensions to calculate +#' EOF. The dimensions must have at least 'time_dim' and 'space_dim'. NAs +#' could exist but it should be consistent along time_dim. That is, if one grid +#' point has NAs, all the time steps at this point should be NAs. +#'@param lat A vector of the latitudes of 'ano'. +#'@param lon A vector of the longitudes of 'ano'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param neofs A positive integer of the modes to be kept. The default value is +#' 15. If time length or the product of the length of space_dim is smaller than +#' neofs, neofs will be changed to the minimum of the three values. +#'@param corr A logical value indicating whether to base on a correlation (TRUE) +#' or on a covariance matrix (FALSE). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{EOFs}{ +#' An array of EOF patterns normalized to 1 (unitless) with dimensions +#' (number of modes, rest of the dimensions of 'ano' except 'time_dim'). +#' Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed +#' field. +#'} +#'\item{PCs}{ +#' An array of principal components with the units of the original field to +#' the power of 2, with dimensions (time_dim, number of modes, rest of the +#' dimensions of 'ano' except 'space_dim'). +#' 'PCs' contains already the percentage of explained variance so, +#' to reconstruct the original field it's only needed to multiply 'EOFs' +#' by 'PCs'. +#'} +#'\item{var}{ +#' An array of the percentage (%) of variance fraction of total variance +#' explained by each mode (number of modes). The dimensions are (number of +#' modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim'). +#'} +#'\item{mask}{ +#' An array of the mask with dimensions (space_dim, rest of the dimensions of +#' 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that +#' 'ano' has value and NA for the positions that 'ano' has NA. It is used to +#' replace NAs with 0s for EOF calculation and mask the result with NAs again +#' after the calculation. +#'} +#'\item{wght}{ +#' An array of the area weighting with dimensions 'space_dim'. It is calculated +#' by cosine of 'lat' and used to compute the fraction of variance explained by +#' each EOFs. +#'} +#'\item{tot_var}{ +#' A number or a numeric array of the total variance explained by all the modes. +#' The dimensions are same as 'ano' except 'time_dim' and 'space_dim'. +#'} +#' +#'@seealso ProjectField, NAO, PlotBoxWhisker +#'@examples +#'# This example computes the EOFs along forecast horizons and plots the one +#'# that explains the greatest amount of variability. The example data has low +#'# resolution so the result may not be explanatory, but it displays how to +#'# use this function. +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'tmp <- MeanDims(ano$exp, c('dataset', 'member')) +#'ano <- tmp[1, , ,] +#'names(dim(ano)) <- names(dim(tmp))[-2] +#'eof <- EOF(ano, sampleData$lat, sampleData$lon) +#'\dontrun{ +#'PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat) +#'} +#' +#'@import multiApply +#'@importFrom stats sd +#'@export +EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), + neofs = 15, corr = FALSE, ncores = NULL) { + + # Check inputs + ## ano + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (any(!space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## lat + if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.")) + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } + ## lon + if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.")) + } + if (any(lon > 360 | lon < -360)) { + .warning("Some 'lon' is out of the range [-360, 360].") + } + ## neofs + if (!is.numeric(neofs) | neofs %% 1 != 0 | neofs <= 0 | length(neofs) > 1) { + stop("Parameter 'neofs' must be a positive integer.") + } + ## corr + if (!is.logical(corr) | length(corr) > 1) { + stop("Parameter 'corr' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate EOF + +# # Replace mask of NAs with 0s for EOF analysis. +# ano[!is.finite(ano)] <- 0 + + # Area weighting. Weights for EOF; needed to compute the + # fraction of variance explained by each EOFs + space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) + wght <- array(cos(lat * pi/180), dim = dim(ano)[space_ind]) + + # We want the covariance matrix to be weigthed by the grid + # cell area so the anomaly field is weighted by its square + # root since the covariance matrix equals transpose(ano) + # times ano. + wght <- sqrt(wght) + + # neofs is bounded + if (neofs != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)) { + neofs <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs) + .warning(paste0("Parameter 'neofs' is changed to ", neofs, ", the minimum among ", + "the length of time_dim, the production of the length of space_dim, ", + "and neofs.")) + } + + res <- Apply(ano, + target_dims = c(time_dim, space_dim), + output_dims = list(EOFs = c('mode', space_dim), + PCs = c(time_dim, 'mode'), + var = 'mode', + tot_var = NULL, + mask = space_dim), + fun = .EOF, + corr = corr, neofs = neofs, + wght = wght, + ncores = ncores) + + return(c(res, wght = list(wght))) + +} + +.EOF <- function(ano, neofs = 15, corr = FALSE, wght = wght) { + # ano: [time, lat, lon] + + # Dimensions + nt <- dim(ano)[1] + ny <- dim(ano)[2] + nx <- dim(ano)[3] + + # Check if all the time steps at one grid point are NA-consistent. + # The grid point should have all NAs or no NA along time dim. + if (anyNA(ano)) { + ano_latlon <- array(ano, dim = c(nt, ny * nx)) # [time, lat*lon] + na_ind <- which(is.na(ano_latlon), arr.ind = T) + if (dim(na_ind)[1] != nt * length(unique(na_ind[, 2]))) { + stop("Detect certain grid points have NAs but not consistent across time ", + "dimension. If the grid point is NA, it should have NA at all time step.") + } + } + + # Build the mask + mask <- ano[1, , ] + mask[!is.finite(mask)] <- NA + mask[is.finite(mask)] <- 1 + dim(mask) <- c(ny, nx) + + # Replace mask of NAs with 0s for EOF analysis. + ano[!is.finite(ano)] <- 0 + + ano <- ano * InsertDim(wght, 1, nt) + + # The use of the correlation matrix is done under the option corr. + if (corr == TRUE) { + stdv <- apply(ano, c(2, 3), sd, na.rm = T) + ano <- ano/InsertDim(stdv, 1, nt) + } + + # Time/space matrix for SVD + dim(ano) <- c(nt, ny * nx) + dim.dat <- dim(ano) + + # 'transpose' means the array needs to be transposed before + # calling La.svd for computational efficiency because the + # spatial dimension is larger than the time dimension. This + # goes with transposing the outputs of LA.svd also. + if (dim.dat[2] > dim.dat[1]) { + transpose <- TRUE + } else { + transpose <- FALSE + } + if (transpose) { + pca <- La.svd(t(ano)) + } else { + pca <- La.svd(ano) + } + + # La.svd conventions: decomposition X = U D t(V) La.svd$u + # returns U La.svd$d returns diagonal values of D La.svd$v + # returns t(V) !! The usual convention is PC=U and EOF=V. + # If La.svd is called for ano (transpose=FALSE case): EOFs: + # $v PCs: $u If La.svd is called for t(ano) (transposed=TRUE + # case): EOFs: t($u) PCs: t($v) + + if (transpose) { + pca.EOFs <- t(pca$u) + pca.PCs <- t(pca$v) + } else { + pca.EOFs <- pca$v + pca.PCs <- pca$u + } + + # The numbers of transposition is limited to neofs + PC <- pca.PCs[, 1:neofs] + EOF <- pca.EOFs[1:neofs, ] + dim(EOF) <- c(neofs, ny, nx) + + # To sort out crash when neofs=1. + if (neofs == 1) { + PC <- InsertDim(PC, 2, 1, name = 'new') + } + + # Computation of the % of variance associated with each mode + W <- pca$d[1:neofs] + tot.var <- sum(pca$d^2) + var.eof <- 100 * pca$d[1:neofs]^2/tot.var + + for (e in 1:neofs) { + # Set all masked grid points to NA in the EOFs + # Divide patterns by area weights so that EOF * PC gives unweigthed (original) data + EOF[e, , ] <- EOF[e, , ] * mask / wght + # PC is multiplied by the explained variance, + # so that the reconstruction is only EOF * PC + PC[, e] <- PC[, e] * W[e] + } + + if (neofs == 1) { + var.eof <- as.array(var.eof) + } + + return(invisible(list(EOFs = EOF, PCs = PC, var = var.eof, tot_var = tot.var, mask = mask))) +} + diff --git a/modules/Indices/R/tmp/NAO.R b/modules/Indices/R/tmp/NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..e69afbff7c8fa10eb9960b71eb91bce8de7db11a --- /dev/null +++ b/modules/Indices/R/tmp/NAO.R @@ -0,0 +1,574 @@ +#'Compute the North Atlantic Oscillation (NAO) Index +#' +#'Compute the North Atlantic Oscillation (NAO) index based on the leading EOF +#'of the sea level pressure (SLP) anomalies over the north Atlantic region +#'(20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and +#'observed anomalies onto the observed EOF pattern or the forecast +#'anomalies onto the EOF pattern of the other years of the forecast. +#'By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month +#'lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns +#'cross-validated PCs of the NAO index for hindcast (exp) and observations +#'(obs) based on the leading EOF pattern, or, if forecast (exp_cor) is provided, +#'the NAO index for forecast and the corresponding data (exp and obs). +#' +#'@param exp A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' hindcast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of observational data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param obs A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' observed anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of experimental data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param exp_cor A named numeric array of the Nort Atlantic SLP (20-80N, 80W-40E) +#' forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimension 'time_dim' of length 1 (as in the case of an operational +#' forecast), 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of reference period needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param lat A vector of the latitudes of 'exp' and 'obs'. +#'@param lon A vector of the longitudes of 'exp' and 'obs'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'exp' and 'obs'. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member +#' dimension of 'exp' (and 'obs', optional). If 'obs' has memb_dim, the length +#' must be 1. The default value is 'member'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension of 'exp' and 'obs'. The default value is 'ftime'. +#'@param ftime_avg A numeric vector of the forecast time steps to average +#' across the target period. If average is not needed, set NULL. The default +#' value is 2:4, i.e., from 2nd to 4th forecast time steps. +#'@param obsproj A logical value indicating whether to compute the NAO index by +#' projecting the forecast anomalies onto the leading EOF of observational +#' reference (TRUE, default) or compute the NAO by first computing the leading +#' EOF of the forecast anomalies (in cross-validation mode, i.e. leave the +#' evaluated year out), then projecting forecast anomalies onto this EOF +#' (FALSE). If 'exp_cor' is provided, 'obs' will be used when obsproj is TRUE +#' and 'exp' will be used when obsproj is FALSE, and no cross-validation is +#' applied. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list which contains some of the following items depending on the data inputs: +#'\item{exp}{ +#' A numeric array of hindcast NAO index in verification format with the same +#' dimensions as 'exp' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#'\item{obs}{ +#' A numeric array of observation NAO index in verification format with the same +#' dimensions as 'obs' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#'} +#'\item{exp_cor}{ +#' A numeric array of forecast NAO index in verification format with the same +#' dimensions as 'exp_cor' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#' +#'@references +#'Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of +#' multi-model seasonal forecasts of the wintertime North Atlantic +#' Oscillation. Climate Dynamics, 21, 501-514. +#' DOI: 10.1007/s00382-003-0350-4 +#' +#'@examples +#'# Make up synthetic data +#'set.seed(1) +#'exp <- array(rnorm(1620), dim = c(member = 2, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'set.seed(2) +#'obs <- array(rnorm(1620), dim = c(member = 1, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'lat <- seq(20, 80, length.out = 6) +#'lon <- seq(-80, 40, length.out = 9) +#'nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) +#' +#'exp_cor <- array(rnorm(540), dim = c(member = 2, sdate = 1, ftime = 5, lat = 6, lon = 9)) +#'nao <- NAO(exp = exp, obs = obs, exp_cor = exp_cor, lat = lat, lon = lon, obsproj = TRUE) +#'# plot the NAO index +#' \dontrun{ +#'nao$exp <- Reorder(nao$exp, c(2, 1)) +#'nao$obs <- Reorder(nao$obs, c(2, 1)) +#'PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", +#' monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") +#' } +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, lon, time_dim = 'sdate', + memb_dim = 'member', space_dim = c('lat', 'lon'), + ftime_dim = 'ftime', ftime_avg = 2:4, + obsproj = TRUE, ncores = NULL) { + # Check inputs + ## exp, obs, and exp_cor (1) + if (is.null(obs) & is.null(exp)) { + stop("Parameter 'exp' and 'obs' cannot both be NULL.") + } + if (!is.null(exp)) { + if (!is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (is.null(dim(exp))) { + stop(paste0("Parameter 'exp' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.")) + } + if(any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0)) { + stop("Parameter 'exp' must have dimension names.") + } + } + if (!is.null(obs)) { + if (!is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (is.null(dim(obs))) { + stop(paste0("Parameter 'obs' must have at least dimensions ", + "time_dim, space_dim, and ftime_dim.")) + } + if(any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'obs' must have dimension names.") + } + } + if (!is.null(exp_cor)) { + if (!is.numeric(exp_cor)) { + stop("Parameter 'exp_cor' must be a numeric array.") + } + if (is.null(dim(exp_cor))) { + stop(paste0("Parameter 'exp_cor' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.")) + } + if (any(is.null(names(dim(exp_cor)))) | any(nchar(names(dim(exp_cor))) == 0)) { + stop("Parameter 'exp_cor' must have dimension names.") + } + if (is.null(exp) || is.null(obs)) { + stop("Parameters 'exp' and 'obs' are required when 'exp_cor' is not provided.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!time_dim %in% names(dim(exp))) { + stop("Parameter 'time_dim' is not found in 'exp' dimension.") + } + } + if (!is.null(obs)) { + if (!time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (!time_dim %in% names(dim(exp_cor))) { + stop("Parameter 'time_dim' is not found in 'exp_cor' dimension.") + } + if (dim(exp_cor)[time_dim] > 1) { + stop("Parameter 'exp_cor' is expected to have length 1 in ", + time_dim, "dimension.") + } + } + + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + add_member_back <- FALSE + if (!is.null(obs)) { + if (memb_dim %in% names(dim(obs))) { + if (dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") + } else { + add_member_back <- TRUE + obs <- ClimProjDiags::Subset(obs, memb_dim, 1, drop = 'selected') + } + } + } + if (!is.null(exp_cor)) { + if (!memb_dim %in% names(dim(exp_cor))) { + stop("Parameter 'memb_dim' is not found in 'exp_cor' dimension.") + } + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!is.null(exp)) { + if (any(!space_dim %in% names(dim(exp)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (any(!space_dim %in% names(dim(obs)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (any(!space_dim %in% names(dim(exp_cor)))) { + stop("Parameter 'space_dim' is not found in 'exp_cor' dimensions.") + } + } + ## ftime_dim + if (!is.character(ftime_dim) | length(ftime_dim) > 1) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!ftime_dim %in% names(dim(exp))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!ftime_dim %in% names(dim(obs))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (!ftime_dim %in% names(dim(exp_cor))) { + stop("Parameter 'ftime_dim' is not found in 'exp_cor' dimensions.") + } + } + ## exp and obs (2) + #TODO: Add checks for exp_cor + if (!is.null(exp) & !is.null(obs)) { + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + throw_error <- FALSE + if (length(name_exp) != length(name_obs)) { + throw_error <- TRUE + } else if (any(name_exp != name_obs)) { + throw_error <- TRUE + } else if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + throw_error <- TRUE + } + if (throw_error) { + stop("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions except 'memb_dim'.") + } + } + ## ftime_avg + if (!is.null(ftime_avg)) { + if (!is.vector(ftime_avg) | !is.numeric(ftime_avg)) { + stop("Parameter 'ftime_avg' must be an integer vector.") + } + if (!is.null(exp)) { + if (max(ftime_avg) > dim(exp)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + if (!is.null(obs)) { + if (max(ftime_avg) > dim(obs)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + if (!is.null(exp_cor)) { + if (max(ftime_avg) > dim(exp_cor)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + } + ## sdate >= 2 + if (!is.null(exp)) { + if (dim(exp)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } else { + if (dim(obs)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } + ## lat and lon + if (!is.null(exp)) { + if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.")) + } + if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.")) + } + } + if (!is.null(obs)) { + if (!is.numeric(lat) | length(lat) != dim(obs)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.")) + } + if (!is.numeric(lon) | length(lon) != dim(obs)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.")) + } + } + if (!is.null(exp_cor)) { + if (!is.numeric(lat) | length(lat) != dim(exp_cor)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp_cor'.")) + } + if (!is.numeric(lon) | length(lon) != dim(exp_cor)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp_cor'.")) + } + } + stop_needed <- FALSE + if (max(lat) > 80 | min(lat) < 20) { + stop_needed <- TRUE + } + #NOTE: different from s2dverification + # lon is not used in the calculation actually. EOF only uses lat to do the + # weight. So we just need to ensure the data is in this region, regardless + # the order. + if (any(lon < 0)) { #[-180, 180] + if (!(min(lon) > -90 & min(lon) < -70 & max(lon) < 50 & max(lon) > 30)) { + stop_needed <- TRUE + } + } else { #[0, 360] + if (any(lon >= 50 & lon <= 270)) { + stop_needed <- TRUE + } else { + lon_E <- lon[which(lon < 50)] + lon_W <- lon[-which(lon < 50)] + if (max(lon_E) < 30 | min(lon_W) > 290) { + stop_needed <- TRUE + } + } + } + if (stop_needed) { + stop(paste0("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.")) + } + ## obsproj + if (!is.logical(obsproj) | length(obsproj) > 1) { + stop("Parameter 'obsproj' must be either TRUE or FALSE.") + } + if (obsproj) { + if (is.null(obs)) { + stop("Parameter 'obsproj' set to TRUE but no 'obs' provided.") + } + if (is.null(exp) & is.null(exp_cor)) { + .warning("parameter 'obsproj' set to TRUE but no 'exp' nor 'exp_cor' provided.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores == 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # Average ftime + if (!is.null(ftime_avg)) { + if (!is.null(exp)) { + exp_sub <- ClimProjDiags::Subset(exp, ftime_dim, ftime_avg, drop = FALSE) + exp <- MeanDims(exp_sub, ftime_dim, na.rm = TRUE) + ## Cross-validated PCs. Fabian. This should be extended to + ## nmod and nlt by simple loops. Virginie + } + if (!is.null(obs)) { + obs_sub <- ClimProjDiags::Subset(obs, ftime_dim, ftime_avg, drop = FALSE) + obs <- MeanDims(obs_sub, ftime_dim, na.rm = TRUE) + } + if (!is.null(exp_cor)) { + exp_cor_sub <- ClimProjDiags::Subset(exp_cor, ftime_dim, ftime_avg, drop = FALSE) + exp_cor <- MeanDims(exp_cor_sub, ftime_dim, na.rm = TRUE) + } + } + + # wght + wght <- array(sqrt(cos(lat * pi/180)), dim = c(length(lat), length(lon))) + if (is.null(exp_cor)) { + if (!is.null(exp) & !is.null(obs)) { + res <- Apply(list(exp, obs), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } else if (!is.null(exp)) { + res <- Apply(list(exp = exp), + target_dims = list(exp = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, obs = NULL, + obsproj = obsproj, add_member_back = FALSE, + ncores = ncores) + } else if (!is.null(obs)) { + if (add_member_back) { + output_dims <- list(obs = c(time_dim, memb_dim)) + } else { + output_dims <- list(obs = time_dim) + } + res <- Apply(list(obs = obs), + target_dims = list(obs = c(time_dim, space_dim)), + output_dims = output_dims, + fun = .NAO, + lat = lat, wght = wght, exp = NULL, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + } else { # exp_cor provided + res <- Apply(list(exp = exp, obs = obs, exp_cor = exp_cor), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim), + exp_cor = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + + return(res) +} + +.NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, wght, obsproj = TRUE, + add_member_back = FALSE) { + # exp: [memb_exp, sdate, lat, lon] + # obs: [sdate, lat, lon] + # exp_cor: [memb, sdate = 1, lat, lon] + # wght: [lat, lon] + + if (!is.null(exp)) { + ntime <- dim(exp)[2] + nlat <- dim(exp)[3] + nlon <- dim(exp)[4] + nmemb_exp <- dim(exp)[1] + } else { + ntime <- dim(obs)[1] + nlat <- dim(obs)[2] + nlon <- dim(obs)[3] + } + if (!is.null(exp_cor)) { + ntime_exp_cor <- dim(exp_cor)[2] # should be 1 + nmemb_exp_cor <- dim(exp_cor)[1] + } + + if (!is.null(obs)) nao_obs <- array(NA, dim = ntime) + if (!is.null(exp)) nao_exp <- array(NA, dim = c(ntime, nmemb_exp)) + if (!is.null(exp_cor)) { + nao_exp_cor <- array(NA, dim = c(ntime_exp_cor, nmemb_exp_cor)) + #NOTE: The dimensions are flipped to fill in data correctly. Need to flip it back later. + } + + if (is.null(exp_cor)) { + + for (tt in 1:ntime) { # cross-validation + + if (!is.null(obs)) { + ## Calculate observation EOF. Excluding one forecast start year. + obs_sub <- obs[c(1:ntime)[-tt], , , drop = FALSE] + EOF_obs <- .EOF(obs_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode = 1, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies. + PF <- .ProjectField(obs, eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + ## Keep PCs of excluded forecast start year. Fabian. + nao_obs[tt] <- PF[tt] + } + + if (!is.null(exp)) { + if (!obsproj) { + exp_sub <- exp[, c(1:ntime)[-tt], , , drop = FALSE] + # Combine 'memb' and 'sdate' to calculate EOF + dim(exp_sub) <- c(nmemb_exp * (ntime - 1), nlat, nlon) + EOF_exp <- .EOF(exp_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + + ## Correct polarity of pattern + ##NOTE: different from s2dverification, which doesn't use mean(). +# if (0 < EOF_exp[1, which.min(abs(lat - 65)), ]) { + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } + + ### Lines below could be simplified further by computing + ### ProjectField() only on the year of interest... (though this is + ### not vital). Lauriane + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_exp[1, , ], wght = wght) # [sdate, memb] + nao_exp[tt, imemb] <- PF[tt] + } + } else { + ## Project forecast anomalies on obs EOF + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + nao_exp[tt, imemb] <- PF[tt] + } + } + } + + } # for loop sdate + + } else { # exp_cor provided + + ## Calculate observation EOF. Without cross-validation + EOF_obs <- .EOF(obs, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies + PF <- .ProjectField(obs, eof_mode = EOF_obs, wght = wght) # [mode = 1, sdate] + nao_obs[] <- PF[1, ] + + if (!obsproj) { + # Calculate EOF_exp + tmp <- array(exp, dim = c(nmemb_exp * ntime, nlat, nlon)) + EOF_exp <- .EOF(tmp, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } + eof_mode_input <- EOF_exp[1, , ] + } else { + eof_mode_input <- EOF_obs[1, , ] + } + + # Calculate NAO_exp + for (imemb in 1:dim(exp)[1]) { + exp_sub <- ClimProjDiags::Subset(exp, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp[ , imemb] <- PF + } + + # Calculate NAO_exp_cor + for (imemb in 1:dim(exp_cor)[1]) { + exp_sub <- ClimProjDiags::Subset(exp_cor, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp_cor[, imemb] <- PF + } + + } + # add_member_back + if (add_member_back) { + memb_dim_name <- ifelse(!is.null(names(dim(exp))[1]), names(dim(exp))[1], 'member') + nao_obs <- InsertDim(nao_obs, 2, 1, name = memb_dim_name) + } + + # Return results + if (is.null(exp_cor)) { + res <- NULL + if (!is.null(exp)) { + res <- c(res, list(exp = nao_exp)) + } + if (!is.null(obs)) { + res <- c(res, list(obs = nao_obs)) + } + return(res) + + } else { + return(list(exp = nao_exp, obs = nao_obs, exp_cor = nao_exp_cor)) + } +} + diff --git a/modules/Indices/R/tmp/ProjectField.R b/modules/Indices/R/tmp/ProjectField.R new file mode 100644 index 0000000000000000000000000000000000000000..810b2474a51ba51ab25b022978d3fa46f4a23971 --- /dev/null +++ b/modules/Indices/R/tmp/ProjectField.R @@ -0,0 +1,272 @@ +#'Project anomalies onto modes of variability +#' +#'Project anomalies onto modes of variability to get the temporal evolution of +#'the EOF mode selected. It returns principal components (PCs) by area-weighted +#'projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from +#'\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and +#'returns NA if the whole spatial pattern is NA. +#' +#'@param ano A numerical array of anomalies with named dimensions. The +#' dimensions must have at least 'time_dim' and 'space_dim'. It can be +#' generated by Ano(). +#'@param eof A list that contains at least 'EOFs' or 'REOFs' and 'wght', which +#' are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and +#' 'space_dim' at least. 'wght' has dimensions space_dim. It can be generated +#' by EOF() or REOF(). +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param mode An integer of the variability mode number in the EOF to be +#' projected on. The default value is NULL, which means all the modes of 'eof' +#' is calculated. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numerical array of the principal components in the verification +#' format. The dimensions are the same as 'ano' except 'space_dim'. +#' +#'@seealso EOF, NAO, PlotBoxWhisker +#'@examples +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) +#'eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) +#'mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1) +#'mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1) +#' +#'\dontrun{ +#' # Plot the forecast and the observation of the first mode for the last year +#' # of forecast +#' sdate_dim_length <- dim(mode1_obs)['sdate'] +#' plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), +#' lwd = 2) +#' for (i in 1:dim(mode1_exp)['member']) { +#' par(new = TRUE) +#' plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], +#' ylim = c(-15000, 15000)) +#' } +#'} +#' +#'@import multiApply +#'@export +ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon'), + mode = NULL, ncores = NULL) { + + # Check inputs + ## ano (1) + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## eof (1) + if (is.null(eof)) { + stop("Parameter 'eof' cannot be NULL.") + } + if (!is.list(eof)) { + stop("Parameter 'eof' must be a list generated by EOF() or REOF().") + } + if ('EOFs' %in% names(eof)) { + EOFs <- "EOFs" + } else if ('REOFs' %in% names(eof)) { + EOFs <- "REOFs" + } else if ('patterns' %in% names(eof)) { + EOFs <- "patterns" + } else { + stop(paste0("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", + "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().")) + } + if (!'wght' %in% names(eof)) { + stop(paste0("Parameter 'eof' must be a list that contains 'wght'. ", + "It can be generated by EOF() or REOF().")) + } + if (!is.numeric(eof[[EOFs]]) || !is.array(eof[[EOFs]])) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array.") + } + if (!is.numeric(eof$wght) || !is.array(eof$wght)) { + stop("The component 'wght' of parameter 'eof' must be a numeric array.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (any(!space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## ano (2) + if (!all(space_dim %in% names(dim(ano))) | !time_dim %in% names(dim(ano))) { + stop(paste0("Parameter 'ano' must be an array with dimensions named as ", + "parameter 'space_dim' and 'time_dim'.")) + } + ## eof (2) + if (!all(space_dim %in% names(dim(eof[[EOFs]]))) | + !'mode' %in% names(dim(eof[[EOFs]]))) { + stop(paste0("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim' and 'mode'.")) + } + if (length(dim(eof$wght)) != 2 | !all(names(dim(eof$wght)) %in% space_dim)) { + stop(paste0("The component 'wght' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim'.")) + } + ## mode + if (!is.null(mode)) { + if (!is.numeric(mode) | mode %% 1 != 0 | mode < 0 | length(mode) > 1) { + stop("Parameter 'mode' must be NULL or a positive integer.") + } + if (mode > dim(eof[[EOFs]])['mode']) { + stop(paste0("Parameter 'mode' is greater than the number of available ", + "modes in 'eof'.")) + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + +#------------------------------------------------------- + + # Keep the chosen mode + if (!is.null(mode)) { + eof_mode <- ClimProjDiags::Subset(eof[[EOFs]], 'mode', mode, drop = 'selected') + } else { + eof_mode <- eof[[EOFs]] + } + + if ('mode' %in% names(dim(eof_mode))) { + dimnames_without_mode <- names(dim(eof_mode))[-which(names(dim(eof_mode)) == 'mode')] + } else { + dimnames_without_mode <- names(dim(eof_mode)) + } + + if (all(dimnames_without_mode %in% space_dim)) { # eof_mode: [lat, lon] or [mode, lat, lon] + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + + } else { + + if (!all(dimnames_without_mode %in% names(dim(ano)))) { + stop(paste0("The array 'EOF' in parameter 'eof' has dimension not in parameter ", + "'ano'. Check if 'ano' and 'eof' are compatible.")) + } + + common_dim_ano <- dim(ano)[which(names(dim(ano)) %in% dimnames_without_mode)] + if (any(common_dim_ano[match(dimnames_without_mode, names(common_dim_ano))] != + dim(eof_mode)[dimnames_without_mode])) { + stop(paste0("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ", + "with different length. Check if 'ano' and 'eof' are compatible.")) + } + + # Enlarge eof/ano is needed. The margin_dims of Apply() must be consistent + # between ano and eof. + additional_dims <- dim(ano)[-which(names(dim(ano)) %in% names(dim(eof_mode)))] + additional_dims <- additional_dims[-which(names(additional_dims) == time_dim)] + if (length(additional_dims) != 0) { + for (i in 1:length(additional_dims)) { + eof_mode <- InsertDim(eof_mode, posdim = (length(dim(eof_mode)) + 1), + lendim = additional_dims[i], name = names(additional_dims)[i]) + } + } + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + } + + return(res) +} + + +.ProjectField <- function(ano, eof_mode, wght) { + # ano: [sdate, lat, lon] + # eof_mode: [lat, lon] or [mode, lat, lon] + # wght: [lat, lon] + + ntime <- dim(ano)[1] + if (length(dim(eof_mode)) == 2) { # mode != NULL + # Initialization of pc.ver. + pc.ver <- array(NA, dim = ntime) #[sdate] + + # Weight + e.1 <- eof_mode * wght + ano <- ano * InsertDim(wght, 1, ntime) + #ano <- aaply(ano, 1, '*', wght) # much heavier + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + #na <- apply(ano, 1, mean, na.rm = TRUE) # much heavier + tmp <- ano * InsertDim(e.1, 1, ntime) # [sdate, lat, lon] + rm(ano) + #pc.ver <- apply(tmp, 1, sum, na.rm = TRUE) # much heavier + pc.ver <- rowSums(tmp, na.rm = TRUE) + pc.ver[which(is.na(na))] <- NA + + } else { # mode = NULL + # Weight + e.1 <- eof_mode * InsertDim(wght, 1, dim(eof_mode)[1]) + dim(e.1) <- c(dim(eof_mode)[1], prod(dim(eof_mode)[2:3])) # [mode, lat*lon] + ano <- ano * InsertDim(wght, 1, ntime) + dim(ano) <- c(ntime, prod(dim(ano)[2:3])) # [sdate, lat*lon] + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + na <- aperm(array(na, dim = c(ntime, dim(e.1)[1])), c(2, 1)) + + # Matrix multiplication e.1 [mode, lat*lon] by ano [lat*lon, sdate] + # Result: [mode, sdate] + pc.ver <- e.1 %*% t(ano) + pc.ver[which(is.na(na))] <- NA + +# # Change back dimensions to feet original input +# dim(projection) <- c(moredims, mode = unname(neofs)) +# return(projection) + } + + return(pc.ver) +} + + diff --git a/modules/Indices/R/tmp/Utils.R b/modules/Indices/R/tmp/Utils.R new file mode 100644 index 0000000000000000000000000000000000000000..058782595acdda6222173278100e53963a8cd5bd --- /dev/null +++ b/modules/Indices/R/tmp/Utils.R @@ -0,0 +1,1779 @@ +#'@importFrom abind abind +#'@import plyr ncdf4 +#'@importFrom grDevices png jpeg pdf svg bmp tiff +#'@importFrom easyVerification convert2prob + +## Function to tell if a regexpr() match is a complete match to a specified name +.IsFullMatch <- function(x, name) { + ifelse(x > 0 && attributes(x)$match.length == nchar(name), TRUE, FALSE) +} + +.ConfigReplaceVariablesInString <- function(string, replace_values, allow_undefined_key_vars = FALSE) { + # This function replaces all the occurrences of a variable in a string by + # their corresponding string stored in the replace_values. + if (length(strsplit(string, "\\$")[[1]]) > 1) { + parts <- strsplit(string, "\\$")[[1]] + output <- "" + i <- 0 + for (part in parts) { + if (i %% 2 == 0) { + output <- paste(output, part, sep = "") + } else { + if (part %in% names(replace_values)) { + output <- paste(output, .ConfigReplaceVariablesInString(replace_values[[part]], replace_values, allow_undefined_key_vars), sep = "") + } else if (allow_undefined_key_vars) { + output <- paste0(output, "$", part, "$") + } else { + stop(paste('Error: The variable $', part, '$ was not defined in the configuration file.', sep = '')) + } + } + i <- i + 1 + } + output + } else { + string + } +} + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon') +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') +} + +.t2nlatlon <- function(t) { + ## As seen in cdo's griddes.c: ntr2nlat() + nlats <- (t * 3 + 1) / 2 + if ((nlats > 0) && (nlats - trunc(nlats) >= 0.5)) { + nlats <- ceiling(nlats) + } else { + nlats <- round(nlats) + } + if (nlats %% 2 > 0) { + nlats <- nlats + 1 + } + ## As seen in cdo's griddes.c: compNlon(), and as specified in ECMWF + nlons <- 2 * nlats + keep_going <- TRUE + while (keep_going) { + n <- nlons + if (n %% 8 == 0) n <- trunc(n / 8) + while (n %% 6 == 0) n <- trunc(n / 6) + while (n %% 5 == 0) n <- trunc(n / 5) + while (n %% 4 == 0) n <- trunc(n / 4) + while (n %% 3 == 0) n <- trunc(n / 3) + if (n %% 2 == 0) n <- trunc(n / 2) + if (n <= 8) { + keep_going <- FALSE + } else { + nlons <- nlons + 2 + if (nlons > 9999) { + stop("Error: pick another gaussian grid truncation. It doesn't fulfill the standards to apply FFT.") + } + } + } + c(nlats, nlons) +} + +.nlat2t <- function(nlats) { + trunc((nlats * 2 - 1) / 3) +} + +.LoadDataFile <- function(work_piece, explore_dims = FALSE, silent = FALSE) { + # The purpose, working modes, inputs and outputs of this function are + # explained in ?LoadDataFile + #suppressPackageStartupMessages({library(ncdf4)}) + #suppressPackageStartupMessages({library(bigmemory)}) + #suppressPackageStartupMessages({library(plyr)}) + # Auxiliar function to convert array indices to lineal indices + arrayIndex2VectorIndex <- function(indices, dims) { + if (length(indices) > length(dims)) { + stop("Error: indices do not match dimensions in arrayIndex2VectorIndex.") + } + position <- 1 + dims <- rev(dims) + indices <- rev(indices) + for (i in 1:length(indices)) { + position <- position + (indices[i] - 1) * prod(dims[-c(1:i)]) + } + position + } + + found_file <- NULL + dims <- NULL + grid_name <- units <- var_long_name <- NULL + is_2d_var <- array_across_gw <- NULL + data_across_gw <- NULL + + filename <- work_piece[['filename']] + namevar <- work_piece[['namevar']] + output <- work_piece[['output']] + # The names of all data files in the directory of the repository that match + # the pattern are obtained. + if (length(grep("^http", filename)) > 0) { + is_url <- TRUE + files <- filename + ## TODO: Check that the user is not using shell globbing exps. + } else { + is_url <- FALSE + files <- Sys.glob(filename) + } + + # If we don't find any, we leave the flag 'found_file' with a NULL value. + if (length(files) > 0) { + # The first file that matches the pattern is chosen and read. + filename <- head(files, 1) + filein <- filename + found_file <- filename + mask <- work_piece[['mask']] + + if (!silent) { + if (explore_dims) { + .message(paste("Exploring dimensions...", filename)) + } + ##} else { + ## cat(paste("* Reading & processing data...", filename, '\n')) + ##} + } + + # We will fill in 'expected_dims' with the names of the expected dimensions of + # the data array we'll retrieve from the file. + expected_dims <- NULL + remap_needed <- FALSE + # But first we open the file and work out whether the requested variable is 2d + fnc <- nc_open(filein) + if (!(namevar %in% names(fnc$var))) { + stop(paste("Error: The variable", namevar, "is not defined in the file", filename)) + } + var_long_name <- fnc$var[[namevar]]$longname + units <- fnc$var[[namevar]]$units + file_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + # The following two 'ifs' are to allow for 'lon'/'lat' by default, instead of + # 'longitude'/'latitude'. + if (!(work_piece[['dimnames']][['lon']] %in% file_dimnames) && + (work_piece[['dimnames']][['lon']] == 'longitude') && + ('lon' %in% file_dimnames)) { + work_piece[['dimnames']][['lon']] <- 'lon' + } + if (!(work_piece[['dimnames']][['lat']] %in% file_dimnames) && + (work_piece[['dimnames']][['lat']] == 'latitude') && + ('lat' %in% file_dimnames)) { + work_piece[['dimnames']][['lat']] <- 'lat' + } + if (is.null(work_piece[['is_2d_var']])) { + is_2d_var <- all(c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) %in% + unlist(lapply(fnc$var[[namevar]][['dim']], + '[[', 'name'))) + } else { + is_2d_var <- work_piece[['is_2d_var']] + } + if ((is_2d_var || work_piece[['is_file_per_dataset']])) { + if (Sys.which("cdo")[[1]] == "") { + stop("Error: CDO libraries not available") + } + + cdo_version <- strsplit(suppressWarnings(system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5] + + cdo_version <- as.numeric_version(unlist(strsplit(cdo_version, "[A-Za-z]", fixed = FALSE))[[1]]) + + } + # If the variable to load is 2-d, we need to determine whether: + # - interpolation is needed + # - subsetting is requested + if (is_2d_var) { + ## We read the longitudes and latitudes from the file. + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + first_lon_in_original_file <- lon[1] + # If a common grid is requested or we are exploring the file dimensions + # we need to read the grid type and size of the file to finally work out the + # CDO grid name. + if (!is.null(work_piece[['grid']]) || explore_dims) { + # Here we read the grid type and its number of longitudes and latitudes + file_info <- system(paste('cdo -s griddes', filein, '2> /dev/null'), intern = TRUE) + grids_positions <- grep('# gridID', file_info) + if (length(grids_positions) < 1) { + stop("The grid should be defined in the files.") + } + grids_first_lines <- grids_positions + 2 + grids_last_lines <- c((grids_positions - 2)[-1], length(file_info)) + grids_info <- as.list(1:length(grids_positions)) + grids_info <- lapply(grids_info, function (x) file_info[grids_first_lines[x]:grids_last_lines[x]]) + grids_info <- lapply(grids_info, function (x) gsub(" *", " ", x)) + grids_info <- lapply(grids_info, function (x) gsub("^ | $", "", x)) + grids_info <- lapply(grids_info, function (x) unlist(strsplit(x, " | = "))) + grids_types <- unlist(lapply(grids_info, function (x) x[grep('gridtype', x) + 1])) + grids_matches <- unlist(lapply(grids_info, function (x) { + nlons <- if (length(grep('xsize', x)) > 0) { + as.numeric(x[grep('xsize', x) + 1]) + } else { + NA + } + nlats <- if (length(grep('ysize', x)) > 0) { + as.numeric(x[grep('ysize', x) + 1]) + } else { + NA + } + result <- FALSE + if (!anyNA(c(nlons, nlats))) { + if ((nlons == length(lon)) && + (nlats == length(lat))) { + result <- TRUE + } + } + result + })) + grids_matches <- grids_matches[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_info <- grids_info[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_types <- grids_types[which(grids_types %in% c('gaussian', 'lonlat'))] + if (length(grids_matches) == 0) { + stop("Error: Only 'gaussian' and 'lonlat' grids supported. See e.g: cdo sinfo ", filename) + } + if (sum(grids_matches) > 1) { + if ((all(grids_types[which(grids_matches)] == 'gaussian') || + all(grids_types[which(grids_matches)] == 'lonlat')) && + all(unlist(lapply(grids_info[which(grids_matches)], identical, + grids_info[which(grids_matches)][[1]])))) { + grid_type <- grids_types[which(grids_matches)][1] + } else { + stop("Error: Load() can't disambiguate: More than one lonlat/gaussian grids with the same size as the requested variable defined in ", filename) + } + } else if (sum(grids_matches) == 1) { + grid_type <- grids_types[which(grids_matches)] + } else { + stop("Unexpected error.") + } + grid_lons <- length(lon) + grid_lats <- length(lat) + # Convert to CDO grid name as seen in cdo's griddes.c: nlat2ntr() + if (grid_type == 'lonlat') { + grid_name <- paste0('r', grid_lons, 'x', grid_lats) + } else { + grid_name <- paste0('t', .nlat2t(grid_lats), 'grid') + } + if (is.null(work_piece[['grid']])) { + .warning(paste0("Detect the grid type to be '", grid_name, "'. ", + "If it is not expected, assign parameter 'grid' to avoid wrong result.")) + } + } + # If a common grid is requested, we will also calculate its size which we will use + # later on. + if (!is.null(work_piece[['grid']])) { + # Now we calculate the common grid type and its lons and lats + if (length(grep('^t\\d{1,+}grid$', work_piece[['grid']])) > 0) { + common_grid_type <- 'gaussian' + common_grid_res <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + nlonlat <- .t2nlatlon(common_grid_res) + common_grid_lats <- nlonlat[1] + common_grid_lons <- nlonlat[2] + } else if (length(grep('^r\\d{1,+}x\\d{1,+}$', work_piece[['grid']])) > 0) { + common_grid_type <- 'lonlat' + common_grid_lons <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + common_grid_lats <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][3]) + } else { + stop("Error: Only supported grid types in parameter 'grid' are tgrid and rx") + } + } else { + ## If no 'grid' is specified, there is no common grid. + ## But these variables are filled in for consistency in the code. + common_grid_lons <- length(lon) + common_grid_lats <- length(lat) + } + first_common_grid_lon <- 0 + last_common_grid_lon <- 360 - 360/common_grid_lons + ## This is not true for gaussian grids or for some regular grids, but + ## is a safe estimation + first_common_grid_lat <- -90 + last_common_grid_lat <- 90 + # And finally determine whether interpolation is needed or not + remove_shift <- FALSE + if (!is.null(work_piece[['grid']])) { + if ((grid_lons != common_grid_lons) || + (grid_lats != common_grid_lats) || + (grid_type != common_grid_type) || + (lon[1] != first_common_grid_lon)) { + if (grid_lons == common_grid_lons && grid_lats == common_grid_lats && + grid_type == common_grid_type && lon[1] != first_common_grid_lon) { + remove_shift <- TRUE + } + remap_needed <- TRUE + common_grid_name <- work_piece[['grid']] + } + } else if ((lon[1] != first_common_grid_lon) && explore_dims && + !work_piece[['single_dataset']]) { + remap_needed <- TRUE + common_grid_name <- grid_name + remove_shift <- TRUE + } + if (remap_needed && (work_piece[['remap']] == 'con') && + (cdo_version >= as.numeric_version('1.7.0'))) { + work_piece[['remap']] <- 'ycon' + } + if (remove_shift && !explore_dims) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + .warning(paste0("The dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], "' doesn't start at longitude 0 and will be re-interpolated in order to align its longitudes with the standard CDO grids definable with the names 'tgrid' or 'rx', which are by definition starting at the longitude 0.\n")) + if (!is.null(mask)) { + .warning(paste0("A mask was provided for the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], "'. This dataset has been re-interpolated to align its longitudes to start at 0. You must re-interpolate the corresponding mask to align its longitudes to start at 0 as well, if you haven't done so yet. Running cdo remapcon,", common_grid_name, " original_mask_file.nc new_mask_file.nc will fix it.\n")) + } + } + if (remap_needed && (grid_lons < common_grid_lons || grid_lats < common_grid_lats)) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + if (!explore_dims) { + .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is originally on ", + "a grid coarser than the common grid and it has been ", + "extrapolated. Check the results carefully. It is ", + "recommended to specify as common grid the coarsest grid ", + "among all requested datasets via the parameter 'grid'.\n")) + } + } + # Now calculate if the user requests for a lonlat subset or for the + # entire field + lonmin <- work_piece[['lon_limits']][1] + lonmax <- work_piece[['lon_limits']][2] + latmin <- work_piece[['lat_limits']][1] + latmax <- work_piece[['lat_limits']][2] + lon_subsetting_requested <- FALSE + lonlat_subsetting_requested <- FALSE + if (lonmin <= lonmax) { + if ((lonmin > first_common_grid_lon) || (lonmax < last_common_grid_lon)) { + lon_subsetting_requested <- TRUE + } + } else { + if ((lonmin - lonmax) > 360/common_grid_lons) { + lon_subsetting_requested <- TRUE + } else { + gap_width <- floor(lonmin / (360/common_grid_lons)) - + floor(lonmax / (360/common_grid_lons)) + if (gap_width > 0) { + if (!(gap_width == 1 && (lonmin %% (360/common_grid_lons) == 0) && + (lonmax %% (360/common_grid_lons) == 0))) { + lon_subsetting_requested <- TRUE + } + } + } + } + if ((latmin > first_common_grid_lat) || (latmax < last_common_grid_lat) + || (lon_subsetting_requested)) { + lonlat_subsetting_requested <- TRUE + } + # Now that we know if subsetting was requested, we can say if final data + # will go across greenwich + if (lonmax < lonmin) { + data_across_gw <- TRUE + } else { + data_across_gw <- !lon_subsetting_requested + } + + # When remap is needed but no subsetting, the file is copied locally + # so that cdo works faster, and then interpolated. + # Otherwise the file is kept as is and the subset will have to be + # interpolated still. + if (!lonlat_subsetting_requested && remap_needed) { + nc_close(fnc) + filecopy <- tempfile(pattern = "load", fileext = ".nc") + file.copy(filein, filecopy) + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + # "-L" is to serialize I/O accesses. It prevents potential segmentation fault in the + # underlying hdf5 library. + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " -selname,", namevar, " ", filecopy, " ", filein, + " 2>/dev/null", sep = "")) + file.remove(filecopy) + work_piece[['dimnames']][['lon']] <- 'lon' + work_piece[['dimnames']][['lat']] <- 'lat' + fnc <- nc_open(filein) + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + } + + # Read and check also the mask + if (!is.null(mask)) { + ###mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + if (is.list(mask)) { + if (!file.exists(mask[['path']])) { + stop(paste("Error: Couldn't find the mask file", mask[['path']])) + } + mask_file <- mask[['path']] + ###file.copy(work_piece[['mask']][['path']], mask_file) + fnc_mask <- nc_open(mask_file) + vars_in_mask <- sapply(fnc_mask$var, '[[', 'name') + if ('nc_var_name' %in% names(mask)) { + if (!(mask[['nc_var_name']] %in% + vars_in_mask)) { + stop(paste("Error: couldn't find variable", mask[['nc_var_name']], + "in the mask file", mask[['path']])) + } + } else { + if (length(vars_in_mask) != 1) { + stop(paste("Error: one and only one non-coordinate variable should be defined in the mask file", + mask[['path']], "if the component 'nc_var_name' is not specified. Currently found: ", + paste(vars_in_mask, collapse = ', '), ".")) + } else { + mask[['nc_var_name']] <- vars_in_mask + } + } + if (sum(fnc_mask$var[[mask[['nc_var_name']]]]$size > 1) != 2) { + stop(paste0("Error: the variable '", + mask[['nc_var_name']], + "' must be defined only over the dimensions '", + work_piece[['dimnames']][['lon']], "' and '", + work_piece[['dimnames']][['lat']], + "' in the mask file ", + mask[['path']])) + } + mask <- ncvar_get(fnc_mask, mask[['nc_var_name']], collapse_degen = TRUE) + nc_close(fnc_mask) + ### mask_lon <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lon']]) + ### mask_lat <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lat']]) + ###} else { + ### dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + ### dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ### ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + ### fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ### ncvar_put(fnc_mask, ncdf_var, work_piece[['mask']]) + ### nc_close(fnc_mask) + ### fnc_mask <- nc_open(mask_file) + ### work_piece[['mask']] <- list(path = mask_file, nc_var_name = 'LSM') + ### mask_lon <- lon + ### mask_lat <- lat + ###} + ###} + ### Now ready to check that the mask is right + ##if (!(lonlat_subsetting_requested && remap_needed)) { + ### if ((dim(mask)[2] != length(lon)) || (dim(mask)[1] != length(lat))) { + ### stop(paste("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), " in '", work_piece[['dataset_type']], "' is wrong. It must be on the common grid if the selected output type is 'lonlat', 'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on the grid of the corresponding dataset if the selected output type is 'areave' and no 'grid' has been specified. For more information check ?Load and see help on parameters 'grid', 'maskmod' and 'maskobs'.", sep = "")) + ### } + ###if (!(identical(mask_lon, lon) && identical(mask_lat, lat))) { + ### stop(paste0("Error: the longitudes and latitudes in the masks must be identical to the ones in the corresponding data files if output = 'areave' or, if the selected output is 'lon', 'lat' or 'lonlat', the longitudes in the mask file must start by 0 and the latitudes must be ordered from highest to lowest. See\n ", + ### work_piece[['mask']][['path']], " and ", filein)) + ###} + } + } + + lon_indices <- 1:length(lon) + if (!(lonlat_subsetting_requested && remap_needed)) { + lon[which(lon < 0)] <- lon[which(lon < 0)] + 360 + } + if (lonmax >= lonmin) { + lon_indices <- lon_indices[which(((lon %% 360) >= lonmin) & ((lon %% 360) <= lonmax))] + } else if (!remap_needed) { + lon_indices <- lon_indices[which(((lon %% 360) <= lonmax) | ((lon %% 360) >= lonmin))] + } + lat_indices <- which(lat >= latmin & lat <= latmax) + ## In most of the cases the latitudes are ordered from -90 to 90. + ## We will reorder them to be in the order from 90 to -90, so mostly + ## always the latitudes are reordered. + ## TODO: This could be avoided in future. + if (lat[1] < lat[length(lat)]) { + lat_indices <- lat_indices[length(lat_indices):1] + } + if (!is.null(mask) && !(lonlat_subsetting_requested && remap_needed)) { + if ((dim(mask)[1] != length(lon)) || (dim(mask)[2] != length(lat))) { + stop(paste("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), " in '", work_piece[['dataset_type']], "' is wrong. It must be on the common grid if the selected output type is 'lonlat', 'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on the grid of the corresponding dataset if the selected output type is 'areave' and no 'grid' has been specified. For more information check ?Load and see help on parameters 'grid', 'maskmod' and 'maskobs'.", sep = "")) + } + mask <- mask[lon_indices, lat_indices] + } + ## If the user requests subsetting, we must extend the lon and lat limits if possible + ## so that the interpolation after is done properly + maximum_extra_points <- work_piece[['remapcells']] + if (lonlat_subsetting_requested && remap_needed) { + if ((maximum_extra_points > (head(lon_indices, 1) - 1)) || + (maximum_extra_points > (length(lon) - tail(lon_indices, 1)))) { + ## if the requested number of points goes beyond the left or right + ## sides of the map, we need to take the entire map so that the + ## interpolation works properly + lon_indices <- 1:length(lon) + } else { + extra_points <- min(maximum_extra_points, head(lon_indices, 1) - 1) + if (extra_points > 0) { + lon_indices <- c((head(lon_indices, 1) - extra_points):(head(lon_indices, 1) - 1), lon_indices) + } + extra_points <- min(maximum_extra_points, length(lon) - tail(lon_indices, 1)) + if (extra_points > 0) { + lon_indices <- c(lon_indices, (tail(lon_indices, 1) + 1):(tail(lon_indices, 1) + extra_points)) + } + } + min_lat_ind <- min(lat_indices) + max_lat_ind <- max(lat_indices) + extra_points <- min(maximum_extra_points, min_lat_ind - 1) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c(lat_indices, (min_lat_ind - 1):(min_lat_ind - extra_points)) + } else { + lat_indices <- c((min_lat_ind - extra_points):(min_lat_ind - 1), lat_indices) + } + } + extra_points <- min(maximum_extra_points, length(lat) - max_lat_ind) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c((max_lat_ind + extra_points):(max_lat_ind + 1), lat_indices) + } else { + lat_indices <- c(lat_indices, (max_lat_ind + 1):(max_lat_ind + extra_points)) + } + } + } + lon <- lon[lon_indices] + lat <- lat[lat_indices] + expected_dims <- c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) + } else { + lon <- 0 + lat <- 0 + } + # We keep on filling the expected dimensions + var_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + nmemb <- nltime <- NULL + ## Sometimes CDO renames 'members' dimension to 'lev' + old_members_dimname <- NULL + if (('lev' %in% var_dimnames) && !(work_piece[['dimnames']][['member']] %in% var_dimnames)) { + old_members_dimname <- work_piece[['dimnames']][['member']] + work_piece[['dimnames']][['member']] <- 'lev' + } + if (work_piece[['dimnames']][['member']] %in% var_dimnames) { + nmemb <- fnc$var[[namevar]][['dim']][[match(work_piece[['dimnames']][['member']], var_dimnames)]]$len + expected_dims <- c(expected_dims, work_piece[['dimnames']][['member']]) + } else { + nmemb <- 1 + } + if (length(expected_dims) > 0) { + dim_matches <- match(expected_dims, var_dimnames) + if (anyNA(dim_matches)) { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop(paste("Error: the expected dimension(s)", + paste(expected_dims[which(is.na(dim_matches))], collapse = ', '), + "were not found in", filename)) + } + time_dimname <- var_dimnames[-dim_matches] + } else { + time_dimname <- var_dimnames + } + if (length(time_dimname) > 0) { + if (length(time_dimname) == 1) { + nltime <- fnc$var[[namevar]][['dim']][[match(time_dimname, var_dimnames)]]$len + expected_dims <- c(expected_dims, time_dimname) + dim_matches <- match(expected_dims, var_dimnames) + } else { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop(paste("Error: the variable", namevar, + "is defined over more dimensions than the expected (", + paste(c(expected_dims, 'time'), collapse = ', '), + "). It could also be that the members, longitude or latitude dimensions are named incorrectly. In that case, either rename the dimensions in the file or adjust Load() to recognize the actual name with the parameter 'dimnames'. See file", filename)) + } + } else { + nltime <- 1 + } + + # Now we must retrieve the data from the file, but only the asked indices. + # So we build up the indices to retrieve. + # Longitudes or latitudes have been retrieved already. + if (explore_dims) { + # If we're exploring the file we only want one time step from one member, + # to regrid it and work out the number of longitudes and latitudes. + # We don't need more. + members <- 1 + ltimes_list <- list(c(1)) + } else { + # The data is arranged in the array 'tmp' with the dimensions in a + # common order: + # 1) Longitudes + # 2) Latitudes + # 3) Members (even if is not a file per member experiment) + # 4) Lead-times + if (work_piece[['is_file_per_dataset']]) { + time_indices <- 1:nltime + mons <- strsplit(system(paste('cdo showmon ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + years <- strsplit(system(paste('cdo showyear ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + mons <- as.numeric(mons[[1]][which(mons[[1]] != "")]) + years <- as.numeric(years[[1]][which(years[[1]] != "")]) + time_indices <- ts(time_indices, start = c(years[1], mons[1]), + end = c(years[length(years)], mons[length(mons)]), + frequency = 12) + ltimes_list <- list() + for (sdate in work_piece[['startdates']]) { + selected_time_indices <- window(time_indices, start = c(as.numeric( + substr(sdate, 1, 4)), as.numeric(substr(sdate, 5, 6))), + end = c(3000, 12), frequency = 12, extend = TRUE) + selected_time_indices <- selected_time_indices[work_piece[['leadtimes']]] + ltimes_list <- c(ltimes_list, list(selected_time_indices)) + } + } else { + ltimes <- work_piece[['leadtimes']] + #if (work_piece[['dataset_type']] == 'exp') { + ltimes_list <- list(ltimes[which(ltimes <= nltime)]) + #} + } + ## TODO: Put, when reading matrices, this kind of warnings + # if (nmember < nmemb) { + # cat("Warning: + members <- 1:work_piece[['nmember']] + members <- members[which(members <= nmemb)] + } + + # Now, for each list of leadtimes to load (usually only one list with all leadtimes), + # we'll join the indices and retrieve data + found_disordered_dims <- FALSE + for (ltimes in ltimes_list) { + if (is_2d_var) { + start <- c(min(lon_indices), min(lat_indices)) + end <- c(max(lon_indices), max(lat_indices)) + if (lonlat_subsetting_requested && remap_needed) { + subset_indices <- list(min(lon_indices):max(lon_indices) - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ncdf_dims <- list(dim_longitudes, dim_latitudes) + } else { + subset_indices <- list(lon_indices - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + ncdf_dims <- list() + } + final_dims <- c(length(subset_indices[[1]]), length(subset_indices[[2]]), 1, 1) + } else { + start <- end <- c() + subset_indices <- list() + ncdf_dims <- list() + final_dims <- c(1, 1, 1, 1) + } + + if (work_piece[['dimnames']][['member']] %in% expected_dims) { + start <- c(start, head(members, 1)) + end <- c(end, tail(members, 1)) + subset_indices <- c(subset_indices, list(members - head(members, 1) + 1)) + dim_members <- ncdim_def(work_piece[['dimnames']][['member']], "", members) + ncdf_dims <- c(ncdf_dims, list(dim_members)) + final_dims[3] <- length(members) + } + if (time_dimname %in% expected_dims) { + if (any(!is.na(ltimes))) { + start <- c(start, head(ltimes[which(!is.na(ltimes))], 1)) + end <- c(end, tail(ltimes[which(!is.na(ltimes))], 1)) + subset_indices <- c(subset_indices, list(ltimes - head(ltimes[which(!is.na(ltimes))], 1) + 1)) + } else { + start <- c(start, NA) + end <- c(end, NA) + subset_indices <- c(subset_indices, list(ltimes)) + } + dim_time <- ncdim_def(time_dimname, "", 1:length(ltimes), unlim = TRUE) + ncdf_dims <- c(ncdf_dims, list(dim_time)) + final_dims[4] <- length(ltimes) + } + count <- end - start + 1 + start <- start[dim_matches] + count <- count[dim_matches] + subset_indices <- subset_indices[dim_matches] + # Now that we have the indices to retrieve, we retrieve the data + if (prod(final_dims) > 0) { + tmp <- take(ncvar_get(fnc, namevar, start, count, + collapse_degen = FALSE), + 1:length(subset_indices), subset_indices) + # The data is regridded if it corresponds to an atmospheric variable. When + # the chosen output type is 'areave' the data is not regridded to not + # waste computing time unless the user specified a common grid. + if (is_2d_var) { + ###if (!is.null(work_piece[['mask']]) && !(lonlat_subsetting_requested && remap_needed)) { + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### start[dim_matches[1:2]], count[dim_matches[1:2]], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + if (lonlat_subsetting_requested && remap_needed) { + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + filein2 <- tempfile(pattern = "loadRegridded2", fileext = ".nc") + ncdf_var <- ncvar_def(namevar, "", ncdf_dims[dim_matches], + fnc$var[[namevar]]$missval, + prec = if (fnc$var[[namevar]]$prec == 'int') { + 'integer' + } else { + fnc$var[[namevar]]$prec + }) + scale_factor <- ifelse(fnc$var[[namevar]]$hasScaleFact, fnc$var[[namevar]]$scaleFact, 1) + add_offset <- ifelse(fnc$var[[namevar]]$hasAddOffset, fnc$var[[namevar]]$addOffset, 0) + if (fnc$var[[namevar]]$hasScaleFact || fnc$var[[namevar]]$hasAddOffset) { + tmp <- (tmp - add_offset) / scale_factor + } + #nc_close(fnc) + fnc2 <- nc_create(filein2, list(ncdf_var)) + ncvar_put(fnc2, ncdf_var, tmp) + if (add_offset != 0) { + ncatt_put(fnc2, ncdf_var, 'add_offset', add_offset) + } + if (scale_factor != 1) { + ncatt_put(fnc2, ncdf_var, 'scale_factor', scale_factor) + } + nc_close(fnc2) + system(paste0("cdo -L -s -sellonlatbox,", if (lonmin > lonmax) { + "0,360," + } else { + paste0(lonmin, ",", lonmax, ",") + }, latmin, ",", latmax, + " -remap", work_piece[['remap']], ",", common_grid_name, + " ", filein2, " ", filein, " 2>/dev/null", sep = "")) + file.remove(filein2) + fnc2 <- nc_open(filein) + sub_lon <- ncvar_get(fnc2, 'lon') + sub_lat <- ncvar_get(fnc2, 'lat') + ## We read the longitudes and latitudes from the file. + ## In principle cdo should put in order the longitudes + ## and slice them properly unless data is across greenwich + sub_lon[which(sub_lon < 0)] <- sub_lon[which(sub_lon < 0)] + 360 + sub_lon_indices <- 1:length(sub_lon) + if (lonmax < lonmin) { + sub_lon_indices <- sub_lon_indices[which((sub_lon <= lonmax) | (sub_lon >= lonmin))] + } + sub_lat_indices <- 1:length(sub_lat) + ## In principle cdo should put in order the latitudes + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + sub_lat_indices <- length(sub_lat):1 + } + final_dims[c(1, 2)] <- c(length(sub_lon_indices), length(sub_lat_indices)) + subset_indices[[dim_matches[1]]] <- sub_lon_indices + subset_indices[[dim_matches[2]]] <- sub_lat_indices + + tmp <- take(ncvar_get(fnc2, namevar, collapse_degen = FALSE), + 1:length(subset_indices), subset_indices) + + if (!is.null(mask)) { + ## We create a very simple 2d netcdf file that is then interpolated to the common + ## grid to know what are the lons and lats of our slice of data + mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + mask_file_remap <- tempfile(pattern = 'loadMask', fileext = '.nc') + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", c(0, 360)) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", c(-90, 90)) + ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ncvar_put(fnc_mask, ncdf_var, array(rep(0, 4), dim = c(2, 2))) + nc_close(fnc_mask) + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", common_grid_name, + " ", mask_file, " ", mask_file_remap, " 2>/dev/null", sep = "")) + fnc_mask <- nc_open(mask_file_remap) + mask_lons <- ncvar_get(fnc_mask, 'lon') + mask_lats <- ncvar_get(fnc_mask, 'lat') + nc_close(fnc_mask) + file.remove(mask_file, mask_file_remap) + if ((dim(mask)[1] != common_grid_lons) || (dim(mask)[2] != common_grid_lats)) { + stop(paste("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), " in '", work_piece[['dataset_type']], "' is wrong. It must be on the common grid if the selected output type is 'lonlat', 'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on the grid of the corresponding dataset if the selected output type is 'areave' and no 'grid' has been specified. For more information check ?Load and see help on parameters 'grid', 'maskmod' and 'maskobs'.", sep = "")) + } + mask_lons[which(mask_lons < 0)] <- mask_lons[which(mask_lons < 0)] + 360 + if (lonmax >= lonmin) { + mask_lon_indices <- which((mask_lons >= lonmin) & (mask_lons <= lonmax)) + } else { + mask_lon_indices <- which((mask_lons >= lonmin) | (mask_lons <= lonmax)) + } + mask_lat_indices <- which((mask_lats >= latmin) & (mask_lats <= latmax)) + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + mask_lat_indices <- mask_lat_indices[length(mask_lat_indices):1] + } + mask <- mask[mask_lon_indices, mask_lat_indices] + } + sub_lon <- sub_lon[sub_lon_indices] + sub_lat <- sub_lat[sub_lat_indices] + ### nc_close(fnc_mask) + ### system(paste0("cdo -s -sellonlatbox,", if (lonmin > lonmax) { + ### "0,360," + ### } else { + ### paste0(lonmin, ",", lonmax, ",") + ### }, latmin, ",", latmax, + ### " -remap", work_piece[['remap']], ",", common_grid_name, + ###This is wrong: same files + ### " ", mask_file, " ", mask_file, " 2>/dev/null", sep = "")) + ### fnc_mask <- nc_open(mask_file) + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + } + } + if (!all(dim_matches == sort(dim_matches))) { + if (!found_disordered_dims && rev(work_piece[['indices']])[2] == 1 && rev(work_piece[['indices']])[3] == 1) { + found_disordered_dims <- TRUE + .warning(paste0("The dimensions for the variable ", namevar, " in the files of the experiment with index ", tail(work_piece[['indices']], 1), " are not in the optimal order for loading with Load(). The optimal order would be '", paste(expected_dims, collapse = ', '), "'. One of the files of the dataset is stored in ", filename)) + } + tmp <- aperm(tmp, dim_matches) + } + dim(tmp) <- final_dims + # If we are exploring the file we don't need to process and arrange + # the retrieved data. We only need to keep the dimension sizes. + if (is_2d_var && lonlat_subsetting_requested && remap_needed) { + final_lons <- sub_lon + final_lats <- sub_lat + } else { + final_lons <- lon + final_lats <- lat + } + if (explore_dims) { + if (work_piece[['is_file_per_member']]) { + ## TODO: When the exp_full_path contains asterisks and is file_per_member + ## members from different datasets may be accounted. + ## Also if one file member is missing the accounting will be wrong. + ## Should parse the file name and extract number of members. + if (is_url) { + nmemb <- NULL + } else { + nmemb <- length(files) + } + } + dims <- list(member = nmemb, ftime = nltime, lon = final_lons, lat = final_lats) + } else { + # If we are not exploring, then we have to process the retrieved data + if (is_2d_var) { + tmp <- apply(tmp, c(3, 4), function(x) { + # Disable of large values. + if (!is.na(work_piece[['var_limits']][2])) { + x[which(x > work_piece[['var_limits']][2])] <- NA + } + if (!is.na(work_piece[['var_limits']][1])) { + x[which(x < work_piece[['var_limits']][1])] <- NA + } + if (!is.null(mask)) { + x[which(mask < 0.5)] <- NA + } + + if (output == 'areave' || output == 'lon') { + weights <- InsertDim(cos(final_lats * pi / 180), 1, length(final_lons), name = 'lon') + weights[which(is.na(x))] <- NA + if (output == 'areave') { + weights <- weights / mean(weights, na.rm = TRUE) + mean(x * weights, na.rm = TRUE) + } else { + weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, length(final_lats), name = 'lat') + MeanDims(x * weights, 2, na.rm = TRUE) + } + } else if (output == 'lat') { + MeanDims(x, 1, na.rm = TRUE) + } else if (output == 'lonlat') { + signif(x, 5) + } + }) + if (output == 'areave') { + dim(tmp) <- c(1, 1, final_dims[3:4]) + } else if (output == 'lon') { + dim(tmp) <- c(final_dims[1], 1, final_dims[3:4]) + } else if (output == 'lat') { + dim(tmp) <- c(1, final_dims[c(2, 3, 4)]) + } else if (output == 'lonlat') { + dim(tmp) <- final_dims + } + } + var_data <- attach.big.matrix(work_piece[['out_pointer']]) + if (work_piece[['dims']][['member']] > 1 && nmemb > 1 && + work_piece[['dims']][['ftime']] > 1 && + nltime < work_piece[['dims']][['ftime']]) { + work_piece[['indices']][2] <- work_piece[['indices']][2] - 1 + for (jmemb in members) { + work_piece[['indices']][2] <- work_piece[['indices']][2] + 1 + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp[, , jmemb, ]) - 1) + var_data[out_indices] <- as.vector(tmp[, , jmemb, ]) + } + work_piece[['indices']][2] <- work_piece[['indices']][2] - tail(members, 1) + 1 + } else { + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp) - 1) + a <- aperm(tmp, c(1, 2, 4, 3)) + as.vector(a) + var_data[out_indices] <- as.vector(aperm(tmp, c(1, 2, 4, 3))) + } + work_piece[['indices']][3] <- work_piece[['indices']][3] + 1 + } + } + } + nc_close(fnc) + if (is_2d_var) { + if (remap_needed) { + array_across_gw <- FALSE + file.remove(filein) + ###if (!is.null(mask) && lonlat_subsetting_requested) { + ### file.remove(mask_file) + ###} + } else { + if (first_lon_in_original_file < 0) { + array_across_gw <- data_across_gw + } else { + array_across_gw <- FALSE + } + } + } + } + if (explore_dims) { + list(dims = dims, is_2d_var = is_2d_var, grid = grid_name, + units = units, var_long_name = var_long_name, + data_across_gw = data_across_gw, array_across_gw = array_across_gw) + } else { + ###if (!silent && !is.null(progress_connection) && !is.null(work_piece[['progress_amount']])) { + ### foobar <- writeBin(work_piece[['progress_amount']], progress_connection) + ###} + if (!silent && !is.null(work_piece[['progress_amount']])) { + message(paste0(work_piece[['progress_amount']]), appendLF = FALSE) + } + found_file + } +} + +.LoadSampleData <- function(var, exp = NULL, obs = NULL, sdates, + nmember = NULL, nmemberobs = NULL, + nleadtime = NULL, leadtimemin = 1, + leadtimemax = NULL, storefreq = 'monthly', + sampleperiod = 1, lonmin = 0, lonmax = 360, + latmin = -90, latmax = 90, output = 'areave', + method = 'conservative', grid = NULL, + maskmod = vector("list", 15), + maskobs = vector("list", 15), + configfile = NULL, suffixexp = NULL, + suffixobs = NULL, varmin = NULL, varmax = NULL, + silent = FALSE, nprocs = NULL) { + ## This function loads and selects sample data stored in sampleMap and + ## sampleTimeSeries and is used in the examples instead of Load() so as + ## to avoid nco and cdo system calls and computation time in the stage + ## of running examples in the CHECK process on CRAN. + selected_start_dates <- match(sdates, c('19851101', '19901101', '19951101', + '20001101', '20051101')) + start_dates_position <- 3 + lead_times_position <- 4 + + if (output == 'lonlat') { + sampleData <- s2dv::sampleMap + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times, , ] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times, , ] + } + else if (output == 'areave') { + sampleData <- s2dv::sampleTimeSeries + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times] + } + + dims_out <- dim(sampleData$mod) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$mod) <- dims_out + + dims_out <- dim(sampleData$obs) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$obs) <- dims_out + + invisible(list(mod = dataOut$mod, obs = dataOut$obs, + lat = dataOut$lat, lon = dataOut$lon)) +} + +.ConfigGetDatasetInfo <- function(matching_entries, table_name) { + # This function obtains the information of a dataset and variable pair, + # applying all the entries that match in the configuration file. + if (table_name == 'experiments') { + id <- 'EXP' + } else { + id <- 'OBS' + } + defaults <- c(paste0('$DEFAULT_', id, '_MAIN_PATH$'), paste0('$DEFAULT_', id, '_FILE_PATH$'), '$DEFAULT_NC_VAR_NAME$', '$DEFAULT_SUFFIX$', '$DEFAULT_VAR_MIN$', '$DEFAULT_VAR_MAX$') + info <- NULL + + for (entry in matching_entries) { + if (is.null(info)) { + info <- entry[-1:-2] + info[which(info == '*')] <- defaults[which(info == '*')] + } else { + info[which(entry[-1:-2] != '*')] <- entry[-1:-2][which(entry[-1:-2] != '*')] + } + } + + info <- as.list(info) + names(info) <- c('main_path', 'file_path', 'nc_var_name', 'suffix', 'var_min', 'var_max') + info +} + +.ReplaceGlobExpressions <- function(path_with_globs, actual_path, + replace_values, tags_to_keep, + dataset_name, permissive) { + # The goal of this function is to replace the shell globbing expressions in + # a path pattern (that may contain shell globbing expressions and Load() + # tags) by the corresponding part of the real existing path. + # What is done actually is to replace all the values of the tags in the + # actual path by the corresponding $TAG$ + # + # It takes mainly two inputs. The path with expressions and tags, e.g.: + # /data/experiments/*/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_*$START_DATE$*.nc + # and a complete known path to one of the matching files, e.g.: + # /data/experiments/ecearth/i00k/tos/tos_fc0-1_19901101_199011-199110.nc + # and it returns the path pattern but without shell globbing expressions: + # /data/experiments/ecearth/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_fc0-1_$START_DATE$_199011-199110.nc + # + # To do that, it needs also as inputs the list of replace values (the + # association of each tag to their value). + # + # All the tags not present in the parameter tags_to_keep will be repalced. + # + # Not all cases can be resolved with the implemented algorithm. In an + # unsolvable case a warning is given and one possible guess is returned. + # + # In some cases it is interesting to replace only the expressions in the + # path to the file, but not the ones in the file name itself. To keep the + # expressions in the file name, the parameter permissive can be set to + # TRUE. To replace all the expressions it can be set to FALSE. + clean <- function(x) { + if (nchar(x) > 0) { + x <- gsub('\\\\', '', x) + x <- gsub('\\^', '', x) + x <- gsub('\\$', '', x) + x <- unname(sapply(strsplit(x, '[',fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) + do.call(paste0, as.list(x)) + } else { + x + } + } + + strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "") + + if (permissive) { + actual_path_chunks <- strsplit(actual_path, '/')[[1]] + actual_path <- paste(actual_path_chunks[-length(actual_path_chunks)], collapse = '/') + file_name <- tail(actual_path_chunks, 1) + if (length(actual_path_chunks) > 1) { + file_name <- paste0('/', file_name) + } + path_with_globs_chunks <- strsplit(path_with_globs, '/')[[1]] + path_with_globs <- paste(path_with_globs_chunks[-length(path_with_globs_chunks)], + collapse = '/') + path_with_globs <- .ConfigReplaceVariablesInString(path_with_globs, replace_values) + file_name_with_globs <- tail(path_with_globs_chunks, 1) + if (length(path_with_globs_chunks) > 1) { + file_name_with_globs <- paste0('/', file_name_with_globs) + } + right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1) + right_known_no_tags <- .ConfigReplaceVariablesInString(right_known, replace_values) + path_with_globs_rx <- utils::glob2rx(paste0(path_with_globs, right_known_no_tags)) + match <- regexpr(gsub('$', '', path_with_globs_rx, fixed = TRUE), paste0(actual_path, file_name)) + if (match != 1) { + stop("Incorrect parameters to replace glob expressions. The path with expressions does not match the actual path.") + } + if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) { + path_with_globs <- paste0(path_with_globs, right_known_no_tags, '*') + file_name_with_globs <- sub(right_known, '/*', file_name_with_globs) + } + } + path_with_globs_rx <- utils::glob2rx(path_with_globs) + values_to_replace <- c() + tags_to_replace_starts <- c() + tags_to_replace_ends <- c() + give_warning <- FALSE + for (tag in tags_to_keep) { + matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]] + lengths <- attr(matches, 'match.length') + if (!(length(matches) == 1 && matches[1] == -1)) { + for (i in 1:length(matches)) { + left <- NULL + if (matches[i] > 1) { + left <- .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 1, matches[i] - 1), replace_values) + left_known <- strReverse(head(strsplit(strReverse(left), strReverse('.*'), fixed = TRUE)[[1]], 1)) + } + right <- NULL + if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) { + right <- .ConfigReplaceVariablesInString(substr(path_with_globs_rx, matches[i] + lengths[i], nchar(path_with_globs_rx)), replace_values) + right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1) + } + final_match <- NULL + match_limits <- NULL + if (!is.null(left)) { + left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) + match_len <- attr(left_match, 'match.length') + left_match_limits <- c(left_match + match_len - 1 - nchar(clean(right_known)) - nchar(replace_values[[tag]]) + 1, + left_match + match_len - 1 - nchar(clean(right_known))) + if (!(left_match < 1)) { + match_limits <- left_match_limits + } + } + right_match <- NULL + if (!is.null(right)) { + right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) + match_len <- attr(right_match, 'match.length') + right_match_limits <- c(right_match + nchar(clean(left_known)), + right_match + nchar(clean(left_known)) + nchar(replace_values[[tag]]) - 1) + if (is.null(match_limits) && !(right_match < 1)) { + match_limits <- right_match_limits + } + } + if (!is.null(right_match) && !is.null(left_match)) { + if (!identical(right_match_limits, left_match_limits)) { + give_warning <- TRUE + } + } + if (is.null(match_limits)) { + stop("Too complex path pattern specified for ", dataset_name, + ". Specify a simpler path pattern for this dataset.") + } + values_to_replace <- c(values_to_replace, tag) + tags_to_replace_starts <- c(tags_to_replace_starts, match_limits[1]) + tags_to_replace_ends <- c(tags_to_replace_ends, match_limits[2]) + } + } + } + + if (length(tags_to_replace_starts) > 0) { + reorder <- sort(tags_to_replace_starts, index.return = TRUE) + tags_to_replace_starts <- reorder$x + values_to_replace <- values_to_replace[reorder$ix] + tags_to_replace_ends <- tags_to_replace_ends[reorder$ix] + while (length(values_to_replace) > 0) { + actual_path <- paste0(substr(actual_path, 1, head(tags_to_replace_starts, 1) - 1), + '$', head(values_to_replace, 1), '$', + substr(actual_path, head(tags_to_replace_ends, 1) + 1, nchar(actual_path))) + extra_chars <- nchar(head(values_to_replace, 1)) + 2 - (head(tags_to_replace_ends, 1) - head(tags_to_replace_starts, 1) + 1) + values_to_replace <- values_to_replace[-1] + tags_to_replace_starts <- tags_to_replace_starts[-1] + tags_to_replace_ends <- tags_to_replace_ends[-1] + tags_to_replace_starts <- tags_to_replace_starts + extra_chars + tags_to_replace_ends <- tags_to_replace_ends + extra_chars + } + } + + if (give_warning) { + .warning(paste0("Too complex path pattern specified for ", dataset_name, + ". Double check carefully the '$Files' fetched for this dataset or specify a simpler path pattern.")) + } + + if (permissive) { + paste0(actual_path, file_name_with_globs) + } else { + actual_path + } +} + +.FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) { + tag <- paste0('\\$', tag, '\\$') + path_with_globs_and_tag <- paste0('^', path_with_globs_and_tag, '$') + parts <- strsplit(path_with_globs_and_tag, '*', fixed = TRUE)[[1]] + parts <- as.list(parts[grep(tag, parts)]) + longest_couples <- c() + pos_longest_couples <- c() + found_value <- NULL + for (i in 1:length(parts)) { + parts[[i]] <- strsplit(parts[[i]], tag)[[1]] + if (length(parts[[i]]) == 1) { + parts[[i]] <- c(parts[[i]], '') + } + len_parts <- sapply(parts[[i]], nchar) + len_couples <- len_parts[-length(len_parts)] + len_parts[2:length(len_parts)] + pos_longest_couples <- c(pos_longest_couples, which.max(len_couples)) + longest_couples <- c(longest_couples, max(len_couples)) + } + chosen_part <- which.max(longest_couples) + parts[[chosen_part]] <- parts[[chosen_part]][pos_longest_couples[chosen_part]:(pos_longest_couples[chosen_part] + 1)] + if (nchar(parts[[chosen_part]][1]) >= nchar(parts[[chosen_part]][2])) { + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + actual_path <- substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) + } + } + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + found_value <- substr(actual_path, 0, match_right - 1) + } + } + } else { + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + actual_path <- substr(actual_path, 0, match_right - 1) + } + } + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + found_value <- substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) + } + } + } + found_value +} + +.FilterUserGraphicArgs <- function(excludedArgs, ...) { + # This function filter the extra graphical parameters passed by the user in + # a plot function, excluding the ones that the plot function uses by default. + # Each plot function has a different set of arguments that are not allowed to + # be modified. + args <- list(...) + userArgs <- list() + for (name in names(args)) { + if ((name != "") & !is.element(name, excludedArgs)) { + # If the argument has a name and it is not in the list of excluded + # arguments, then it is added to the list that will be used + userArgs[[name]] <- args[[name]] + } else { + .warning(paste0("the argument '", name, "' can not be + modified and the new value will be ignored")) + } + } + userArgs +} + +.SelectDevice <- function(fileout, width, height, units, res) { + # This function is used in the plot functions to check the extension of the + # files where the graphics will be stored and select the right R device to + # save them. + # If the vector of filenames ('fileout') has files with different + # extensions, then it will only accept the first one, changing all the rest + # of the filenames to use that extension. + + # We extract the extension of the filenames: '.png', '.pdf', ... + ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) + + if (length(ext) != 0) { + # If there is an extension specified, select the correct device + ## units of width and height set to accept inches + if (ext[1] == ".png") { + saveToFile <- function(fileout) { + png(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".jpeg") { + saveToFile <- function(fileout) { + jpeg(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] %in% c(".eps", ".ps")) { + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".pdf") { + saveToFile <- function(fileout) { + pdf(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".svg") { + saveToFile <- function(fileout) { + svg(filename = fileout, width = width, height = height) + } + } else if (ext[1] == ".bmp") { + saveToFile <- function(fileout) { + bmp(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".tiff") { + saveToFile <- function(fileout) { + tiff(filename = fileout, width = width, height = height, res = res, units = units) + } + } else { + .warning("file extension not supported, it will be used '.eps' by default.") + ## In case there is only one filename + fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) + ext[1] <- ".eps" + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } + # Change filenames when necessary + if (any(ext != ext[1])) { + .warning(paste0("some extensions of the filenames provided in 'fileout' are not ", ext[1],". The extensions are being converted to ", ext[1], ".")) + fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) + } + } else { + # Default filenames when there is no specification + .warning("there are no extensions specified in the filenames, default to '.eps'") + fileout <- paste0(fileout, ".eps") + saveToFile <- postscript + } + + # return the correct function with the graphical device, and the correct + # filenames + list(fun = saveToFile, files = fileout) +} + +.message <- function(...) { + # Function to use the 'message' R function with our custom settings + # Default: new line at end of message, indent to 0, exdent to 3, + # collapse to \n* + args <- list(...) + + ## In case we need to specify message arguments + if (!is.null(args[["appendLF"]])) { + appendLF <- args[["appendLF"]] + } else { + ## Default value in message function + appendLF <- TRUE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value in message function + domain <- NULL + } + args[["appendLF"]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n*" + } + args[["collapse"]] <- NULL + + ## Message tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "* " + } + args[["tag"]] <- NULL + + message(paste0(tag, paste(strwrap( + args, indent = indent, exdent = exdent + ), collapse = collapse)), appendLF = appendLF, domain = domain) +} + +.warning <- function(...) { + # Function to use the 'warning' R function with our custom settings + # Default: no call information, indent to 0, exdent to 3, + # collapse to \n + args <- list(...) + + ## In case we need to specify warning arguments + if (!is.null(args[["call."]])) { + call <- args[["call."]] + } else { + ## Default: don't show info about the call where the warning came up + call <- FALSE + } + if (!is.null(args[["immediate."]])) { + immediate <- args[["immediate."]] + } else { + ## Default value in warning function + immediate <- FALSE + } + if (!is.null(args[["noBreaks."]])) { + noBreaks <- args[["noBreaks."]] + } else { + ## Default value warning function + noBreaks <- FALSE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value warning function + domain <- NULL + } + args[["call."]] <- NULL + args[["immediate."]] <- NULL + args[["noBreaks."]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n!" + } + args[["collapse"]] <- NULL + + ## Warning tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "! Warning: " + } + args[["tag"]] <- NULL + + warning(paste0(tag, paste(strwrap( + args, indent = indent, exdent = exdent + ), collapse = collapse)), call. = call, immediate. = immediate, + noBreaks. = noBreaks, domain = domain) +} + +.IsColor <- function(x) { + res <- try(col2rgb(x), silent = TRUE) + return(!"try-error" %in% class(res)) +} + +# This function switches to a specified figure at position (row, col) in a layout. +# This overcomes the bug in par(mfg = ...). However the mode par(new = TRUE) is +# activated, i.e., all drawn elements will be superimposed. Additionally, after +# using this function, the automatical pointing to the next figure in the layout +# will be spoiled: once the last figure in the layout is drawn, the pointer won't +# move to the first figure in the layout. +# Only figures with numbers other than 0 (when creating the layout) will be +# accessible. +# Inputs: either row and col, or n and mat +.SwitchToFigure <- function(row = NULL, col = NULL, n = NULL, mat = NULL) { + if (!is.null(n) && !is.null(mat)) { + if (!is.numeric(n) || length(n) != 1) { + stop("Parameter 'n' must be a single numeric value.") + } + n <- round(n) + if (!is.array(mat)) { + stop("Parameter 'mat' must be an array.") + } + target <- which(mat == n, arr.ind = TRUE)[1, ] + row <- target[1] + col <- target[2] + } else if (!is.null(row) && !is.null(col)) { + if (!is.numeric(row) || length(row) != 1) { + stop("Parameter 'row' must be a single numeric value.") + } + row <- round(row) + if (!is.numeric(col) || length(col) != 1) { + stop("Parameter 'col' must be a single numeric value.") + } + col <- round(col) + } else { + stop("Either 'row' and 'col' or 'n' and 'mat' must be provided.") + } + next_attempt <- c(row, col) + par(mfg = next_attempt) + i <- 1 + layout_size <- par('mfrow') + layout_cells <- matrix(1:prod(layout_size), layout_size[1], layout_size[2], + byrow = TRUE) + while (any((par('mfg')[1:2] != c(row, col)))) { + next_attempt <- which(layout_cells == i, arr.ind = TRUE)[1, ] + par(mfg = next_attempt) + i <- i + 1 + if (i > prod(layout_size)) { + stop("Figure not accessible.") + } + } + plot(0, type = 'n', axes = FALSE, ann = FALSE) + par(mfg = next_attempt) +} + +# Function to permute arrays of non-atomic elements (e.g. POSIXct) +.aperm2 <- function(x, new_order) { + old_dims <- dim(x) + attr_bk <- attributes(x) + if ('dim' %in% names(attr_bk)) { + attr_bk[['dim']] <- NULL + } + if (is.numeric(x)) { + x <- aperm(x, new_order) + } else { + y <- array(1:length(x), dim = dim(x)) + y <- aperm(y, new_order) + x <- x[as.vector(y)] + } + dim(x) <- old_dims[new_order] + attributes(x) <- c(attributes(x), attr_bk) + x +} + +# This function is a helper for the function .MergeArrays. +# It expects as inputs two named numeric vectors, and it extends them +# with dimensions of length 1 until an ordered common dimension +# format is reached. +# The first output is dims1 extended with 1s. +# The second output is dims2 extended with 1s. +# The third output is a merged dimension vector. If dimensions with +# the same name are found in the two inputs, and they have a different +# length, the maximum is taken. +.MergeArrayDims <- function(dims1, dims2) { + new_dims1 <- c() + new_dims2 <- c() + while (length(dims1) > 0) { + if (names(dims1)[1] %in% names(dims2)) { + pos <- which(names(dims2) == names(dims1)[1]) + dims_to_add <- rep(1, pos - 1) + if (length(dims_to_add) > 0) { + names(dims_to_add) <- names(dims2[1:(pos - 1)]) + } + new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) + new_dims2 <- c(new_dims2, dims2[1:pos]) + dims1 <- dims1[-1] + dims2 <- dims2[-c(1:pos)] + } else { + new_dims1 <- c(new_dims1, dims1[1]) + new_dims2 <- c(new_dims2, 1) + names(new_dims2)[length(new_dims2)] <- names(dims1)[1] + dims1 <- dims1[-1] + } + } + if (length(dims2) > 0) { + dims_to_add <- rep(1, length(dims2)) + names(dims_to_add) <- names(dims2) + new_dims1 <- c(new_dims1, dims_to_add) + new_dims2 <- c(new_dims2, dims2) + } + list(new_dims1, new_dims2, pmax(new_dims1, new_dims2)) +} + +# This function takes two named arrays and merges them, filling with +# NA where needed. +# dim(array1) +# 'b' 'c' 'e' 'f' +# 1 3 7 9 +# dim(array2) +# 'a' 'b' 'd' 'f' 'g' +# 2 3 5 9 11 +# dim(.MergeArrays(array1, array2, 'b')) +# 'a' 'b' 'c' 'e' 'd' 'f' 'g' +# 2 4 3 7 5 9 11 +.MergeArrays <- function(array1, array2, along) { + if (!(is.null(array1) || is.null(array2))) { + if (!(identical(names(dim(array1)), names(dim(array2))) && + identical(dim(array1)[-which(names(dim(array1)) == along)], + dim(array2)[-which(names(dim(array2)) == along)]))) { + new_dims <- .MergeArrayDims(dim(array1), dim(array2)) + dim(array1) <- new_dims[[1]] + dim(array2) <- new_dims[[2]] + for (j in 1:length(dim(array1))) { + if (names(dim(array1))[j] != along) { + if (dim(array1)[j] != dim(array2)[j]) { + if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { + na_array_dims <- dim(array2) + na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] + na_array <- array(dim = na_array_dims) + array2 <- abind(array2, na_array, along = j) + names(dim(array2)) <- names(na_array_dims) + } else { + na_array_dims <- dim(array1) + na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] + na_array <- array(dim = na_array_dims) + array1 <- abind(array1, na_array, along = j) + names(dim(array1)) <- names(na_array_dims) + } + } + } + } + } + if (!(along %in% names(dim(array2)))) { + stop("The dimension specified in 'along' is not present in the ", + "provided arrays.") + } + array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) + names(dim(array1)) <- names(dim(array2)) + } else if (is.null(array1)) { + array1 <- array2 + } + array1 +} + +# only can be used in Trend(). Needs generalization or be replaced by other function. +.reorder <- function(output, time_dim, dim_names) { + # Add dim name back + if (is.null(dim(output))) { + dim(output) <- c(stats = length(output)) + } else { #is an array + if (length(dim(output)) == 1) { + if (!is.null(names(dim(output)))) { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { + names(dim(output)) <- time_dim + } + } else { # more than one dim + if (names(dim(output))[1] != "") { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { #regular case + names(dim(output))[1] <- time_dim + } + } + } + # reorder + pos <- match(dim_names, names(dim(output))) + output <- aperm(output, pos) + names(dim(output)) <- dim_names + names(dim(output))[names(dim(output)) == time_dim] <- 'stats' + return(output) +} + +# to be used in AMV.R, TPI.R, SPOD.R, GSAT.R and GMST.R +.Indices <- function(data, type, monini, indices_for_clim, + fmonth_dim, sdate_dim, year_dim, month_dim, na.rm) { + + if (type == 'dcpp') { + + fyear_dim <- 'fyear' + data <- Season(data = data, time_dim = fmonth_dim, + monini = monini, moninf = 1, monsup = 12, + method = mean, na.rm = na.rm) + names(dim(data))[which(names(dim(data))==fmonth_dim)] <- fyear_dim + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + + anom <- data + + } else { ## Different indices_for_clim for each forecast year (to use the same calendar years) + + n_fyears <- as.numeric(dim(data)[fyear_dim]) + n_sdates <- as.numeric(dim(data)[sdate_dim]) + + if (is.null(indices_for_clim)) { ## climatology over the whole (common) period + first_years_for_clim <- n_fyears : 1 + last_years_for_clim <- n_sdates : (n_sdates - n_fyears + 1) + } else { ## indices_for_clim specified as a numeric vector + first_years_for_clim <- seq(from = indices_for_clim[1], by = -1, length.out = n_fyears) + last_years_for_clim <- seq(from = indices_for_clim[length(indices_for_clim)], by = -1, length.out = n_fyears) + } + + data <- s2dv::Reorder(data = data, order = c(fyear_dim, sdate_dim)) + anom <- array(data = NA, dim = dim(data)) + for (i in 1:n_fyears) { + clim <- mean(data[i,first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm) + anom[i,] <- data[i,] - clim + } + } + + } else if (type %in% c('obs','hist')) { + + data <- multiApply::Apply(data = data, target_dims = month_dim, fun = mean, na.rm = na.rm)$output1 + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + clim <- 0 + } else if (is.null(indices_for_clim)) { ## climatology over the whole period + clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 + } else { ## indices_for_clim specified as a numeric vector + clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, indices = indices_for_clim), + target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 + } + + anom <- data - clim + + } else {stop('type must be dcpp, hist or obs')} + + return(anom) +} + +#TODO: Remove from s2dv when PlotLayout can get colorbar info from plotting function directly. +# The function is temporarily here because PlotLayout() needs to draw the colorbars of +# PlotMostLikelyQuantileMap(). +#Draws Color Bars for Categories +#A wrapper of s2dv::ColorBar to generate multiple color bars for different +#categories, and each category has different color set. +GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, + bar_limits, var_limits = NULL, + triangle_ends = NULL, plot = TRUE, + draw_separators = FALSE, + bar_titles = NULL, title_scale = 1, label_scale = 1, extra_margin = rep(0, 4), + ...) { + # bar_limits + if (!is.numeric(bar_limits) || length(bar_limits) != 2) { + stop("Parameter 'bar_limits' must be a numeric vector of length 2.") + } + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = bar_limits[1], to = bar_limits[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= nmap) { + chosen_sets <- 1:nmap + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(1:length(col_sets), nmap) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != nmap) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in 1:length(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (nmap == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nmap == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nmap, " (%)") + } + } + + if (plot) { + for (k in 1:nmap) { + s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, +# bar_limits = bar_limits, var_limits = var_limits, + triangle_ends = triangle_ends, plot = TRUE, + draw_separators = draw_separators, + title = bar_titles[[k]], title_scale = title_scale, + label_scale = label_scale, extra_margin = extra_margin) + } + } else { + #TODO: col_inf and col_sup + return(list(brks = brks, cols = cols)) + } + +} + + diff --git a/modules/Loading/Loading.R b/modules/Loading/Loading.R index 6e219bf13b09a6d2888b8edb4095b1f78b875ee0..c575d3f16e3d9ad92e81a4790c9c4d05c124a16e 100644 --- a/modules/Loading/Loading.R +++ b/modules/Loading/Loading.R @@ -3,7 +3,7 @@ source("tools/libs.R") Loading <- function(recipe) { # Source correct function depending on filesystem and time horizon # Case: CERISE (Mars) - if (tolower(recipe$Run$filesystem) == "mars") { + if (tolower(recipe$Run$filesystem) %in% c("cerise", "mars")) { source("modules/Loading/R/load_GRIB.R") data <- load_GRIB(recipe) } else if (tolower(recipe$Run$filesystem) == "sample") { diff --git a/modules/Loading/R/GRIB/GrbLoad.R b/modules/Loading/R/GRIB/GrbLoad.R index 7a3f441410c34d0b667ef0785d33d8c727670ceb..67c06368df4299d6d0d94b766afb6664aedd0f93 100644 --- a/modules/Loading/R/GRIB/GrbLoad.R +++ b/modules/Loading/R/GRIB/GrbLoad.R @@ -4,6 +4,7 @@ # If exp, has.memb is a number; if obs, has.memb = NULL # syear_time_dim is the time attr dim of exp as the input for obs #---------------------------------------------------------------------------------- +source("modules/Loading/R/GRIB/grbload.R") GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, regrid = NULL) { library(gribr) @@ -15,164 +16,60 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, for (dat_i in 1:length(dat)) { - file_to_load <- grib_open(dat[[dat_i]]) - - #---------------------------------------- - # HOW TO FIND THE VALUE OF EACH FTIME STEP? - #---------------------------------------- - #NOTE: ValidityTime is not considered now. So if the time frequency is less than daily, it has problem. - - # METHOD 1: Get first message to figure out the validityDate/Time of each message - #NOTE: gm1$validityDate should be "s", "m", "h", etc. according to document. But our files have "1". - gm1 <- grib_get_message(file_to_load, 1) - first_ftime <- as.character(gm1$validityDate) - first_ftime_hour <- gm1$validityTime - # For monthly data - #NOTE: may not be correct because it is calculated by the first message - cdo_time_attr <- clock::add_months(as.POSIXct(paste0(first_ftime, ' ', first_ftime_hour), - format = "%Y%m%d %H", tz = 'UTC'), time_step - 1) - cdo_time <- format(cdo_time_attr, "%Y%m%d") - - # # METHOD 2: Use cdo showtimestamp (DEPENDENCY!) - # #TODO: Change to method 1 because can't predict what cdo will produce - # cdo_time <- system(paste0("cdo showtimestamp ", dat[[dat_i]]), intern = T) - # cdo_time <- strsplit(cdo_time, " ")[[length(cdo_time)]] - # cdo_time <- cdo_time[which(cdo_time != "")] - ## # Check if there is member dim or not - ## has_memb <- ifelse((length(unique(cdo_time)) == length(cdo_time)), FALSE, TRUE) - # if (has.memb) memb_dim_length <- length(cdo_time)/length(unique(cdo_time)) - # cdo_time <- unique(cdo_time)[time_step] #"2000-12-01T00:00:00" - # cdo_time_attr <- as.POSIXct(gsub('T', ' ', cdo_time), tz = 'UTC') - # cdo_time <- sapply(sapply(cdo_time, strsplit, "T"), '[[', 1) - # cdo_time <- gsub('-', '', cdo_time) - - #---------------------------------------- - - # all members + ftimes: length should be memb*ftime (e.g., 51*7) - ## Method 1: use grib_select and real values to filter - memb_ftime <- grib_select(file_to_load, list(validityDate = cdo_time)) - if (inherits(memb_ftime, 'gribMessage')) memb_ftime <- list(memb_ftime) - - # ## Method 2: Calculate which messages are the desired ones - # gm <- grib_get_message(file_to_load, time_step) - # if (length(time_step) == 1) { - # gm <- list(gm) - # } - - ################################################################## - # Get data as an array [longitude, latitude, (memb*)time] - ################################################################## - if (grepl("reduced", gm1$gridType)) { - #NOTE: Need to call gribr::grib_expand_grids because I don't know how to make .Call("gribr_redtoreg") work outside that function - # https://github.com/nawendt/gribr/blob/main/src/redtoreg.c - values_l <- vector('list', length = length(memb_ftime)) - for (gm_i in 1:length(memb_ftime)) { - values_l[[gm_i]] <- grib_expand_grids(memb_ftime[[gm_i]]) - } - result[[dat_i]] <- array(unlist(values_l), dim = c(longitude = gm1$Nj * 2, latitude = gm1$Nj, time = length(values_l))) - # Save memory - rm(values_l); gc() - - } else { - result[[dat_i]] <- .grib_expand_grids(memb_ftime) - } - - ################################################################## - # Get metadata - ################################################################## - ## (1-1) Everything from the first message of first file - if (dat_i == 1) { - ## (1-1) Everything from the first message of first file - # dims <- dim(result[[dat_i]]) - # attributes(result) <- gm1 - # # turn result into array again - # dim(result[[dat_i]]) <- dims - - ## (1-2) Only save the necessary attributes - attr(result, 'edition') <- gm1$edition - attr(result, 'shortName') <- gm1$shortName - #NOTE: Tune varaible name!! - if (gm1$shortName == '2t') attr(result, 'shortName') <- 'tas' - attr(result, 'name') <- gm1$name - attr(result, 'units') <- gm1$units - # attr(result, 'validityDate') <- gm1$validityDate - # attr(result, 'validityTime') <- gm1$validityTime - - ## (2) Lat and lon - latlon <- grib_latlons(gm1, expand = TRUE) - attr(result, 'latitude') <- unique(as.vector(c(latlon$lats))) - attr(result, 'longitude') <- unique(as.vector(c(latlon$lons))) - # Save memory (though it's small) - rm(latlon); gc() - - #NOTE: Find another way to check regular grid; Ni/Nj not always exist - # if (has.key(gm1, "Nx") && has.key(gm1, "Ny")) { - # nx <- gm1$Nx - # ny <- gm1$Ny - # } else { - # nx <- gm1$Ni - # ny <- gm1$Nj - # } - # if (length(lats) != ny | length(lons) != nx) { - # stop("Latitude and Longitude seem to be non-regular grid.") - # } - - } - - #-------------------------------- - #NOTE: Just use cdo_time - # ## (3) Date and time: Need to get from each massage - # for (time_i in 1:length(time_step)) { - # gm1 <- gm[[time_i]] - # #NOTE: What's the correct time? - ## dates <- gm1$validityDate #Date of validity of the forecast - ## times <- gm1$validityTime - ## dates <- gm1$dataDate # Reference date - # times[[dat_i]][time_i] <- as.POSIXct( - # lubridate::ymd_hms(paste0(paste(gm1$year,gm1$month,gm1$day, '-'), ' ', - # paste(gm1$hour, gm1$minute, gm1$second, ':'))) - # ) - # } - times[[dat_i]] <- cdo_time_attr - #-------------------------------- - - ################################################################## - # regrid - ################################################################## - if (!is.null(regrid)) { - # result[[dat_i]]: [longitude, latitude, time] - res_data <- s2dv::CDORemap(result[[dat_i]], lons = attr(result, 'longitude'), lats = attr(result, 'latitude'), - grid = regrid$type, method = regrid$method, force_remap = TRUE) - if (dat_i == length(dat)) { - attr(result, 'longitude') <- res_data$lons - attr(result, 'latitude') <- res_data$lats - } - result[[dat_i]] <- res_data$data_array - } - - - ################################################################## - # Save memory - rm(memb_ftime); rm(gm1); gc() - grib_close(file_to_load) # Doesn't impact memory - ################################################################## + file_to_load <- tryCatch(grbload(dat[[dat_i]], time_step = time_step, + has.memb = has.memb, + syear_time_dim = syear_time_dim, + regrid = regrid, first_file = dat_i), + error=function(e) { + warning(paste('Missing file?', e)) + list(result = NULL, time = NULL) + }) + result[[dat_i]] <- file_to_load$result + times[[dat_i]] <- file_to_load$time } #for loop dat - + + # If there are missing files it keeps loading available data + missing_files <- unlist(lapply(result, function(x) {is.null(x)})) + if (all(missing_files == TRUE)) { + error(recipe$Run$logger, + "Any GRIB file found") + } + print(missing_files) + index <- 1 # using information from first file + if (any(missing_files == TRUE)) { + index <- which(missing_files == FALSE)[1] + for (missed in which(missing_files)) { + info(recipe$Run$logger, + "There are missing files in grib format.") + info(recipe$Run$logger, + dat[missing_files]) + print(index) + result[[missed]] <- array(NA, dim(result[[index]])) + } + } + print(index) # Turn result list into array - attr <- attributes(result) - res_dim <- c(dim(result[[1]]), syear = length(result)) #[longitude, latitude, (memb*)time, syear] + attr <- attributes(result[[index]]) + res_dim <- c(dim(result[[index]]), syear = length(result)) #[longitude, latitude, (memb*)time, syear] result <- unlist(result) dim(result) <- res_dim - # Generate date/time attributes - times <- array(unlist(times), dim = c(time = length(time_step), syear = length(dat), + if (any(lapply(times, function(x){length(x)>0})==FALSE)) { + for (i in which(lapply(times, function(x){length(x)>0}) == FALSE)) { + times[[i]] <- array(NA, c(time = length(time_step), syear = 1, + sday = 1, sweek = 1)) + } + } + times <- array(unlist(times), dim = c(time = length(time_step), + syear = length(dat), sday = 1, sweek = 1)) + times <- s2dv::Reorder(times, c('sday', 'sweek', 'syear', 'time')) if (!is.null(syear_time_dim)) dim(times) <- syear_time_dim - times <- as.POSIXct(times, origin = '1970-01-01', tz = 'UTC') + times <- as.POSIXct(times, origin = '1970-01-01', tz = 'UTC') # Reshape and reorder array - if (is.null(has.memb)) { # obs doesn't have memb; reshape syear/time dim + if (is.null(has.memb) || has.memb == FALSE) { # obs doesn't have memb; reshape syear/time dim result <- s2dv::Reorder(result, c("syear", "time", "latitude", "longitude")) result <- array(result, dim = c(dat = 1, var = 1, syear_time_dim, dim(result)[3:4], @@ -188,7 +85,7 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, attr$dim <- dim(result) attributes(result) <- attr attr(result, 'time') <- times - + # Save memory rm(times); rm(attr); gc() @@ -246,3 +143,33 @@ GrbLoad <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, values } + +tune_var_name <- function(x) { + if (x == '2t') { + res <- "tas" + } else if (x == 'tprate') { + res <- "prlr" + } else if (x == "10si") { + res <- "sfcWind" + } else if (x == "2d") { + res <- "tdps" + } else if (x == "msl") { + res <- "psl" + } else if (x == "sst") { + res <- "tos" + } else if (x == "tp") { + res <- "prlr" + } else if (x %in% c("mx2t24", "mx2t")) { + res <- "tasmax" + } else if (x %in% c("mn2t24", "mn2t")) { + res <- "tasmin" + } else if (x == "Maximum temperature at 2 metres since previous post-processing") { + res <- "tasmax" + } else if (x == "Minimum temperature at 2 metres since previous post-processing") { + res <- "tasmin" + } else { + res <- x + warning("Conversion name", x, "needed?") + } + return(res) +} diff --git a/modules/Loading/R/GRIB/grbload.R b/modules/Loading/R/GRIB/grbload.R new file mode 100644 index 0000000000000000000000000000000000000000..7d6fb72784b5841aac7896254d383ac1a08f00f4 --- /dev/null +++ b/modules/Loading/R/GRIB/grbload.R @@ -0,0 +1,165 @@ +#---------------------------------------------------------------------------------- +# Use gribr package to load GRIB files +# Atomic function to load data if there are missing files +# syear_time_dim is the time attr dim of exp as the input for obs +#---------------------------------------------------------------------------------- +grbload <- function (dat, time_step = 1, has.memb = NULL, syear_time_dim = NULL, + regrid = NULL, first_file = NULL) { + + file_to_load <- grib_open(dat) +info(recipe$Run$logger, dat) +info(recipe$Run$logger, str(file_to_load)) + #---------------------------------------- + # HOW TO FIND THE VALUE OF EACH FTIME STEP? + #---------------------------------------- + #NOTE: ValidityTime is not considered now. So if the time frequency is less than daily, it has problem. + +# # METHOD 1: Get first message to figure out the validityDate/Time of each message +# #NOTE: gm1$validityDate should be "s", "m", "h", etc. according to document. But our files have "1". + gm1 <- grib_get_message(file_to_load, 1) +# first_ftime <- as.character(gm1$validityDate) +# first_ftime_hour <- gm1$validityTime +# # For monthly data +# #NOTE: may not be correct because it is calculated by the first message +# cdo_time_attr <- clock::add_months(as.POSIXct(first_ftime, +# format = "%Y%m%d", tz = 'UTC'), time_step - 1) +# cdo_time <- format(cdo_time_attr, "%Y%m%d") + + # METHOD 2: Use cdo showtimestamp (DEPENDENCY!) + #TODO: Change to method 1 because can't predict what cdo will produce + cdo_time <- system(paste0("cdo showtimestamp ", dat), intern = T) + cdo_time <- strsplit(cdo_time, " ")[[length(cdo_time)]] + cdo_time <- cdo_time[which(cdo_time != "")] + # Check if there is member dim or not + has.memb <- ifelse((length(unique(cdo_time)) == length(cdo_time)), FALSE, TRUE) + if (has.memb) memb_dim_length <- length(cdo_time)/length(unique(cdo_time)) + cdo_time <- unique(cdo_time)[time_step] #"2000-12-01T00:00:00" + cdo_time_attr <- as.POSIXct(gsub('T', ' ', cdo_time), tz = 'UTC') + cdo_time <- sapply(sapply(cdo_time, strsplit, "T"), '[[', 1) + cdo_time <- gsub('-', '', cdo_time) +# ERA5 Precipitation dates problem: + if (!has.memb) cdo_time <- gm1$validityDate + #---------------------------------------- + + # all members + ftimes: length should be memb*ftime (e.g., 51*7) + ## Method 1: use grib_select and real values to filter + memb_ftime <- grib_select(file_to_load, list(validityDate = cdo_time)) + if (inherits(memb_ftime, 'gribMessage')) memb_ftime <- list(memb_ftime) + +# ## Method 2: Calculate which messages are the desired ones +# gm <- grib_get_message(file_to_load, time_step) +# if (length(time_step) == 1) { +# gm <- list(gm) +# } + + ################################################################## + # Get data as an array [longitude, latitude, (memb*)time] + ################################################################## + if (grepl("reduced", gm1$gridType)) { + #NOTE: Need to call gribr::grib_expand_grids because I don't know how to make .Call("gribr_redtoreg") work outside that function + # https://github.com/nawendt/gribr/blob/main/src/redtoreg.c + values_l <- vector('list', length = length(memb_ftime)) + for (gm_i in 1:length(memb_ftime)) { + values_l[[gm_i]] <- grib_expand_grids(memb_ftime[[gm_i]]) + } + result <- array(unlist(values_l), dim = c(longitude = gm1$Ni, + latitude = gm1$Nj, + time = length(values_l))) + info(recipe$Run$logger, "HERE1") + info(recipe$Run$logger, str(result)) + # Save memory + rm(values_l); gc() + + } else { + result <- .grib_expand_grids(memb_ftime) + info(recipe$Run$logger, "HERE2") + info(recipe$Run$logger, str(result)) + + } + info(recipe$Run$logger, ls()) + + ################################################################## + # Get metadata + ################################################################## + ## (1-1) Everything from the first message of first file +# if (first_file == 1) { + ## (1-1) Everything from the first message of first file +# dims <- dim(result[[dat_i]]) +# attributes(result) <- gm1 +# # turn result into array again +# dim(result[[dat_i]]) <- dims + ## (1-2) Only save the necessary attributes + attr(result, 'edition') <- gm1$edition + attr(result, 'shortName') <- gm1$shortName + #NOTE: Tune varaible name!! + attr(result, 'shortName') <- tune_var_name(gm1$shortName) + attr(result, 'name') <- gm1$name + attr(result, 'units') <- gm1$units +# attr(result, 'validityDate') <- gm1$validityDate +# attr(result, 'validityTime') <- gm1$validityTime + + ## (2) Lat and lon + latlon <- grib_latlons(gm1, expand = TRUE) + attr(result, 'latitude') <- unique(as.vector(c(latlon$lats))) + attr(result, 'longitude') <- unique(as.vector(c(latlon$lons))) + # Save memory (though it's small) + rm(latlon); gc() + + #NOTE: Find another way to check regular grid; Ni/Nj not always exist +# if (has.key(gm1, "Nx") && has.key(gm1, "Ny")) { +# nx <- gm1$Nx +# ny <- gm1$Ny +# } else { +# nx <- gm1$Ni +# ny <- gm1$Nj +# } +# if (length(lats) != ny | length(lons) != nx) { +# stop("Latitude and Longitude seem to be non-regular grid.") +# } + +# } + +#-------------------------------- +#NOTE: Just use cdo_time +# ## (3) Date and time: Need to get from each massage +# for (time_i in 1:length(time_step)) { +# gm1 <- gm[[time_i]] +# #NOTE: What's the correct time? +## dates <- gm1$validityDate #Date of validity of the forecast +## times <- gm1$validityTime +## dates <- gm1$dataDate # Reference date +# times[[dat_i]][time_i] <- as.POSIXct( +# lubridate::ymd_hms(paste0(paste(gm1$year,gm1$month,gm1$day, '-'), ' ', +# paste(gm1$hour, gm1$minute, gm1$second, ':'))) +# ) +# } + times <- cdo_time_attr +#-------------------------------- + + ################################################################## + # regrid + ################################################################## + if (!is.null(regrid)) { + # result[[dat_i]]: [longitude, latitude, time] + res_data <- s2dv::CDORemap(result, lons = attr(result, 'longitude'), lats = attr(result, 'latitude'), + grid = regrid$type, method = regrid$method, force_remap = TRUE) + #if (first_file == length(dat)) {ç + result <- res_data$data_array + attr(result, 'longitude') <- res_data$lons + attr(result, 'latitude') <- res_data$lats + #} + } + + + ################################################################## + # Save memory + rm(memb_ftime); rm(gm1); gc() + grib_close(file_to_load) # Doesn't impact memory + ################################################################## + print(str(result)) + return(list(result = result, time = times)) +} + +######################################################################### +######################################################################### + diff --git a/modules/Loading/R/load_GRIB.R b/modules/Loading/R/load_GRIB.R index 0dd5f9191c4275b66c5525c248e03b9c6e0a0e8b..d3a016d270ab9504c07def9f8cc1037b6e4e4f74 100644 --- a/modules/Loading/R/load_GRIB.R +++ b/modules/Loading/R/load_GRIB.R @@ -22,7 +22,7 @@ load_GRIB <- function(recipe) { lons.max <- recipe$Analysis$Region$lonmax # can only be 360 ref.name <- recipe$Analysis$Datasets$Reference$name exp.name <- recipe$Analysis$Datasets$System$name - variable <- recipe$Analysis$Variables$name #'tas' + variable <- recipe$Analysis$Variables$name # 'tas' store.freq <- recipe$Analysis$Variables$freq regrid.method <- recipe$Analysis$Regrid$method @@ -40,7 +40,12 @@ load_GRIB <- function(recipe) { #NOTE: We can use this info in GrbLoad() to substitute param 'has.memb' fcst.nmember <- exp_descrip$nmember$fcst hcst.nmember <- exp_descrip$nmember$hcst - + if (is.null(exp_descrip$nmember$hcst)) { + available_sdates <- unlist(lapply(exp_descrip$sdates, function(x){x[[1]]})) + ind <- which(hcst.sdate == available_sdates) + hcst.nmember <- exp_descrip$sdates[[ind]][[2]] + } + info(recipe$Run$logger, "========== PARAMETERS RETRIEVED. ==========") @@ -54,7 +59,7 @@ load_GRIB <- function(recipe) { # The correct files #exp_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_system5_tas_CORRECTED/" - hcst.path <- paste0(archive$src, hcst.dir) + hcst.path <- paste0(archive$src, hcst.dir, freq.hcst) hcst.year <- paste0(as.numeric(hcst.inityear):as.numeric(hcst.endyear)) hcst.files <- paste0(hcst.path, variable, '_', hcst.year, hcst.sdate, '.grb') @@ -79,7 +84,7 @@ load_GRIB <- function(recipe) { # Load forecast #------------------------------------------------------------------- if (!is.null(fcst.year)) { - fcst.path <- paste0(archive$src, hcst.dir) + fcst.path <- paste0(archive$src, hcst.dir, freq.hcst) fcst.files <- paste0(fcst.path, variable, '_', fcst.year, hcst.sdate, '.grb') fcst <- GrbLoad(dat = as.list(fcst.files), time_step = hcst.ftime, has.memb = fcst.nmember, syear_time_dim = NULL, regrid = regrid_list) @@ -94,16 +99,19 @@ load_GRIB <- function(recipe) { # Load reference #------------------------------------------------------------------- #obs_path <- "/esarchive/scratch/aho/tmp/GRIB/GRIB_era5_tas/" - obs.path <- paste0(archive$src, obs.dir) + obs.path <- paste0(archive$src, obs.dir, freq.obs, "/") # Use hcst time attr to load obs hcst_times <- attr(hcst, 'time') hcst_times_strings <- format(hcst_times, '%Y%m') obs.files <- paste0(obs.path, variable, '_', hcst_times_strings, '.grb') - if (!regrid.type %in% c('none', 'to_reference')) { - if (regrid.type == 'to_system') { - regrid_list <- c(method = regrid.method, type = exp_descrip$reference_grid) + + if (!regrid.type %in% c('none')) { + if (regrid.type == 'to_reference') { + regrid_list <- list(method = regrid.method, type = reference_descrip$reference_grid) + } else if (regrid.type == 'to_system') { + regrid_list <- list(method = regrid.method, type = exp_descrip$reference_grid) } else { # e.g., "r360x181" regrid_list <- list(method = regrid.method, type = regrid.type) } @@ -115,7 +123,6 @@ load_GRIB <- function(recipe) { obs <- GrbLoad(dat = as.list(obs.files), time_step = 1, has.memb = NULL, syear_time_dim = dim(hcst_times), regrid = regrid_list) gc() - .log_memory_usage(recipe$Run$logger, when = "After loading the data") info(recipe$Run$logger, "========== OBS LOADED. ==========") diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index db7ceecacd0f6e8af750fae45147e6dcbf07f506..6d5c974636d8bbb6def4bbe7333c8bdd63d2c57b 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -1,3 +1,4 @@ +source("modules/Saving/R/tmp/CST_SaveExp.R") save_metrics <- function(recipe, metrics, dictionary = NULL, @@ -65,7 +66,14 @@ save_metrics <- function(recipe, } } +#<<<<<<< HEAD +# times <- .get_times(store.freq, fcst.horizon, leadtimes, fcst.sdate, calendar) +# time <- times$time +# global_attributes <- c(list(var_long_name = NULL), +# global_attributes) +#======= times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) +#>>>>>>> master # Loop over variable dimension for (var in 1:data_cube$dims[['var']]) { # Subset skill arrays @@ -75,6 +83,10 @@ save_metrics <- function(recipe, drop = 'selected')}) # Generate name of output file variable <- data_cube$attrs$Variable$varName[[var]] + global_attributes$var_long_name <- + data_cube$attrs$Variable$metadata[[variable]]$long_name + + outdir <- get_dir(recipe = recipe, variable = variable) if (!dir.exists(outdir)) { dir.create(outdir, recursive = T) diff --git a/modules/Scorecards/R/load_mask.R b/modules/Scorecards/R/load_mask.R new file mode 100644 index 0000000000000000000000000000000000000000..207b21a4c43df841d60c2c74eb4dbbe6fc6a1201 --- /dev/null +++ b/modules/Scorecards/R/load_mask.R @@ -0,0 +1,23 @@ +library(gribr) +load_mask <- function(mask_path) { + file_format <- which(strsplit(mask_path, "")[[1]] == ".") + file_format <- substr(mask_path[[1]], file_format, + length(strsplit(mask_path, "")[[1]])) + if (file_format == ".grb") { + file_to_load <- grib_open(mask_path) + gm1 <- grib_get_message(file_to_load, 1) + lats <- unique(gm1$latitudes) + lons <- unique(gm1$longitudes) + mask <- array(as.numeric(gm1$values), + dim = c(longitude = gm1$Ni, + latitude = gm1$Nj)) + grib_close(file_to_load) + } else if (file_format == ".nc") { + stop("mask loading in netcdf not implemented") + } else { + stop("unknown file format") + } + return(list(mask = mask, lon = lons, lat = lats)) +} + + diff --git a/modules/Scorecards/R/tmp/LoadMetrics.R b/modules/Scorecards/R/tmp/LoadMetrics.R index 84e44028901a715023f71d0662a75b588e98a7e3..4f31f0534366f561ab288d295e310aa7225af0f2 100644 --- a/modules/Scorecards/R/tmp/LoadMetrics.R +++ b/modules/Scorecards/R/tmp/LoadMetrics.R @@ -172,7 +172,6 @@ LoadMetrics <- function(input_path, system, reference, var, period, } # dims: [metric, longitude, latitude, time, smonth] # or [metric, region, time, smonth] - # Loop for file dim(allfiles) <- c(dat = 1, sdate = length(allfiles)) @@ -182,7 +181,7 @@ LoadMetrics <- function(input_path, system, reference, var, period, drop_var_dim = T) names(dim(res)) <- NULL } else { - res <- array(dim = c(length(metrics), allfiledims[-1,1])) + res <- array(dim = allfiledims[-1,1]) names(dim(res)) <- NULL } res})$output1 diff --git a/modules/Scorecards/R/tmp/SCPlotScorecard.R b/modules/Scorecards/R/tmp/SCPlotScorecard.R new file mode 100644 index 0000000000000000000000000000000000000000..dedbc6cb4b5257d6b6488dfbd7e5517dfbb00af8 --- /dev/null +++ b/modules/Scorecards/R/tmp/SCPlotScorecard.R @@ -0,0 +1,444 @@ +#'Scorecards function create simple scorecards by region (types 1 & 3) +#' +#'@description This function creates a scorecard for a single system and +#'reference combination, showing data by region and forecast month. +#' +#'@param data A multidimensional array containing spatially aggregated metrics +#' data with dimensions: metric, region, sdate and ftime. +#'@param row.dim A character string indicating the dimension name to show in the +#' rows of the plot. +#'@param subrow.dim A character string indicating the dimension name to show in +#' the sub-rows of the plot. +#'@param col.dim A character string indicating the dimension name to show in the +#' columns of the plot. +#'@param subcol.dim A character string indicating the dimension name to show in +#' the sub-columns of the plot. +#'@param legend.dim A character string indicating the dimension name to use for +#' the legend. +#'@param row.names A vector of character strings with row display names. +#'@param subrow.names A vector of character strings with sub-row display names. +#'@param col.names A vector of character strings with column display names. +#'@param subcol.names A vector of character strings with sub-column display +#' names. +#'@param row.title A character string for the title of the row names. +#'@param subrow.title A character string for the title of the sub-row names. +#'@param table.title A character string for the title of the plot. +#'@param table.subtitle A character string for the sub-title of the plot. +#'@param legend.breaks A vector of numerics or a list of vectors of numerics, +#' containing the breaks for the legends. If a vector is given as input, then +#' these breaks will be repeated for each legend.dim. A list of vectors can be +#' given as input if the legend.dims require different breaks. This parameter +#' is required even if the legend is not plotted, to define the colors in the +#' scorecard table. +#'@param plot.legend A logical value to determine if the legend is plotted. +#'@param legend.width A numeric value to define the width of the legend bars. +#'@param legend.height A numeric value to define the height of the legend bars. +#'@param palette A vector of character strings or a list of vectors of +#' character strings containing the colors to use in the legends. If a vector +#' is given as input, then these colors will be used for each legend.dim. A +#' list of vectors can be given as input if different colors are desired for +#' the legend.dims. This parameter must be included even if the the legend is +#' not plotted, to define the colors in the scorecard table. +#'@param colorunder A character string or of vector of character strings +#' defining the colors to use for data values with are inferior to the lowest +#' breaks value. This parameter will also plot a inferior triangle in the +#' legend bar. The parameter can be set to NULL if there are no inferior values. +#' If a character string is given this color will be applied to all legend.dims. +#'@param colorsup A character string or of vector of character strings +#' defining the colors to use for data values with are superior to the highest +#' breaks value. This parameter will also plot a inferior triangle in the +#' legend bar. The parameter can be set to NULL if there are no superior values. +#' If a character string is given this color will be applied to all legend.dims. +#'@param round.decimal A numeric indicating to which decimal point the data +#' is to be displayed in the scorecard table. +#'@param font.size A numeric indicating the font size on the scorecard table. +#'@param fileout A path of the location to save the scorecard plots. +#' +#'@return An image file containing the scorecard. +#'@example +#'data <- array(rnorm(1000), dim = c('sdate' = 12, 'metric' = 4, 'region' = 3, +#' 'time' = 6)) +#'row.names <- c('Tropics', 'Extra-tropical NH', 'Extra-tropical SH') +#'col.names <- c('Mean bias (K)', 'Correlation', 'RPSS','CRPSS') +#'SCPlotScorecard(data = data, row.names = row.names, col.names = col.names, +#' subcol.names = month.abb[as.numeric(1:12)], +#' row.title = 'Region', subrow.title = 'Forecast Month', +#' col.title = 'Start date', +#' table.title = "Temperature of ECMWF System 5", +#' table.subtitle = "(Ref: ERA5 1994-2016)", +#' fileout = 'test.png') +#' +#'@import kableExtra +#'@import s2dv +#'@import ClimProjDiags +#'@export +SCPlotScorecard <- function(data, row.dim = 'region', subrow.dim = 'time', + col.dim = 'metric', subcol.dim = 'sdate', + legend.dim = 'metric', row.names = NULL, + subrow.names = NULL, col.names = NULL, + subcol.names = NULL, row.title = NULL, + subrow.title = NULL, col.title = NULL, + table.title = NULL, table.subtitle = NULL, + legend.breaks = NULL, plot.legend = TRUE, + label.scale = NULL, legend.width = NULL, + legend.height = NULL, palette = NULL, + colorunder = NULL, colorsup = NULL, + round.decimal = 2, font.size = 1.1, + legend.white.space = NULL, + col1.width = NULL, col2.width = NULL, + fileout = './scorecard.png') { + # Input parameter checks + ## Check data + if (!is.array(data)) { + stop("Parameter 'data' must be a numeric array.") + } + ## Check row.dim + if (!is.character(row.dim)) { + stop("Parameter 'row.dim' must be a character string.") + } + if (!row.dim %in% names(dim(data))) { + stop("Parameter 'row.dim' is not found in 'data' dimensions.") + } + ## Check row.names + if (!is.null(row.names)) { + if (length(row.names) != as.numeric(dim(data)[row.dim])) { + stop("Parameter 'row.names' must have the same length of dimension 'row.dims'.") + } + } else { + row.names <- as.character(1:dim(data)[row.dim]) + } + ## Check subrow.dim + if (!is.character(subrow.dim)) { + stop("Parameter 'subrow.dim' must be a character string.") + } + if (!subrow.dim %in% names(dim(data))) { + stop("Parameter 'subrow.dim' is not found in 'data' dimensions.") + } + ## Check subrow.names + if (!is.null(subrow.names)) { + if (length(subrow.names) != as.numeric(dim(data)[subrow.dim])) { + stop("Parameter 'subrow.names' must have the same length of dimension 'subrow.dims'.") + } + } else { + subrow.names <- as.character(1:dim(data)[subrow.dim]) + } + ## Check col.dim + if (!is.character(col.dim)) { + stop("Parameter 'col.dim' must be a character string.") + } + if (!col.dim %in% names(dim(data))) { + stop("Parameter 'col.dim' is not found in 'data' dimensions.") + } + ## Check col.names + if (!is.null(col.names)) { + if (length(col.names) != as.numeric(dim(data)[col.dim])) { + stop("Parameter 'col.names' must have the same length of dimension 'col.dims'.") + } + } else { + col.names <- as.character(1:dim(data)[col.dim]) + } + ## Check subcol.dim + if (!is.character(subcol.dim)) { + stop("Parameter 'subcol.dim' must be a character string.") + } + if (!subcol.dim %in% names(dim(data))) { + stop("Parameter 'subcol.dim' is not found in 'data' dimensions.") + } + ## Check subcol.names + if (!is.null(subcol.names)) { + if (length(subcol.names) != as.numeric(dim(data)[subcol.dim])) { + stop("Parameter 'subcol.names' must have the same length of dimension 'subcol.dims'.") + } + } else { + subcol.names <- as.character(1:dim(data)[subcol.dim]) + } + ## Check legend.dim + if (!is.character(legend.dim)) { + stop("Parameter 'legend.dim' must be a character string.") + } + if (!legend.dim %in% names(dim(data))) { + stop("Parameter 'legend.dim' is not found in 'data' dimensions.") + } + ## Check row.title inputs + if (!is.null(row.title)) { + if (!is.character(row.title)) { + stop("Parameter 'row.title must be a character string.") + } + } else { + row.title <- "" + } + ## Check subrow.title + if (!is.null(subrow.title)) { + if (!is.character(subrow.title)) { + stop("Parameter 'subrow.title must be a character string.") + } + } else { + subrow.title <- "" + } + ## Check col.title + if (!is.null(col.title)) { + if (!is.character(col.title)) { + stop("Parameter 'col.title must be a character string.") + } + } else { + col.title <- "" + } + ## Check table.title + if (!is.null(table.title)) { + if (!is.character(table.title)) { + stop("Parameter 'table.title' must be a character string.") + } + } else { + table.title <- "" + } + ## Check table.subtitle + if (!is.null(table.subtitle)) { + if (!is.character(table.subtitle)) { + stop("Parameter 'table.subtitle' must be a character string.") + } + } else { + table.subtitle <- "" + } + # Check legend.breaks + if (is.vector(legend.breaks) && is.numeric(legend.breaks)) { + legend.breaks <- rep(list(legend.breaks), as.numeric(dim(data)[legend.dim])) + } else if (is.null(legend.breaks)) { + legend.breaks <- rep(list(seq(-1, 1, 0.2)), as.numeric(dim(data)[legend.dim])) + } else if (inherits(legend.breaks, 'list')) { + stopifnot(length(legend.breaks) == as.numeric(dim(data)[legend.dim])) + } else { + stop("Parameter 'legend.breaks' must be a numeric vector, a list or NULL.") + } + ## Check plot.legend + if (!inherits(plot.legend, 'logical')) { + stop("Parameter 'plot.legend' must be a logical value.") + } + ## Check label.scale + if (is.null(label.scale)) { + label.scale <- 1.4 + } else { + if (!is.numeric(label.scale) | length(label.scale) != 1) { + stop("Parameter 'label.scale' must be a numeric value of length 1.") + } + } + ## Check legend.width + if (is.null(legend.width)) { + legend.width <- length(subcol.names) * 46.5 + } else { + if (!is.numeric(legend.width) | length(legend.width) != 1) { + stop("Parameter 'legend.width' must be a numeric value of length 1.") + } + } + if (is.null(legend.height)) { + legend.height <- 50 + } else { + if (!is.numeric(legend.height) | length(legend.height) != 1) { + stop("Parameter 'legend.height' must be a numeric value of length 1.") + } + } + ## Check colour palette input + if (is.vector(palette)) { + palette <- rep(list(palette), as.numeric(dim(data)[legend.dim])) + } else if (is.null(palette)) { + palette <- rep(list(c('#2D004B', '#542789', '#8073AC', '#B2ABD2', '#D8DAEB', + '#FEE0B6', '#FDB863', '#E08214', '#B35806', '#7F3B08')), + as.numeric(dim(data)[legend.dim])) + } else if (inherits(palette, 'list')) { + stopifnot(length(palette) == as.numeric(dim(data)[legend.dim])) + } else { + stop("Parameter 'palette' must be a numeric vector, a list or NULL.") + } + ## Check colorunder + if (is.null(colorunder)) { + colorunder <- rep("#04040E",as.numeric(dim(data)[legend.dim])) + } else if (is.character(colorunder) && length(colorunder) == 1) { + colorunder <- rep(colorunder, as.numeric(dim(data)[legend.dim])) + } else if (is.character(colorunder) && + length(colorunder) != as.numeric(dim(data)[legend.dim])) { + stop("Parameter 'colorunder' must be a numeric vector, a list or NULL.") + } + ## Check colorsup + if (is.null(colorsup)) { + colorsup <- rep("#730C04", as.numeric(dim(data)[legend.dim])) + } else if (is.character(colorsup) && length(colorsup) == 1) { + colorsup <- rep(colorsup,as.numeric(dim(data)[legend.dim])) + } else if (is.character(colorsup) && + length(colorsup) != as.numeric(dim(data)[legend.dim])) { + stop("Parameter 'colorsup' must be a numeric vector, a list or NULL.") + } + ## Check round.decimal + if (is.null(round.decimal)) { + round.decimal <- 2 + } else if (!is.numeric(round.decimal) | length(round.decimal) != 1) { + stop("Parameter 'round.decimal' must be a numeric value of length 1.") + } + ## Check font.size + if (is.null(font.size)) { + font.size <- 1 + } else if (!is.numeric(font.size) | length(font.size) != 1) { + stop("Parameter 'font.size' must be a numeric value of length 1.") + } + ## Check legend white space + if (is.null(legend.white.space)){ + legend.white.space <- 6 + } else { + legend.white.space <- legend.white.space + } + ## Check col1.width + if (is.null(col1.width)) { + if (max(nchar(row.names)) == 1 ) { + col1.width <- max(nchar(row.names)) + } else { + col1.width <- max(nchar(row.names))/4 + } + } else if (!is.numeric(col1.width)) { + stop("Parameter 'col1.width' must be a numeric value of length 1.") + } + ## Check col2.width + if (is.null(col2.width)) { + if (max(nchar(subrow.names)) == 1 ) { + col2.width <- max(nchar(subrow.names)) + } else { + col2.width <- max(nchar(subrow.names))/4 + } + } else if (!is.numeric(col2.width)) { + stop("Parameter 'col2.width' must be a numeric value of length 1.") + } + + + # Get dimensions of inputs + n.col.names <- length(col.names) + n.subcol.names <- length(subcol.names) + n.row.names <- length(row.names) + n.subrow.names <- length(subrow.names) + + # Define table size + n.rows <- n.row.names * n.subrow.names + n.columns <- 2 + (n.col.names * n.subcol.names) + + # Column names + row.names.table <- rep("", n.rows) + for (row in 1:n.row.names) { + row.names.table[floor(n.subrow.names/2) + (row - 1) * n.subrow.names] <- row.names[row] + } + + # Define scorecard table titles + column.titles <- c(row.title, subrow.title, rep(c(subcol.names), n.col.names)) + + # Round data + data <- round(data, round.decimal) + + # Define data inside the scorecards table + for (row in 1:n.row.names) { + table_temp <- data.frame(table_column_2 = as.character(subrow.names)) + for (col in 1:n.col.names) { + table_temp <- data.frame(table_temp, + Reorder(data = Subset(x = data, along = c(col.dim, row.dim), + indices = list(col, row), drop = 'selected'), + order = c(subrow.dim, subcol.dim))) + } + if (row == 1) { + table_data <- table_temp + } else { + table_data <- rbind(table_data, table_temp) + } + } + + # All data for plotting in table + table <- data.frame(table_column_1 = row.names.table, table_data) + table_temp <- array(unlist(table[3:n.columns]), dim = c(n.rows, n.columns - 2)) + # Define colors to show in table + table_colors <- .SCTableColors(table = table_temp, n.col = n.col.names, + n.subcol = n.subcol.names, n.row = n.row.names, + n.subrow = n.subrow.names, legend.breaks = legend.breaks, + palette = palette, colorunder = colorunder, + colorsup = colorsup) + metric.color <- table_colors$metric.color + metric.text.color <- table_colors$metric.text.color + # metric.text.bold <- table_colors$metric.text.bold + + options(stringsAsFactors = FALSE) + title <- data.frame(c1 = table.title, c2 = n.columns) + subtitle <- data.frame(c1 = table.subtitle, c2 = n.columns) + header.names <- as.data.frame(data.frame(c1 = c("", col.names), + c2 = c(2, rep(n.subcol.names, n.col.names)))) + header.names2 <- as.data.frame(data.frame(c1 = c("", paste0(rep(col.title, n.col.names))), + c2 = c(2, rep(n.subcol.names, n.col.names)))) + title.space <- data.frame(c1 = "\n", c2 = n.columns) + + # Hide NA values in table + options(knitr.kable.NA = '') + + # Create HTML table + table.html.part <- list() + table.html.part[[1]] <- kbl(table, escape = F, col.names = column.titles, align = rep("c", n.columns)) %>% + kable_paper("hover", full_width = F, font_size = 14 * font.size) %>% + add_header_above(header = header.names2, font_size = 16 * font.size) %>% + add_header_above(header = title.space, font_size = 10 * font.size) %>% + add_header_above(header = header.names, font_size = 20 * font.size) %>% + add_header_above(header = title.space, font_size = 10 * font.size) %>% + add_header_above(header = subtitle, font_size = 16 * font.size, align = "left") %>% + add_header_above(header = title.space, font_size = 10 * font.size) %>% + add_header_above(header = title, font_size = 22 * font.size, align = "left") + + for (i in 1:n.col.names) { + for (j in 1:n.subcol.names) { + my.background <- metric.color[, (i - 1) * n.subcol.names + j] + my.text.color <- metric.text.color[, (i - 1) * n.subcol.names + j] + # my.bold <- metric.text.bold[(i - 1) * n.subcol.names + j] + + table.html.part[[(i - 1) * n.subcol.names + j + 1]] <- + column_spec(table.html.part[[(i - 1) * n.subcol.names + j]], + 2 + n.subcol.names * (i - 1) + j, + background = my.background[1:n.rows], + color = my.text.color[1:n.rows], + bold = T) ## strsplit(toString(bold), ', ')[[1]] + } + } + + # Define position of table borders + column.borders <- NULL + for (i in 1:n.col.names) { + column.spacing <- (n.subcol.names * i) + 2 + column.borders <- c(column.borders, column.spacing) + } + + n.last.list <- n.col.names * n.subcol.names + 1 + + table.html <- column_spec(table.html.part[[n.last.list]], 1, bold = TRUE, width_min = paste0(col1.width, 'cm')) %>% + column_spec(2, bold = TRUE, width_min = paste0(col2.width, 'cm')) %>% + column_spec(3:n.columns, width_min = "1.5cm") %>% + column_spec(c(1, 2, column.borders), border_right = "2px solid black") %>% + column_spec(1, border_left = "2px solid black") %>% + column_spec(n.columns, border_right = "2px solid black") %>% + row_spec(seq(from = 0, to = n.subrow.names * n.row.names, by = n.subrow.names), + extra_css = "border-bottom: 2px solid black", hline_after = TRUE) + + if (plot.legend == TRUE) { + # Save the scorecard (without legend) + save_kable(table.html, file = paste0(fileout, '_tmpScorecard.png'), vheight = 1) + + # White space for legend + legend.white.space <- 37.8 * legend.white.space ## converting pixels to cm + + # Create and save color bar legend + scorecard_legend <- .SCLegend(legend.breaks = legend.breaks, + palette = palette, + colorunder = colorunder, + colorsup = colorsup, + label.scale = label.scale, + legend.width = legend.width, + legend.height = legend.height, + legend.white.space = legend.white.space, + fileout = fileout) + + # Add the legends below the scorecard table + system(paste0('convert -append ', fileout, '_tmpScorecard.png ', fileout, + '_tmpScorecardLegend.png ', fileout)) + # Remove temporary scorecard table + unlink(paste0(fileout, '_tmpScorecard*.png')) + } + if (plot.legend == FALSE) { + save_kable(table.html, file = fileout) + } +} diff --git a/modules/Scorecards/R/tmp/ScorecardsMulti.R b/modules/Scorecards/R/tmp/ScorecardsMulti.R index 7278eb16a2663ac9f723c68e375522439b046237..e36c45488044d45775bd4fa956341c4a630615b8 100644 --- a/modules/Scorecards/R/tmp/ScorecardsMulti.R +++ b/modules/Scorecards/R/tmp/ScorecardsMulti.R @@ -218,9 +218,6 @@ ScorecardsMulti <- function(data, sign, system, reference, var, start.year, ## Find position of mean bias metric to calculate breaks if ('mean_bias' %in% metrics) { pos_bias <- which(metrics == 'mean_bias') - if(var == 'psl'){ - data[,,pos_bias,,,] <- data[,,pos_bias,,,]/100 ## temporary - } breaks_bias <- .SCBiasBreaks(Subset(data, along = c('metric','region'), indices = list(pos_bias,reg))) } diff --git a/modules/Scorecards/R/tmp/ScorecardsSingle.R b/modules/Scorecards/R/tmp/ScorecardsSingle.R index 9fd445451dcd4bb54d3aeb7c7eab43075434e6eb..fa93bb556b1b6457c18cdd252e79bfc06f5df458 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSingle.R +++ b/modules/Scorecards/R/tmp/ScorecardsSingle.R @@ -201,13 +201,16 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, ## Find position of mean bias metric to calculate breaks breaks_bias <- NULL - if ('mean_bias' %in% metrics){ - stopifnot(identical(names(dim(Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected'))), c('metric','time','sdate','region'))) - temp_data <- Subset(data, c('system', 'reference'), list(sys, ref), drop = 'selected') + if ('mean_bias' %in% metrics) { + stopifnot(identical(names(dim(Subset(data, c('system', 'reference'), + list(sys, ref), drop = 'selected'))), + c('metric','time','sdate','region'))) + temp_data <- Subset(data, c('system', 'reference'), + list(sys, ref), drop = 'selected') pos_bias <- which(metrics == 'mean_bias') - if(var == 'psl'){ - temp_data[pos_bias,,,] <- temp_data[pos_bias,,,]/100 - } + # if(var == 'psl'){ + # temp_data[pos_bias,,,] <- temp_data[pos_bias,,,]/100 + # } breaks_bias <- .SCBiasBreaks(Subset(temp_data, along = 'metric', indices = pos_bias)) } @@ -393,7 +396,7 @@ ScorecardsSingle <- function(data, sign, system, reference, var, start.year, if(!is.null(sign)){ sign_sc_4 <- Reorder(Subset(transformed_sign, c('system', 'reference'), list(sys, ref), drop = 'selected'), new_order) } else { - sign_sc_4 + sign_sc_4 <- NULL } VizScorecard(data = data_sc_4, diff --git a/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R b/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R index 88e9de1530b68be962773f9f836386947ebc0c79..6c74e8be45469a29892e7296e5b1c641b7014321 100644 --- a/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R +++ b/modules/Scorecards/R/tmp/ScorecardsSystemDiff.R @@ -121,7 +121,17 @@ ScorecardsSystemDiff <- function(data, ## Get scorecards table display names from configuration files var.name <- var_dict[[var]]$long_name - var.units <- var_dict[[var]]$units ## TODO: Get units from recipe or elsewhere + if ('name' %in% names(recipe$Analysis$Variables)){ + if (recipe$Analysis$Variables$name == var) { + var.units <- recipe$Analysis$Variables$units + } + } else { + for (i in 1:length(recipe$Analysis$Variables)) { + if (recipe$Analysis$Variables[[i]]$name == var) { + var.units <- recipe$Analysis$Variables[[i]]$units + } + } + } system.name <- NULL reference.name <- NULL diff --git a/modules/Scorecards/R/tmp/WeightedMetrics.R b/modules/Scorecards/R/tmp/WeightedMetrics.R index 08267db4cce8c5590598acc6f3bd4ab93af15a47..0123af458df8ab6c76825df12eb90a0ae0cab3e2 100644 --- a/modules/Scorecards/R/tmp/WeightedMetrics.R +++ b/modules/Scorecards/R/tmp/WeightedMetrics.R @@ -82,11 +82,16 @@ WeightedMetrics <- function(loaded_metrics, regions, forecast.months, latdim = lat_dim_name, na.rm = na.rm, ncores = ncores) - if (!all(names(dim(weighted.mean)) == c('metric', 'time', 'sdate'))) { weighted.mean <- Reorder(weighted.mean, c('metric', 'time', 'sdate')) - } - + } + if (dim(weighted.mean)['time'] < dim(all_metric_means)['time']) { + dif_time_len <- dim(all_metric_means)['time'] - dim(weighted.mean)['time'] + empty <- array(NA, c(metric = length(metrics), + time = dif_time_len, + sdate = length(start.months))) + weighted.mean <- abind(weighted.mean, empty, along = 2) + } all_metric_means[, , , reg, ref, sys] <- weighted.mean } ## close loop on region diff --git a/modules/Scorecards/Scorecards.R b/modules/Scorecards/Scorecards.R index 37aa421c978d8aad8479b9b972684222207ddd5f..3f4f77d57323e8f4fecff43a6540eb048443b34a 100644 --- a/modules/Scorecards/Scorecards.R +++ b/modules/Scorecards/Scorecards.R @@ -11,7 +11,7 @@ source('modules/Scorecards/R/tmp/ScorecardsSingle.R') source('modules/Scorecards/R/tmp/ScorecardsMulti.R') source('modules/Scorecards/R/tmp/ScorecardsSystemDiff.R') source('modules/Scorecards/R/tmp/VizScorecard.R') - +source('modules/Scorecards/R/load_mask.R') ## Temporary for new ESviz function source('modules/Scorecards/R/tmp/ColorBarContinuous.R') source('modules/Scorecards/R/tmp/ClimPalette.R') @@ -26,14 +26,13 @@ Scorecards <- function(recipe) { stats.input.path <- paste0(recipe$Run$output_dir, "/outputs/Statistics/") output.path <- paste0(recipe$Run$output_dir, "/plots/Scorecards/") dir.create(output.path, recursive = T, showWarnings = F) - system <- recipe$Analysis$Datasets$System$name - reference <- recipe$Analysis$Datasets$Reference$name + system <- recipe$Analysis$Datasets$System + reference <- recipe$Analysis$Datasets$Reference var <- recipe$Analysis$Variables$name start.year <- as.numeric(recipe$Analysis$Time$hcst_start) end.year <- as.numeric(recipe$Analysis$Time$hcst_end) forecast.months <- recipe$Analysis$Time$ftime_min : recipe$Analysis$Time$ftime_max calib.method <- tolower(recipe$Analysis$Workflow$Calibration$method) - if (recipe$Analysis$Workflow$Scorecards$start_months == 'all' || is.null(recipe$Analysis$Workflow$Scorecards$start_months)) { start.months <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) } else { @@ -44,7 +43,7 @@ Scorecards <- function(recipe) { } } - start.months <- sprintf("%02d", start.months) +# start.months <- sprintf("%02d", start.months) period <- paste0(start.year, "-", end.year) ## Parameters for data aggregation @@ -77,6 +76,7 @@ Scorecards <- function(recipe) { ## Parameters for scorecard layout table.label <- recipe$Analysis$Workflow$Scorecards$table_label fileout.label <- recipe$Analysis$Workflow$Scorecards$fileout_label + legend.white.space <- recipe$Analysis$Workflow$Scorecards$legend_white_space col1.width <- recipe$Analysis$Workflow$Scorecards$col1_width col2.width <- recipe$Analysis$Workflow$Scorecards$col2_width legend.breaks <- recipe$Analysis$Workflow$Scorecards$legend_breaks @@ -130,10 +130,44 @@ Scorecards <- function(recipe) { } else { calculate.diff <- recipe$Analysis$Workflow$Scorecards$calculate_diff } - +#system <- system[1:2] + if (is.null(recipe$Analysis$Workflow$Scorecards$mask)) { + mask <- NULL + } else { + if (is.null(recipe$Run$filesystem)) { + filesystem <- 'esarchive' + } else { + filesystem <- recipe$Run$filesystem + } + if (tolower(recipe$Analysis$Workflow$Scorecards$mask) %in% + c("sea", "land")) { + mask_mode <- tolower(recipe$Analysis$Workflow$Scorecards$mask) + mask_grid <- recipe$Analysis$Regrid$type + conf_mask <- read_yaml("conf/archive.yml")[[filesystem]] + mask_path <- conf_mask$src + if (tolower(mask_grid) == "to_system") { + mask_path <- lapply(system, function(x) { + mask <- paste0(mask_path, + conf_mask$System[[x]]$src, + conf_mask$System[[x]]$lsm)}) + } else if (tolower(mask_grid) =="to_reference") { + mask_path <- lapply(reference, function(x) { + mask <- paste0(mask_path, + conf_mask$Reference[[x]]$src, + conf_mask$Reference[[x]]$lsm)}) + } else { + #apply regrid to the reference or to the syst + stop("Mask regrid not implemented") + } + mask <- lapply(mask_path, function(x) { + load_mask(x)}) + } else { + stop("Mask description not implemented") + } + } + ####### SKILL AGGREGATION ####### if(metric.aggregation == 'skill'){ - ## Load data files loaded_metrics <- LoadMetrics(input_path = skill.input.path, system = system, @@ -145,7 +179,59 @@ Scorecards <- function(recipe) { calib_method = calib.method, inf_to_na = inf.to.na ) + if (!is.null(mask)) { + for (sys in 1:length(system)) { + for (ref in 1:length(reference)) { + sys_pos <- which(names(loaded_metrics) == + gsub("\\.", "", system)[sys]) + ref_pos <- which(names(loaded_metrics[[sys_pos]]) == + gsub("\\.", "", reference)[ref]) + tmp <- loaded_metrics[[sys_pos]][[ref_pos]] + attrs_tmp <- attributes(tmp) + +# TODOS: +# What if multi ref how to select mask? + if (tolower(mask_grid) == 'to_system') { + position <- sys + } else if (tolower(mask_grid) == 'to_reference') { + position <- ref + } + if (!all(mask[[position]]$lon == as.vector(attributes(tmp)$lon))) { + stop("Longitudes order does not match") + } + if (!all(mask[[position]]$lat == as.vector(attributes(tmp)$lat))) { + stop("Latitudes order does not match") + } + tmp_dims <- names(dim(tmp)) + tmp <- Apply(list(tmp), + target_dims = names(dim(mask[[position]]$mask)), + function(x, masking, mask_mode) { + if (mask_mode == "sea") { + x[masking < 0.5] <- NA # mask the sea + } else { + x[masking >= 0.5] <- NA + } + return(x) + }, masking = mask[[position]]$mask, + mask_mode = mask_mode)$output1 + if (!all(names(dim(tmp)) == tmp_dims)) { + tmp <- Reorder(tmp, tmp_dims) + } +# PlotEquiMap(mask[[position]]$mask, +# lon = as.vector(mask[[position]]$lon), +# lat = as.vector(mask[[position]]$lat), filled.c =F, +# fileout = 'test.png') +# PlotEquiMap(loaded_metrics[[2]][[1]][,,1,2,2], +# lon = as.vector(attrs_tmp$lon), +# lat = as.vector(attrs_tmp$lat), filled.c =F, +# #brks = c(5.7, 4.5,3.4,2.3,1.1,0,-1.1,-2.3, -3.4,-4.5,-5.6), +# brks = seq(-1,1,0.2), fileout = 'test.png') + loaded_metrics[[sys]][[ref]] <- tmp + attributes(loaded_metrics[[sys]][[ref]]) <- attrs_tmp + } # close ref + } # close sys + } # close if mask ## Spatial Aggregation of metrics if('region' %in% names(dim(loaded_metrics[[1]][[1]]))){ @@ -523,7 +609,6 @@ Scorecards <- function(recipe) { ####### PLOT SCORECARDS ########## - ## Create simple scorecard tables ## (one system only) ## Metrics input must be in the same order as function SC_spatial_aggregation diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index c10def7abcbe329508fcee0705d4cd62095ff3d9..c52fff4d2d7eaca50b9aa1fab769451bcc920adb 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -12,26 +12,44 @@ recipe$Run$output_dir <- output_dir ## Loop over variables datasets <- recipe$Analysis$Datasets +## TODO: Improve dependency system? +for (masking in strsplit(recipe$Analysis$Workflow$Scorecards$mask, " ")[[1]]) { + for (variable in 1:length(recipe$Analysis$Variables)) { + if (!(recipe$Analysis$Variables[[variable]]$name == 'tos' && + tolower(masking) == "sea")) { + scorecard_recipe <- recipe + # modify mask params: + if (masking == "no" || masking == FALSE) { + masking <- NULL + } else { + scorecard_recipe$Analysis$Workflow$Scorecards$fileout_label <- + paste0(scorecard_recipe$Analysis$Workflow$Scorecards$fileout_label, + "-", masking) + } + + scorecard_recipe$Analysis$Workflow$Scorecards$mask <- masking + scorecard_recipe$Analysis$Datasets$System <- + as.vector(unlist(recipe$Analysis$Datasets$System)) + + ## Include multimodel in systems + if(isTRUE(scorecard_recipe$Analysis$Datasets$Multimodel$execute) || + scorecard_recipe$Analysis$Datasets$Multimodel$execute == 'both' || + scorecard_recipe$Analysis$Datasets$Multimodel$execute == 'yes'){ + scorecard_recipe$Analysis$Datasets$System <- + c(scorecard_recipe$Analysis$Datasets$System, 'Multimodel') + } -for (variable in 1:length(recipe$Analysis$Variables)) { - # Create a copy of the recipe for this variable - scorecard_recipe <- recipe - # Collect all system names - scorecard_recipe$Analysis$Datasets$System <- - list(name = as.vector(unlist(recipe$Analysis$Datasets$System))) - # Include multimodel in systems - if (!isFALSE(recipe$Analysis$Datasets$Multimodel$execute)) { - scorecard_recipe$Analysis$Datasets$System$name <- - c(scorecard_recipe$Analysis$Datasets$System$name, 'Multimodel') + scorecard_recipe$Analysis$Datasets$Reference <- + as.vector(unlist(recipe$Analysis$Datasets$Reference)) + scorecard_recipe$Analysis$Variables <- + recipe$Analysis$Variables[[variable]] +print(recipe$Analysis$Variables[[variable]]) +#print(recipe$Analysis$Datasets$Reference[[reference]]) +#print(recipe$Analysis$Datasets$System[[system]]) + # Plot Scorecards + Scorecards(scorecard_recipe) + } } - # Collect all reference names - scorecard_recipe$Analysis$Datasets$Reference <- - list(name = as.vector(unlist(recipe$Analysis$Datasets$Reference))) - # Assign variables - scorecard_recipe$Analysis$Variables <- - recipe$Analysis$Variables[[variable]] - # Plot Scorecards - Scorecards(scorecard_recipe) } print("##### SCORECARDS SAVED TO THE OUTPUT DIRECTORY #####") diff --git a/modules/Skill/R/RPS_clim.R b/modules/Skill/R/RPS_clim.R index 4b309c004f1881bd21944460d3439672c839cb9d..b609ae989a786ac69e64e0c972e3c31f274f27be 100644 --- a/modules/Skill/R/RPS_clim.R +++ b/modules/Skill/R/RPS_clim.R @@ -1,19 +1,17 @@ # RPS version for climatology RPS_clim <- function(obs, indices_for_clim = NULL, - prob_thresholds = c(1/3, 2/3), - cross.val = TRUE, - return_mean = TRUE) { - + prob_thresholds = c(1/3, 2/3), cross.val = T, + Fair = FALSE, bin_dim_abs = NULL, return_mean = TRUE) { if (is.null(indices_for_clim)){ indices_for_clim <- 1:length(obs) } - - obs_probs <- .GetProbs(data = obs, - indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: - prob_thresholds = prob_thresholds, - weights = NULL, - cross.val = cross.val) - + if (is.null(bin_dim_abs)) { + obs_probs <- .GetProbs(data = obs, indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: + prob_thresholds = prob_thresholds, weights = NULL, + cross.val = cross.val) + } else { + obs_probs <- obs + } # clim_probs: [bin, sdate] clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) @@ -23,6 +21,15 @@ RPS_clim <- function(obs, indices_for_clim = NULL, probs_clim_cumsum <- apply(clim_probs, 2, cumsum) probs_obs_cumsum <- apply(obs_probs, 2, cumsum) rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) + if (Fair) { # FairRPS + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + ## See explanation in https://freva.met.fu-berlin.de/about/problems/ + R <- dim(obs)[2] #years + adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) + adjustment <- colSums(adjustment) + rps_ref <- rps_ref + adjustment + } if (return_mean == TRUE) { return(mean(rps_ref)) diff --git a/modules/Skill/R/tmp/Corr.R b/modules/Skill/R/tmp/Corr.R new file mode 100644 index 0000000000000000000000000000000000000000..aaa3e1eadabad40895fc85d9cd2df8f24d117c4b --- /dev/null +++ b/modules/Skill/R/tmp/Corr.R @@ -0,0 +1,485 @@ +#'Compute the correlation coefficient between an array of forecast and their corresponding observation +#' +#'Calculate the correlation coefficient (Pearson, Kendall or Spearman) for +#'an array of forecast and an array of observation. The correlations are +#'computed along 'time_dim' that usually refers to the start date dimension. If +#''comp_dim' is given, the correlations are computed only if obs along comp_dim +#'dimension are complete between limits[1] and limits[2], i.e., there is no NA +#'between limits[1] and limits[2]. This option can be activated if the user +#'wants to account only for the forecasts which the corresponding observations +#'are available at all leadtimes.\cr +#'The confidence interval is computed by the Fisher transformation and the +#'significance level relies on an one-sided student-T distribution.\cr +#'The function can calculate ensemble mean before correlation by 'memb_dim' +#'specified and 'memb = F'. If ensemble mean is not calculated, correlation will +#'be calculated for each member. +#'If there is only one dataset for exp and obs, you can simply use cor() to +#'compute the correlation. +#' +#'@param exp A named numeric array of experimental data, with at least dimension +#' 'time_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param time_dim A character string indicating the name of dimension along +#' which the correlations are computed. The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param comp_dim A character string indicating the name of dimension along which +#' obs is taken into account only if it is complete. The default value +#' is NULL. +#'@param limits A vector of two integers indicating the range along comp_dim to +#' be completed. The default is c(1, length(comp_dim dimension)). +#'@param method A character string indicating the type of correlation: +#' 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no +#' member dimension, set NULL. The default value is NULL. +#'@param memb A logical value indicating whether to remain 'memb_dim' dimension +#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when +#' 'memb_dim' is not NULL. The default value is TRUE. +#'@param pval A logical value indicating whether to return or not the p-value +#' of the test Ho: Corr = 0. The default value is TRUE. +#'@param conf A logical value indicating whether to return or not the confidence +#' intervals. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: Corr = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, exp_memb, obs_memb, all other dimensions of exp except +#' time_dim and memb_dim).\cr +#'nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is the +#'number of observation (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and +#'nobs are omitted. exp_memb is the number of member in experiment (i.e., +#''memb_dim' in exp) and obs_memb is the number of member in observation (i.e., +#''memb_dim' in obs). If memb = F, exp_memb and obs_memb are omitted.\cr\cr +#'\item{$corr}{ +#' The correlation coefficient. +#'} +#'\item{$p.val}{ +#' The p-value. Only present if \code{pval = TRUE}. +#'} +#'\item{$conf.lower}{ +#' The lower confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' The upper confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$sign}{ +#' The statistical significance. Only present if \code{sign = TRUE}. +#'} +#' +#'@examples +#'# Case 1: Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'runmean_months <- 12 +#' +#'# Smooth along lead-times +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) +#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months) +#'required_complete_row <- 3 # Discard start dates which contain any NA lead-times +#'leadtimes_per_startdate <- 60 +#'corr <- Corr(MeanDims(smooth_ano_exp, 'member'), +#' MeanDims(smooth_ano_obs, 'member'), +#' comp_dim = 'ftime', dat_dim = 'dataset', +#' limits = c(ceiling((runmean_months + 1) / 2), +#' leadtimes_per_startdate - floor(runmean_months / 2))) +#' +#'# Case 2: Keep member dimension +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', dat_dim = 'dataset') +#'# ensemble mean +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE, +#' dat_dim = 'dataset') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@importFrom stats cor pt qnorm +#'@export +Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, + comp_dim = NULL, limits = NULL, method = 'pearson', + memb_dim = NULL, memb = TRUE, + pval = TRUE, conf = TRUE, sign = FALSE, + alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## comp_dim + if (!is.null(comp_dim)) { + if (!is.character(comp_dim) | length(comp_dim) > 1) { + stop("Parameter 'comp_dim' must be a character string.") + } + if (!comp_dim %in% names(dim(exp)) | !comp_dim %in% names(dim(obs))) { + stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## limits + if (!is.null(limits)) { + if (is.null(comp_dim)) { + stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") + } + if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | + length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { + stop(paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.")) + } + } + ## method + if (!(method %in% c("kendall", "spearman", "pearson"))) { + stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + } + ## memb + if (!is.logical(memb) | length(memb) > 1) { + stop("Parameter 'memb' must be one logical value.") + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'dat_dim' and 'memb_dim'.")) + } + if (dim(exp)[time_dim] < 3) { + stop("The length of time_dim must be at least 3 to compute correlation.") + } + + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- Reorder(obs, order_obs) + + + ############################### + # Calculate Corr + + # Remove data along comp_dim dim if there is at least one NA between limits + if (!is.null(comp_dim)) { + pos <- which(names(dim(obs)) == comp_dim) + if (is.null(limits)) { + obs_sub <- obs + } else { + obs_sub <- ClimProjDiags::Subset(obs, pos, list(limits[1]:limits[2])) + } + outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE)) + outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) + obs[which(outrows)] <- NA + rm(obs_sub, outrows) + } + if (!is.null(memb_dim)) { + if (!memb) { #ensemble mean + exp <- MeanDims(exp, memb_dim, na.rm = TRUE) + obs <- MeanDims(obs, memb_dim, na.rm = TRUE) +# name_exp <- names(dim(exp)) +# margin_dims_ind <- c(1:length(name_exp))[-which(name_exp == memb_dim)] +# exp <- apply(exp, margin_dims_ind, mean, na.rm = TRUE) #NOTE: remove NAs here +# obs <- apply(obs, margin_dims_ind, mean, na.rm = TRUE) + memb_dim <- NULL + } + } + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim, memb_dim), + c(time_dim, dat_dim, memb_dim)), + fun = .Corr, + dat_dim = dat_dim, memb_dim = memb_dim, + time_dim = time_dim, method = method, + pval = pval, conf = conf, sign = sign, alpha = alpha, + ncores = ncores) + + return(res) +} + +.Corr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', method = 'pearson', + conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05) { + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + if (is.null(memb_dim)) { + CORR <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + if (any(!is.na(exp)) && sum(!is.na(obs)) > 2) { + CORR[, ] <- cor(exp, obs, use = "pairwise.complete.obs", method = method) + } + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + for (j in 1:nobs) { + for (y in 1:nexp) { + if (any(!is.na(exp[, y])) && sum(!is.na(obs[, j])) > 2) { + CORR[y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + } + } +#---------------------------------------- +# Same as above calculation. +#TODO: Compare which is faster. +# CORR <- sapply(1:nobs, function(i) { +# sapply(1:nexp, function (x) { +# if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) { +# cor(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method) +# } else { +# NA +# } +# }) +# }) +#----------------------------------------- + } + + } else { # memb_dim != NULL + exp_memb <- as.numeric(dim(exp)[memb_dim]) # memb_dim + obs_memb <- as.numeric(dim(obs)[memb_dim]) + + CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + + if (is.null(dat_dim)) { + # exp: [sdate, memb_exp] + # obs: [sdate, memb_obs] + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + + if (any(!is.na(exp[,y])) && sum(!is.na(obs[, j])) > 2) { + CORR[, , y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + + } + } + } else { + # exp: [sdate, dat_exp, memb_exp] + # obs: [sdate, dat_obs, memb_obs] + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + CORR[, , y, j] <- sapply(1:nobs, function(i) { + sapply(1:nexp, function (x) { + if (any(!is.na(exp[, x, y])) && sum(!is.na(obs[, i, j])) > 2) { + cor(exp[, x, y], obs[, i, j], + use = "pairwise.complete.obs", + method = method) + } else { + NA + } + }) + }) + + } + } + } + + } + + +# if (pval) { +# for (i in 1:nobs) { +# p.val[, i] <- try(sapply(1:nexp, +# function(x) {(cor.test(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method)$p.value)/2}), silent = TRUE) +# if (class(p.val[, i]) == 'character') { +# p.val[, i] <- NA +# } +# } +# } + + if (pval || conf || sign) { + if (method == "kendall" | method == "spearman") { + if (!is.null(dat_dim) | !is.null(memb_dim)) { + tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3) + names(dim(tmp))[1] <- time_dim + eno <- Eno(tmp, time_dim) + } else { + tmp <- rank(obs) + tmp <- array(tmp) + names(dim(tmp)) <- time_dim + eno <- Eno(tmp, time_dim) + } + } else if (method == "pearson") { + eno <- Eno(obs, time_dim) + } + + if (is.null(memb_dim)) { + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + eno_expand[i, ] <- eno + } + } else { #member + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + for (i in 1:nexp) { + for (j in 1:exp_memb) { + eno_expand[i, , j, ] <- eno + } + } + } + + } + +#############old################# +#This doesn't return error but it's diff from cor.test() when method is spearman and kendall + if (pval || sign) { + t <- sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) + p.val <- pt(t, eno_expand - 2, lower.tail = FALSE) + if (sign) signif <- !is.na(p.val) & p.val <= alpha + } +################################### + if (conf) { + conf.lower <- alpha / 2 + conf.upper <- 1 - conf.lower + suppressWarnings({ + conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3)) + confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3)) + }) + } + +################################### + # Remove nexp and nobs if dat_dim = NULL + if (is.null(dat_dim)) { +# if (is.null(dat_dim) & !is.null(memb_dim)) { + + if (length(dim(CORR)) == 2) { + dim(CORR) <- NULL + if (pval) { + dim(p.val) <- NULL + } + if (conf) { + dim(conflow) <- NULL + dim(confhigh) <- NULL + } + if (sign) { + dim(signif) <- NULL + } + } else { + dim(CORR) <- dim(CORR)[3:length(dim(CORR))] + if (pval) { + dim(p.val) <- dim(p.val)[3:length(dim(p.val))] + } + if (conf) { + dim(conflow) <- dim(conflow)[3:length(dim(conflow))] + dim(confhigh) <- dim(confhigh)[3:length(dim(confhigh))] + } + if (sign) { + dim(signif) <- dim(signif)[3:length(dim(signif))] + } + } + } + +################################### + + res <- list(corr = CORR) + if (pval) { + res <- c(res, list(p.val = p.val)) + } + if (conf) { + res <- c(res, list(conf.lower = conflow, conf.upper = confhigh)) + } + if (sign) { + res <- c(res, list(sign = signif)) + } + + return(res) + +} + diff --git a/modules/Skill/R/tmp/RPS.R b/modules/Skill/R/tmp/RPS.R index 59b2d01a0d842967cdaa4d0351ca1321e19ccf8c..990d9c63485633e21ce500c03fdc63a73bb2e970 100644 --- a/modules/Skill/R/tmp/RPS.R +++ b/modules/Skill/R/tmp/RPS.R @@ -43,6 +43,7 @@ #'@param Fair A logical indicating whether to compute the FairRPS (the #' potential RPS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. +#'@param nmemb A numeric value indicating the number of members used to compute the probabilities. This parameter is necessary when calculating FairRPS from probabilities. Default is NULL. #'@param weights A named numerical array of the weights for 'exp' probability #' calculation. If 'dat_dim' is NULL, the dimensions should include 'memb_dim' #' and 'time_dim'. Else, the dimension should also include 'dat_dim'. The @@ -88,7 +89,8 @@ #'@export RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, - Fair = FALSE, weights = NULL, cross.val = FALSE, return_mean = TRUE, + Fair = FALSE, nmemb = NULL, weights = NULL, + cross.val = FALSE, return_mean = TRUE, na.rm = FALSE, ncores = NULL) { # Check inputs @@ -178,6 +180,16 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL if (!is.logical(Fair) | length(Fair) > 1) { stop("Parameter 'Fair' must be either TRUE or FALSE.") } + if (Fair) { + if (!is.null(cat_dim)) { + if (cat_dim %in% names(dim(exp))) { + if (is.null(nmemb)) { + stop("Parameter 'nmemb' necessary to compute Fair", + "score from probabilities") + } + } + } + } ## return_mean if (!is.logical(return_mean) | length(return_mean) > 1) { stop("Parameter 'return_mean' must be either TRUE or FALSE.") @@ -253,7 +265,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL fun = .RPS, dat_dim = dat_dim, time_dim = time_dim, memb_dim = memb_dim, cat_dim = cat_dim, - prob_thresholds = prob_thresholds, + prob_thresholds = prob_thresholds, nmemb = nmemb, indices_for_clim = indices_for_clim, Fair = Fair, weights = weights, cross.val = cross.val, na.rm = na.rm, ncores = ncores)$output1 @@ -270,7 +282,8 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL .RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, - Fair = FALSE, weights = NULL, cross.val = FALSE, na.rm = FALSE) { + Fair = FALSE, nmemb = NULL, weights = NULL, + cross.val = FALSE, na.rm = FALSE) { #--- if memb_dim: # exp: [sdate, memb, (dat)] # obs: [sdate, (memb), (dat)] @@ -349,8 +362,11 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL # obs_probs: [bin, sdate] } else { # inputs are probabilities already - exp_probs <- t(exp_data) - obs_probs <- t(obs_data) + if (all(names(dim(exp_data)) == c(time_dim, memb_dim)) || + all(names(dim(exp_data)) == c(time_dim, cat_dim))) { + exp_probs <- t(exp_data) + obs_probs <- t(obs_data) + } } probs_exp_cumsum <- apply(exp_probs, 2, cumsum) @@ -358,11 +374,17 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL # rps: [sdate, nexp, nobs] rps [good_values, i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2) - if (Fair) { # FairRPS - ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) - ## [formula taken from SpecsVerification::EnsRps] - R <- dim(exp)[2] #memb + if (!is.null(memb_dim)) { + if (memb_dim %in% names(dim(exp))) { + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + R <- dim(exp)[2] #memb + } + } else { + R <- nmemb + } + warning("Applying fair correction.") adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) adjustment <- colSums(adjustment) rps[, i, j] <- rps[, i, j] + adjustment diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index c03841b828ea70746f6a62a2db3afa703e745c3c..5e6641f97944864f7d6e4e4d1f5f5d430a704c81 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -510,3 +510,4 @@ Probabilities <- function(recipe, data) { } } + diff --git a/modules/Units/R/transform_units_precipitation.R b/modules/Units/R/transform_units_precipitation.R index d0dd7ffd50dfb6f04db35006a06cd61b1d8f43e9..22fe416799a6f3e1f4caa84acccee4c9520af1a8 100644 --- a/modules/Units/R/transform_units_precipitation.R +++ b/modules/Units/R/transform_units_precipitation.R @@ -74,10 +74,14 @@ transform_units_precipitation <- function(data, original_units, new_units, time_pos <- which(lapply(data[[1]]$attrs$Variable$metadata[[var_name]]$dim, function(x) {x$name}) == 'time') cal <- tolower(data[[1]]$attrs$Variable$metadata[[var_name]]$dim[[time_pos]]$calendar) + # grib files doens't show cal on attr + if (is.null(cal) || length(cal) < 1) { + cal <- 'standard' + } data_list[[var_index]] <- Apply(list(data_list[[var_index]], data[[1]]$attrs$Dates), target_dim = list(c('syear'), c('syear')), - extra_info = list(cal = cal, days_in_month = .days_in_month), + extra_info = list(cal = cal, days_in_month = .days_in_month), fun = function(x, y) { date <- as.Date(y, "%Y-%m-%d") num_days <- .days_in_month(date, cal = .cal) diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 2df06e8bdf87027e3a42da24b78821c5bdc1de82..13b02c9f68a836e678172f1d2ba560287bb9fcb8 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -8,7 +8,7 @@ plot_metrics <- function(recipe, data_cube, metrics, # metrics: list of named metric arrays with named dimensions # outdir: output directory # significance: T/F, whether to display the significance dots in the plots - + var_dict <- read_yaml("conf/variable-dictionary.yml") # Abort if frequency is daily if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { error(recipe$Run$logger, "Visualization functions not yet implemented @@ -175,19 +175,23 @@ plot_metrics <- function(recipe, data_cube, metrics, # Reorder dimensions metric <- Reorder(metric, c("time", "longitude", "latitude")) # If the significance has been requested and the variable has it, - # retrieve it and reorder its dimensions. - significance_name <- paste0(name, "_significance") - if ((significance) && (significance_name %in% names(metrics))) { - metric_significance <- var_metric[[significance_name]] - metric_significance <- Reorder(metric_significance, c("time", - "longitude", - "latitude")) - # Split significance into list of lists, along the time dimension - # This allows for plotting the significance dots correctly. - metric_significance <- ClimProjDiags::ArrayToList(metric_significance, + # retrieve it and reorder its dimensions. + if (significance != FALSE) { # Both, dots, mask or TRUE + significance_name <- paste0(name, "_significance") + if ((significance_name %in% names(metrics))) { + metric_significance <- var_metric[[significance_name]] + metric_significance <- Reorder(metric_significance, c("time", + "longitude", + "latitude")) + # Split significance into list of lists, along the time dimension + # This allows for plotting the significance dots correctly. + metric_significance <- ClimProjDiags::ArrayToList(metric_significance, dim = "time", level = "sublist", names = "dots") + } else { + metric_significance <- NULL + } } else { metric_significance <- NULL } @@ -199,11 +203,12 @@ plot_metrics <- function(recipe, data_cube, metrics, } # Get variable name and long name var_name <- data_cube$attrs$Variable$varName[[var]] - var_long_name <- data_cube$attrs$Variable$metadata[[var_name]]$long_name + var_long_name <- var_dict$vars[[var_name]]$long_name + #var_long_name <- data_cube$attrs$Variable$metadata[[var_name]]$long_name # Multi-panel or single-panel plots if (recipe$Analysis$Workflow$Visualization$multi_panel) { # Define titles - toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), + toptitle <- paste0(system_name, " / ", var_long_name, #str_to_title(var_long_name), "\n", display_name, " / ", hcst_period) ## time_bounds in data_cube to know if Time_aggregation was applied if (!is.null(attributes(data_cube$attrs$time_bounds))) { @@ -274,9 +279,9 @@ plot_metrics <- function(recipe, data_cube, metrics, lon = longitude, lat = latitude, lon_dim = 'longitude', lat_dim = 'latitude', target_proj = target_proj, legend = 's2dv', - style = 'point', brks = brks, cols = cols, + style = 'point', dots = NULL, brks = brks, cols = cols, col_inf = col_inf, col_sup = col_sup, - units = units) + units = units) } # Loop over forecast times for (i in 1:dim(metric)[['time']]) { @@ -324,10 +329,30 @@ plot_metrics <- function(recipe, data_cube, metrics, # Modify base arguments base_args[[1]] <- metric[i, , ] if (!is.null(metric_significance)) { - base_args[[2]] <- metric_significance[[i]][[1]] - significance_caption <- "alpha = 0.05" + sign_file_label <- NULL + if (is.logical(significance)) { + if (significance) { + base_args[[2]] <- metric_significance[[i]][[1]] + significance_caption <- "alpha = 0.05" + sign_file_lable <- '_mask' + } + } else { + if (significance == 'dots') { + base_args[[10]] <- metric_significance[[i]][[1]] + significance_caption <- "alpha = 0.05" + sign_file_label <- '_dots' + if (any(is.na(base_args[[10]]))) { + base_args[[10]][which(is.na(base_args[[10]]))] <- 0 + } + } else if (significance == 'mask') { + base_args[[2]] <- metric_significance[[i]][[1]] + significance_caption <- "alpha = 0.05" + sign_file_label <- '_mask' + } + } } else { significance_caption <- NULL + sign_file_label <- NULL } if (identical(fun, PlotRobinson)) { ## TODO: Customize alpha and other technical details depending on the metric @@ -338,7 +363,7 @@ plot_metrics <- function(recipe, data_cube, metrics, "Reference: ", recipe$Analysis$Datasets$Reference, "\n", significance_caption) } - fileout <- paste0(outfile, "_ft", forecast_time, ".pdf") + fileout <- paste0(outfile, "_ft", forecast_time) # Plot info(recipe$Run$logger, paste("Plotting", display_name)) @@ -346,7 +371,12 @@ plot_metrics <- function(recipe, data_cube, metrics, do.call(fun, args = c(base_args, list(toptitle = toptitle, - fileout = fileout))) + fileout = paste0(fileout, sign_file_label, + ".png")))) +# system(paste("convert", paste0(fileout, ".ps"), "-rotate 90", paste0(fileout, ".png"))) +# system(paste("convert", paste0(fileout, ".png"), +# "-density 300 -background white", +# paste0(fileout, ".png"))) } } } diff --git a/modules/Visualization/R/tmp/PlotRobinson.R b/modules/Visualization/R/tmp/PlotRobinson.R index bd427448fad9bdc9482c3e13b161f05e2fd6c1a7..e066852c4fbfab61396552928e27fbe96aca7f8a 100644 --- a/modules/Visualization/R/tmp/PlotRobinson.R +++ b/modules/Visualization/R/tmp/PlotRobinson.R @@ -118,11 +118,11 @@ PlotRobinson <- function(data, lon, lat, lon_dim = NULL, lat_dim = NULL, target_proj = 54030, legend = 's2dv', style = 'point', dots = NULL, mask = NULL, brks = NULL, cols = NULL, bar_limits = NULL, triangle_ends = NULL, col_inf = NULL, col_sup = NULL, colNA = NULL, - color_fun = clim.palette(), bar_extra_margin = rep(0, 4), vertical = TRUE, + color_fun = clim.palette(), bar_extra_margin = c(3.5, 0, 3.5, 0), vertical = TRUE, toptitle = NULL, caption = NULL, units = NULL, crop_coastlines = NULL, - point_size = "auto", title_size = 16, dots_size = 0.5, + point_size = "auto", title_size = 10, dots_size = 0.2, dots_shape = 47, coastlines_width = 0.3, - fileout = NULL, width = 8, height = 4, size_units = "in", + fileout = NULL, width = 8, height = 5, size_units = "in", res = 300) { # Sanity check @@ -234,12 +234,14 @@ PlotRobinson <- function(data, lon, lat, lon_dim = NULL, lat_dim = NULL, if (!identical(dim(mask), dim(data))) { stop("Parameter 'mask' must have the same dimensions as 'data'.") } else if (is.numeric(mask)) { + mask[which(is.na(mask))] <- 0 if (all(mask %in% c(0, 1))) { mask <- array(as.logical(mask), dim = dim(mask)) } else { stop("Parameter 'mask' must have only TRUE/FALSE or 0/1.") } } else if (is.logical(mask)) { + mask[which(is.na(mask))] <- F if (!all(mask %in% c(T, F))) { stop("Parameter 'mask' must have only TRUE/FALSE or 0/1.") } diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 3750baead9284492691714ef50d356f7a5faa8ba..16870e5ab14f86b88a567cd96445f462f35fcf96 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -25,7 +25,6 @@ Visualization <- function(recipe, # s2dv_cube objects # skill_metrics: list of arrays containing the computed skill metrics # significance: Bool. Whether to include significance dots where applicable - # Try to set default configuration if not specified by user if (is.null(output_conf) && !is.null(recipe$Analysis$Region$name)) { output_conf <- read_yaml("modules/Visualization/output_size.yml", @@ -77,8 +76,26 @@ Visualization <- function(recipe, # Plot skill metrics if ("skill_metrics" %in% plots) { if (!is.null(skill_metrics)) { - plot_metrics(recipe, data$hcst, skill_metrics, outdir, - significance, output_conf = output_conf) + if (is.logical(significance)) { + plot_metrics(recipe, data$hcst, skill_metrics, outdir, + significance, output_conf = output_conf) + info(recipe$Run$logger, + paste("##### Skill metrics significance set as", + significance, " #####")) + } else { + if (significance %in% c('both', 'mask')) { + plot_metrics(recipe, data$hcst, skill_metrics, outdir, + significance = 'mask', output_conf = output_conf) + info(recipe$Run$logger, + "##### Skill metrics significance as mask #####") + } + if (significance %in% c('both', 'dots')) { + plot_metrics(recipe, data$hcst, skill_metrics, outdir, + significance = 'dots', output_conf = output_conf) + info(recipe$Run$logger, + "##### Skill metrics significance as dots #####") + } + } } else { error(recipe$Run$logger, paste0("The skill metric plots have been requested, but the ", diff --git a/recipe_NAO_scorecards.yml b/recipe_NAO_scorecards.yml new file mode 100644 index 0000000000000000000000000000000000000000..488b1e2402706511dd8d26627e9c7d7b09f69fbe --- /dev/null +++ b/recipe_NAO_scorecards.yml @@ -0,0 +1,70 @@ +Description: + Author: Nuria Perez-Zanon + Info: Cerise phase 0 assessment NAO index + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tos, freq: monthly_mean} + Datasets: + System: + - {name: Meteo-France-System8} + - {name: CMCC-SPS3.5} + - {name: UKMO-System602} + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' + # fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: 'EU', latmin: 20, latmax: 80, lonmin: -80, lonmax: 40 } + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: none + Indices: + NAO: {obsproj: TRUE, save: 'all', plot_ts: TRUE, plot_sp: yes} + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + save: none + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/nperez/cs_oper/ + code_dir: /esarchive/scratch/nperez/git3/sunset/ + + diff --git a/recipe_NAO_test.yml b/recipe_NAO_test.yml new file mode 100644 index 0000000000000000000000000000000000000000..091b913cfdddd7ac63b9bddb3f733ce2d97c4811 --- /dev/null +++ b/recipe_NAO_test.yml @@ -0,0 +1,89 @@ +Description: + Author: Nuria Perez-Zanon + Info: Cerise phase 0 assessment NAO index + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: psl + freq: monthly_mean + Datasets: + System: + - {name: Meteo-France-System8} + # - {name: CMCC-SPS3.5} + # - {name: UKMO-System602} + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: + - '0101' + # - '0201' + #- '0301' + #- '0401' + # - '0501' + # - '0601' + # - '0701' + # - '0801' + # - '0901' + # - '1001' + # - '1101' + #- '1201' + # fcst_year: # Optional, int: Forecast year 'YYYY' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '1996' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 3 # Mandatory, int: Last leadtime time step in months + Region: + - {name: 'EU', latmin: 20, latmax: 80, lonmin: -80, lonmax: 40 } + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: none + Indices: + NAO: {obsproj: TRUE, save: 'all', plot_ts: TRUE, plot_sp: yes} + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + save: none + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Scorecards: + execute: yes # yes/no + regions: + - 'NAO' + #Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + #Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + #Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 'all' + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + inf_to_na: yes + table_label: NULL + fileout_label: NULL + legend_width: 690 + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + output_dir: /perm/cyce/phase0/ + code_dir: /ec/res4/scratch/cyce/cerise/sunset/ + filesystem: cerise + + diff --git a/recipe_ecvs_scorecards_seasonal.yml b/recipe_ecvs_scorecards_seasonal.yml new file mode 100644 index 0000000000000000000000000000000000000000..2afc9473b9a1aee2b423b9be6b5cf957713ca6b1 --- /dev/null +++ b/recipe_ecvs_scorecards_seasonal.yml @@ -0,0 +1,115 @@ +Description: + Author: Nuria Perez-Zanon + Info: Cerise phase 0 assessment for ECVs + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: monthly_mean, units: K} + #- {name: tasmin, freq: monthly_mean, units: K} + #- {name: tasmax, freq: monthly_mean, units: K} + #- {name: tos, freq: monthly_mean, units: K} + #- {name: sfcWind, freq: monthly_mean, units: ms-1} + #- {name: tdps, freq: monthly_mean, units: K} + #- {name: psl, freq: monthly_mean, units: hPa} + #- {name: prlr, freq: monthly_mean, units: mm, flux: yes} + Datasets: + System: + - {name: Meteo-France-System8} + - {name: CMCC-SPS3.5} + - {name: UKMO-System602} + - {name: ECMWF-i2o2} + Multimodel: + execute: false # Mandatory, bool: Either yes/true or no/false + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: "EU", latmin: -90, latmax: 90, lonmin: 0, lonmax: 360} + Regrid: + method: conservative #bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: yes + save: none + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: no + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + projection: Robinson + significance: both + Scorecards: + execute: yes # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: + 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: + 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 'all' + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + inf_to_na: yes + table_label: NULL + fileout_label: NULL + legend_width: 690 + columns_width: 1.5 + col1_width: 4 + col2_width: NULL + calculate_diff: FALSE + mask: no sea + ncores: 24 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: cerise + output_dir: /perm/cyce/phase0/ # replace with the directory where you want to save the outputs + code_dir: /ec/res4/scratch/cyce/cerise/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /ec/res4/scratch/cyce/cerise/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a000 # replace with your EXPID + hpc_user: cyce # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 128 + platform: ATHOS-hpc + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/recipe_tas_scorecards_seasonal.yml b/recipe_tas_scorecards_seasonal.yml new file mode 100644 index 0000000000000000000000000000000000000000..84be865a180b38d9da0762491f671563b4e8bb1c --- /dev/null +++ b/recipe_tas_scorecards_seasonal.yml @@ -0,0 +1,94 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tos + freq: monthly_mean + units: K + flux: no + Datasets: + System: + name: CMCC-SPS3.5 #CMCC-SPS3.5 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0101' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 3 # Mandatory, int: Last leadtime time step in months + Region: + latmin: -10 + latmax: 10 + lonmin: 180 + lonmax: 250 + Regrid: + method: bilinear # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: cerise + output_dir: /perm/cyce/phase0/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/nperez/git3/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/rsz_rsz_bsc_logo.png b/rsz_rsz_bsc_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..59406d7a5f242aa87eb276bea39d6544a802b986 Binary files /dev/null and b/rsz_rsz_bsc_logo.png differ diff --git a/tools/add_logo.R b/tools/add_logo.R index 42fb87c50b1bf1a0c20409cee43cc1703e8aeb3c..e223c6a91436fe47de6050a6fc295fee750d4b72 100644 --- a/tools/add_logo.R +++ b/tools/add_logo.R @@ -1,13 +1,8 @@ add_logo <- function(recipe, logo) { # recipe: SUNSET recipe # logo: URL to the logo - system <- list.files(paste0(recipe$Run$output_dir, "/plots/")) - variable <- recipe$Analysis$Variable$name - files <- lapply(variable, function(x) { - f <- list.files(paste0(recipe$Run$output_dir, "/plots/", - system, "/", x)) - full_path <- paste0(recipe$Run$output_dir, "/plots/", - system, "/", x,"/", f)})[[1]] + files <- list.files(paste0(recipe$Run$output_dir, "/plots/"), + pattern='.png', full.name = TRUE, recursive = TRUE) dim(files) <- c(file = length(files)) Apply(list(files), target_dims = NULL, function(x) { system(paste("composite -gravity southeast -geometry +10+10", diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 3b2e6eee3399fec5be8daebe097222951f65123e..926fdc86f2c1c35e2c8daee06b72e7b4d1d11789 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -197,10 +197,35 @@ divide_recipe <- function(recipe) { chunk_to_recipe[chunk] <- recipe_name chunk <- chunk + 1 } - write_yaml(all_recipes[[reci]], - paste0(recipe_dir, "atomic_recipe_", recipe_name, ".yml")) + # Check available start dates compared to config + if (tolower(all_recipes[[reci]]$Analysis$Horizon) == 'seasonal') { + archive <- read_yaml("conf/archive.yml")[[recipe$Run$filesystem]] + } else if (tolower(all_recipes[[reci]]$Analysis$Horizon) == 'decadal') { + archive <- read_yaml("conf/archive_decadal.yml")[[recipe$Run$filesystem]] + } else { + archive <- read_yaml("conf/archive_subseasonal.yml")[[recipe$Run$filesystem]] + } + if (length(archive$System[[all_recipes[[reci]]$Analysis$Dataset$System$name]]$sdate) > 0) { + available_sdates <- unlist(lapply( + archive$System[[all_recipes[[reci]]$Analysis$Dataset$System$name]]$sdates, + function(x){x[[1]]})) + if (all_recipes[[reci]]$Analysis$Time$sdate %in% available_sdates) { + # check: ftime + ind <- which(all_recipes[[reci]]$Analysis$Time$sdate == + available_sdates) + available_ftime_max <- + archive$System[[all_recipes[[reci]]$Analysis$Dataset$System$name]]$sdates[[ind]][[3]] + if (all_recipes[[reci]]$Analysis$Time$ftime_max > available_ftime_max) { + all_recipes[[reci]]$Analysis$Time$ftime_max <- available_ftime_max + } + write_yaml(all_recipes[[reci]], + paste0(recipe_dir, "atomic_recipe_", recipe_name, ".yml")) + } + } else { + write_yaml(all_recipes[[reci]], + paste0(recipe_dir, "atomic_recipe_", recipe_name, ".yml")) + } } - # Print information for user info(recipe$Run$logger, paste("The main recipe has been divided into", length(chunk_to_recipe),