From 163f2e9d3b2b58ed4dc49e17a5df2fc6dcaa1812 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Tue, 10 Jan 2023 19:02:28 +0100 Subject: [PATCH 1/7] first version --- R/ROCSS.R | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 R/ROCSS.R diff --git a/R/ROCSS.R b/R/ROCSS.R new file mode 100644 index 0000000..a849ad4 --- /dev/null +++ b/R/ROCSS.R @@ -0,0 +1,47 @@ + +library(s2dv) +library(easyVerification) + +# exp <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) +# ref <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) +# obs <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60)) +# ROCSS(exp, obs) +# ROCSS(exp, obs, ref) + +ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3,2/3), time_dim = 'sdate', member_dim = 'member', na.rm = FALSE, ncores = NULL){ + + n_cats <- 1 + length(prob_thresholds) + + ## Reordering dimensions (for EnsRoca) + exp <- Reorder(data = exp, order = c(names(dim(exp))[-(which(names(dim(exp)) %in% c(time_dim,member_dim)))], time_dim, member_dim)) + obs <- Reorder(data = obs, order = c(names(dim(exp))[-(which(names(dim(exp)) %in% c(time_dim,member_dim)))], time_dim)) + if (!is.null(ref)){ + ref <- Reorder(data = ref, order = c(names(dim(ref))[-(which(names(dim(ref)) %in% c(time_dim,member_dim)))], time_dim, member_dim)) + } + + ## ROCS for the forecast + rocs_exp <- easyVerification::veriApply(verifun = 'EnsRoca', fcst = exp, obs = obs, + tdim = which(names(dim(exp))==time_dim), + ensdim = which(names(dim(exp))==member_dim), + prob = prob_thresholds, + na.rm = na.rm, ncpus = ncores) + ## ROCSS + rocss <- list() + if (is.null(ref)){ ## Random forecast as reference forecast + for (cat in paste0('cat',1:n_cats)){ + rocss[[cat]] <- 2 * rocs_exp[[cat]] - 1 + } + } else { + ## ROCS for the reference forecast + rocs_ref <- easyVerification::veriApply(verifun = 'EnsRoca', fcst = ref, obs = obs, + tdim = which(names(dim(ref))==time_dim), + ensdim = which(names(dim(ref))==member_dim), + prob = prob_thresholds, + na.rm = na.rm, ncpus = ncores) + for (cat in paste0('cat',1:n_cats)){ + rocss[[cat]] <- (rocs_exp[[cat]] - rocs_ref[[cat]]) / (1 - rocs_ref[[cat]]) + } + } + + return(rocss) +} -- GitLab From 7c4b39534db303b1263e9dbbbbcf96ec616cb5d5 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 11 Jan 2023 11:14:32 +0100 Subject: [PATCH 2/7] added checks --- R/ROCSS.R | 131 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 117 insertions(+), 14 deletions(-) diff --git a/R/ROCSS.R b/R/ROCSS.R index a849ad4..982f610 100644 --- a/R/ROCSS.R +++ b/R/ROCSS.R @@ -2,40 +2,143 @@ library(s2dv) library(easyVerification) -# exp <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) -# ref <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) -# obs <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60)) -# ROCSS(exp, obs) -# ROCSS(exp, obs, ref) +exp <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) +ref <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) +obs <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60)) +ROCSS(exp, obs) +ROCSS(exp, obs, ref) -ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3,2/3), time_dim = 'sdate', member_dim = 'member', na.rm = FALSE, ncores = NULL){ +ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3,2/3), na.rm = FALSE, + time_dim = 'sdate', memb_dim = 'member', ncores = NULL){ - n_cats <- 1 + length(prob_thresholds) + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (!is.array(obs) | !is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop("Parameter 'ref' must be a numeric array.") + if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } + ## dat_dim + # if (!is.null(dat_dim)) { + # if (!is.character(dat_dim) | length(dat_dim) > 1) { + # stop("Parameter 'dat_dim' must be a character string.") + # } + # if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + # stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + # " Set it as NULL if there is no dataset dimension.") + # } + # } + ## exp, obs, and ref (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + # if (!is.null(dat_dim)) { + # name_exp <- name_exp[-which(name_exp == dat_dim)] + # name_obs <- name_obs[-which(name_obs == dat_dim)] + # } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim'")) # and 'dat_dim'.")) + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + name_ref <- name_ref[-which(name_ref == memb_dim)] + # if (!is.null(dat_dim)) { + # if (dat_dim %in% name_ref) { + # if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + # stop(paste0("If parameter 'ref' has dataset dimension, it must be", + # " equal to dataset dimension of 'exp'.")) + # } + # name_ref <- name_ref[-which(name_ref == dat_dim)] + # } + # } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim'", # and 'dat_dim'", + " if there is only one reference dataset.")) + } + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## na.rm + if (!na.rm %in% c(TRUE,FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### ## Reordering dimensions (for EnsRoca) - exp <- Reorder(data = exp, order = c(names(dim(exp))[-(which(names(dim(exp)) %in% c(time_dim,member_dim)))], time_dim, member_dim)) - obs <- Reorder(data = obs, order = c(names(dim(exp))[-(which(names(dim(exp)) %in% c(time_dim,member_dim)))], time_dim)) + exp <- Reorder(data = exp, order = c(names(dim(exp))[-(which(names(dim(exp)) %in% c(time_dim,memb_dim)))], time_dim, memb_dim)) + obs <- Reorder(data = obs, order = c(names(dim(exp))[-(which(names(dim(exp)) %in% c(time_dim,memb_dim)))], time_dim)) if (!is.null(ref)){ - ref <- Reorder(data = ref, order = c(names(dim(ref))[-(which(names(dim(ref)) %in% c(time_dim,member_dim)))], time_dim, member_dim)) + ref <- Reorder(data = ref, order = c(names(dim(ref))[-(which(names(dim(ref)) %in% c(time_dim,memb_dim)))], time_dim, memb_dim)) } - ## ROCS for the forecast + ## ROCS (exp) rocs_exp <- easyVerification::veriApply(verifun = 'EnsRoca', fcst = exp, obs = obs, tdim = which(names(dim(exp))==time_dim), - ensdim = which(names(dim(exp))==member_dim), + ensdim = which(names(dim(exp))==memb_dim), prob = prob_thresholds, na.rm = na.rm, ncpus = ncores) ## ROCSS + n_cats <- 1 + length(prob_thresholds) rocss <- list() if (is.null(ref)){ ## Random forecast as reference forecast for (cat in paste0('cat',1:n_cats)){ rocss[[cat]] <- 2 * rocs_exp[[cat]] - 1 } } else { - ## ROCS for the reference forecast + ## ROCS (ref) rocs_ref <- easyVerification::veriApply(verifun = 'EnsRoca', fcst = ref, obs = obs, tdim = which(names(dim(ref))==time_dim), - ensdim = which(names(dim(ref))==member_dim), + ensdim = which(names(dim(ref))==memb_dim), prob = prob_thresholds, na.rm = na.rm, ncpus = ncores) for (cat in paste0('cat',1:n_cats)){ -- GitLab From bd448d859b56fdaa5a8fabd92f3a0f7029a34fdc Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 11 Jan 2023 15:04:26 +0100 Subject: [PATCH 3/7] added documentation --- R/ROCSS.R | 60 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/R/ROCSS.R b/R/ROCSS.R index 982f610..df7e22e 100644 --- a/R/ROCSS.R +++ b/R/ROCSS.R @@ -1,14 +1,52 @@ - -library(s2dv) -library(easyVerification) - -exp <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) -ref <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) -obs <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60)) -ROCSS(exp, obs) -ROCSS(exp, obs, ref) - -ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3,2/3), na.rm = FALSE, +#'Compute the Relative Operating Characteristic Skill Score +#' +#'The Relative Operating Characteristic Skill Score (ROCSS; Kharin and Zwiers, +#'2003) is based on the ROC curve, which gives information about the hit rates +#'against the false-alarm rates for a particular category or event. The ROC curve +#'can be summarized with the area under the ROC curve, known as the ROC score, +#'to provide a skill value for each category. The ROCSS ranges between minus +#'infinite and 1. A positive ROCSS value indicates that the forecast has higher +#'skill than the reference forecasts, meaning the contrary otherwise. +#'@param exp A named numerical array of the forecast with at least time and +#' member dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param ref A named numerical array of the reference forecast data with at +#' least time and member dimension. The dimensions must be the same as 'exp' +#' except 'memb_dim'. If 'ref' is NULL, the random forecast is used as reference +#' forecast. The default value is NULL. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param na.rm A logical value indicating whether to remove NA values. The +#' default value is FALSE. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast and the reference forecast. The +#' default value is 'member'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'#'A list with numerical arrays of ROCSS with the same dimensions as 'exp' except +#''time_dim' and 'memb_dim' dimensions. The number of elements of the list corresponds +#'to the number of probabilistic categories, i.e., 1+length(prob_thresholds) +#' +#'@references +#'Kharin, V. V. and Zwiers, F. W. (2003): https://doi.org/10.1175/1520-0442(2003)016%3C4145:OTRSOP%3E2.0.CO;2 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) +#'ref <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) +#'obs <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60)) +#'ROCSS(exp = exp, obs = obs) ## random forecast as reference forecast +#'ROCSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +#' +#'@import easyVerification +#'@export +ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), na.rm = FALSE, time_dim = 'sdate', memb_dim = 'member', ncores = NULL){ # Check inputs -- GitLab From 212a4fc25d6813e1a06ed6ceeb9e0f053ef87952 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 12 Jan 2023 15:32:30 +0100 Subject: [PATCH 4/7] replaced veryApply with multiApply --- R/ROCSS.R | 83 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 22 deletions(-) diff --git a/R/ROCSS.R b/R/ROCSS.R index df7e22e..45e9a9a 100644 --- a/R/ROCSS.R +++ b/R/ROCSS.R @@ -19,8 +19,9 @@ #'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to #' 1) between the categories. The default value is c(1/3, 2/3), which #' corresponds to tercile equiprobable categories. -#'@param na.rm A logical value indicating whether to remove NA values. The -#' default value is FALSE. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. #'@param time_dim A character string indicating the name of the time dimension. #' The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member dimension @@ -44,9 +45,10 @@ #'ROCSS(exp = exp, obs = obs) ## random forecast as reference forecast #'ROCSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast #' -#'@import easyVerification +#'@import multiApply +#'@importFrom easyVerification convert2prob EnsRoca #'@export -ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), na.rm = FALSE, +ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, time_dim = 'sdate', memb_dim = 'member', ncores = NULL){ # Check inputs @@ -138,9 +140,17 @@ ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), na.rm = F any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") } - ## na.rm - if (!na.rm %in% c(TRUE,FALSE)) { - stop("Parameter 'na.rm' must be TRUE or FALSE") + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- 1:dim(obs)[time_dim] + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } } ## ncores if (!is.null(ncores)) { @@ -152,19 +162,50 @@ ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), na.rm = F ############################### - ## Reordering dimensions (for EnsRoca) - exp <- Reorder(data = exp, order = c(names(dim(exp))[-(which(names(dim(exp)) %in% c(time_dim,memb_dim)))], time_dim, memb_dim)) - obs <- Reorder(data = obs, order = c(names(dim(exp))[-(which(names(dim(exp)) %in% c(time_dim,memb_dim)))], time_dim)) - if (!is.null(ref)){ + ## Compute ROCSS + if (!is.null(ref)){ ## reference forecast is provided + ref <- Reorder(data = ref, order = c(names(dim(ref))[-(which(names(dim(ref)) %in% c(time_dim,memb_dim)))], time_dim, memb_dim)) + + res <- multiApply::Apply(data = list(exp = exp, obs = obs, ref = ref), + target_dims = list(exp = c(time_dim, memb_dim), + obs = time_dim, + ref = c(time_dim, memb_dim)), + fun = .ROCSS, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, + time_dim = time_dim, + ncores = ncores) + + } else { ## Random forecast as reference forecast + + res <- multiApply::Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = c(time_dim, memb_dim), + obs = time_dim), + fun = .ROCSS, + ref = ref, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, + time_dim = time_dim, + ncores = ncores) } + return(res) +} + +.ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, time_dim = 'sdate'){ + + # exp: [sdate, memb] + # obs: [sdate] + # ref: [sdate, memb] + + exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds) + obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds) + ## ROCS (exp) - rocs_exp <- easyVerification::veriApply(verifun = 'EnsRoca', fcst = exp, obs = obs, - tdim = which(names(dim(exp))==time_dim), - ensdim = which(names(dim(exp))==memb_dim), - prob = prob_thresholds, - na.rm = na.rm, ncpus = ncores) + rocs_exp <- EnsRoca(ens = Reorder(exp_probs, c(time_dim,'bin')), + obs = Reorder(obs_probs, c(time_dim,'bin'))) + ## ROCSS n_cats <- 1 + length(prob_thresholds) rocss <- list() @@ -172,13 +213,11 @@ ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), na.rm = F for (cat in paste0('cat',1:n_cats)){ rocss[[cat]] <- 2 * rocs_exp[[cat]] - 1 } - } else { + } else { ## Reference forecast is provided ## ROCS (ref) - rocs_ref <- easyVerification::veriApply(verifun = 'EnsRoca', fcst = ref, obs = obs, - tdim = which(names(dim(ref))==time_dim), - ensdim = which(names(dim(ref))==memb_dim), - prob = prob_thresholds, - na.rm = na.rm, ncpus = ncores) + ref_probs <- .get_probs(data = ref, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds) + rocs_ref <- EnsRoca(ens = Reorder(ref_probs, c(time_dim,'bin')), + obs = Reorder(obs_probs, c(time_dim,'bin'))) for (cat in paste0('cat',1:n_cats)){ rocss[[cat]] <- (rocs_exp[[cat]] - rocs_ref[[cat]]) / (1 - rocs_ref[[cat]]) } -- GitLab From 90ba3e7721eebd121841da95598fd9a6b2c35f3a Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 22 Feb 2023 17:23:21 +0100 Subject: [PATCH 5/7] included cross.val in ROCSS; bin_dim name was missing for cross.val=FALSE in .get_probs --- R/ROCSS.R | 19 ++++++++++++++----- R/RPS.R | 4 ++-- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/R/ROCSS.R b/R/ROCSS.R index 45e9a9a..493534c 100644 --- a/R/ROCSS.R +++ b/R/ROCSS.R @@ -27,6 +27,9 @@ #'@param memb_dim A character string indicating the name of the member dimension #' to compute the probabilities of the forecast and the reference forecast. The #' default value is 'member'. +#'@param cross.val A logical indicating whether to compute the thresholds between +#' probabilistic categories in cross-validation. +#' The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -49,7 +52,7 @@ #'@importFrom easyVerification convert2prob EnsRoca #'@export ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, - time_dim = 'sdate', memb_dim = 'member', ncores = NULL){ + time_dim = 'sdate', memb_dim = 'member', cross.val = FALSE, ncores = NULL){ # Check inputs ## exp, obs, and ref (1) @@ -152,6 +155,10 @@ ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), indices_f stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") } } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | @@ -175,6 +182,7 @@ ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), indices_f prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, time_dim = time_dim, + cross.val = cross.val, ncores = ncores) } else { ## Random forecast as reference forecast @@ -187,20 +195,21 @@ ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), indices_f prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, time_dim = time_dim, + cross.val = cross.val, ncores = ncores) } return(res) } -.ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, time_dim = 'sdate'){ +.ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, time_dim = 'sdate', cross.val = FALSE){ # exp: [sdate, memb] # obs: [sdate] # ref: [sdate, memb] - exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds) - obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds) + exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds, cross.val = cross.val) + obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds, cross.val = cross.val) ## ROCS (exp) rocs_exp <- EnsRoca(ens = Reorder(exp_probs, c(time_dim,'bin')), @@ -215,7 +224,7 @@ ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), indices_f } } else { ## Reference forecast is provided ## ROCS (ref) - ref_probs <- .get_probs(data = ref, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds) + ref_probs <- .get_probs(data = ref, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds, cross.val = cross.val) rocs_ref <- EnsRoca(ens = Reorder(ref_probs, c(time_dim,'bin')), obs = Reorder(obs_probs, c(time_dim,'bin'))) for (cat in paste0('cat',1:n_cats)){ diff --git a/R/RPS.R b/R/RPS.R index 6a89d6d..48bf1bf 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -40,7 +40,7 @@ #' have more than 45 members if consistency between the weighted and unweighted #' methodologies is desired. #'@param cross.val A logical indicating whether to compute the thresholds between -#' probabilistics categories in cross-validation. +#' probabilistic categories in cross-validation. #' The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. @@ -302,7 +302,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL cumulative_weights <- sorted_arrays$cumulative_weights quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y } - quantiles <- array(rep(quantiles, dim(data)[1]),dim = c(length(quantiles), dim(data)[1])) + quantiles <- array(rep(quantiles, dim(data)[1]),dim = c(bin = length(quantiles), dim(data)[1])) } # quantiles: [bin-1, sdate] -- GitLab From fabbe39bb734f66c4ea4c0f7de1b9ea576d63e21 Mon Sep 17 00:00:00 2001 From: aho Date: Fri, 3 Mar 2023 15:16:38 +0100 Subject: [PATCH 6/7] Move .get_probs() to Utils; change ROCSS output to an array; add 'dat_dim' in ROCSS. --- NAMESPACE | 2 + R/ROCSS.R | 243 +++++++++++++++++++++++------------- R/RPS.R | 85 ------------- R/Utils.R | 90 ++++++++++++- man/ROCSS.Rd | 89 +++++++++++++ man/RPS.Rd | 2 +- tests/testthat/test-ROCSS.R | 229 +++++++++++++++++++++++++++++++++ 7 files changed, 563 insertions(+), 177 deletions(-) create mode 100644 man/ROCSS.Rd create mode 100644 tests/testthat/test-ROCSS.R diff --git a/NAMESPACE b/NAMESPACE index d440ec0..6224a15 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -59,6 +59,7 @@ export(ProjectField) export(REOF) export(RMS) export(RMSSS) +export(ROCSS) export(RPS) export(RPSS) export(RandomWalkTest) @@ -100,6 +101,7 @@ importFrom(SpecsVerification,enscrps_cpp) importFrom(abind,abind) importFrom(abind,adrop) importFrom(easyNCDF,ArrayToNc) +importFrom(easyVerification,EnsRoca) importFrom(easyVerification,convert2prob) importFrom(grDevices,bmp) importFrom(grDevices,col2rgb) diff --git a/R/ROCSS.R b/R/ROCSS.R index 493534c..7831a88 100644 --- a/R/ROCSS.R +++ b/R/ROCSS.R @@ -2,44 +2,53 @@ #' #'The Relative Operating Characteristic Skill Score (ROCSS; Kharin and Zwiers, #'2003) is based on the ROC curve, which gives information about the hit rates -#'against the false-alarm rates for a particular category or event. The ROC curve -#'can be summarized with the area under the ROC curve, known as the ROC score, -#'to provide a skill value for each category. The ROCSS ranges between minus -#'infinite and 1. A positive ROCSS value indicates that the forecast has higher -#'skill than the reference forecasts, meaning the contrary otherwise. +#'against the false-alarm rates for a particular category or event. The ROC +#'curve can be summarized with the area under the ROC curve, known as the ROC +#'score, to provide a skill value for each category. The ROCSS ranges between +#'minus infinite and 1. A positive ROCSS value indicates that the forecast has +#'higher skill than the reference forecasts, meaning the contrary otherwise. #'@param exp A named numerical array of the forecast with at least time and #' member dimension. #'@param obs A named numerical array of the observation with at least time #' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and -#' 'dat_dim'. +#' 'dat_dim'. #'@param ref A named numerical array of the reference forecast data with at #' least time and member dimension. The dimensions must be the same as 'exp' -#' except 'memb_dim'. If 'ref' is NULL, the random forecast is used as reference -#' forecast. The default value is NULL. +#' except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, +#' it should not have dataset dimension. If there is corresponding reference +#' for each experiement, the dataset dimension must have the same length as in +#' 'exp'. If 'ref' is NULL, the random forecast is used as reference forecast. +#' The default value is NULL. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast and the reference forecast. The +#' default value is 'member'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. #'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to #' 1) between the categories. The default value is c(1/3, 2/3), which #' corresponds to tercile equiprobable categories. #'@param indices_for_clim A vector of the indices to be taken along 'time_dim' #' for computing the thresholds between the probabilistic categories. If NULL, #' the whole period is used. The default value is NULL. -#'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the probabilities of the forecast and the reference forecast. The -#' default value is 'member'. -#'@param cross.val A logical indicating whether to compute the thresholds between -#' probabilistic categories in cross-validation. -#' The default value is FALSE. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation. The default value is +#' FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' #'@return -#'#'A list with numerical arrays of ROCSS with the same dimensions as 'exp' except -#''time_dim' and 'memb_dim' dimensions. The number of elements of the list corresponds -#'to the number of probabilistic categories, i.e., 1+length(prob_thresholds) +#'A numerical array of ROCSS with the same dimensions as 'exp' excluding +#''time_dim' and 'memb_dim' dimensions and including 'cat' dimension, which is +#'each category. The length if 'cat' dimension corresponds to the number of +#'probabilistic categories, i.e., 1 + length(prob_thresholds). If there are +#'multiple datasets, two additional dimensions 'nexp' and 'nobs' are added. #' #'@references -#'Kharin, V. V. and Zwiers, F. W. (2003): https://doi.org/10.1175/1520-0442(2003)016%3C4145:OTRSOP%3E2.0.CO;2 +#'Kharin, V. V. and Zwiers, F. W. (2003): +#' https://doi.org/10.1175/1520-0442(2003)016%3C4145:OTRSOP%3E2.0.CO;2 #' #'@examples #'exp <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) @@ -49,10 +58,11 @@ #'ROCSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast #' #'@import multiApply -#'@importFrom easyVerification convert2prob EnsRoca +#'@importFrom easyVerification EnsRoca #'@export -ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, - time_dim = 'sdate', memb_dim = 'member', cross.val = FALSE, ncores = NULL){ +ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + cross.val = FALSE, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -94,15 +104,15 @@ ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), indices_f stop("Parameter 'memb_dim' is not found in 'ref' dimension.") } ## dat_dim - # if (!is.null(dat_dim)) { - # if (!is.character(dat_dim) | length(dat_dim) > 1) { - # stop("Parameter 'dat_dim' must be a character string.") - # } - # if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { - # stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", - # " Set it as NULL if there is no dataset dimension.") - # } - # } + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } ## exp, obs, and ref (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) @@ -110,31 +120,31 @@ ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), indices_f if (memb_dim %in% name_obs) { name_obs <- name_obs[-which(name_obs == memb_dim)] } - # if (!is.null(dat_dim)) { - # name_exp <- name_exp[-which(name_exp == dat_dim)] - # name_obs <- name_obs[-which(name_obs == dat_dim)] - # } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim'")) # and 'dat_dim'.")) + "all dimensions except 'memb_dim' and 'dat_dim'.")) } if (!is.null(ref)) { name_ref <- sort(names(dim(ref))) name_ref <- name_ref[-which(name_ref == memb_dim)] - # if (!is.null(dat_dim)) { - # if (dat_dim %in% name_ref) { - # if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { - # stop(paste0("If parameter 'ref' has dataset dimension, it must be", - # " equal to dataset dimension of 'exp'.")) - # } - # name_ref <- name_ref[-which(name_ref == dat_dim)] - # } - # } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop(paste0("If parameter 'ref' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.")) + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } if (!identical(length(name_exp), length(name_ref)) | !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", - "all dimensions except 'memb_dim'", # and 'dat_dim'", + "all dimensions except 'memb_dim' and 'dat_dim'", " if there is only one reference dataset.")) } } @@ -169,68 +179,123 @@ ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), indices_f ############################### - ## Compute ROCSS - if (!is.null(ref)){ ## reference forecast is provided - - ref <- Reorder(data = ref, order = c(names(dim(ref))[-(which(names(dim(ref)) %in% c(time_dim,memb_dim)))], time_dim, memb_dim)) - - res <- multiApply::Apply(data = list(exp = exp, obs = obs, ref = ref), - target_dims = list(exp = c(time_dim, memb_dim), - obs = time_dim, - ref = c(time_dim, memb_dim)), + # Compute ROCSS + ## output_dims + if (is.null(dat_dim)) { + output_dims <- 'cat' + } else { + output_dims <- c('nexp', 'nobs', 'cat') + } + ## target_dims + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + ## If ref doesn't have & dat_dim is not NULL + if (!is.null(ref) && !is.null(dat_dim) &&!dat_dim %in% names(dim(ref))) { + target_dims_ref <- c(time_dim, memb_dim) + } else { + target_dims_ref <- c(time_dim, memb_dim, dat_dim) + } + + if (!is.null(ref)) { ## reference forecast is provided + res <- Apply(data = list(exp = exp, obs = obs, ref = ref), + target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + obs = target_dims_obs, + ref = target_dims_ref), + output_dims = output_dims, fun = .ROCSS, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - time_dim = time_dim, + time_dim = time_dim, dat_dim = dat_dim, cross.val = cross.val, - ncores = ncores) + ncores = ncores)$output1 } else { ## Random forecast as reference forecast - - res <- multiApply::Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = c(time_dim, memb_dim), - obs = time_dim), + res <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = c(time_dim, memb_dim, dat_dim), + obs = target_dims_obs), + output_dims = output_dims, fun = .ROCSS, ref = ref, prob_thresholds = prob_thresholds, indices_for_clim = indices_for_clim, - time_dim = time_dim, + time_dim = time_dim, dat_dim = dat_dim, cross.val = cross.val, - ncores = ncores) + ncores = ncores)$output1 } return(res) } -.ROCSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, time_dim = 'sdate', cross.val = FALSE){ +.ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, prob_thresholds = c(1/3, 2/3), + indices_for_clim = NULL, cross.val = FALSE) { - # exp: [sdate, memb] - # obs: [sdate] - # ref: [sdate, memb] + # exp: [sdate, memb, (dat)] + # obs: [sdate, (dat)] + # ref: [sdate, memb, (dat)] or NULL - exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds, cross.val = cross.val) - obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds, cross.val = cross.val) - - ## ROCS (exp) - rocs_exp <- EnsRoca(ens = Reorder(exp_probs, c(time_dim,'bin')), - obs = Reorder(obs_probs, c(time_dim,'bin'))) - - ## ROCSS - n_cats <- 1 + length(prob_thresholds) - rocss <- list() - if (is.null(ref)){ ## Random forecast as reference forecast - for (cat in paste0('cat',1:n_cats)){ - rocss[[cat]] <- 2 * rocs_exp[[cat]] - 1 + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + dat_dim <- 'dat' + dim(exp) <- c(dim(exp), dat = 1) + dim(obs) <- c(dim(obs), dat = 1) + if (!is.null(ref)) { + dim(ref) <- c(dim(ref), dat = 1) } - } else { ## Reference forecast is provided - ## ROCS (ref) - ref_probs <- .get_probs(data = ref, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds, cross.val = cross.val) - rocs_ref <- EnsRoca(ens = Reorder(ref_probs, c(time_dim,'bin')), - obs = Reorder(obs_probs, c(time_dim,'bin'))) - for (cat in paste0('cat',1:n_cats)){ - rocss[[cat]] <- (rocs_exp[[cat]] - rocs_ref[[cat]]) / (1 - rocs_ref[[cat]]) + remove_dat_dim <- TRUE + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + if (!is.null(ref) && !dat_dim %in% names(dim(ref))) { # make ref have the same dat dim as exp + ref <- array(ref, dim = dim(exp)) } + remove_dat_dim <- FALSE } - + + ncats <- 1 + length(prob_thresholds) + rocs_exp <- array(dim = c(nexp = nexp, nobs = nobs, cat = ncats)) + if (!is.null(ref)) rocs_ref <- array(dim = dim(rocs_exp)) + + for (exp_i in 1:nexp) { + for (obs_i in 1:nobs) { + + # Input dim for .get_probs + ## if exp: [sdate, memb] + ## if obs: [sdate, (memb)] + exp_probs <- .get_probs(ClimProjDiags::Subset(exp, dat_dim, exp_i, drop = 'selected'), + indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, cross.val = cross.val) + obs_probs <- .get_probs(data = ClimProjDiags::Subset(obs, dat_dim, obs_i, drop = 'selected'), + indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, cross.val = cross.val) + ## exp_probs and obs_probs: [bin, sdate] + + ## ROCS (exp) + rocs_exp[exp_i, obs_i, ] <- unlist(EnsRoca(ens = Reorder(exp_probs, c(time_dim, 'bin')), + obs = Reorder(obs_probs, c(time_dim, 'bin')))[1:ncats]) + + if (!is.null(ref)) { + ref_probs <- .get_probs(ClimProjDiags::Subset(ref, dat_dim, exp_i, drop = 'selected'), + indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, cross.val = cross.val) + rocs_ref[exp_i, obs_i, ] <- unlist(EnsRoca(ens = Reorder(ref_probs, c(time_dim, 'bin')), + obs = Reorder(obs_probs, c(time_dim, 'bin')))[1:ncats]) + } + } + } + + ## ROCSS + if (is.null(ref)) { ## Random forecast as reference forecast + rocss <- 2 * rocs_exp - 1 + } else { ## Reference forecast is provided + rocss <- (rocs_exp - rocs_ref) / (1 - rocs_ref) + } + if (remove_dat_dim) { + rocss <- array(rocss, dim = dim(rocss)['cat']) + } + return(rocss) } diff --git a/R/RPS.R b/R/RPS.R index 48bf1bf..32d88a4 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -269,88 +269,3 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NUL return(rps) } -.get_probs <- function(data, indices_for_quantiles, prob_thresholds, weights = NULL, cross.val = FALSE) { - # if exp: [sdate, memb] - # if obs: [sdate, (memb)] - - # Add dim [memb = 1] to obs if it doesn't have memb_dim - if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1) - - # Absolute thresholds - if (cross.val) { - quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) - for (i in 1:dim(data)[1]) { - if (is.null(weights)) { - quantiles[,i] <- quantile(x = as.vector(data[indices_for_quantiles[which(indices_for_quantiles != i)], ]), - probs = prob_thresholds, type = 8, na.rm = TRUE) - } else { - # weights: [sdate, memb] - sorted_arrays <- .sorted_distributions(data[indices_for_quantiles[which(indices_for_quantiles != i)], ], - weights[indices_for_quantiles[which(indices_for_quantiles != i)], ]) - sorted_data <- sorted_arrays$data - cumulative_weights <- sorted_arrays$cumulative_weights - quantiles[,i] <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y - } - } - } else { - if (is.null(weights)) { - quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) - } else { - # weights: [sdate, memb] - sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], weights[indices_for_quantiles, ]) - sorted_data <- sorted_arrays$data - cumulative_weights <- sorted_arrays$cumulative_weights - quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y - } - quantiles <- array(rep(quantiles, dim(data)[1]),dim = c(bin = length(quantiles), dim(data)[1])) - } - - # quantiles: [bin-1, sdate] - # Probabilities - probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] - for (i_time in 1:dim(data)[1]) { - if (anyNA(data[i_time, ])) { - probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) - } else { - if (is.null(weights)) { - probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], - threshold = quantiles[,i_time])) - } else { - sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) - sorted_data <- sorted_arrays$data - cumulative_weights <- sorted_arrays$cumulative_weights - # find any quantiles that are outside the data range - integrated_probs <- array(dim = dim(quantiles)) - for (i_quant in 1:dim(quantiles)[1]) { - # for thresholds falling under the distribution - if (quantiles[i_quant, i_time] < min(sorted_data)) { - integrated_probs[i_quant, i_time] <- 0 - # for thresholds falling over the distribution - } else if (max(sorted_data) < quantiles[i_quant, i_time]) { - integrated_probs[i_quant, i_time] <- 1 - } else { - integrated_probs[i_quant, i_time] <- approx(sorted_data, cumulative_weights, quantiles[i_quant, i_time], - "linear")$y - } - } - probs[, i_time] <- append(integrated_probs[,i_time], 1) - append(0, integrated_probs[,i_time]) - if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { - stop(paste0("Probability in i_time = ", i_time, " is out of [0, 1].")) - } - } - } - } - return(probs) -} - -.sorted_distributions <- function(data_vector, weights_vector) { - weights_vector <- as.vector(weights_vector) - data_vector <- as.vector(data_vector) - weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 - sorter <- order(data_vector) - sorted_weights <- weights_vector[sorter] - cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights - cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 - cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 - return(list("data" = data_vector[sorter], "cumulative_weights" = cumulative_weights)) -} diff --git a/R/Utils.R b/R/Utils.R index c2c17eb..8770af9 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -1,7 +1,7 @@ #'@importFrom abind abind -#'@import plyr +#'@import plyr ncdf4 #'@importFrom grDevices png jpeg pdf svg bmp tiff -#'@import ncdf4 +#'@importFrom easyVerification convert2prob ## Function to tell if a regexpr() match is a complete match to a specified name .IsFullMatch <- function(x, name) { @@ -1809,3 +1809,89 @@ GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE } +.get_probs <- function(data, indices_for_quantiles, prob_thresholds, weights = NULL, cross.val = FALSE) { + # if exp: [sdate, memb] + # if obs: [sdate, (memb)] + + # Add dim [memb = 1] to obs if it doesn't have memb_dim + if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1) + + # Absolute thresholds + if (cross.val) { + quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) + for (i in 1:dim(data)[1]) { + if (is.null(weights)) { + quantiles[,i] <- quantile(x = as.vector(data[indices_for_quantiles[which(indices_for_quantiles != i)], ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles[which(indices_for_quantiles != i)], ], + weights[indices_for_quantiles[which(indices_for_quantiles != i)], ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles[,i] <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + } + } else { + if (is.null(weights)) { + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], weights[indices_for_quantiles, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + quantiles <- array(rep(quantiles, dim(data)[1]),dim = c(bin = length(quantiles), dim(data)[1])) + } + + # quantiles: [bin-1, sdate] + # Probabilities + probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] + for (i_time in 1:dim(data)[1]) { + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) + } else { + if (is.null(weights)) { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], + threshold = quantiles[,i_time])) + } else { + sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + # find any quantiles that are outside the data range + integrated_probs <- array(dim = dim(quantiles)) + for (i_quant in 1:dim(quantiles)[1]) { + # for thresholds falling under the distribution + if (quantiles[i_quant, i_time] < min(sorted_data)) { + integrated_probs[i_quant, i_time] <- 0 + # for thresholds falling over the distribution + } else if (max(sorted_data) < quantiles[i_quant, i_time]) { + integrated_probs[i_quant, i_time] <- 1 + } else { + integrated_probs[i_quant, i_time] <- approx(sorted_data, cumulative_weights, quantiles[i_quant, i_time], + "linear")$y + } + } + probs[, i_time] <- append(integrated_probs[,i_time], 1) - append(0, integrated_probs[,i_time]) + if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { + stop(paste0("Probability in i_time = ", i_time, " is out of [0, 1].")) + } + } + } + } + return(probs) +} + +.sorted_distributions <- function(data_vector, weights_vector) { + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_weights <- weights_vector[sorter] + cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights + cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 + cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 + return(list("data" = data_vector[sorter], "cumulative_weights" = cumulative_weights)) +} + diff --git a/man/ROCSS.Rd b/man/ROCSS.Rd new file mode 100644 index 0000000..1f49517 --- /dev/null +++ b/man/ROCSS.Rd @@ -0,0 +1,89 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ROCSS.R +\name{ROCSS} +\alias{ROCSS} +\title{Compute the Relative Operating Characteristic Skill Score} +\usage{ +ROCSS( + exp, + obs, + ref = NULL, + time_dim = "sdate", + memb_dim = "member", + dat_dim = NULL, + prob_thresholds = c(1/3, 2/3), + indices_for_clim = NULL, + cross.val = FALSE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numerical array of the forecast with at least time and +member dimension.} + +\item{obs}{A named numerical array of the observation with at least time +dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +'dat_dim'.} + +\item{ref}{A named numerical array of the reference forecast data with at +least time and member dimension. The dimensions must be the same as 'exp' +except 'memb_dim' and 'dat_dim'. If there is only one reference dataset, +it should not have dataset dimension. If there is corresponding reference +for each experiement, the dataset dimension must have the same length as in +'exp'. If 'ref' is NULL, the random forecast is used as reference forecast. +The default value is NULL.} + +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'sdate'.} + +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the probabilities of the forecast and the reference forecast. The +default value is 'member'.} + +\item{dat_dim}{A character string indicating the name of dataset dimension. +The length of this dimension can be different between 'exp' and 'obs'. +The default value is NULL.} + +\item{prob_thresholds}{A numeric vector of the relative thresholds (from 0 to +1) between the categories. The default value is c(1/3, 2/3), which +corresponds to tercile equiprobable categories.} + +\item{indices_for_clim}{A vector of the indices to be taken along 'time_dim' +for computing the thresholds between the probabilistic categories. If NULL, +the whole period is used. The default value is NULL.} + +\item{cross.val}{A logical indicating whether to compute the thresholds +between probabilistic categories in cross-validation. The default value is +FALSE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A numerical array of ROCSS with the same dimensions as 'exp' excluding +'time_dim' and 'memb_dim' dimensions and including 'cat' dimension, which is +each category. The length if 'cat' dimension corresponds to the number of +probabilistic categories, i.e., 1 + length(prob_thresholds). If there are +multiple datasets, two additional dimensions 'nexp' and 'nobs' are added. +} +\description{ +The Relative Operating Characteristic Skill Score (ROCSS; Kharin and Zwiers, +2003) is based on the ROC curve, which gives information about the hit rates +against the false-alarm rates for a particular category or event. The ROC +curve can be summarized with the area under the ROC curve, known as the ROC +score, to provide a skill value for each category. The ROCSS ranges between +minus infinite and 1. A positive ROCSS value indicates that the forecast has +higher skill than the reference forecasts, meaning the contrary otherwise. +} +\examples{ +exp <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) +ref <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60, member = 10)) +obs <- array(rnorm(1000), dim = c(lon = 3, lat = 2, sdate = 60)) +ROCSS(exp = exp, obs = obs) ## random forecast as reference forecast +ROCSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast + +} +\references{ +Kharin, V. V. and Zwiers, F. W. (2003): + https://doi.org/10.1175/1520-0442(2003)016%3C4145:OTRSOP%3E2.0.CO;2 +} diff --git a/man/RPS.Rd b/man/RPS.Rd index 16b7a90..813c12f 100644 --- a/man/RPS.Rd +++ b/man/RPS.Rd @@ -56,7 +56,7 @@ have more than 45 members if consistency between the weighted and unweighted methodologies is desired.} \item{cross.val}{A logical indicating whether to compute the thresholds between -probabilistics categories in cross-validation. +probabilistic categories in cross-validation. The default value is FALSE.} \item{ncores}{An integer indicating the number of cores to use for parallel diff --git a/tests/testthat/test-ROCSS.R b/tests/testthat/test-ROCSS.R new file mode 100644 index 0000000..a95d0ba --- /dev/null +++ b/tests/testthat/test-ROCSS.R @@ -0,0 +1,229 @@ +context("s2dv::ROCSS tests") + +############################################## + +# dat1 +set.seed(1) +exp1 <- array(rnorm(60), dim = c(member = 3, sdate = 10, lat = 2)) +set.seed(2) +obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) +set.seed(3) +ref1 <- array(rnorm(40), dim = c(member = 2, sdate = 10, lat = 2)) + +# dat2 +set.seed(1) +exp2 <- array(rnorm(30), dim = c(member = 3, sdate = 10)) +set.seed(2) +obs2 <- array(rnorm(10), dim = c(sdate = 10)) +set.seed(3) +ref2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) + + +# dat3 +set.seed(1) +exp3 <- array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3)) +set.seed(2) +obs3 <- array(rnorm(20), dim = c(member = 1, sdate = 10, dataset = 2)) +set.seed(3) +ref3 <- array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3)) +set.seed(3) +ref3_2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) + +############################################## +test_that("1. Input checks", { + # exp and obs (1) + expect_error( + ROCSS(c()), + "Parameter 'exp' must be a numeric array." + ) + expect_error( + ROCSS(exp1, c()), + "Parameter 'obs' must be a numeric array." + ) + + # time_dim + expect_error( + ROCSS(exp1, obs1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + ROCSS(exp1, obs1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + ROCSS(exp2, obs2, array(rnorm(20), dim = c(member = 2, time = 10))), + "Parameter 'time_dim' is not found in 'ref' dimension." + ) + # memb_dim + expect_error( + ROCSS(exp1, obs1, memb_dim = TRUE), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + ROCSS(exp1, obs1, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' dimension." + ) + expect_error( + ROCSS(exp2, obs2, array(rnorm(20), dim = c(memb = 2, sdate = 10))), + "Parameter 'memb_dim' is not found in 'ref' dimension." + ) + # exp, ref, and obs (2) + expect_error( + ROCSS(exp1, array(1:9, dim = c(sdate = 9))), + "Parameter 'exp' and 'obs' must have same length of all dimensions except 'memb_dim' and 'dat_dim'." + ) + expect_error( + ROCSS(exp3, obs3, array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 4)), dat_dim = 'dataset'), + "If parameter 'ref' has dataset dimension, it must be equal to dataset dimension of 'exp'." + ) + expect_error( + ROCSS(exp1, obs1, ref2), + "Parameter 'exp' and 'ref' must have the same length of all dimensions except 'memb_dim' and 'dat_dim' if there is only one reference dataset." + ) + # prob_thresholds + expect_error( + ROCSS(exp1, obs1, ref1, prob_thresholds = 1), + "Parameter 'prob_thresholds' must be a numeric vector between 0 and 1." + ) + # indices_for_clim + expect_error( + ROCSS(exp1, obs1, ref1, indices_for_clim = array(1:6, dim = c(2, 3))), + "Parameter 'indices_for_clim' must be NULL or a numeric vector." + ) + expect_error( + ROCSS(exp1, obs1, indices_for_clim = 3:11), + "Parameter 'indices_for_clim' should be the indices of 'time_dim'." + ) + # cross.val + expect_error( + ROCSS(exp1, obs1, cross.val = 1), + "Parameter 'cross.val' must be either TRUE or FALSE." + ) + + # ncores + expect_error( + ROCSS(exp2, obs2, ncores = 1.5), + "Parameter 'ncores' must be either NULL or a positive integer." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + +expect_equal( + dim(ROCSS(exp1, obs1)), + c(cat = 3, lat = 2) +) +expect_equal( + dim(ROCSS(exp1, obs1, ref1)), + c(cat = 3, lat = 2) +) +expect_equal( +c(ROCSS(exp1, obs1)[1, ]), +c(0.3333333, 0.0000000), +tolerance = 0.0001 +) +expect_equal( +c(ROCSS(exp1, obs1)[2, ]), +c(0.0000000, 0.6666667), +tolerance = 0.0001 +) +expect_equal( +c(ROCSS(exp1, obs1)[3, ]), +c(0.5238095, 0.3809524), +tolerance = 0.0001 +) +# with ref +expect_equal( +c(ROCSS(exp1, obs1, ref1)[1, ]), +c(0.53333333, 0.08695652), +tolerance = 0.0001 +) +expect_equal( +c(ROCSS(exp1, obs1, ref1)[2, ]), +c(0.4545455, 0.6190476), +tolerance = 0.0001 +) +expect_equal( +c(ROCSS(exp1, obs1, ref1)[3, ]), +c(0.5238095, 0.5357143), +tolerance = 0.0001 +) + +}) + +############################################## +test_that("3. Output checks: dat2", { + +expect_equal( + dim(ROCSS(exp2, obs2, prob_thresholds = c(0.25, 0.5, 0.75))), + c(cat = 4) +) +expect_equal( + dim(ROCSS(exp2, obs2, ref2, prob_thresholds = c(0.25, 0.5, 0.75))), + c(cat = 4) +) +# without ref +expect_equal( +c(ROCSS(exp2, obs2)), +c(0.3333333, 0, 0.5238095), +tolerance = 0.0001 +) +expect_equal( +c(ROCSS(exp2, obs2, prob_thresholds = c(0.25, 0.5, 0.75))), +c(0.1875000, -0.0952381, 0.0000000, -0.7500000), +tolerance = 0.0001 +) +expect_equal( +c(ROCSS(exp2, obs2, indices_for_clim = 2:6)), +c(0.56, 0.00, 0.25), +tolerance = 0.0001 +) +expect_equal( +c(ROCSS(exp2, obs2, ref2, indices_for_clim = 2:6)), +c(0.6333333, 0.0000000, 0.2500000), +tolerance = 0.0001 +) + + +}) + +############################################## +test_that("4. Output checks: dat3", { + +expect_equal( + dim(ROCSS(exp3, obs3, dat_dim = 'dataset')), + c(nexp = 3, nobs = 2, cat = 3) +) +expect_equal( + dim(ROCSS(exp3, obs3, ref3, dat_dim = 'dataset')), + c(nexp = 3, nobs = 2, cat = 3) +) +expect_equal( + dim(ROCSS(exp3, obs3, ref3_2, dat_dim = 'dataset')), + c(nexp = 3, nobs = 2, cat = 3) +) + +expect_equal( + c(ROCSS(exp3, obs3, ref3, dat_dim = 'dataset')[1, , ]), + c(0.3, -0.25, 0.3636364, -0.8095238, -0.4285714,0), + tolerance = 0.0001 +) +expect_equal( + c(ROCSS(exp3, obs3, ref3, dat_dim = 'dataset')[2, , ]), + c(0.5652174, 0, 0, -0.4285714, -0.4, 0), + tolerance = 0.0001 +) +expect_equal( + c(ROCSS(exp3, obs3, ref3_2, dat_dim = 'dataset')[1, , ]), + c(0.3, -0.25, 0.3636364, -0.8095238, -0.4285714, 0), + tolerance = 0.0001 +) +expect_equal( + c(ROCSS(exp3, obs3, ref3_2, dat_dim = 'dataset')[2, , ]), + c(0.66666667, 0.04166667, 0.40909091, -0.42857143, -0.33333333, -0.33333333), + tolerance = 0.0001 +) + +}) -- GitLab From b86795a0c240135efdccd26b4abb137863d566a7 Mon Sep 17 00:00:00 2001 From: aho Date: Mon, 6 Mar 2023 10:22:48 +0100 Subject: [PATCH 7/7] Change WeightedMean inputs --- R/AMV.R | 8 ++++---- R/GMST.R | 4 ++-- R/GSAT.R | 4 ++-- R/SPOD.R | 8 ++++---- R/TPI.R | 12 ++++++------ 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/R/AMV.R b/R/AMV.R index e14897b..9167628 100644 --- a/R/AMV.R +++ b/R/AMV.R @@ -224,12 +224,12 @@ AMV <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo mean_1 <- ClimProjDiags::WeightedMean(data = data, lon = data_lons, lat = data_lats, region = regions$reg1, - londim = which(names(dim(data)) == lon_dim), - latdim = which(names(dim(data)) == lat_dim)) + londim = lon_dim, + latdim = lat_dim) mean_2 <- ClimProjDiags::WeightedMean(data = data, lon = data_lons, lat = data_lats, region = regions$reg2, - londim = which(names(dim(data)) == lon_dim), - latdim = which(names(dim(data)) == lat_dim)) + londim = lon_dim, + latdim = lat_dim) data <- ClimProjDiags::CombineIndices(indices = list(mean_1,mean_2), weights = NULL, operation = 'subtract') # (mean_1 - mean_2) diff --git a/R/GMST.R b/R/GMST.R index 85b382d..92109ea 100644 --- a/R/GMST.R +++ b/R/GMST.R @@ -273,8 +273,8 @@ GMST <- function(data_tas, data_tos, data_lats, data_lons, mask_sea_land, sea_va data <- ClimProjDiags::WeightedMean(data = data, lon = data_lons, lat = data_lats, region = NULL, - londim = which(names(dim(data)) == lon_dim), - latdim = which(names(dim(data)) == lat_dim)) + londim = lon_dim, + latdim = lat_dim) if (type == 'dcpp'){ target_dims <- c(sdate_dim, fmonth_dim) diff --git a/R/GSAT.R b/R/GSAT.R index 30975b9..0cde94a 100644 --- a/R/GSAT.R +++ b/R/GSAT.R @@ -211,8 +211,8 @@ GSAT <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l data <- ClimProjDiags::WeightedMean(data = data, lon = data_lons, lat = data_lats, region = NULL, - londim = which(names(dim(data)) == lon_dim), - latdim = which(names(dim(data)) == lat_dim)) + londim = lon_dim, + latdim = lat_dim) if (type == 'dcpp'){ target_dims <- c(sdate_dim, fmonth_dim) diff --git a/R/SPOD.R b/R/SPOD.R index 2599477..3a8ff73 100644 --- a/R/SPOD.R +++ b/R/SPOD.R @@ -223,12 +223,12 @@ SPOD <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l mean_1 <- ClimProjDiags::WeightedMean(data = data, lon = data_lons, lat = data_lats, region = regions$reg1, - londim = which(names(dim(data)) == lon_dim), - latdim = which(names(dim(data)) == lat_dim)) + londim = lon_dim, + latdim = lat_dim) mean_2 <- ClimProjDiags::WeightedMean(data = data, lon = data_lons, lat = data_lats, region = regions$reg2, - londim = which(names(dim(data)) == lon_dim), - latdim = which(names(dim(data)) == lat_dim)) + londim = lon_dim, + latdim = lat_dim) data <- ClimProjDiags::CombineIndices(indices = list(mean_1,mean_2), weights = NULL, operation = 'subtract') # (mean_1 - mean_2) diff --git a/R/TPI.R b/R/TPI.R index d3f0550..167664f 100644 --- a/R/TPI.R +++ b/R/TPI.R @@ -225,16 +225,16 @@ TPI <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo mean_1 <- ClimProjDiags::WeightedMean(data = data, lon = data_lons, lat = data_lats, region = regions$reg1, - londim = which(names(dim(data)) == lon_dim), - latdim = which(names(dim(data)) == lat_dim)) + londim = lon_dim, + latdim = lat_dim) mean_2 <- ClimProjDiags::WeightedMean(data = data, lon = data_lons, lat = data_lats, region = regions$reg2, - londim = which(names(dim(data)) == lon_dim), - latdim = which(names(dim(data)) == lat_dim)) + londim = lon_dim, + latdim = lat_dim) mean_3 <- ClimProjDiags::WeightedMean(data = data, lon = data_lons, lat = data_lats, region = regions$reg3, - londim = which(names(dim(data)) == lon_dim), - latdim = which(names(dim(data)) == lat_dim)) + londim = lon_dim, + latdim = lat_dim) mean_1_3 <- ClimProjDiags::CombineIndices(indices = list(mean_1, mean_3), weights = NULL, operation = 'mean') -- GitLab