From 583ebfc6a55fc93e387bd53aa5733a3fec132366 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 29 Nov 2023 17:01:19 +0100 Subject: [PATCH 1/4] included the modified-Mann-Kendall --- R/Trend.R | 230 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 198 insertions(+), 32 deletions(-) diff --git a/R/Trend.R b/R/Trend.R index e10fe19..2146873 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -2,10 +2,8 @@ #' #'Compute the linear trend or any degree of polynomial regression along the #'forecast time. It returns the regression coefficients (including the intercept) -#'and the detrended array. The confidence intervals and p-value are also -#'provided if needed.\cr -#'The confidence interval relies on the student-T distribution, and the p-value -#'is calculated by ANOVA. +#'and the detrended array. The confidence intervals, significance and p-value +#'are also provided if needed.\cr #' #'@param data An numeric array including the dimension along which the trend #' is computed. @@ -18,11 +16,16 @@ #'@param alpha A numeric indicating the significance level for the statistical #' significance test. The default value is 0.05. #'@param conf A logical value indicating whether to retrieve the confidence -#' intervals or not. The default value is TRUE. +#' intervals calculated with a student-T distribution or not. The default +#' value is TRUE. #'@param pval A logical value indicating whether to compute the p-value or not. #' The default value is TRUE. #'@param sign A logical value indicating whether to retrieve the statistical #' significance based on 'alpha'. The default value is FALSE. +#'@param sign.test A character string indicating the significance test. The +#' options are "ANOVA" and "modified-Mann-Kendall" (which takes into account +#' the autocorrelation; described in Hamed and Rao 1998; +#' doi:10.1016/S0022-1694(97)00125-X). The default value is "t-test". #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -52,7 +55,7 @@ #' \code{conf = TRUE}. #'} #'\item{$p.val}{ -#' A numeric array of p-value calculated by anova(). The first dimension +#' A numeric array of p-value. The first dimension #' 'stats' is 1, followed by the same dimensions as parameter 'data' except #' the 'time_dim' dimension. Only present if \code{pval = TRUE}. #'} @@ -74,9 +77,10 @@ #'@import multiApply #'@importFrom stats anova #'@export -Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, alpha = 0.05, - conf = TRUE, pval = TRUE, sign = FALSE, ncores = NULL) { - +Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, + alpha = 0.05, conf = TRUE, pval = TRUE, sign = FALSE, + sign.test = 'ANOVA', ncores = NULL) { + # Check inputs ## data if (is.null(data)) { @@ -124,6 +128,10 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, alpha = 0 if (!is.logical(sign) | length(sign) > 1) { stop("Parameter 'sign' must be one logical value.") } + ## sign + if (!sign.test %in% c('ANOVA','modified-Mann-Kendall') | length(sign.test) > 1) { + stop("Parameter 'sign' must be one logical value.") + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores)) { @@ -132,16 +140,16 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, alpha = 0 stop("Parameter 'ncores' must be a positive integer.") } } - + ############################### # Calculate Trend - + ## output_dims output_dims <- list(trend = 'stats') if (conf) output_dims <- c(output_dims, list(conf.lower = 'stats', conf.upper = 'stats')) if (pval) output_dims <- c(output_dims, list(p.val = 'stats')) if (sign) output_dims <- c(output_dims, list(sign = 'stats')) - + output_dims <- c(output_dims, list(detrended = time_dim)) output <- Apply(list(data), @@ -150,63 +158,221 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, alpha = 0 output_dims = output_dims, interval = interval, polydeg = polydeg, alpha = alpha, conf = conf, - pval = pval, sign = sign, + pval = pval, sign = sign, sign.test = sign.test, ncores = ncores) - + return(invisible(output)) } -.Trend <- function(x, interval = 1, polydeg = 1, alpha = 0.05, - conf = TRUE, pval = TRUE, sign = FALSE) { +.Trend <- function(x, interval = 1, polydeg = 1, alpha = 0.05, conf = TRUE, + pval = TRUE, sign = FALSE, sign.test = 'ANOVA') { # x: [ftime] - + mon <- seq(x) * interval - + # remove NAs for potential poly() NApos <- 1:length(x) NApos[which(is.na(x))] <- NA x2 <- x[!is.na(NApos)] mon2 <- mon[!is.na(NApos)] - + if (length(x2) > 0) { -# lm.out <- lm(x ~ mon, na.action = na.omit) + # lm.out <- lm(x ~ mon, na.action = na.omit) lm.out <- lm(x2 ~ poly(mon2, degree = polydeg, raw = TRUE), na.action = na.omit) trend <- lm.out$coefficients #intercept, slope1, slope2,... - + if (conf) { conf.lower <- confint(lm.out, level = (1 - alpha))[, 1] conf.upper <- confint(lm.out, level = (1 - alpha))[, 2] } - + if (pval | sign) { - p.value <- as.array(stats::anova(lm.out)$'Pr(>F)'[1]) + + if (identical(sign.test,'ANOVA')){ + p.value <- as.array(stats::anova(lm.out)$'Pr(>F)'[1]) + } else if (identical(sign.test,'modified-Mann-Kendall')) { + p.value <- as.array(.mmkh_fixed(x = as.vector(x), ci = 1-alpha)[['new P-value']]) + } else {stop('Incorrect sign.test')} + if (pval) p.val <- p.value - if (sign) signif <- !is.na(p.value) & p.value <= alpha + if (sign) signif <- !is.na(p.value) & p.value <= alpha } - + detrended <- c() detrended[is.na(x) == FALSE] <- x[is.na(x) == FALSE] - lm.out$fitted.values - + } else { - + trend <- rep(NA, polydeg + 1) detrended <- rep(NA, length(x)) - + if (conf) { conf.lower <- rep(NA, polydeg + 1) conf.upper <- rep(NA, polydeg + 1) } - + if (pval) p.val <- as.array(NA) if (sign) signif <- as.array(FALSE) - + } - + output <- list(trend = trend) if (conf) output <- c(output, list(conf.lower = conf.lower, conf.upper = conf.upper)) if (pval) output <- c(output, list(p.val = p.val)) if (sign) output <- c(output, list(sign = signif)) output <- c(output, list(detrended = detrended)) - + return(output) } + +## modifiedmk::mmkh fails when evaluating "if(ro[i] > sig || ro[i] < -sig)" and ro[i] is NaN +## I have added is.finite(ro[i]) to the condition +.mmkh_fixed <-function(x, ci=0.95) { + # Initialize the test parameters + options(scipen = 999) + # Time series vector + x = x + # Modified Z statistic after variance correction by Hamed and Rao (1998) method + z = NULL + # Original Z statistic for Mann-Kendall test before variance correction + z0 = NULL + # Modified Z statistic after variance correction by Hamed and Rao (1998) method + pval = NULL + # Original p-value for Mann-Kendall test before variance correction + pval0 = NULL + # Initialize Mann-Kendall S statistic + S = 0 + # Initialize Mann-Kendall Tau + Tau = NULL + # Correction factor n/n* value + essf = NULL + # Confidance interval + ci = ci + + # To test whether the data is in vector format + + if (is.vector(x) == FALSE) { + stop("Input data must be a vector") + } + + # To test whether the data values are finite numbers and attempting to eliminate non-finite numbers + + if (any(is.finite(x) == FALSE)) { + x[-c(which(is.finite(x) == FALSE))] -> x + warning("The input vector contains non-finite numbers. An attempt was made to remove them") + } + + n <- length(x) + + #Specify minimum input vector length + if (n < 3) { + stop("Input vector must contain at least three values") + } + + # Calculating Sen's slope + rep(NA, n * (n - 1)/2) -> V + k = 0 + for (i in 1:(n-1)) { + for (j in (i+1):n) { + k = k+1 + V[k] = (x[j]-x[i])/(j-i) + } + } + median(V,na.rm=TRUE)->slp + + # Calculating trend-free Series + + t=1:length(x) + xn<-(x[1:n])-((slp)*(t)) + + + # Calculating Mann-Kendall S statistic + + for (i in 1:(n-1)) { + for (j in (i+1):n) { + S = S + sign(x[j]-x[i]) + } + } + + # Calculating autocorrelation function of the ranks of observations (ro) + + acf(rank(xn), lag.max=(n-1), plot=FALSE)$acf[-1] -> ro + + # Calculating significant autocorrelation at given confidance interval (rof) + + qnorm((1+ci)/2)/sqrt(n) -> sig + rep(NA,length(ro)) -> rof + for (i in 1:(length(ro))) { + if (is.finite(ro[i]) && (ro[i] > sig || ro[i] < -sig)) { + rof[i] <- ro[i] + } else { + rof[i] = 0 + } + } + + # Calculating 2/(n*(n-1)*(n-2)) + + 2 / (n*(n-1)*(n-2)) -> cte + + # Calculating sum(((n-i)*(n-i-1)*(n-i-2)*rof[i] + + ess=0 + for (i in 1:(n-1)) { + ess = ess + (n-i)*(n-i-1)*(n-i-2)*rof[i] + } + + # Calculating variance correction factor (n/n*) as per Hamed and Rao (1998) + + essf = 1 + ess*cte + + # Calculating Mann-Kendall Variance before correction (Var(s)) + + var.S = n*(n-1)*(2*n+5)*(1/18) + if(length(unique(x)) < n) { + unique(x) -> aux + for (i in 1:length(aux)) { + length(which(x == aux[i])) -> tie + if (tie > 1) { + var.S = var.S - tie*(tie-1)*(2*tie+5)*(1/18) + } + } + } + + # Calculating new variance Var(s)*=(Var(s))*(n/n*) as per Hamed and Rao (1998) + + VS = var.S * essf + + # Calculating Z statistic values before and after variance correction + + if (S == 0) { + z = 0 + z0 = 0 + }else + if (S > 0) { + z = (S-1)/sqrt(VS) + z0 = (S-1)/sqrt(var.S) + } else { + z = (S+1)/sqrt(VS) + z0 = (S+1)/sqrt(var.S) + } + + # Calculating p-value before and after variance correction + + pval = 2*pnorm(-abs(z)) + pval0 = 2*pnorm(-abs(z0)) + + # Calculating Kendall's Tau + + Tau = S/(.5*n*(n-1)) + + + return(c("Corrected Zc" = z, + "new P-value" = pval, + "N/N*" = essf, + "Original Z" = z0, + "old P.value" = pval0, + "Tau" = Tau, + "Sen's slope" = slp, + "old.variance" = var.S, + "new.variance"= VS)) +} -- GitLab From 075ad31658866c65521c8bce7ef8a48ea33dde03 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Wed, 29 Nov 2023 17:07:16 +0100 Subject: [PATCH 2/4] . --- R/Trend.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/Trend.R b/R/Trend.R index 2146873..ff8f31d 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -25,7 +25,7 @@ #'@param sign.test A character string indicating the significance test. The #' options are "ANOVA" and "modified-Mann-Kendall" (which takes into account #' the autocorrelation; described in Hamed and Rao 1998; -#' doi:10.1016/S0022-1694(97)00125-X). The default value is "t-test". +#' doi:10.1016/S0022-1694(97)00125-X). The default value is "ANOVA". #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' -- GitLab From b25576a78b51c63ddc94aea16c2e6d5aea1dd645 Mon Sep 17 00:00:00 2001 From: Carlos Delgado Date: Thu, 30 Nov 2023 15:11:19 +0100 Subject: [PATCH 3/4] renamed sign.test to sig_method --- R/Trend.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/R/Trend.R b/R/Trend.R index ff8f31d..a1eece8 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -22,7 +22,7 @@ #' The default value is TRUE. #'@param sign A logical value indicating whether to retrieve the statistical #' significance based on 'alpha'. The default value is FALSE. -#'@param sign.test A character string indicating the significance test. The +#'@param sig_method A character string indicating the significance test. The #' options are "ANOVA" and "modified-Mann-Kendall" (which takes into account #' the autocorrelation; described in Hamed and Rao 1998; #' doi:10.1016/S0022-1694(97)00125-X). The default value is "ANOVA". @@ -79,7 +79,7 @@ #'@export Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, alpha = 0.05, conf = TRUE, pval = TRUE, sign = FALSE, - sign.test = 'ANOVA', ncores = NULL) { + sig_method = 'ANOVA', ncores = NULL) { # Check inputs ## data @@ -129,7 +129,7 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, stop("Parameter 'sign' must be one logical value.") } ## sign - if (!sign.test %in% c('ANOVA','modified-Mann-Kendall') | length(sign.test) > 1) { + if (!sig_method %in% c('ANOVA','modified-Mann-Kendall') | length(sig_method) > 1) { stop("Parameter 'sign' must be one logical value.") } ## ncores @@ -158,14 +158,14 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, output_dims = output_dims, interval = interval, polydeg = polydeg, alpha = alpha, conf = conf, - pval = pval, sign = sign, sign.test = sign.test, + pval = pval, sign = sign, sig_method = sig_method, ncores = ncores) return(invisible(output)) } .Trend <- function(x, interval = 1, polydeg = 1, alpha = 0.05, conf = TRUE, - pval = TRUE, sign = FALSE, sign.test = 'ANOVA') { + pval = TRUE, sign = FALSE, sig_method = 'ANOVA') { # x: [ftime] mon <- seq(x) * interval @@ -188,11 +188,11 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, if (pval | sign) { - if (identical(sign.test,'ANOVA')){ + if (identical(sig_method,'ANOVA')){ p.value <- as.array(stats::anova(lm.out)$'Pr(>F)'[1]) - } else if (identical(sign.test,'modified-Mann-Kendall')) { + } else if (identical(sig_method,'modified-Mann-Kendall')) { p.value <- as.array(.mmkh_fixed(x = as.vector(x), ci = 1-alpha)[['new P-value']]) - } else {stop('Incorrect sign.test')} + } else {stop('Incorrect sig_method')} if (pval) p.val <- p.value if (sign) signif <- !is.na(p.value) & p.value <= alpha -- GitLab From e06738456a9d07518609998e87ac4d9ee6e0e41a Mon Sep 17 00:00:00 2001 From: aho Date: Wed, 20 Dec 2023 13:19:11 +0100 Subject: [PATCH 4/4] correct update --- R/Trend.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/R/Trend.R b/R/Trend.R index d1f65d4..57d2d13 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -200,10 +200,6 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, if (sign) signif <- !is.na(p.value) & p.value <= alpha } - detrended <- c() - detrended[is.na(x) == FALSE] <- x[is.na(x) == FALSE] - lm.out$fitted.values - - detrended <- NULL detrended[!is.na(x)] <- x[!is.na(x)] - lm.out$fitted.values -- GitLab