diff --git a/DESCRIPTION b/DESCRIPTION index 3259032e7759a3b32aede5c5008ce522823e1d09..431d3bbfefec7bc1f9b8103f7603ed2754deedd6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ClimProjDiags Title: Set of Tools to Compute Various Climate Indices -Version: 0.1.0 +Version: 0.1.1 Authors@R: c( person("BSC-CNS", role = c("aut", "cph")), person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8568-3071")), @@ -20,8 +20,9 @@ Imports: multiApply (>= 2.0.0), PCICt, plyr, + climdex.pcic, stats -License: LGPL-3 +License: Apache License 2.0 URL: https://earth.bsc.es/gitlab/es/ClimProjDiags BugReports: https://earth.bsc.es/gitlab/es/ClimProjDiags/issues Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index 03fa0de4a2997108d8994eac872d50842c898dab..8364b61887761eac1e0b44e525030c7a28468b07 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(Threshold) export(WaveDuration) export(WeightedMean) import(PCICt) +import(climdex.pcic) import(multiApply) importFrom(plyr,aaply) importFrom(stats,quantile) diff --git a/R/Climdex.R b/R/Climdex.R index e8203fb1c96d8b5f19e46b89e7b3eabc74a22161..a13b15ba283bb6455729bd2ea0bd6ce69bb6fc46 100644 --- a/R/Climdex.R +++ b/R/Climdex.R @@ -17,6 +17,7 @@ #' \item\code{$years} {A vector of the corresponding years.}} #' #'@import multiApply +#'@import climdex.pcic #'@import PCICt #'@examples #'##Example synthetic data: @@ -228,17 +229,17 @@ Climdex <- function(data, metric, threshold = NULL, base.range = NULL, dates = N .Climdex <- function(data, threshold, dates = dates, metric = metric, jdays = jdays, date.factor = NULL, base.range = NULL) { if (metric == "cdd") { - result <- .spell.length.max(daily.prec = as.vector(data), date.factor = dates, 1, "<", FALSE) + result <- spell.length.max(daily.prec = as.vector(data), date.factor = dates, 1, "<", FALSE) } else if (metric == "rx5day") { - result <- .nday.consec.prec.max(daily.prec = as.vector(data), date.factor = dates, + result <- nday.consec.prec.max(daily.prec = as.vector(data), date.factor = dates, ndays = 5, center.mean.on.last.day = FALSE) } else if (metric == "t90p" || metric == "Wx") { - result <- .percent.days.op.threshold(temp = data, dates = dates, jdays = jdays, + result <- percent.days.op.threshold(temp = data, dates = dates, jdays = jdays, date.factor = date.factor, threshold.outside.base = threshold, base.thresholds = threshold, base.range = base.range, op = ">", 20) } else if (metric == "t10p") { - result <- .percent.days.op.threshold(temp = data, dates = dates, jdays = jdays, + result <- percent.days.op.threshold(temp = data, dates = dates, jdays = jdays, date.factor = date.factor, threshold.outside.base = threshold, base.thresholds = threshold, base.range = base.range, op = "<", 20) @@ -246,277 +247,4 @@ Climdex <- function(data, metric, threshold = NULL, base.range = NULL, dates = N return(result) } -#' @title Maximum spell length -#' -#' @description -#' This function returns the longest string of days which exceed or are below -#' the given threshold. -#' -#' @details -#' This routine compares data to the threshold using the given operator, -#' generating a series of TRUE or FALSE values. It then computes the lengths of -#' sequences of TRUE values (spells) and chooses the longest spell in each -#' period (as defined by date.factor). -#' -#' The \code{spells.can.span.years} option controls whether spells must always -#' terminate at the end of a period, or whether they may continue until the -#' criteria ceases to be met or the end of the data is reached. The default for -#' fclimdex is TRUE. -#' -#' @param daily.prec Data to compute index on. -#' @param date.factor Date factor to split by. -#' @param threshold The threshold to compare to. -#' @param op The operator to use to compare data to threshold. -#' @param spells.can.span.years Whether spells can span years. -#' @return A timeseries of maximum spell lengths for each period. -#' @seealso \code{\link{climdex.cdd}}. -#' @keywords ts climate -#' @examples -#' -#' prec.dat <- c(0.1, 3.0, 4.3, 1.9, 1.3, 6.0, 0, 0, 4.0, 1) -#' phony.date.factor <- factor(rep(1:2, each=5)) -#' -#' ## With spells spanning years... -#' cwd <- spell.length.max(prec.dat, phony.date.factor, 1, ">=", TRUE) -#' -#' ## Without spells spanning years... -#' altcwd <- spell.length.max(prec.dat, phony.date.factor, 1, ">=", FALSE) -#' -#' @noRd -.spell.length.max <- function(daily.prec, date.factor, threshold, op, spells.can.span.years) { - bools <- match.fun(op)(daily.prec, threshold) - - if(spells.can.span.years) { - all.true <- .tapply.fast(bools, date.factor, all) - max.spell <- .tapply.fast(.get.series.lengths.at.ends(bools), date.factor, max) - - ## Mask out values which are in the middle of a spell with NA - na.mask <- c(1, NA)[as.integer((max.spell == 0) & all.true) + 1] - return(max.spell * na.mask) - } else { - return(.tapply.fast(bools, date.factor, function(x) { max(.get.series.lengths.at.ends(x)) })) - } -} -#' Get series length at ends -#' -#' This function takes a series of boolean values and returns a list of -#' integers of the same length corresponding to the lengths at the ends of -#' sequences of TRUE values. -#' -#' It can often be useful to know how long a series of boolean values is. This -#' function provides a method of knowing where and how long such sequences are. -#' -#' @param x Sequence of booleans. -#' @param na.value Value to replace NAs with. -#' @return A vector consisting of the lengths of sequences of TRUE values at -#' the location of the last TRUE value in the sequence, and zeroes elsewhere. -#' @keywords ts climate -#' @examples -#' -#' ## Get lengths of sequences of TRUE values in a sequence -#' series.lengths <- get.series.lengths.at.ends(c(TRUE, TRUE, TRUE, FALSE, -#' TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE)) -#' -#' -#' @noRd -.get.series.lengths.at.ends <- function(x, na.value=FALSE) { - stopifnot(is.logical(x) && is.logical(na.value)) - n <- length(x) - if(n == 1) - return(as.numeric(x)) - - res <- rep(0, n) - x[is.na(x)] <- na.value - - ## Compare series to lag-1 and lag+1 series; false added to trigger state transition from TRUE at ends of series - start <- which(x & !(c(FALSE, x[1:(n - 1)]))) - end <- which(x & !(c(x[2:n], FALSE))) - res[end] <- end - start + 1 - return(res) -} -#' @title Number of days (less than, greater than, etc) a threshold -#' -#' @description -#' Produces sums of values that exceed (or are below) the specified threshold. -#' -#' @details -#' This function takes a data series, the number of days in the running window, -#' a date factor to aggregate by, and an optional modifier parameter -#' (center.mean.on.last.day). It computes the n-day running sum of -#' precipitation and returns the maximum n-day total precipitation per unit -#' time, as defined by \code{date.factor}. -#' -#' @param daily.prec Daily timeseries of precipitation. -#' @param date.factor Factor to aggregate by. -#' @param ndays Number of days in the running window. -#' @param center.mean.on.last.day Whether to center the n-day running mean on -#' the last day of the series, instead of the middle day. -#' @return A vector consisting of the maximum n-day sum of precipitation per -#' time interval. -#' @keywords ts climate -#' @examples -#' library(PCICt) -#' -#' ## Parse the dates into PCICt. -#' tmax.dates <- as.PCICt(do.call(paste, ec.1018935.tmax[,c("year", -#' "jday")]), format="%Y %j", cal="gregorian") -#' tmin.dates <- as.PCICt(do.call(paste, ec.1018935.tmin[,c("year", -#' "jday")]), format="%Y %j", cal="gregorian") -#' prec.dates <- as.PCICt(do.call(paste, ec.1018935.prec[,c("year", -#' "jday")]), format="%Y %j", cal="gregorian") -#' -#' ## Load the data in. -#' ci <- climdexInput.raw(ec.1018935.tmax$MAX_TEMP, -#' ec.1018935.tmin$MIN_TEMP, ec.1018935.prec$ONE_DAY_PRECIPITATION, -#' tmax.dates, tmin.dates, prec.dates, base.range=c(1971, 2000)) -#' -#' ## Compute rx5day on a monthly basis. -#' rx5day <- nday.consec.prec.max(ci@@data$prec, ci@@date.factors$monthly, 5) -#' -#' @noRd -.nday.consec.prec.max <- function(daily.prec, date.factor, ndays, center.mean.on.last.day=FALSE) { - if(ndays == 1) { - return(suppressWarnings(.tapply.fast(daily.prec, date.factor, max, na.rm=TRUE))) - } - ## Ends of the data will be de-emphasized (padded with zero precip data); NAs replaced with 0 - daily.prec[is.na(daily.prec)] <- 0 - prec.runsum <- .running.mean(daily.prec, ndays) - prec.runsum[is.na(prec.runsum)] <- 0 - if(center.mean.on.last.day) { - k2 = ndays %/% 2 - prec.runsum <- c(rep(0, k2), prec.runsum[1:(length(prec.runsum) - k2)]) - } - return(.tapply.fast(prec.runsum, date.factor, max) * ndays) -} -#' @title Running Mean of a Vector -#' -#' @description Calculates the running means of a vector with a shifting window -#' -#' @details Returns a new vector the same length as vec, where the ith -#' element is the mean of the bin of elements centered at the ith element -#' of the original vector. Means cannot be calculated for elements less -#' than half the width of the bin from the beginning or end of the vector; -#' the result vector has NA in those positions. -#' -#' @param vec A vector -#' @param bin The number of entries to average over for each mean -#' -#' @return a vector containing the running mean of bin elements of vec -#' -#' @example -#' \dontrun { -#' running.mean(c(1, 2, 3, 4, 5, 6), 2) -#' } -#' \dontrun { -#' running.mean(c(5, 5, 5, 5, 5), 4) -#' } -#'@noRd -.running.mean <- function(vec, bin){ - vec = as.vector(vec) - len = length(vec) - if (bin<=1) { - return (vec) - } - if (bin > len) { - bin = len - } - left.bin = bin%/%2 - - means = double(len) - - right.bin = bin - left.bin - 1 - means = c( sum(vec[1:bin]), diff(vec,bin) ) # find the first sum and the differences from it - means = cumsum(means)/bin # apply precomputed differences - means = c(rep(NA,left.bin), means, rep(NA,right.bin)) # extend to original vector length - return(means) -} -#' Lengths of strings of TRUE values -#' -#' Computes fraction of days above or below the baseline threshold for each -#' day, and averages them using the date factor passed in. -#' -#' This function computes fractions of days above or below baseline thresholds -#' for each day, then aggregates them using \code{date.factor}. It is used to -#' implement TN/TX 10/90p. -#' -#' @param temp Sequence of temperature values. -#' @param dates Sequence of associated dates. -#' @param jdays Sequence of associated days of year. -#' @param date.factor Factor to aggregate data using. -#' @param threshold.outside.base Sequence of thresholds to be used for data -#' outside the base period. -#' @param base.thresholds Data structure containing sets of thresholds to be -#' used inside the base period; see \link{climdexInput-class}. -#' @param base.range Date range (type PCICt) of the baseline period. -#' @param op Comparison operator to use. -#' @param max.missing.days Maximum number of NA values per time period. -#' @return A vector consisting of the mean fraction of days above or below the -#' supplied set of thresholds. -#' @note If date.factor is omitted, daily series will be returned. -#' @seealso \link{climdexInput-class}. -#' @keywords ts climate -#' @examples -#' library(PCICt) -#' -#' ## Parse the dates into PCICt. -#' tmax.dates <- as.PCICt(do.call(paste, ec.1018935.tmax[,c("year", -#' "jday")]), format="%Y %j", cal="gregorian") -#' tmin.dates <- as.PCICt(do.call(paste, ec.1018935.tmin[,c("year", -#' "jday")]), format="%Y %j", cal="gregorian") -#' prec.dates <- as.PCICt(do.call(paste, ec.1018935.prec[,c("year", -#' "jday")]), format="%Y %j", cal="gregorian") -#' -#' ## Load the data in. -#' ci <- climdexInput.raw(ec.1018935.tmax$MAX_TEMP, -#' ec.1018935.tmin$MIN_TEMP, ec.1018935.prec$ONE_DAY_PRECIPITATION, -#' tmax.dates, tmin.dates, prec.dates, base.range=c(1971, 2000)) -#' -#' ## Compute monthly tx90p. -#' tx90p <- percent.days.op.threshold(ci@@data$tmax, ci@@dates, ci@@jdays, -#' ci@@date.factors$monthly, -#' ci@@quantiles$tmax$outbase$q90, -#' ci@@quantiles$tmax$inbase$q90, -#' ci@@base.range, ">", -#' ci@@max.missing.days['monthly']) * -#' ci@@namasks$monthly$tmax -#' -#' @noRd -.percent.days.op.threshold <- function(temp, dates, jdays, date.factor, threshold.outside.base, base.thresholds, base.range, op='<', max.missing.days) { - f <- match.fun(op) - dat <- f(temp, threshold.outside.base[jdays]) - - inset <- dates >= base.range[1] & dates <= base.range[2] - ## Don't use in-base thresholds with data shorter than two years; no years to replace with. - if(sum(inset) > 0 && length(dates) >= 360 * 2) { - jdays.base <- jdays[inset] - years.base <- .get.years(dates[inset]) - - ## Get number of base years, subset temp data to base period only. - temp.base <- temp[inset] - years.base.range <- range(years.base) - byrs <- (years.base.range[2] - years.base.range[1] + 1) - - ## Linearize thresholds, then compare them to the temperatures - bdim <- dim(base.thresholds) - dim(base.thresholds) <- c(bdim[1] * bdim[2], bdim[3]) - yday.byr.indices <- jdays.base + (years.base - .get.years(base.range)[1]) * bdim[1] - f.result <- f(rep(temp.base, byrs - 1), base.thresholds[yday.byr.indices,]) - dim(f.result) <- c(length(yday.byr.indices), bdim[3]) - - ## Chop up data along the 2nd dim into a list; sum elements of the list - dat[inset] <- rowSums(f.result, na.rm=TRUE) / (byrs - 1) - } - dat[is.nan(dat)] <- NA - if(missing(date.factor)) - return(dat) - na.mask <- .get.na.mask(dat, date.factor, max.missing.days) - ## FIXME: Need to monthly-ize the NA mask calculation, which will be ugly. - ret <- .tapply.fast(dat, date.factor, mean, na.rm=TRUE) * 100 * na.mask - ret[is.nan(ret)] <- NA - return(ret) -} -## Get year -.get.years <- function(dates) { - return(as.POSIXlt(dates)$year + 1900) -} diff --git a/R/Extremes.R b/R/Extremes.R index 8fc5ca24a66425af6a9ed1396512de580c4df7d7..f3e7b16b7e187e6cb20c705bd37b1901bf25c57e 100644 --- a/R/Extremes.R +++ b/R/Extremes.R @@ -22,6 +22,7 @@ #' #'@import multiApply #'@import PCICt +#'@import climdex.pcic #'@examples #'##Example synthetic data: #'data <- 1:(2 * 3 * 372 * 1) @@ -211,7 +212,7 @@ Extremes <- function(data, threshold, op = ">", min.length = 6, spells.can.span. } .Extremes <- function(data, threshold, date.factor, jdays, op, min.length, spells.can.span.years, max.missing.days) { - result <- .threshold.exceedance.duration.index(data, date.factor, jdays, + result <- threshold.exceedance.duration.index(data, date.factor, jdays, threshold,op, min.length, spells.can.span.years, max.missing.days) } diff --git a/R/WaveDuration.R b/R/WaveDuration.R index de0d5734c8ccce4b2b551081120071f66e20f634..23c2d97e68d0753aee49fe13660c651070f6544b 100644 --- a/R/WaveDuration.R +++ b/R/WaveDuration.R @@ -18,6 +18,7 @@ #' #'@import multiApply #'@import PCICt +#'@import climdex.pcic #'@examples #'##Example synthetic data: #'data <- 1:(2 * 3 * 31 * 5) @@ -223,130 +224,11 @@ WaveDuration <- function(data, threshold, op = ">", spell.length = 6, by.seasons } .WaveDuration <- function(data, threshold, date.factor = date.factor, jdays = jdays, op = op, spell.length = spell.length) { - result <- .threshold.exceedance.duration.index(daily.temp = data, + result <- threshold.exceedance.duration.index(daily.temp = data, date.factor = date.factor, jdays = jdays, thresholds = threshold, op = op, min.length = spell.length, spells.can.span.years = TRUE, 1) return(result) } -#' @title Sum of spell lengths exceeding daily threshold -#' -#' @description -#' This function returns the number of spells of more than \code{min.length} -#' days which exceed or are below the given threshold. -#' -#' @details -#' This routine compares data to the thresholds using the given operator, -#' generating a series of TRUE or FALSE values; these values are then filtered -#' to remove any sequences of less than \code{min.length} days of TRUE values. -#' It then computes the lengths of the remaining sequences of TRUE values -#' (spells) and sums their lengths. -#' -#' The \code{spells.can.span.years} option controls whether spells must always -#' terminate at the end of a period, or whether they may continue until the -#' criteria ceases to be met or the end of the data is reached. The default for -#' fclimdex is FALSE. -#' -#' @param daily.temp Data to compute index on. -#' @param date.factor Date factor to split by. -#' @param jdays Timeseries of days of year. -#' @param thresholds The thresholds to compare to. -#' @param op The operator to use to compare data to threshold. -#' @param min.length The minimum spell length to be considered. -#' @param spells.can.span.years Whether spells can span years. -#' @param max.missing.days Maximum number of NA values per time period. -#' @return A timeseries of maximum spell lengths for each period. -#' @seealso \code{\link{climdex.wsdi}}. -#' @keywords ts climate -#' @examples -#' -#' prec.dat <- c(0.1, 3.0, 4.3, 1.9, 1.3, 6.0, 0, 0, 4.0, 1) -#' phony.date.factor <- factor(rep(1:2, each=5)) -#' -#' ## With spells spanning years... -#' alttedi <- threshold.exceedance.duration.index(prec.dat, -#' phony.date.factor, rep(1:5, 2), rep(1, 5), ">=", 2, TRUE, 1) -#' -#' ## Without spells spanning years... -#' tedi <- threshold.exceedance.duration.index(prec.dat, phony.date.factor, -#' rep(1:5, 2), rep(1, 5), ">=", 2, FALSE, 1) -#' -#' @noRd -.threshold.exceedance.duration.index <- function(daily.temp, date.factor, jdays, thresholds, op=">", min.length=6, spells.can.span.years=TRUE, max.missing.days) { - stopifnot(is.numeric(c(daily.temp, thresholds, min.length)), is.factor(date.factor), - is.function(match.fun(op)), - min.length > 0) - f <- match.fun(op) - na.mask <- .get.na.mask(is.na(daily.temp + thresholds[jdays]), date.factor, max.missing.days) - - if(spells.can.span.years) { - periods <- .select.blocks.gt.length(f(daily.temp, thresholds[jdays]), min.length - 1) - return(.tapply.fast(periods, date.factor, sum) * na.mask) - } else { - ## fclimdex behaviour... - return(.tapply.fast(1:length(daily.temp), date.factor, function(idx) { sum(.select.blocks.gt.length(f(daily.temp[idx], thresholds[jdays[idx]]), min.length - 1)) } ) * na.mask) - } -} -## Get NA mask given threshold and split factor -.get.na.mask <- function(x, f, threshold) { - return(c(1, NA)[1 + as.numeric(.tapply.fast(is.na(x), f, function(y) { return(sum(y) > threshold) } ))]) -} -#' Select blocks of TRUE values of sufficient length. -#' -#' Produces a sequence of booleans of the same length as input, with sequences -#' of TRUE values shorter than n replaced with FALSE. -#' -#' This function takes a series of booleans and returns a sequence of booleans -#' of equal length, with all sequences of TRUE of length \code{n} or shorter -#' replaced with sequences of FALSE. NA values are replaced with -#' \code{na.value}. -#' -#' @param d Sequence of booleans. -#' @param n Longest sequence of TRUE to replace with FALSE. -#' @param na.value Values to replace NAs with. -#' @return A vector of booleans, with the length \code{n} or less sequences of -#' TRUE replaced with FALSE. -#' @keywords ts climate -#' @examples -#' -#' ## Return only the first sequence of TRUE... second sequence will be FALSE. -#' foo <- select.blocks.gt.length(c(rep(TRUE, 4), FALSE, rep(TRUE, 3)), 3) -#' -#' @noRd -.select.blocks.gt.length <- function(d, n, na.value=FALSE) { - stopifnot(is.logical(d), is.numeric(n)) - - if(n < 1) - return(d) - - if(n >= length(d)) - return(rep(FALSE, length(d))) - - d[is.na(d)] <- na.value - - d2 <- Reduce(function(x, y) { return(c(rep(FALSE, y), d[1:(length(d) - y)]) & x) }, 1:n, d) - return(Reduce(function(x, y) { return(c(d2[(y + 1):length(d2)], rep(FALSE, y)) | x) }, 1:n, d2)) -} -## Lower overhead version of tapply -.tapply.fast <- function (X, INDEX, FUN = NULL, ..., simplify = TRUE) { - FUN <- if (!is.null(FUN)) - match.fun(FUN) - - if(!is.factor(INDEX)) - stop("INDEX must be a factor.") - - if (length(INDEX) != length(X)) - stop("arguments must have same length") - - if (is.null(FUN)) - return(INDEX) - - namelist <- levels(INDEX) - ans <- lapply(split(X, INDEX), FUN, ...) - - ans <- unlist(ans, recursive = FALSE) - names(ans) <- levels(INDEX) - return(ans) -}