diff --git a/DESCRIPTION b/DESCRIPTION index d7691f7166a63a8c3f07280ad393e0da61963ffe..0699598dd1c5b68462fe88bc685decf6a3118831 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,7 +18,6 @@ Depends: R (>= 3.2.0) Imports: multiApply (>= 2.0.0), - climdex.pcic, PCICt, plyr, stats diff --git a/NAMESPACE b/NAMESPACE index 8c0c72b49711d1083ace038843337edfa80dfba2..e844fcba507d50c74a4134ac8da2afb5cc73769f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,7 +14,6 @@ export(Threshold) export(WaveDuration) export(WeightedMean) import(PCICt) -import(climdex.pcic) import(multiApply) importFrom(plyr,aaply) importFrom(stats,quantile) diff --git a/R/Climdex.R b/R/Climdex.R index e182979054ea086f42ef6e0e8ebf41166fe09b86..261815c87202730a27012c9d64bc924017db340b 100644 --- a/R/Climdex.R +++ b/R/Climdex.R @@ -16,7 +16,6 @@ #' \item\code{$result} {An array with the same dimensions as the input array, except for the temporal dimension which is renamed to 'year', moved to the first dimension position and reduce to annual resolution.} #' \item\code{$years} {A vector of the corresponding years.}} #' -#'@import climdex.pcic #'@import multiApply #'@import PCICt #'@examples @@ -226,20 +225,295 @@ Climdex <- function(data, metric, threshold = NULL, base.range = NULL, dates = N .Climdex <- function(data, threshold, dates = dates, metric = metric, jdays = jdays, date.factor = NULL, base.range = NULL) { if (metric == "cdd") { - result <- spell.length.max(daily.prec = as.vector(data), date.factor = dates, 1, "<", FALSE) + result <- .spell.length.max(daily.prec = as.vector(data), date.factor = dates, 1, "<", FALSE) } else if (metric == "rx5day") { - result <- nday.consec.prec.max(daily.prec = as.vector(data), date.factor = dates, + result <- .nday.consec.prec.max(daily.prec = as.vector(data), date.factor = dates, ndays = 5, center.mean.on.last.day = FALSE) } else if (metric == "t90p" || metric == "Wx") { - result <- percent.days.op.threshold(temp = data, dates = dates, jdays = jdays, + result <- .percent.days.op.threshold(temp = data, dates = dates, jdays = jdays, date.factor = date.factor, threshold.outside.base = threshold, base.thresholds = threshold, base.range = base.range, op = ">", 20) } else if (metric == "t10p") { - result <- percent.days.op.threshold(temp = data, dates = dates, jdays = jdays, + result <- .percent.days.op.threshold(temp = data, dates = dates, jdays = jdays, date.factor = date.factor, threshold.outside.base = threshold, base.thresholds = threshold, base.range = base.range, op = "<", 20) } return(result) } + +#' @title Maximum spell length +#' +#' @description +#' This function returns the longest string of days which exceed or are below +#' the given threshold. +#' +#' @details +#' This routine compares data to the threshold using the given operator, +#' generating a series of TRUE or FALSE values. It then computes the lengths of +#' sequences of TRUE values (spells) and chooses the longest spell in each +#' period (as defined by date.factor). +#' +#' The \code{spells.can.span.years} option controls whether spells must always +#' terminate at the end of a period, or whether they may continue until the +#' criteria ceases to be met or the end of the data is reached. The default for +#' fclimdex is TRUE. +#' +#' @param daily.prec Data to compute index on. +#' @param date.factor Date factor to split by. +#' @param threshold The threshold to compare to. +#' @param op The operator to use to compare data to threshold. +#' @param spells.can.span.years Whether spells can span years. +#' @return A timeseries of maximum spell lengths for each period. +#' @seealso \code{\link{climdex.cdd}}. +#' @keywords ts climate +#' @examples +#' +#' prec.dat <- c(0.1, 3.0, 4.3, 1.9, 1.3, 6.0, 0, 0, 4.0, 1) +#' phony.date.factor <- factor(rep(1:2, each=5)) +#' +#' ## With spells spanning years... +#' cwd <- spell.length.max(prec.dat, phony.date.factor, 1, ">=", TRUE) +#' +#' ## Without spells spanning years... +#' altcwd <- spell.length.max(prec.dat, phony.date.factor, 1, ">=", FALSE) +#' +#' @noRd +.spell.length.max <- function(daily.prec, date.factor, threshold, op, spells.can.span.years) { + bools <- match.fun(op)(daily.prec, threshold) + + if(spells.can.span.years) { + all.true <- .tapply.fast(bools, date.factor, all) + max.spell <- .tapply.fast(get.series.lengths.at.ends(bools), date.factor, max) + + ## Mask out values which are in the middle of a spell with NA + na.mask <- c(1, NA)[as.integer((max.spell == 0) & all.true) + 1] + return(max.spell * na.mask) + } else { + return(.tapply.fast(bools, date.factor, function(x) { max(.get.series.lengths.at.ends(x)) })) + } +} +#' Get series length at ends +#' +#' This function takes a series of boolean values and returns a list of +#' integers of the same length corresponding to the lengths at the ends of +#' sequences of TRUE values. +#' +#' It can often be useful to know how long a series of boolean values is. This +#' function provides a method of knowing where and how long such sequences are. +#' +#' @param x Sequence of booleans. +#' @param na.value Value to replace NAs with. +#' @return A vector consisting of the lengths of sequences of TRUE values at +#' the location of the last TRUE value in the sequence, and zeroes elsewhere. +#' @keywords ts climate +#' @examples +#' +#' ## Get lengths of sequences of TRUE values in a sequence +#' series.lengths <- get.series.lengths.at.ends(c(TRUE, TRUE, TRUE, FALSE, +#' TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE)) +#' +#' +#' @noRd +.get.series.lengths.at.ends <- function(x, na.value=FALSE) { + stopifnot(is.logical(x) && is.logical(na.value)) + n <- length(x) + if(n == 1) + return(as.numeric(x)) + + res <- rep(0, n) + x[is.na(x)] <- na.value + + ## Compare series to lag-1 and lag+1 series; false added to trigger state transition from TRUE at ends of series + start <- which(x & !(c(FALSE, x[1:(n - 1)]))) + end <- which(x & !(c(x[2:n], FALSE))) + res[end] <- end - start + 1 + return(res) +} +#' @title Number of days (less than, greater than, etc) a threshold +#' +#' @description +#' Produces sums of values that exceed (or are below) the specified threshold. +#' +#' @details +#' This function takes a data series, the number of days in the running window, +#' a date factor to aggregate by, and an optional modifier parameter +#' (center.mean.on.last.day). It computes the n-day running sum of +#' precipitation and returns the maximum n-day total precipitation per unit +#' time, as defined by \code{date.factor}. +#' +#' @param daily.prec Daily timeseries of precipitation. +#' @param date.factor Factor to aggregate by. +#' @param ndays Number of days in the running window. +#' @param center.mean.on.last.day Whether to center the n-day running mean on +#' the last day of the series, instead of the middle day. +#' @return A vector consisting of the maximum n-day sum of precipitation per +#' time interval. +#' @keywords ts climate +#' @examples +#' library(PCICt) +#' +#' ## Parse the dates into PCICt. +#' tmax.dates <- as.PCICt(do.call(paste, ec.1018935.tmax[,c("year", +#' "jday")]), format="%Y %j", cal="gregorian") +#' tmin.dates <- as.PCICt(do.call(paste, ec.1018935.tmin[,c("year", +#' "jday")]), format="%Y %j", cal="gregorian") +#' prec.dates <- as.PCICt(do.call(paste, ec.1018935.prec[,c("year", +#' "jday")]), format="%Y %j", cal="gregorian") +#' +#' ## Load the data in. +#' ci <- climdexInput.raw(ec.1018935.tmax$MAX_TEMP, +#' ec.1018935.tmin$MIN_TEMP, ec.1018935.prec$ONE_DAY_PRECIPITATION, +#' tmax.dates, tmin.dates, prec.dates, base.range=c(1971, 2000)) +#' +#' ## Compute rx5day on a monthly basis. +#' rx5day <- nday.consec.prec.max(ci@@data$prec, ci@@date.factors$monthly, 5) +#' +#' @noRd +.nday.consec.prec.max <- function(daily.prec, date.factor, ndays, center.mean.on.last.day=FALSE) { + if(ndays == 1) { + return(suppressWarnings(tapply.fast(daily.prec, date.factor, max, na.rm=TRUE))) + } + ## Ends of the data will be de-emphasized (padded with zero precip data); NAs replaced with 0 + daily.prec[is.na(daily.prec)] <- 0 + prec.runsum <- .running.mean(daily.prec, ndays) + prec.runsum[is.na(prec.runsum)] <- 0 + if(center.mean.on.last.day) { + k2 = ndays %/% 2 + prec.runsum <- c(rep(0, k2), prec.runsum[1:(length(prec.runsum) - k2)]) + } + return(.tapply.fast(prec.runsum, date.factor, max) * ndays) +} +#' @title Running Mean of a Vector +#' +#' @description Calculates the running means of a vector with a shifting window +#' +#' @details Returns a new vector the same length as vec, where the ith +#' element is the mean of the bin of elements centered at the ith element +#' of the original vector. Means cannot be calculated for elements less +#' than half the width of the bin from the beginning or end of the vector; +#' the result vector has NA in those positions. +#' +#' @param vec A vector +#' @param bin The number of entries to average over for each mean +#' +#' @return a vector containing the running mean of bin elements of vec +#' +#' @example +#' \dontrun { +#' running.mean(c(1, 2, 3, 4, 5, 6), 2) +#' } +#' \dontrun { +#' running.mean(c(5, 5, 5, 5, 5), 4) +#' } +#'@noRd +.running.mean <- function(vec, bin){ + vec = as.vector(vec) + len = length(vec) + if (bin<=1) { + return (vec) + } + if (bin > len) { + bin = len + } + left.bin = bin%/%2 + + means = double(len) + + right.bin = bin - left.bin - 1 + means = c( sum(vec[1:bin]), diff(vec,bin) ) # find the first sum and the differences from it + means = cumsum(means)/bin # apply precomputed differences + means = c(rep(NA,left.bin), means, rep(NA,right.bin)) # extend to original vector length + return(means) +} +#' Lengths of strings of TRUE values +#' +#' Computes fraction of days above or below the baseline threshold for each +#' day, and averages them using the date factor passed in. +#' +#' This function computes fractions of days above or below baseline thresholds +#' for each day, then aggregates them using \code{date.factor}. It is used to +#' implement TN/TX 10/90p. +#' +#' @param temp Sequence of temperature values. +#' @param dates Sequence of associated dates. +#' @param jdays Sequence of associated days of year. +#' @param date.factor Factor to aggregate data using. +#' @param threshold.outside.base Sequence of thresholds to be used for data +#' outside the base period. +#' @param base.thresholds Data structure containing sets of thresholds to be +#' used inside the base period; see \link{climdexInput-class}. +#' @param base.range Date range (type PCICt) of the baseline period. +#' @param op Comparison operator to use. +#' @param max.missing.days Maximum number of NA values per time period. +#' @return A vector consisting of the mean fraction of days above or below the +#' supplied set of thresholds. +#' @note If date.factor is omitted, daily series will be returned. +#' @seealso \link{climdexInput-class}. +#' @keywords ts climate +#' @examples +#' library(PCICt) +#' +#' ## Parse the dates into PCICt. +#' tmax.dates <- as.PCICt(do.call(paste, ec.1018935.tmax[,c("year", +#' "jday")]), format="%Y %j", cal="gregorian") +#' tmin.dates <- as.PCICt(do.call(paste, ec.1018935.tmin[,c("year", +#' "jday")]), format="%Y %j", cal="gregorian") +#' prec.dates <- as.PCICt(do.call(paste, ec.1018935.prec[,c("year", +#' "jday")]), format="%Y %j", cal="gregorian") +#' +#' ## Load the data in. +#' ci <- climdexInput.raw(ec.1018935.tmax$MAX_TEMP, +#' ec.1018935.tmin$MIN_TEMP, ec.1018935.prec$ONE_DAY_PRECIPITATION, +#' tmax.dates, tmin.dates, prec.dates, base.range=c(1971, 2000)) +#' +#' ## Compute monthly tx90p. +#' tx90p <- percent.days.op.threshold(ci@@data$tmax, ci@@dates, ci@@jdays, +#' ci@@date.factors$monthly, +#' ci@@quantiles$tmax$outbase$q90, +#' ci@@quantiles$tmax$inbase$q90, +#' ci@@base.range, ">", +#' ci@@max.missing.days['monthly']) * +#' ci@@namasks$monthly$tmax +#' +#' @noRd +.percent.days.op.threshold <- function(temp, dates, jdays, date.factor, threshold.outside.base, base.thresholds, base.range, op='<', max.missing.days) { + f <- match.fun(op) + dat <- f(temp, threshold.outside.base[jdays]) + + inset <- dates >= base.range[1] & dates <= base.range[2] + ## Don't use in-base thresholds with data shorter than two years; no years to replace with. + if(sum(inset) > 0 && length(dates) >= 360 * 2) { + jdays.base <- jdays[inset] + years.base <- .get.years(dates[inset]) + + ## Get number of base years, subset temp data to base period only. + temp.base <- temp[inset] + years.base.range <- range(years.base) + byrs <- (years.base.range[2] - years.base.range[1] + 1) + + ## Linearize thresholds, then compare them to the temperatures + bdim <- dim(base.thresholds) + dim(base.thresholds) <- c(bdim[1] * bdim[2], bdim[3]) + yday.byr.indices <- jdays.base + (years.base - .get.years(base.range)[1]) * bdim[1] + f.result <- f(rep(temp.base, byrs - 1), base.thresholds[yday.byr.indices,]) + dim(f.result) <- c(length(yday.byr.indices), bdim[3]) + + ## Chop up data along the 2nd dim into a list; sum elements of the list + dat[inset] <- rowSums(f.result, na.rm=TRUE) / (byrs - 1) + } + dat[is.nan(dat)] <- NA + if(missing(date.factor)) + return(dat) + na.mask <- .get.na.mask(dat, date.factor, max.missing.days) + ## FIXME: Need to monthly-ize the NA mask calculation, which will be ugly. + ret <- .tapply.fast(dat, date.factor, mean, na.rm=TRUE) * 100 * na.mask + ret[is.nan(ret)] <- NA + return(ret) +} +## Get year +.get.years <- function(dates) { + return(as.POSIXlt(dates)$year + 1900) +} + diff --git a/R/Extremes.R b/R/Extremes.R index 46ecbaf80d1621060a64bf6659f9bd4fb4af93f2..30dd5698cd89eed9dd3bcfd445c98a4ec358229b 100644 --- a/R/Extremes.R +++ b/R/Extremes.R @@ -21,7 +21,6 @@ #'@details This routine compares data to the thresholds using the given operator, generating a series of TRUE or FALSE values; these values are then filtered to remove any sequences of less than \code{min.length} days of TRUE values. It then computes the lengths of the remaining sequences of TRUE values (spells) and sums their lengths. The \code{spells.can.spa .years} option controls whether spells must always terminate at the end of a period, or whether they may continue until the criteria ceases to be met or the end of the data is reached. The default for fclimdex is FALSE. #' #'@import multiApply -#'@import climdex.pcic #'@import PCICt #'@examples #'##Example synthetic data: @@ -212,7 +211,7 @@ Extremes <- function(data, threshold, op = ">", min.length = 6, spells.can.span. } .Extremes <- function(data, threshold, date.factor, jdays, op, min.length, spells.can.span.years, max.missing.days) { - result <- threshold.exceedance.duration.index(data, date.factor, jdays, + result <- .threshold.exceedance.duration.index(data, date.factor, jdays, threshold,op, min.length, spells.can.span.years, max.missing.days) } diff --git a/R/WaveDuration.R b/R/WaveDuration.R index 441fb3a41e6c6b3c7238a574fa9410de88ba7334..de0d5734c8ccce4b2b551081120071f66e20f634 100644 --- a/R/WaveDuration.R +++ b/R/WaveDuration.R @@ -16,7 +16,6 @@ #' \item\code{$result}{An array with the same dimensions as the input \code{data}, but with the time dimension reduce from daily to monthly or seasonal resolution depending on the selected resolution in \code{by.season}.} #' \item\code{$years}{A vector of the years and season/months corresponding to the resolution selected in \code{by.season} and temporal length of the input \code{data}}} #' -#'@import climdex.pcic #'@import multiApply #'@import PCICt #'@examples @@ -224,7 +223,7 @@ WaveDuration <- function(data, threshold, op = ">", spell.length = 6, by.seasons } .WaveDuration <- function(data, threshold, date.factor = date.factor, jdays = jdays, op = op, spell.length = spell.length) { - result <- threshold.exceedance.duration.index(daily.temp = data, + result <- .threshold.exceedance.duration.index(daily.temp = data, date.factor = date.factor, jdays = jdays, thresholds = threshold, op = op, min.length = spell.length, @@ -274,24 +273,24 @@ WaveDuration <- function(data, threshold, op = ">", spell.length = 6, by.seasons #' rep(1:5, 2), rep(1, 5), ">=", 2, FALSE, 1) #' #' @noRd -threshold.exceedance.duration.index <- function(daily.temp, date.factor, jdays, thresholds, op=">", min.length=6, spells.can.span.years=TRUE, max.missing.days) { +.threshold.exceedance.duration.index <- function(daily.temp, date.factor, jdays, thresholds, op=">", min.length=6, spells.can.span.years=TRUE, max.missing.days) { stopifnot(is.numeric(c(daily.temp, thresholds, min.length)), is.factor(date.factor), is.function(match.fun(op)), min.length > 0) f <- match.fun(op) - na.mask <- get.na.mask(is.na(daily.temp + thresholds[jdays]), date.factor, max.missing.days) + na.mask <- .get.na.mask(is.na(daily.temp + thresholds[jdays]), date.factor, max.missing.days) if(spells.can.span.years) { - periods <- select.blocks.gt.length(f(daily.temp, thresholds[jdays]), min.length - 1) - return(tapply.fast(periods, date.factor, sum) * na.mask) + periods <- .select.blocks.gt.length(f(daily.temp, thresholds[jdays]), min.length - 1) + return(.tapply.fast(periods, date.factor, sum) * na.mask) } else { ## fclimdex behaviour... - return(tapply.fast(1:length(daily.temp), date.factor, function(idx) { sum(select.blocks.gt.length(f(daily.temp[idx], thresholds[jdays[idx]]), min.length - 1)) } ) * na.mask) + return(.tapply.fast(1:length(daily.temp), date.factor, function(idx) { sum(.select.blocks.gt.length(f(daily.temp[idx], thresholds[jdays[idx]]), min.length - 1)) } ) * na.mask) } } ## Get NA mask given threshold and split factor -get.na.mask <- function(x, f, threshold) { - return(c(1, NA)[1 + as.numeric(tapply.fast(is.na(x), f, function(y) { return(sum(y) > threshold) } ))]) +.get.na.mask <- function(x, f, threshold) { + return(c(1, NA)[1 + as.numeric(.tapply.fast(is.na(x), f, function(y) { return(sum(y) > threshold) } ))]) } #' Select blocks of TRUE values of sufficient length. #' @@ -315,7 +314,7 @@ get.na.mask <- function(x, f, threshold) { #' foo <- select.blocks.gt.length(c(rep(TRUE, 4), FALSE, rep(TRUE, 3)), 3) #' #' @noRd -select.blocks.gt.length <- function(d, n, na.value=FALSE) { +.select.blocks.gt.length <- function(d, n, na.value=FALSE) { stopifnot(is.logical(d), is.numeric(n)) if(n < 1) @@ -330,7 +329,7 @@ select.blocks.gt.length <- function(d, n, na.value=FALSE) { return(Reduce(function(x, y) { return(c(d2[(y + 1):length(d2)], rep(FALSE, y)) | x) }, 1:n, d2)) } ## Lower overhead version of tapply -tapply.fast <- function (X, INDEX, FUN = NULL, ..., simplify = TRUE) { +.tapply.fast <- function (X, INDEX, FUN = NULL, ..., simplify = TRUE) { FUN <- if (!is.null(FUN)) match.fun(FUN) @@ -350,40 +349,4 @@ tapply.fast <- function (X, INDEX, FUN = NULL, ..., simplify = TRUE) { names(ans) <- levels(INDEX) return(ans) } -#' Select blocks of TRUE values of sufficient length. -#' -#' Produces a sequence of booleans of the same length as input, with sequences -#' of TRUE values shorter than n replaced with FALSE. -#' -#' This function takes a series of booleans and returns a sequence of booleans -#' of equal length, with all sequences of TRUE of length \code{n} or shorter -#' replaced with sequences of FALSE. NA values are replaced with -#' \code{na.value}. -#' -#' @param d Sequence of booleans. -#' @param n Longest sequence of TRUE to replace with FALSE. -#' @param na.value Values to replace NAs with. -#' @return A vector of booleans, with the length \code{n} or less sequences of -#' TRUE replaced with FALSE. -#' @keywords ts climate -#' @examples -#' -#' ## Return only the first sequence of TRUE... second sequence will be FALSE. -#' foo <- select.blocks.gt.length(c(rep(TRUE, 4), FALSE, rep(TRUE, 3)), 3) -#' -#' @noRd -select.blocks.gt.length <- function(d, n, na.value=FALSE) { - stopifnot(is.logical(d), is.numeric(n)) - - if(n < 1) - return(d) - - if(n >= length(d)) - return(rep(FALSE, length(d))) - - d[is.na(d)] <- na.value - - d2 <- Reduce(function(x, y) { return(c(rep(FALSE, y), d[1:(length(d) - y)]) & x) }, 1:n, d) - return(Reduce(function(x, y) { return(c(d2[(y + 1):length(d2)], rep(FALSE, y)) | x) }, 1:n, d2)) -}