diff --git a/NAMESPACE b/NAMESPACE index 87937e2c0d37d651790ac99889457791bc8858d7..7b77d4e948cb2c02dce11359dc0dacba22659792 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ export(ConfigShowSimilarEntries) export(ConfigShowTable) export(Corr) export(Eno) +export(Filter) export(GMST) export(GSAT) export(Histo2Hindcast) @@ -47,6 +48,7 @@ export(Reorder) export(SPOD) export(Season) export(Smoothing) +export(Spectrum) export(TPI) export(ToyModel) export(Trend) @@ -104,5 +106,6 @@ importFrom(stats,quantile) importFrom(stats,rnorm) importFrom(stats,sd) importFrom(stats,setNames) +importFrom(stats,spectrum) importFrom(stats,ts) importFrom(stats,window) diff --git a/R/Filter.R b/R/Filter.R new file mode 100644 index 0000000000000000000000000000000000000000..c4e76bf20faba914f7b287c2b0b6d6b902df8ec0 --- /dev/null +++ b/R/Filter.R @@ -0,0 +1,121 @@ +#'Filter frequency peaks from an array +#' +#'Filter out the selected frequency from a time series. The filtering is +#'performed by dichotomy, seeking for a frequency around the parameter 'freq' +#'and the phase that maximizes the signal to subtract from the time series. +#'The maximization of the signal to subtract relies on a minimization of the +#'mean square differences between the time series ('data') and the cosine of +#'the specified frequency and phase. +#' +#'@param data A numeric vector or array of the data to be filtered. +#' If it's a vector, it should be a time series. If it's an array, +#' the dimensions must have at least 'time_dim'. +#'@param freq A number of the frequency to filter. +#'@param time_dim A character string indicating the dimension along which to +#' compute the filtering. The default value is 'ftime'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numeric vector or array of the filtered data with the dimensions +#' the same as 'data'. +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'ensmod <- MeanDims(sampleData$mod, 2) +#'spectrum <- Spectrum(ensmod) +#' +#'for (jsdate in 1:dim(spectrum)['sdate']) { +#' for (jlen in 1:dim(spectrum)['ftime']) { +#' if (spectrum[jlen, 2, 1, jsdate] > spectrum[jlen, 3, 1, jsdate]) { +#' ensmod[1, jsdate, ] <- Filter(ensmod[1, jsdate, ], spectrum[jlen, 1, 1, jsdate]) +#' } +#' } +#'} +#' \donttest{ +#'PlotAno(InsertDim(ensmod, 2, 1), sdates = startDates) +#' } +#' +#'@import multiApply +#'@importFrom stats lm +#'@export +Filter <- function(data, freq, time_dim = 'ftime', ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(dim(data))) { #is vector + dim(data) <- c(length(data)) + names(dim(data)) <- time_dim + } + if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## freq + if (is.null(freq)) { + stop("Parameter 'freq' cannot be NULL.") + } + if (!is.numeric(freq) | length(freq) != 1) { + stop("Parameter 'freq' must be a number.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Filter + output <- Apply(list(data), + target_dims = time_dim, + fun = .Filter, + freq = freq, + output_dims = time_dim, + ncores = ncores)$output1 + + return(output) +} + +.Filter <- function(data, freq) { + # data: [ftime] + + fac1 <- 1 + fac2 <- 1 + ndat <- length(data) + ndat2 <- length(which(!is.na(data))) + maxi <- 0 + endphase <- 0 + + for (jfreq in seq(freq - 0.5 / ndat2, freq + 0.5 / ndat2, 0.1 / (ndat2 * fac1))) { + for (phase in seq(0, pi, (pi / (10 * fac2)))) { + xtest <- cos(phase + c(1:ndat) * jfreq * 2 * pi) + test <- lm(data[is.na(data) == FALSE] ~ xtest[ + is.na(data) == FALSE])$fitted.values + if (sum(test ^ 2) > maxi) { + endphase <- phase + endfreq <- jfreq + } + maxi <- max(sum(test ^ 2), maxi) + } + } + xend <- cos(endphase + c(1:ndat) * endfreq * 2 * pi) + data[is.na(data) == FALSE] <- data[is.na(data) == FALSE] - lm( + data[is.na(data) == FALSE] ~ xend[is.na(data) == FALSE] + )$fitted.values + + return(invisible(data)) +} diff --git a/R/Spectrum.R b/R/Spectrum.R new file mode 100644 index 0000000000000000000000000000000000000000..2cbb16793d42fb398bc45ca7173fcd0f09283567 --- /dev/null +++ b/R/Spectrum.R @@ -0,0 +1,129 @@ +#'Estimate frequency spectrum +#' +#'Estimate the frequency spectrum of the data array together with a +#'user-specified confidence level. The output is provided as an array with +#'dimensions c(number of frequencies, stats = 3, other margin dimensions of +#'data). The 'stats' dimension contains the frequencies at which the spectral +#'density is estimated, the estimates of the spectral density, and the +#'significance level.\cr +#'The spectrum estimation relies on an R built-in function \code{spectrum()} +#'and the confidence interval is estimated by the Monte-Carlo method. +#' +#'@param data A vector or numeric array of which the frequency spectrum is +#' required. If it's a vector, it should be a time series. If it's an array, +#' the dimensions must have at least 'time_dim'. The data is assumed to be +#' evenly spaced in time. +#'@param time_dim A character string indicating the dimension along which to +#' compute the frequency spectrum. The default value is 'ftime'. +#'@param conf.lev A numeric indicating the confidence level for the Monte-Carlo +#' significance test. The default value is 0.95. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numeric array of the frequency spectrum with dimensions +#' c( = number of frequencies, stats = 3, the rest of the +#' dimensions of 'data'). The 'stats' dimension contains the frequency values, +#' the spectral density, and the confidence interval. +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'ensmod <- MeanDims(sampleData$mod, 2) +#'spectrum <- Spectrum(ensmod) +#' +#'for (jsdate in 1:dim(spectrum)['sdate']) { +#' for (jlen in 1:dim(spectrum)['ftime']) { +#' if (spectrum[jlen, 2, 1, jsdate] > spectrum[jlen, 3, 1, jsdate]) { +#' ensmod[1, jsdate, ] <- Filter(ensmod[1, jsdate, ], spectrum[jlen, 1, 1, jsdate]) +#' } +#' } +#'} +#' \donttest{ +#'PlotAno(InsertDim(ensmod, 2, 1), sdates = startDates) +#' } +#' +#'@import multiApply +#'@importFrom stats spectrum cor rnorm sd quantile +#'@export +Spectrum <- function(data, time_dim = 'ftime', conf.lev = 0.95, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(dim(data))) { #is vector + dim(data) <- c(length(data)) + names(dim(data)) <- time_dim + } + if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## conf.lev + if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { + stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Spectrum + + output <- Apply(list(data), + target_dims = time_dim, + fun = .Spectrum, + output_dims = c(time_dim, 'stats'), + conf.lev = conf.lev, + ncores = ncores)$output1 + + return(output) +} + +.Spectrum <- function(data, conf.lev = 0.95) { + # data: [time] + + data <- data[is.na(data) == FALSE] + ndat <- length(data) + + if (ndat >= 3) { + tmp <- spectrum(data, plot = FALSE) + output <- array(dim = c(length(tmp$spec), 3)) + output[, 1] <- tmp$freq + output[, 2] <- tmp$spec + ntir <- 100 + store <- array(dim = c(ntir, length(tmp$spec))) + for (jt in 1:ntir) { + toto <- mean(data) + alpha1 <- cor(data[2:ndat], data[1:(ndat - 1)]) + for (ind in 2:ndat) { + b <- rnorm(1, mean(data) * (1 - alpha1), sd(data) * sqrt(1 - + alpha1 ^ 2)) + toto <- c(toto, toto[ind - 1] * alpha1 + b) + } + toto2 <- spectrum(toto, plot = FALSE) + store[jt, ] <- toto2$spec + } + for (jx in 1:length(tmp$spec)) { + output[jx, 3] <- quantile(store[, jx], conf.lev) + } + } else { + output <- NA + } + + return(invisible(output)) +} diff --git a/man/Filter.Rd b/man/Filter.Rd new file mode 100644 index 0000000000000000000000000000000000000000..f98fe0acbcff6bf4b9e1bd6f716fc7227930006f --- /dev/null +++ b/man/Filter.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Filter.R +\name{Filter} +\alias{Filter} +\title{Filter frequency peaks from an array} +\usage{ +Filter(data, freq, time_dim = "ftime", ncores = NULL) +} +\arguments{ +\item{data}{A numeric vector or array of the data to be filtered. +If it's a vector, it should be a time series. If it's an array, +the dimensions must have at least 'time_dim'.} + +\item{freq}{A number of the frequency to filter.} + +\item{time_dim}{A character string indicating the dimension along which to +compute the filtering. The default value is 'ftime'.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A numeric vector or array of the filtered data with the dimensions + the same as 'data'. +} +\description{ +Filter out the selected frequency from a time series. The filtering is +performed by dichotomy, seeking for a frequency around the parameter 'freq' +and the phase that maximizes the signal to subtract from the time series. +The maximization of the signal to subtract relies on a minimization of the +mean square differences between the time series ('data') and the cosine of +the specified frequency and phase. +} +\examples{ +# Load sample data as in Load() example: +example(Load) +ensmod <- MeanDims(sampleData$mod, 2) +spectrum <- Spectrum(ensmod) + +for (jsdate in 1:dim(spectrum)['sdate']) { + for (jlen in 1:dim(spectrum)['ftime']) { + if (spectrum[jlen, 2, 1, jsdate] > spectrum[jlen, 3, 1, jsdate]) { + ensmod[1, jsdate, ] <- Filter(ensmod[1, jsdate, ], spectrum[jlen, 1, 1, jsdate]) + } + } +} + \donttest{ +PlotAno(InsertDim(ensmod, 2, 1), sdates = startDates) + } + +} diff --git a/man/Spectrum.Rd b/man/Spectrum.Rd new file mode 100644 index 0000000000000000000000000000000000000000..84b39c0cf44cc4165c25b354680ec226b8bd668d --- /dev/null +++ b/man/Spectrum.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Spectrum.R +\name{Spectrum} +\alias{Spectrum} +\title{Estimate frequency spectrum} +\usage{ +Spectrum(data, time_dim = "ftime", conf.lev = 0.95, ncores = NULL) +} +\arguments{ +\item{data}{A vector or numeric array of which the frequency spectrum is +required. If it's a vector, it should be a time series. If it's an array, +the dimensions must have at least 'time_dim'. The data is assumed to be +evenly spaced in time.} + +\item{time_dim}{A character string indicating the dimension along which to +compute the frequency spectrum. The default value is 'ftime'.} + +\item{conf.lev}{A numeric indicating the confidence level for the Monte-Carlo +significance test. The default value is 0.95.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A numeric array of the frequency spectrum with dimensions + c( = number of frequencies, stats = 3, the rest of the + dimensions of 'data'). The 'stats' dimension contains the frequency values, + the spectral density, and the confidence interval. +} +\description{ +Estimate the frequency spectrum of the data array together with a +user-specified confidence level. The output is provided as an array with +dimensions c(number of frequencies, stats = 3, other margin dimensions of +data). The 'stats' dimension contains the frequencies at which the spectral +density is estimated, the estimates of the spectral density, and the +significance level.\cr +The spectrum estimation relies on an R built-in function \code{spectrum()} +and the confidence interval is estimated by the Monte-Carlo method. +} +\examples{ +# Load sample data as in Load() example: +example(Load) +ensmod <- MeanDims(sampleData$mod, 2) +spectrum <- Spectrum(ensmod) + +for (jsdate in 1:dim(spectrum)['sdate']) { + for (jlen in 1:dim(spectrum)['ftime']) { + if (spectrum[jlen, 2, 1, jsdate] > spectrum[jlen, 3, 1, jsdate]) { + ensmod[1, jsdate, ] <- Filter(ensmod[1, jsdate, ], spectrum[jlen, 1, 1, jsdate]) + } + } +} + \donttest{ +PlotAno(InsertDim(ensmod, 2, 1), sdates = startDates) + } + +} diff --git a/tests/testthat/test-Filter.R b/tests/testthat/test-Filter.R new file mode 100644 index 0000000000000000000000000000000000000000..cf271e1aa87f86bdaa9e787ab52c0bf63ba30fd8 --- /dev/null +++ b/tests/testthat/test-Filter.R @@ -0,0 +1,101 @@ +context("s2dv::Filter tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(10), dim = c(dat = 1, ftime = 5, sdate = 2)) + freq1 <- 0.015 + # dat2 + set.seed(10) + dat2 <- c(1:10) + rnorm(10) + freq2 <- freq1 + + # dat3 + dat3 <- dat2 + dat3[2] <- NA + freq3 <- freq1 +############################################## + +test_that("1. Input checks", { + + # data + expect_error( + Filter(c()), + "Parameter 'data' cannot be NULL." + ) + expect_error( + Filter(c(NA, NA)), + "Parameter 'data' must be a numeric array." + ) + expect_error( + Filter(array(1:10, dim = c(2, 5))), + "Parameter 'data' must have dimension names." + ) + # freq + expect_error( + Filter(dat1, c()), + "Parameter 'freq' cannot be NULL." + ) + expect_error( + Filter(dat1, c(0.1, 0.2)), + "Parameter 'freq' must be a number." + ) + # time_dim + expect_error( + Filter(array(c(1:25), dim = c(dat = 1, date = 5, sdate = 5)), freq1), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + expect_error( + Filter(dat1, freq1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + # ncores + expect_error( + Filter(dat1, freq1, ncore = 3.5), + "Parameter 'ncores' must be a positive integer." + ) +}) + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + dim(Filter(dat1, freq1)), + c(ftime = 5, dat = 1, sdate = 2) + ) + expect_equal( + Filter(dat1, freq1)[, 1, 2], + c(-0.080093110, 0.141328669, -0.105230299, -0.004168101, 0.048162841), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("3. Output checks: dat2", { + + expect_equal( + dim(Filter(dat2, freq2)), + c(ftime = 10) + ) + expect_equal( + as.vector(Filter(dat2, freq2)[2:5]), + c(0.1215244, -1.0229749, -0.2053940, 0.7375181), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("4. Output checks: dat3", { + expect_equal( + dim(Filter(dat3, freq3)), + c(ftime = 10) + ) + expect_equal( + as.vector(Filter(dat3, freq3)[2:5]), + c(NA, -0.9414294, -0.1265448, 0.7910344), + tolerance = 0.0001 + ) + +}) diff --git a/tests/testthat/test-Spectrum.R b/tests/testthat/test-Spectrum.R new file mode 100644 index 0000000000000000000000000000000000000000..caf53d395d7a3a3c1d374fa6ab1abbd80decbbc9 --- /dev/null +++ b/tests/testthat/test-Spectrum.R @@ -0,0 +1,111 @@ +context("s2dv::Spectrum tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(10), dim = c(dat = 1, ftime = 5, sdate = 2)) + + # dat2 + set.seed(10) + dat2 <- c(1:10) + rnorm(10) + + # dat3 + dat3 <- dat2 + dat3[2] <- NA +############################################## + +test_that("1. Input checks", { + + # data + expect_error( + Spectrum(c()), + "Parameter 'data' cannot be NULL." + ) + expect_error( + Spectrum(c(NA, NA)), + "Parameter 'data' must be a numeric array." + ) + expect_error( + Spectrum(array(1:10, dim = c(2, 5))), + "Parameter 'data' must have dimension names." + ) + # time_dim + expect_error( + Spectrum(array(c(1:25), dim = c(dat = 1, date = 5, sdate = 5))), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + expect_error( + Spectrum(dat1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + # conf.lev + expect_error( + Spectrum(dat1, conf.lev = -1), + "Parameter 'conf.lev' must be a numeric number between 0 and 1.", + fixed = T + ) + # ncores + expect_error( + Spectrum(dat1, ncore = 3.5), + "Parameter 'ncores' must be a positive integer." + ) +}) + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + dim(Spectrum(dat1)), + c(ftime = 2, stats = 3, dat = 1, sdate = 2) + ) + expect_equal( + Spectrum(dat1)[, 1, 1, 2], + c(0.2, 0.4), + tolerance = 0.0001 + ) + expect_equal( + Spectrum(dat1)[, 2, 1, 2], + c(0.89583007, 0.05516983), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("3. Output checks: dat2", { + + expect_equal( + dim(Spectrum(dat2)), + c(ftime = 5, stats = 3) + ) + expect_equal( + Spectrum(dat2)[, 1], + c(0.1, 0.2, 0.3, 0.4, 0.5), + tolerance = 0.0001 + ) + expect_equal( + Spectrum(dat2)[, 2], + c(0.1767994, 1.0113808, 0.3341372, 0.1807377, 1.0594528), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("4. Output checks: dat3", { + expect_equal( + dim(Spectrum(dat3)), + c(ftime = 4, stats = 3) + ) + expect_equal( + Spectrum(dat3)[, 1], + c(0.1111111, 0.2222222, 0.3333333, 0.4444444), + tolerance = 0.0001 + ) + expect_equal( + Spectrum(dat3)[, 2], + c(0.7204816, 0.6529411, 0.2605188, 0.7009824), + tolerance = 0.0001 + ) + +})