diff --git a/NAMESPACE b/NAMESPACE index 8067b1e25c11744b1458a83760de00da86b35bf5..33ffd24817b59968a251d74e7daa78c6dabd58e1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,28 +1,30 @@ # Generated by roxygen2: do not edit by hand export(Analogs) -export(CST_Analogs) export(BEI_PDFBest) export(BEI_Weights) +export(CST_Analogs) export(CST_Anomaly) export(CST_BEI_Weighting) export(CST_BiasCorrection) export(CST_Calibration) export(CST_Load) +export(CST_MultiEOF) export(CST_MultiMetric) export(CST_MultivarRMSE) export(CST_RFSlope) export(CST_RFWeights) export(CST_RainFARM) export(CST_SaveExp) +export(MultiEOF) export(PlotCombinedMap) export(PlotForecastPDF) export(PlotMostLikelyQuantileMap) export(RFSlope) export(RainFARM) -import(ClimProjDiags) export(as.s2dv_cube) export(s2dv_cube) +import(ClimProjDiags) import(abind) import(ggplot2) import(multiApply) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R new file mode 100644 index 0000000000000000000000000000000000000000..455423416dd15a9071beece41553e242b0ede89a --- /dev/null +++ b/R/CST_MultiEOF.R @@ -0,0 +1,321 @@ +#' @rdname CST_MultiEOF +#' @title EOF analysis of multiple variables +#' +#' @author Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} +#' @author Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} +#' +#' @description This function performs EOF analysis over multiple variables, +#' accepting in input a list of CSTools objects. Based on Singular Value Decomposition. For each field the EOFs are computed and the corresponding PCs are standardized (unit variance, zero mean); the minimum number of principal components needed to reach the user-defined variance is retained. The function weights the input data for the latitude cosine square root. + +#' +#' @param datalist A list of objects of the class 's2dv_cube', containing the variables to be analysed. +#' Each data object in the list is expected to have an element named \code{$data} with at least two +#' spatial dimensions named "lon" and "lat", a dimension "ftime" and a dimension "sdate". +#' @param neof_composed Number of composed eofs to return in output +#' @param minvar Minimum variance fraction to be explained in first decomposition +#' @param neof_max Maximum number of single eofs considered in the first decomposition +#' @param lon_lim Vector with longitudinal range limits for the EOF calculation for all input variables +#' @param lat_lim Vector with latitudinal range limits for the EOF calculation for all input variables +#' @return A list with elements \code{$coeff} (an array of time-varying principal component coefficients), +#' \code{$variance} (a matrix of explained variances), +#' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). +#' @import abind +#' @examples +#' \donttest{ +#' library(zeallot) +#' library(ClimProjDiags) +#' c(exp, obs) %<-% lonlat_data +#' # Create three datasets (from the members) +#' exp1 <- exp +#' exp2 <- exp +#' exp3 <- exp +#' exp1$data <- Subset(exp$data, along = 2, indices = 1 : 5) +#' exp2$data <- Subset(exp$data, along = 2, indices = 6 : 10) +#' exp3$data <- Subset(exp$data, along = 2, indices = 11 : 15) +#' +#' cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max=5, neof_composed=2) +#' str(cal) +#' # List of 3 +#' # $ coeff : num [1:3, 1:6, 1:2, 1:5] -0.312 -0.588 0.724 1.202 1.181 ... +#' # $ variance : num [1:2, 1:5] 0.413 0.239 0.352 0.27 0.389 ... +#' # $ eof_pattern: num [1:3, 1:53, 1:22, 1:2, 1:5] -1.47 -0.446 -0.656 -1.534 -0.464 ... +#' dim(cal$coeff) +#' # ftime sdate eof member +#' # 3 6 2 3 +#' +#' cal <- CST_MultiEOF(list(exp1, exp2, exp3) , minvar=0.9) +#' str(cal) +#' # $ coeff : num [1:3, 1:6, 1:5, 1:5] 0.338 0.603 -0.736 -1.191 -1.198 ... +#' # $ variance : num [1:5, 1:5] 0.3903 0.2264 0.1861 0.1032 0.0379 ... +#' # $ eof_pattern: num [1:3, 1:53, 1:22, 1:5, 1:5] 1.477 0.454 0.651 1.541 0.47 ... +#' +#' cal <- CST_MultiEOF(list(exp1, exp2)) +#' cal <- CST_MultiEOF(list(exp1, exp2, exp3), lon_lim=c(5, 30), lat_lim=c(35, 50), neof_composed=3) +#' } +#' @export + +CST_MultiEOF <- function(datalist, + neof_max = 40, neof_composed = 5, minvar = 0.6, + lon_lim = NULL, lat_lim = NULL) { + + if (!(all(sapply(datalist, inherits, 's2dv_cube')))) { + stop("Elements of the list in parameter 'datalist' must be of the class ", + "'s2dv_cube', as output by CSTools::CST_Load.") + } + + # Check if all dims equal + adims=lapply(lapply(datalist, function(x) x$data), dim) + if( !all(apply(apply(abind(adims, along = 0), 2, duplicated), 2, sum) == + (length(adims)-1))) { + stop("Input data fields must all have the same dimensions.") + } + + #print("Pasting data...") + exp <- abind(lapply(datalist, '[[', 'data'), along = 0) + dim(exp) <- c(var = length(datalist), dim(datalist[[1]]$data)) + #print("...done") + + if (any(is.na(exp))) { + stop("Input data contain NA values.") + } + + result <- MultiEOF(exp, datalist[[1]]$lon, datalist[[1]]$lat, + datalist[[1]]$Dates$start, minvar = minvar, + neof_max = neof_max, neof_composed = neof_composed, + lon_lim = lon_lim, lat_lim = lat_lim) + + return(result) +} + +#' @rdname MultiEOF +#' @title EOF analysis of multiple variables starting from an array (reduced version) +#' +#' @author Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} +#' @author Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} +#' +#' @description This function performs EOF analysis over multiple variables, accepting in input an array with a dimension \code{"var"} for each variable to analyse. Based on Singular Value Decomposition. For each field the EOFs are computed and the corresponding PCs are standardized (unit variance, zero mean); the minimum number of principal components needed to reach the user-defined variance is retained. The function weights the input data for the latitude cosine square root. +#' +#' @param data A multidimensional array with dimension \code{"var"}, +#' containing the variables to be analysed. The other diemnsions follow the same structure as the +#' \code{"exp"} element of a 's2dv_cube' object. +#' @param lon Vector of longitudes. +#' @param lat Vector of latitudes. +#' @param time Vector or matrix of dates in POSIXct format. +#' @param lon_dim String with dimension name of longitudinal coordinate +#' @param lat_dim String with dimension name of latitudinal coordinate +#' @param neof_composed Number of composed eofs to return in output +#' @param minvar Minimum variance fraction to be explained in first decomposition +#' @param neof_max Maximum number of single eofs considered in the first decomposition +#' @param lon_lim Vector with longitudinal range limits for the calculation for all input variables +#' @param lat_lim Vector with latitudinal range limits for the calculation for all input variables +#' @return A list with elements \code{$coeff} (an array of time-varying principal component coefficients), +#' \code{$variance} (a matrix of explained variances), +#' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). +#' @import multiApply +#' @export + +MultiEOF <- function(data, lon, lat, time, + lon_dim = "lon", lat_dim = "lat", + neof_max = 40, neof_composed = 5, minvar = 0.6, + lon_lim = NULL, lat_lim = NULL) { + # Check/detect time_dim + + # reorder and group ftime and sdate together at the end in that order + cdim0 <- dim(data) + imaskt <- names(cdim0) %in% "ftime" + imasks <- names(cdim0) %in% "sdate" + data <- .aperm2(data, c(which(!(imasks | imaskt)), + which(imaskt), which(imasks))) + cdim <- dim(data) + ind <- 1:length(which(!(imaskt | imasks))) + # compact (multiply) time_dim dimensions + dim(data) <- c(cdim[ind], samples = prod(cdim[-ind])) + + # Repeatedly apply .multi.eofs + result <- Apply(data, c("var", lon_dim, lat_dim, "samples"), + .multi.eofs, lon, lat, time, neof_max = neof_max, + neof_composed = neof_composed, minvar = minvar, + xlim = lon_lim, ylim = lat_lim) + + # Expand back samples to compacted dims + dim(result$coeff) <- c(cdim[-ind], dim(result$coeff)[-1]) + # Recover first lon and first lat list + dd=dim(result$lon)[1]; m=matrix(1, nrow=dd, ncol=length(dim(result$lon))); + m[1:dd]=1:dd; result$lon = result$lon[m] + dd=dim(result$lat)[1]; m=matrix(1, nrow=dd, ncol=length(dim(result$lat))); + m[1:dd]=1:dd; result$lat = result$lat[m] + + return(result) +} + + +#' Atomic MultiEOF +#' @param field_arr_raw an array of dimension: (n_field, lon, lat, time). +#' where n_field is the number of variables over which to calculate +#' the multi_eofs. +#' @param neof_composed Number of composed eofs to return in output +#' @param minvar Minimum variance fraction to be explained in first decomposition +#' @param neof_max Maximum number of single eofs considered in the first decomposition +#' @param xlim Vector with longitudinal range limits for the calculation +#' @param ylim Vector with latitudinal range limits for the calculation +#' @return A list with elements \code{$coeff} (an array of time-varying principal component coefficients), +#' \code{$variance} (a matrix of explained variances), +#' \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). +#' @noRd + +.multi.eofs <- function(field_arr_raw, lon, lat, time, + neof_max = 40, neof_composed = 5, minvar = 0.6, + xlim = NULL, ylim = NULL) { + + if (exists(".lm.fit")) { + lin.fit <- .lm.fit + } else { + lin.fit <- lm.fit + } + + n_field <- dim(field_arr_raw)[1] + + etime <- .power.date(time) + #print("Calculating anomalies...") + field_arr <- array(dim = dim(field_arr_raw)) + for (k in seq(1, n_field, 1)) { + field_arr[k, , , ] <- .daily.anom.mean( + lon, lat, field_arr_raw[k, , , ], etime + ) + } + + # area weighting, based on the root of cosine + #print("Area Weighting...") + ww <- .area.weight(lon, lat, root = T) + + for (k in seq(1, n_field, 1)) { + field_orig <- field_arr[k, , , ] + # calculate the area weight + field <- sweep(field_orig, c(1, 2), ww, "*") + + idx <- .selbox(lon, lat, xlim, ylim) + slon <- lon[idx$ilon] + slat <- lat[idx$ilat] + field <- field[idx$ilon, idx$ilat, ] + + # transform 3D field in a matrix + field <- array(field, dim = c(dim(field)[1] * dim(field)[2], dim(field)[3])) + + # calling SVD + SVD <- svd(field, nu = neof_max, nv = neof_max) + + # extracting EOFs (loading pattern), expansions coefficient + # and variance explained + pattern <- array(SVD$u, dim = c(dim(field)[1], dim(field)[2], neof_max)) + coefficient <- SVD$v + variance <- (SVD$d[1:neof_max]) ^ 2 / sum((SVD$d) ^ 2) + #print("Accumulated variance:") + #print(cumsum(variance)) + reqPC <- which(cumsum(variance) > minvar)[1] + #print("Number of EOFs needed for var:") + variance <- variance[1:reqPC] + coefficient <- coefficient[, 1:reqPC] + if (reqPC == 1) { + coefficient <- replicate(1, coefficient) + } + coefficient <- apply(coefficient, c(2), .standardize) + regression <- array(NA, dim = c(length(lon), length(lat), neof_max)) + for (i in 1:reqPC) { + regression[, , i] <- apply(field_orig, c(1, 2), + function(x) lin.fit(as.matrix(coefficient[, i], + ncol = 1), x)$coefficients + ) + } + + assign( + paste0("pc", k), list(coeff = coefficient, variance = variance, + wcoeff = sweep(coefficient, c(2), variance, "*"), + regression = regression) + ) + } + + newpc <- NULL + for (k in seq(1, n_field, 1)) { + newpc <- cbind(newpc, get(paste0("pc", k))$wcoeff) + } + newpc <- t(newpc) + + #print("Calculating composed EOFs") + SVD <- svd(newpc, nu = neof_composed, nv = neof_composed) + # extracting EOFs, expansions coefficient and variance explained + coefficient <- SVD$v + variance <- (SVD$d[1:(neof_composed)]) ^ 2 / sum( (SVD$d) ^ 2) + coefficient <- apply(coefficient, c(2), .standardize) + + # linear regressions on anomalies + regression <- array(dim = c(n_field, length(lon), + length(lat), neof_composed)) + for (k in seq(1, n_field, 1)) { + #print("Linear Regressions (it can take a while)... ") + for (i in 1:neof_composed) { + regression[k, , , i] <- apply( + field_arr[k, , , ], c(1, 2), + function(x) lin.fit( + as.matrix(coefficient[, i], + ncol = 1), + x)$coefficients + ) + } + } + + #print("Finalize...") + names(dim(coefficient)) <- c("time", "eof") + variance <- array(variance) + names(dim(variance)) <- "eof" + names(dim(regression)) <- c("var", "lon", "lat", "eof") + + out <- list(coeff = coefficient, variance = variance, eof_pattern = regression, lon = slon, lat = slat) + + return(out) +} + + +# new function to create simple list with date values - Oct-18 +# it needs a date or PCICt object, and returns also the season subdivision +.power.date <- function(datas, verbose = FALSE) { + + # create a "season" for continuous time, used by persistance tracking + startpoints <- c(0, which(diff(datas) > 1)) + deltapoints <- diff(c(startpoints, length(datas))) + seas <- inverse.rle(list(lengths = deltapoints, + values = seq(1, length(startpoints)))) + + etime <- list( + day = as.numeric(format(datas, "%d")), + month = as.numeric(format(datas, "%m")), + year = as.numeric(format(datas, "%Y")), data = datas, season = seas + ) + + .printv("Time Array Built", verbose) + .printv(paste("Length:", length(seas)), verbose) + .printv(paste("From", datas[1], "to", datas[length(seas)]), verbose) + return(etime) +} + + +# function for daily anomalies, use array predeclaration and rowMeans (40 times faster!) +.daily.anom.mean <- function(ics, ipsilon, field, etime) { + condition <- paste(etime$day, etime$month) + daily <- array(NA, dim = c(length(ics), length(ipsilon), + length(unique(condition)))) + anom <- field * NA + + for (t in unique(condition)) { + if (sum(t == condition) == 1) { + print(paste0("Cannot compute a mean with only one value: ", + "using climatological mean")) + anom[, , which(t == condition)] <- rowMeans(field, dims = 2) + } else { + daily[, , which(t == unique(condition))] <- rowMeans(field[, , t == condition], dims = 2) + anom[, , which(t == condition)] <- sweep(field[, , which(t == condition)], + c(1, 2), + daily[, , which(t == unique(condition))], "-") + } + } + return(anom) +} diff --git a/R/zzz.R b/R/zzz.R index 1414e9490cd4f221d9d707bf3ff922f178369ba5..c1b8664fc4e4fd72a83be2ef35666ce70bf3bcd5 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -17,3 +17,63 @@ x } +# verbose-only printing function +.printv <- function(value, verbosity = TRUE) { + if (verbosity) { + print(value) + } +} + +# normalize a time series +.standardize <- function(timeseries) { + out <- (timeseries - mean(timeseries, na.rm = T)) / sd(timeseries, na.rm = T) + return(out) +} + +.selbox <- function(lon, lat, xlim = NULL, ylim = NULL) { + if (!is.null(xlim)) { + # This transforms c(-20, -10) to c(340, 350) but c(-20, 10) is unchanged + # Bring them all to the same units in the 0:360 range + xlim1 <- xlim[1] %% 360 + xlim2 <- xlim[2] %% 360 + lonm <- lon %% 360 + if (lonm[1] > tail(lonm, 1)) { + lonm <- lon + } + if (xlim1 > xlim2) { + # If box crosses 0 + ilonsel <- (lonm >= xlim1) | (lonm <= xlim2) + } else { + ilonsel <- (lonm >= xlim1) & (lonm <= xlim2) + } + if (!any(ilonsel)) { + stop("No intersection between longitude bounds and data domain.") + } + } else { + ilonsel <- 1:length(lon) + } + if (!is.null(ylim)) { + ilatsel <- (lat >= ylim[1]) & (lat <= ylim[2]) + } else { + ilatsel <- 1:length(lat) + } + return(list(ilon = ilonsel, ilat = ilatsel)) +} + +# produce a 2d matrix of area weights +.area.weight <- function(ics, ipsilon, root = T) { + field <- array(NA, dim = c(length(ics), length(ipsilon))) + if (root == T) { + for (j in 1:length(ipsilon)) { + field[, j] <- sqrt(cos(pi / 180 * ipsilon[j])) + } + } + + if (root == F) { + for (j in 1:length(ipsilon)) { + field[, j] <- cos(pi / 180 * ipsilon[j]) + } + } + + return(field) +} diff --git a/man/CST_MultiEOF.Rd b/man/CST_MultiEOF.Rd new file mode 100644 index 0000000000000000000000000000000000000000..fb5847515de2b370481c2b31ec8eb90abef17f92 --- /dev/null +++ b/man/CST_MultiEOF.Rd @@ -0,0 +1,72 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_MultiEOF.R +\name{CST_MultiEOF} +\alias{CST_MultiEOF} +\title{EOF analysis of multiple variables} +\usage{ +CST_MultiEOF(datalist, neof_max = 40, neof_composed = 5, minvar = 0.6, + lon_lim = NULL, lat_lim = NULL) +} +\arguments{ +\item{datalist}{A list of objects of the class 's2dv_cube', containing the variables to be analysed. +Each data object in the list is expected to have an element named \code{$data} with at least two +spatial dimensions named "lon" and "lat", a dimension "ftime" and a dimension "sdate".} + +\item{neof_max}{Maximum number of single eofs considered in the first decomposition} + +\item{neof_composed}{Number of composed eofs to return in output} + +\item{minvar}{Minimum variance fraction to be explained in first decomposition} + +\item{lon_lim}{Vector with longitudinal range limits for the EOF calculation for all input variables} + +\item{lat_lim}{Vector with latitudinal range limits for the EOF calculation for all input variables} +} +\value{ +A list with elements \code{$coeff} (an array of time-varying principal component coefficients), + \code{$variance} (a matrix of explained variances), + \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). +} +\description{ +This function performs EOF analysis over multiple variables, +accepting in input a list of CSTools objects. Based on Singular Value Decomposition. For each field the EOFs are computed and the corresponding PCs are standardized (unit variance, zero mean); the minimum number of principal components needed to reach the user-defined variance is retained. The function weights the input data for the latitude cosine square root. +} +\examples{ +\donttest{ +library(zeallot) +library(ClimProjDiags) +c(exp, obs) \%<-\% lonlat_data +# Create three datasets (from the members) +exp1 <- exp +exp2 <- exp +exp3 <- exp +exp1$data <- Subset(exp$data, along = 2, indices = 1 : 5) +exp2$data <- Subset(exp$data, along = 2, indices = 6 : 10) +exp3$data <- Subset(exp$data, along = 2, indices = 11 : 15) + +cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max=5, neof_composed=2) +str(cal) +# List of 3 +# $ coeff : num [1:3, 1:6, 1:2, 1:5] -0.312 -0.588 0.724 1.202 1.181 ... +# $ variance : num [1:2, 1:5] 0.413 0.239 0.352 0.27 0.389 ... +# $ eof_pattern: num [1:3, 1:53, 1:22, 1:2, 1:5] -1.47 -0.446 -0.656 -1.534 -0.464 ... +dim(cal$coeff) +# ftime sdate eof member +# 3 6 2 3 + +cal <- CST_MultiEOF(list(exp1, exp2, exp3) , minvar=0.9) +str(cal) +# $ coeff : num [1:3, 1:6, 1:5, 1:5] 0.338 0.603 -0.736 -1.191 -1.198 ... +# $ variance : num [1:5, 1:5] 0.3903 0.2264 0.1861 0.1032 0.0379 ... +# $ eof_pattern: num [1:3, 1:53, 1:22, 1:5, 1:5] 1.477 0.454 0.651 1.541 0.47 ... + +cal <- CST_MultiEOF(list(exp1, exp2)) +cal <- CST_MultiEOF(list(exp1, exp2, exp3), lon_lim=c(5, 30), lat_lim=c(35, 50), neof_composed=3) +} +} +\author{ +Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} + +Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} +} + diff --git a/man/MultiEOF.Rd b/man/MultiEOF.Rd new file mode 100644 index 0000000000000000000000000000000000000000..1e822fc40c27ab72fd67339f9ec1d7b9fa9751ba --- /dev/null +++ b/man/MultiEOF.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_MultiEOF.R +\name{MultiEOF} +\alias{MultiEOF} +\title{EOF analysis of multiple variables starting from an array (reduced version)} +\usage{ +MultiEOF(data, lon, lat, time, lon_dim = "lon", lat_dim = "lat", + neof_max = 40, neof_composed = 5, minvar = 0.6, lon_lim = NULL, + lat_lim = NULL) +} +\arguments{ +\item{data}{A multidimensional array with dimension \code{"var"}, +containing the variables to be analysed. The other diemnsions follow the same structure as the +\code{"exp"} element of a 's2dv_cube' object.} + +\item{lon}{Vector of longitudes.} + +\item{lat}{Vector of latitudes.} + +\item{time}{Vector or matrix of dates in POSIXct format.} + +\item{lon_dim}{String with dimension name of longitudinal coordinate} + +\item{lat_dim}{String with dimension name of latitudinal coordinate} + +\item{neof_max}{Maximum number of single eofs considered in the first decomposition} + +\item{neof_composed}{Number of composed eofs to return in output} + +\item{minvar}{Minimum variance fraction to be explained in first decomposition} + +\item{lon_lim}{Vector with longitudinal range limits for the calculation for all input variables} + +\item{lat_lim}{Vector with latitudinal range limits for the calculation for all input variables} +} +\value{ +A list with elements \code{$coeff} (an array of time-varying principal component coefficients), + \code{$variance} (a matrix of explained variances), + \code{eof_pattern} (a matrix of EOF patterns obtained by regression for each variable). +} +\description{ +This function performs EOF analysis over multiple variables, accepting in input an array with a dimension \code{"var"} for each variable to analyse. Based on Singular Value Decomposition. For each field the EOFs are computed and the corresponding PCs are standardized (unit variance, zero mean); the minimum number of principal components needed to reach the user-defined variance is retained. The function weights the input data for the latitude cosine square root. +} +\author{ +Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} + +Paolo Davini - ISAC-CNR, \email{p.davini@isac.cnr.it} +} + diff --git a/tests/testthat/test-CST_MultiEOF.R b/tests/testthat/test-CST_MultiEOF.R new file mode 100644 index 0000000000000000000000000000000000000000..d9c46ab1ec06169593c39e72e20c83b4fa503ca6 --- /dev/null +++ b/tests/testthat/test-CST_MultiEOF.R @@ -0,0 +1,77 @@ +context("Generic tests") +test_that("Sanity checks and simple use case", { + library(abind) + # Generate simple synthetic data + seq <- 1 : (2 * 3 * 4 * 5 * 6 * 8) + seq3 <- 1 : (2 * 3 * 4 * 4 * 6 * 8) + mod1 <- sin( 0.7 + seq )^2 + cos( seq ^ 2 * 1.22 ) + dim(mod1) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 8) + mod2 <- sin( seq * 2 ) ^ 3 + cos( seq ^ 2 ) + dim(mod2) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 8) + mod3 <- cos( 0.5 + seq3 ) + sin ( seq3 ^ 2 * 0.2 ) + dim(mod3) <- c(dataset = 2, member = 3, sdate = 4, ftime = 4, lat = 6, lon = 8) + lon <- seq(0, 35, 5) + lat <- seq(0, 25, 5) + exp1 <- list(data = mod1, lat = lat, lon = lon) + exp2 <- list(data = mod2, lat = lat, lon = lon) + exp3 <- list(data = mod3, lat = lat, lon = lon) + attr(exp1, 'class') <- 's2dv_cube' + attr(exp2, 'class') <- 's2dv_cube' + attr(exp3, 'class') <- 's2dv_cube' + d=as.POSIXct(c("2017/01/01", "2017/01/02", "2017/01/03", "2017/01/04", "2017/01/05", + "2018/01/01", "2018/01/02", "2018/01/03", "2018/01/04", "2018/01/05", + "2019/01/01", "2019/01/02", "2019/01/03", "2019/01/04", "2019/01/05", + "2020/01/01", "2020/01/02", "2020/01/03", "2020/01/04", "2020/01/05")) + + exp1$Dates$start=d + exp2$Dates$start=d + exp3$Dates$start=d + + expect_error( + CST_MultiEOF(datalist = 1), + "Elements of the list in parameter 'datalist' must be of the class 's2dv_cube', as output by CSTools::CST_Load." + ) + expect_error( + CST_MultiEOF(list(exp1, exp3)), + "Input data fields must all have the same dimensions." + ) + mod3 <- cos( 0.5 + seq ) + sin( seq ^ 2 * 0.2 ) + dim(mod3) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 8) + exp3$data <- mod3 + + expect_error( + CST_MultiEOF(list(exp1, exp2, exp3), lon_lim=c(-250, -245), lat_lim=c(10, 25)), + "No intersection between longitude bounds and data domain.") + + cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_composed=2) + expect_equal(length(cal), 5) + dimexp=dim(exp1$data) + expect_equal(dim(cal$coeff), c(dimexp["ftime"], dimexp["sdate"], + eof=2, dimexp["dataset"], dimexp["member"])) + expect_equal(dim(cal$variance), c(eof=2, dimexp["dataset"], dimexp["member"])) + expect_equal(dim(cal$eof_pattern), c(var=3, dimexp["lon"], dimexp["lat"], + eof=2, dimexp["dataset"], + dimexp["member"])) + expect_equal(cal$variance[1, 1, 1], 0.2909419, tolerance = .00001) + expect_equal(cal$coeff[2, 1, 1, 1, 1], 0.5414261, tolerance = .00001) + expect_equal(cal$eof_pattern[1, 2, 2, 2, 1, 1], 0.3932484, tolerance = .00001) + + cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max=5, neof_composed=2, minvar=0.2) + expect_equal(cal$coeff[2, 1, 1, 1, 1], -0.6117927, tolerance = .00001) + + cal <- CST_MultiEOF(list(exp1, exp2, exp3), lon_lim=c(5, 30), lat_lim=c(10, 25)) + expect_equal(cal$coeff[2, 1, 1, 1, 1], 0.8539488, tolerance = .00001) + expect_equivalent(cal$lon, seq(5, 30, 5)) + expect_equivalent(cal$lat, seq(10, 25, 5)) + cal <- CST_MultiEOF(list(exp1, exp2, exp3), lon_lim=c(350, 15), lat_lim=c(10, 25)) + expect_equivalent(cal$lon, seq(0, 15, 5)) + expect_equivalent(cal$lat, seq(10, 25, 5)) + cal <- CST_MultiEOF(list(exp1, exp2, exp3), lon_lim=c(-355, -345)) + expect_equivalent(cal$lon, seq(5, 15, 5)) + + exp3$data[1, 1, 1, 1, 1, 1]=NaN + expect_error( + CST_MultiEOF(list(exp1, exp3), neof_max=8, neof_composed=2), + "Input data contain NA values." + ) +})