From 8ac499ffb613bd3f923515f2bc88f64b85b609e2 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 12 Sep 2023 17:27:04 +0200 Subject: [PATCH 1/6] Develop accepting spatial NAs in MultiEOF --- R/CST_MultiEOF.R | 166 ++++++++++++++++++++--------- man/CST_MultiEOF.Rd | 20 +++- man/MultiEOF.Rd | 19 +++- tests/testthat/test-CST_MultiEOF.R | 7 +- 4 files changed, 157 insertions(+), 55 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index bd218423..26336152 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -26,6 +26,14 @@ #' for all input variables. #'@param lat_lim Vector with latitudinal range limits for the EOF calculation #' for all input variables. +#'@param lon_dim String with dimension name of longitudinal coordinate. +#'@param lat_dim String with dimension name of latitudinal coordinate. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param sdate_dim A character string indicating the name of the start date +#' dimension. By default, it is set to 'sdate'. +#'@param var_dim A character string indicating the name of the variable +#' dimension. By default, it is set to 'var'. #'@return A list with elements \code{$coeff} (an array of time-varying principal #'component coefficients), \code{$variance} (a matrix of explained variances), #'\code{eof_pattern} (a matrix of EOF patterns obtained by regression for each @@ -56,32 +64,24 @@ #'cal <- CST_MultiEOF(datalist = list(exp1, exp2), neof_composed = 2) #'@export CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, - minvar = 0.6, lon_lim = NULL, lat_lim = NULL) { - # Check s2dv_cube + minvar = 0.6, lon_lim = NULL, lat_lim = NULL, + lon_dim = "lon", lat_dim = "lat", time_dim = 'ftime', + sdate_dim = 'sdate', var_dim = 'var') { + # Check 's2dv_cube' if (!(all(sapply(datalist, inherits, 's2dv_cube')))) { - stop("Elements of the list in parameter 'datalist' must be of the class ", - "'s2dv_cube', as output by CSTools::CST_Load.") - } - - # Check if all dims equal - adims = lapply(lapply(datalist, function(x) x$data), dim) - if(!all(apply(apply(abind(adims, along = 0), 2, duplicated), 2, sum) == - (length(adims)-1))) { - stop("Input data fields must all have the same dimensions.") + stop("Elements of the list in parameter 'datalist' must be of the ", + "class 's2dv_cube'.") } - - exp <- abind(lapply(datalist, '[[', 'data'), along = 0) - dim(exp) <- c(var = length(datalist), dim(datalist[[1]]$data)) - - if (any(is.na(exp))) { - stop("Input data contain NA values.") - } - - # Check coordinates if (!all(c('data', 'coords', 'attrs') %in% names(datalist[[1]]))) { stop("Parameter 'datalist' must have 'data', 'coords' and 'attrs' elements ", "within the 's2dv_cube' structure.") } + # Dates + dates <- datalist[[1]]$attrs$Dates + if (is.null(dates)) { + stop("Element 'Dates' is not found in 'attrs' list of the first array.") + } + # Check coordinates if (!any(names(datalist[[1]]$coords) %in% .KnownLonNames()) | !any(names(datalist[[1]]$coords) %in% .KnownLatNames())) { stop("Spatial coordinate names do not match any of the names accepted by the ", @@ -89,24 +89,27 @@ CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, "'nav_lat'. Longitudes accepted names: 'lon', 'lons', 'longitude', 'x',", " 'i', 'nav_lon'.") } - # Check dimensions - if (!any(names(dim(datalist[[1]]$data)) %in% .KnownLonNames()) | - !any(names(dim(datalist[[1]]$data)) %in% .KnownLatNames())) { - stop("Spatial dimension names do not match any of the names accepted by ", - "the package.") + + # Check if all dims equal + adims = lapply(lapply(datalist, function(x) x$data), dim) + if (!all(apply(apply(abind(adims, along = 0), 2, duplicated), 2, sum) == + (length(adims)-1))) { + stop("Input data fields must all have the same dimensions.") } + exp <- abind(lapply(datalist, '[[', 'data'), along = 0) + dim(exp) <- c(length(datalist), dim(datalist[[1]]$data)) + names(dim(exp)) <- c(var_dim, names(dim(datalist[[1]]$data))) + lon <- names(datalist[[1]]$coords)[[which(names(datalist[[1]]$coords) %in% .KnownLonNames())]] lat <- names(datalist[[1]]$coords)[[which(names(datalist[[1]]$coords) %in% .KnownLatNames())]] - lon_name <- names(dim(datalist[[1]]$data))[[which(names(dim(datalist[[1]]$data)) %in% .KnownLonNames())]] - lat_name <- names(dim(datalist[[1]]$data))[[which(names(dim(datalist[[1]]$data)) %in% .KnownLatNames())]] - result <- MultiEOF(exp, lon = as.vector(datalist[[1]]$coords[[lon]]), lat = as.vector(datalist[[1]]$coords[[lat]]), - lon_dim = lon_name, lat_dim = lat_name, - time = datalist[[1]]$attrs$Dates, minvar = minvar, + lon_dim = lon_dim, lat_dim = lat_dim, time_dim = time_dim, + sdate_dim = sdate_dim, var_dim = var_dim, + dates = dates, minvar = minvar, neof_max = neof_max, neof_composed = neof_composed, lon_lim = lon_lim, lat_lim = lat_lim) @@ -135,9 +138,16 @@ CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, #' 'nav_lon'. #'@param lon Vector of longitudes. #'@param lat Vector of latitudes. -#'@param time Vector or matrix of dates in POSIXct format. +#'@param dates Vector or matrix of dates in POSIXct format. +#'@param time Deprecated. #'@param lon_dim String with dimension name of longitudinal coordinate. #'@param lat_dim String with dimension name of latitudinal coordinate. +#'@param time_dim A character string indicating the name of the temporal +#' dimension. By default, it is set to 'time'. +#'@param sdate_dim A character string indicating the name of the start date +#' dimension. By default, it is set to 'sdate'. +#'@param var_dim A character string indicating the name of the variable +#' dimension. By default, it is set to 'var'. #'@param neof_composed Number of composed eofs to return in output. #'@param minvar Minimum variance fraction to be explained in first decomposition. #'@param neof_max Maximum number of single eofs considered in the first @@ -162,8 +172,9 @@ CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, #'cal <- MultiEOF(data = exp, lon = lon, lat = lat, time = Dates) #'@import multiApply #'@export -MultiEOF <- function(data, lon, lat, time, +MultiEOF <- function(data, lon, lat, dates, time = NULL, lon_dim = "lon", lat_dim = "lat", + time_dim = 'ftime', sdate_dim = 'sdate', var_dim = 'var', neof_max = 40, neof_composed = 5, minvar = 0.6, lon_lim = NULL, lat_lim = NULL) { @@ -173,27 +184,54 @@ MultiEOF <- function(data, lon, lat, time, stop("Spatial coordinate names do not match any of the names accepted by ", "the package.") } + # dates + if (!is.null(time)) { + warning("The parameter 'time' is deprecated, use 'dates' instead.") + dates <- time + } + # time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + # sdate_dim + if (!is.character(sdate_dim) | length(sdate_dim) != 1) { + stop("Parameter 'sdate_dim' must be a character string.") + } + if (!sdate_dim %in% names(dim(data))) { + stop("Parameter 'sdate_dim' is not found in 'data' dimension.") + } + # var_dim + if (!is.character(var_dim) | length(var_dim) != 1) { + stop("Parameter 'var_dim' must be a character string.") + } + if (!var_dim %in% names(dim(data))) { + stop("Parameter 'var_dim' is not found in 'data' dimension.") + } # Reorder and group ftime and sdate together at the end in that order - cdim0 <- dim(data) - imaskt <- names(cdim0) %in% "ftime" - imasks <- names(cdim0) %in% "sdate" + imaskt <- names(dim(data)) %in% time_dim + imasks <- names(dim(data)) %in% sdate_dim data <- .aperm2(data, c(which(!(imasks | imaskt)), which(imaskt), which(imasks))) - cdim <- dim(data) + dims <- dim(data) ind <- 1:length(which(!(imaskt | imasks))) # compact (multiply) time_dim dimensions - dim(data) <- c(cdim[ind], samples = prod(cdim[-ind])) + dim(data) <- c(dims[ind], samples = prod(dims[-ind])) # Repeatedly apply .multi.eofs - result <- Apply(data, c("var", lon_dim, lat_dim, "samples"), - .multi.eofs, lon, lat, time, neof_max = neof_max, + result <- Apply(data = data, + target_dims = c(var_dim, lon_dim, lat_dim, "samples"), + fun = .multi.eofs, lon = lon, lat = lat, dates = dates, + neof_max = neof_max, neof_composed = neof_composed, minvar = minvar, xlim = lon_lim, ylim = lat_lim, lon_dim = lon_dim, lat_dim = lat_dim) # Expand back samples to compacted dims - dim(result$coeff) <- c(cdim[-ind], dim(result$coeff)[-1]) + dim(result$coeff) <- c(dims[-ind], dim(result$coeff)[-1]) # Recover first lon and first lat list dd = dim(result[[lon_dim]])[1]; m = matrix(1, nrow = dd, ncol = length(dim(result[[lon_dim]]))); m[1:dd] = 1:dd; result[[lon_dim]] = result[[lon_dim]][m] @@ -222,7 +260,7 @@ MultiEOF <- function(data, lon, lat, time, #'variable). #'@noRd -.multi.eofs <- function(field_arr_raw, lon, lat, time, neof_max = 40, +.multi.eofs <- function(field_arr_raw, lon, lat, dates, neof_max = 40, neof_composed = 5, minvar = 0.6, xlim = NULL, ylim = NULL, lon_dim = "lon", lat_dim = "lat") { @@ -231,9 +269,14 @@ MultiEOF <- function(data, lon, lat, time, } else { lin.fit <- lm.fit } - + + # Dimensions n_field <- dim(field_arr_raw)[1] - etime <- .power.date(time) + n_lon <- dim(field_arr_raw)[2] + n_lat <- dim(field_arr_raw)[3] + nt <- dim(field_arr_raw)[4] + + etime <- .power.date(dates) field_arr <- array(dim = dim(field_arr_raw)) for (k in seq(1, n_field, 1)) { @@ -243,8 +286,33 @@ MultiEOF <- function(data, lon, lat, time, # area weighting, based on the root of cosine ww <- .area.weight(lon, lat, root = T) + # create a mask + mask_arr <- array(dim = c(n_lon, n_lat, n_field)) + for (k in seq(1, n_field, 1)) { field_orig <- field_arr[k, , , ] + + # Check if all the time steps at one grid point are NA-consistent + # The grid point should have all NAs or no NA along time dim. + if (anyNA(field_orig)) { + field_latlon <- array(field_orig, dim = c(n_lon*n_lat, nt)) # [lon*lat, time] + na_ind <- which(is.na(field_latlon), arr.ind = T) + if (dim(na_ind)[1] != nt * length(unique(na_ind[,1]))) { + stop("Detected certain grid points have NAs but not consistent across time ", + "dimension. If the grid point is NA, it should have NA at all time step.") + } + } + # Build the mask + mask <- field_orig[, , 1] + mask[!is.finite(mask)] <- NA + mask[is.finite(mask)] <- 1 + dim(mask) <- c(n_lon, n_lat) + mask_arr[,,k] <- mask + + # Replace mask of NAs with 0s for EOF analysis. + field_arr[k, , , ][!is.finite(field_orig)] <- 0 + field_orig[!is.finite(field_orig)] <- 0 + # calculate the area weight field <- sweep(field_orig, c(1, 2), ww, "*") idx <- .selbox(lon, lat, xlim, ylim) @@ -274,7 +342,7 @@ MultiEOF <- function(data, lon, lat, time, for (i in 1:reqPC) { regression[, , i] <- apply(field_orig, c(1, 2), function(x) lin.fit(as.matrix(coefficient[, i], - ncol = 1), x)$coefficients) + ncol = 1), x)$coefficients)*mask } assign(paste0("pc", k), list(coeff = coefficient, variance = variance, wcoeff = sweep(coefficient, c(2), variance, "*"), @@ -300,19 +368,19 @@ MultiEOF <- function(data, lon, lat, time, for (i in 1:neof_composed) { regression[k, , , i] <- apply(field_arr[k, , , ], c(1, 2), function(x) lin.fit(as.matrix(coefficient[, i], - ncol = 1), x)$coefficients) + ncol = 1), x)$coefficients)*mask_arr[,,k] } } - names(dim(coefficient)) <- c("time", "eof") + names(dim(coefficient)) <- c("dates", "eof") variance <- array(variance) names(dim(variance)) <- "eof" - names(dim(regression)) <- c("var", "lon", "lat", "eof") + names(dim(regression)) <- c(names(dim(field_arr_raw))[1:3], "eof") out <- list(coeff = coefficient, variance = variance, eof_pattern = regression) - out[[lon_dim]] <- slon - out[[lat_dim]] <- slat + out[[names(n_lon)]] <- slon + out[[names(n_lat)]] <- slat return(out) } diff --git a/man/CST_MultiEOF.Rd b/man/CST_MultiEOF.Rd index 11f8877f..21f74c2b 100644 --- a/man/CST_MultiEOF.Rd +++ b/man/CST_MultiEOF.Rd @@ -10,7 +10,12 @@ CST_MultiEOF( neof_composed = 5, minvar = 0.6, lon_lim = NULL, - lat_lim = NULL + lat_lim = NULL, + lon_dim = "lon", + lat_dim = "lat", + time_dim = "ftime", + sdate_dim = "sdate", + var_dim = "var" ) } \arguments{ @@ -34,6 +39,19 @@ for all input variables.} \item{lat_lim}{Vector with latitudinal range limits for the EOF calculation for all input variables.} + +\item{lon_dim}{String with dimension name of longitudinal coordinate.} + +\item{lat_dim}{String with dimension name of latitudinal coordinate.} + +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{sdate_dim}{A character string indicating the name of the start date +dimension. By default, it is set to 'sdate'.} + +\item{var_dim}{A character string indicating the name of the variable +dimension. By default, it is set to 'var'.} } \value{ A list with elements \code{$coeff} (an array of time-varying principal diff --git a/man/MultiEOF.Rd b/man/MultiEOF.Rd index 04963e1a..08721a40 100644 --- a/man/MultiEOF.Rd +++ b/man/MultiEOF.Rd @@ -9,9 +9,13 @@ MultiEOF( data, lon, lat, - time, + dates, + time = NULL, lon_dim = "lon", lat_dim = "lat", + time_dim = "ftime", + sdate_dim = "sdate", + var_dim = "var", neof_max = 40, neof_composed = 5, minvar = 0.6, @@ -31,12 +35,23 @@ Longitudinal dimension accepted names: 'lon', 'lons','longitude', 'x', 'i', \item{lat}{Vector of latitudes.} -\item{time}{Vector or matrix of dates in POSIXct format.} +\item{dates}{Vector or matrix of dates in POSIXct format.} + +\item{time}{Deprecated.} \item{lon_dim}{String with dimension name of longitudinal coordinate.} \item{lat_dim}{String with dimension name of latitudinal coordinate.} +\item{time_dim}{A character string indicating the name of the temporal +dimension. By default, it is set to 'time'.} + +\item{sdate_dim}{A character string indicating the name of the start date +dimension. By default, it is set to 'sdate'.} + +\item{var_dim}{A character string indicating the name of the variable +dimension. By default, it is set to 'var'.} + \item{neof_max}{Maximum number of single eofs considered in the first decomposition.} diff --git a/tests/testthat/test-CST_MultiEOF.R b/tests/testthat/test-CST_MultiEOF.R index 66a63525..70346206 100644 --- a/tests/testthat/test-CST_MultiEOF.R +++ b/tests/testthat/test-CST_MultiEOF.R @@ -46,8 +46,8 @@ dat03$coords <- list(long = seq(1:4), lati = seq(1:4)) test_that("1. Input checks", { expect_error( CST_MultiEOF(datalist = 1), - paste0("Elements of the list in parameter 'datalist' must be of the class ", - "'s2dv_cube', as output by CSTools::CST_Load.") + paste0("Elements of the list in parameter 'datalist' must be of the class.", + "'s2dv_cube'.") ) # Check if all dims equal expect_error( @@ -152,6 +152,7 @@ test_that("2. Output checks", { exp3$data[1, 1, 1, 1, 1, 1] = NaN expect_error( CST_MultiEOF(list(exp1, exp3), neof_max = 8, neof_composed=2), - "Input data contain NA values." + paste0("Detected certain grid points have NAs but not consistent across time ", + "dimension. If the grid point is NA, it should have NA at all time step.") ) }) -- GitLab From 44fddc691f3f3e40ebbbfa7705f3af23b372de2b Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Tue, 12 Sep 2023 17:32:23 +0200 Subject: [PATCH 2/6] Improve documentation for lon_dim, lat_dim params --- R/CST_MultiEOF.R | 12 ++++++++---- man/CST_MultiEOF.Rd | 6 ++++-- man/MultiEOF.Rd | 6 ++++-- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 26336152..18e868b7 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -26,8 +26,10 @@ #' for all input variables. #'@param lat_lim Vector with latitudinal range limits for the EOF calculation #' for all input variables. -#'@param lon_dim String with dimension name of longitudinal coordinate. -#'@param lat_dim String with dimension name of latitudinal coordinate. +#'@param lon_dim A character string indicating the name of the longitudinal +#' dimension. By default, it is set to 'lon'. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default, it is set to 'lat'. #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'time'. #'@param sdate_dim A character string indicating the name of the start date @@ -140,8 +142,10 @@ CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, #'@param lat Vector of latitudes. #'@param dates Vector or matrix of dates in POSIXct format. #'@param time Deprecated. -#'@param lon_dim String with dimension name of longitudinal coordinate. -#'@param lat_dim String with dimension name of latitudinal coordinate. +#'@param lon_dim A character string indicating the name of the longitudinal +#' dimension. By default, it is set to 'lon'. +#'@param lat_dim A character string indicating the name of the latitudinal +#' dimension. By default, it is set to 'lat'. #'@param time_dim A character string indicating the name of the temporal #' dimension. By default, it is set to 'time'. #'@param sdate_dim A character string indicating the name of the start date diff --git a/man/CST_MultiEOF.Rd b/man/CST_MultiEOF.Rd index 21f74c2b..bc88390c 100644 --- a/man/CST_MultiEOF.Rd +++ b/man/CST_MultiEOF.Rd @@ -40,9 +40,11 @@ for all input variables.} \item{lat_lim}{Vector with latitudinal range limits for the EOF calculation for all input variables.} -\item{lon_dim}{String with dimension name of longitudinal coordinate.} +\item{lon_dim}{A character string indicating the name of the longitudinal +dimension. By default, it is set to 'lon'.} -\item{lat_dim}{String with dimension name of latitudinal coordinate.} +\item{lat_dim}{A character string indicating the name of the latitudinal +dimension. By default, it is set to 'lat'.} \item{time_dim}{A character string indicating the name of the temporal dimension. By default, it is set to 'time'.} diff --git a/man/MultiEOF.Rd b/man/MultiEOF.Rd index 08721a40..d4fe8954 100644 --- a/man/MultiEOF.Rd +++ b/man/MultiEOF.Rd @@ -39,9 +39,11 @@ Longitudinal dimension accepted names: 'lon', 'lons','longitude', 'x', 'i', \item{time}{Deprecated.} -\item{lon_dim}{String with dimension name of longitudinal coordinate.} +\item{lon_dim}{A character string indicating the name of the longitudinal +dimension. By default, it is set to 'lon'.} -\item{lat_dim}{String with dimension name of latitudinal coordinate.} +\item{lat_dim}{A character string indicating the name of the latitudinal +dimension. By default, it is set to 'lat'.} \item{time_dim}{A character string indicating the name of the temporal dimension. By default, it is set to 'time'.} -- GitLab From 44a09aa9a368bc72c636e41dc62620fa21cd2648 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Wed, 13 Sep 2023 12:54:28 +0200 Subject: [PATCH 3/6] Develop 's2dv_cube' output in CST_ and improve initial checks and documentation --- R/CST_MultiEOF.R | 227 +++++++++++++++++++++++------ man/CST_MultiEOF.Rd | 73 +++++++--- man/MultiEOF.Rd | 42 +++++- tests/testthat/test-CST_MultiEOF.R | 136 ++++++++++++++--- 4 files changed, 381 insertions(+), 97 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 18e868b7..62f6b991 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -17,15 +17,9 @@ #' "lon" and "lat", a dimension "ftime" and a dimension "sdate". Latitudinal #' dimension accepted names: 'lat', 'lats', 'latitude', 'y', 'j', 'nav_lat'. #' Longitudinal dimension accepted names: 'lon', 'lons','longitude', 'x', 'i', -#' 'nav_lon'. -#'@param neof_composed Number of composed eofs to return in output. -#'@param minvar Minimum variance fraction to be explained in first decomposition. -#'@param neof_max Maximum number of single eofs considered in the first -#' decomposition. -#'@param lon_lim Vector with longitudinal range limits for the EOF calculation -#' for all input variables. -#'@param lat_lim Vector with latitudinal range limits for the EOF calculation -#' for all input variables. +#' 'nav_lon'. NAs can exist but it should be consistent along 'time_dim'. That +#' is, if one grid point has NAs for each variable, all the time steps at this +#' point should be NAs. #'@param lon_dim A character string indicating the name of the longitudinal #' dimension. By default, it is set to 'lon'. #'@param lat_dim A character string indicating the name of the latitudinal @@ -36,11 +30,42 @@ #' dimension. By default, it is set to 'sdate'. #'@param var_dim A character string indicating the name of the variable #' dimension. By default, it is set to 'var'. -#'@return A list with elements \code{$coeff} (an array of time-varying principal -#'component coefficients), \code{$variance} (a matrix of explained variances), -#'\code{eof_pattern} (a matrix of EOF patterns obtained by regression for each -#'variable). -#'@import abind +#'@param neof_max Maximum number of single eofs considered in the first +#' decomposition. +#'@param neof_composed Number of composed eofs to return in output. +#'@param minvar Minimum variance fraction to be explained in first decomposition. +#'@param lon_lim Vector with longitudinal range limits for the EOF calculation +#' for all input variables. +#'@param lat_lim Vector with latitudinal range limits for the EOF calculation +#' for all input variables. +#'@return +#'A list containing: +#'\item{coeff}{ +#' An 's2dv_cube' with the data element being an array of principal components +#' with dimensions 'time_dim', 'sdate_dim', number of eof, rest of the +#' dimensions of 'data' except 'lon_dim' and 'lat_dim'. +#'} +#'\item{variance}{ +#' An 's2dv_cube' with the data element being an array of explained variances +#' with dimensions 'eof' and the rest of the dimensions of 'data' except +#' 'time_dim', 'sdate_dim', 'lon_dim' and 'lat_dim'. +#'} +#'\item{eof_pattern}{ +#' An 's2dv_cube' with the data element being an array of EOF patterns obtained +#' by regression with dimensions: 'eof' and the rest of the dimensions of +#' 'data' except 'time_dim' and 'sdate_dim'. +#'} +#'\item{mask}{ +#' An 's2dv_cube' with the data element being an array of the mask with +#' dimensions ('lon_dim', 'lat_dim', rest of the dimensions of 'data' except +#' 'time_dim'). It is made from 'data', 1 for the positions that 'data' has +#' value and NA for the positions that 'data' has NA. It is used to replace NAs +#' with 0s for EOF calculation and mask the result with NAs again after the +#' calculation. +#'} +#'\item{coordinates}{ +#' Longitudinal and latitudinal coordinates vectors. +#'} #'@examples #'seq <- 1 : (2 * 3 * 4 * 5 * 6 * 8) #'mod1 <- sin( 0.7 + seq )^2 + cos( seq ^ 2 * 1.22 ) @@ -64,11 +89,12 @@ #'exp2$attrs$Dates = d #' #'cal <- CST_MultiEOF(datalist = list(exp1, exp2), neof_composed = 2) +#'@import abind #'@export -CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, - minvar = 0.6, lon_lim = NULL, lat_lim = NULL, - lon_dim = "lon", lat_dim = "lat", time_dim = 'ftime', - sdate_dim = 'sdate', var_dim = 'var') { +CST_MultiEOF <- function(datalist, lon_dim = "lon", lat_dim = "lat", + time_dim = 'ftime', sdate_dim = 'sdate', + var_dim = 'var', neof_max = 40, neof_composed = 5, + minvar = 0.6, lon_lim = NULL, lat_lim = NULL) { # Check 's2dv_cube' if (!(all(sapply(datalist, inherits, 's2dv_cube')))) { stop("Elements of the list in parameter 'datalist' must be of the ", @@ -83,7 +109,7 @@ CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, if (is.null(dates)) { stop("Element 'Dates' is not found in 'attrs' list of the first array.") } - # Check coordinates + # coordinates if (!any(names(datalist[[1]]$coords) %in% .KnownLonNames()) | !any(names(datalist[[1]]$coords) %in% .KnownLatNames())) { stop("Spatial coordinate names do not match any of the names accepted by the ", @@ -103,19 +129,35 @@ CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, dim(exp) <- c(length(datalist), dim(datalist[[1]]$data)) names(dim(exp)) <- c(var_dim, names(dim(datalist[[1]]$data))) - lon <- names(datalist[[1]]$coords)[[which(names(datalist[[1]]$coords) %in% .KnownLonNames())]] - lat <- names(datalist[[1]]$coords)[[which(names(datalist[[1]]$coords) %in% .KnownLatNames())]] + lon_name <- names(datalist[[1]]$coords)[[which(names(datalist[[1]]$coords) %in% .KnownLonNames())]] + lat_name <- names(datalist[[1]]$coords)[[which(names(datalist[[1]]$coords) %in% .KnownLatNames())]] + lon <- as.vector(datalist[[1]]$coords[[lon_name]]) + lat <- as.vector(datalist[[1]]$coords[[lat_name]]) - result <- MultiEOF(exp, - lon = as.vector(datalist[[1]]$coords[[lon]]), - lat = as.vector(datalist[[1]]$coords[[lat]]), + result <- MultiEOF(exp, lon = lon, lat = lat, lon_dim = lon_dim, lat_dim = lat_dim, time_dim = time_dim, sdate_dim = sdate_dim, var_dim = var_dim, dates = dates, minvar = minvar, neof_max = neof_max, neof_composed = neof_composed, lon_lim = lon_lim, lat_lim = lat_lim) - - return(result) + names_res <- names(result[1:4]) + res <- lapply(seq_along(result)[1:4], function(i) { + coords = list(lon, lat) + names(coords) <- c(lon_dim, lat_dim) + dates <- dates + varName <- names(result)[[i]] + metadata <- lapply(datalist, function(x) x$attrs$Variable$metadata) + metadata <- unlist(metadata, recursive=FALSE) + metadata <- metadata[unique(names(metadata))] + suppressWarnings( + cube <- s2dv_cube(data = result[[i]], coords = coords, varName = varName, Dates = dates, + source_files = unlist(sapply(datalist, function(x) x$attrs$source_files)), + metadata = metadata, when = Sys.time()) + ) + return(cube) + }) + names(res) <- names_res + return(c(res, result[5:6])) } #'@rdname MultiEOF #'@title EOF analysis of multiple variables starting from an array (reduced @@ -137,11 +179,14 @@ CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, #' as the \code{"exp"} element of a 's2dv_cube' object. Latitudinal #' dimension accepted names: 'lat', 'lats', 'latitude', 'y', 'j', 'nav_lat'. #' Longitudinal dimension accepted names: 'lon', 'lons','longitude', 'x', 'i', -#' 'nav_lon'. +#' 'nav_lon'. NAs can exist but it should be consistent along 'time_dim'. That +#' is, if one grid point has NAs for each variable, all the time steps at this +#' point should be NAs. #'@param lon Vector of longitudes. #'@param lat Vector of latitudes. #'@param dates Vector or matrix of dates in POSIXct format. -#'@param time Deprecated. +#'@param time Deprecated parameter, it has been substituted by 'dates'. It will +#' be removed in the next release. #'@param lon_dim A character string indicating the name of the longitudinal #' dimension. By default, it is set to 'lon'. #'@param lat_dim A character string indicating the name of the latitudinal @@ -152,18 +197,43 @@ CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, #' dimension. By default, it is set to 'sdate'. #'@param var_dim A character string indicating the name of the variable #' dimension. By default, it is set to 'var'. -#'@param neof_composed Number of composed eofs to return in output. -#'@param minvar Minimum variance fraction to be explained in first decomposition. #'@param neof_max Maximum number of single eofs considered in the first #' decomposition. +#'@param neof_composed Number of composed eofs to return in output. +#'@param minvar Minimum variance fraction to be explained in first decomposition. #'@param lon_lim Vector with longitudinal range limits for the calculation for #' all input variables. #'@param lat_lim Vector with latitudinal range limits for the calculation for #' all input variables. -#'@return A list with elements \code{$coeff} (an array of time-varying principal -#'component coefficients), \code{$variance} (a matrix of explained variances), -#'\code{eof_pattern} (a matrix of EOF patterns obtained by regression for each -#'variable). +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#'@return +#'A list containing: +#'\item{coeff}{ +#' An array of principal components with dimensions 'time_dim', 'sdate_dim', +#' number of eof, rest of the dimensions of 'data' except 'lon_dim' and +#' 'lat_dim'. +#'} +#'\item{variance}{ +#' An array of explained variances with dimensions 'eof' and the rest of the +#' dimensions of 'data' except 'time_dim', 'sdate_dim', 'lon_dim' and +#' 'lat_dim'. +#'} +#'\item{eof_pattern}{ +#' An array of EOF patterns obtained by regression with dimensions: 'eof' and +#' the rest of the dimensions of 'data' except 'time_dim' and 'sdate_dim'. +#'} +#'\item{mask}{ +#' An array of the mask with dimensions ('lon_dim', 'lat_dim', rest of the +#' dimensions of 'data' except 'time_dim'). It is made from 'data', 1 for the +#' positions that 'data' has value and NA for the positions that 'data' has NA. +#' It is used to replace NAs with 0s for EOF calculation and mask the result +#' with NAs again after the calculation. +#'} +#'\item{coordinates}{ +#' Longitudinal and latitudinal coordinates vectors. +#'} +#' #'@examples #'exp <- array(runif(1280)*280, dim = c(dataset = 2, member = 2, sdate = 3, #' ftime = 3, lat = 4, lon = 4, var = 1)) @@ -180,19 +250,53 @@ MultiEOF <- function(data, lon, lat, dates, time = NULL, lon_dim = "lon", lat_dim = "lat", time_dim = 'ftime', sdate_dim = 'sdate', var_dim = 'var', neof_max = 40, neof_composed = 5, minvar = 0.6, - lon_lim = NULL, lat_lim = NULL) { - - # Know spatial coordinates names - if (!any(lon_dim %in% .KnownLonNames()) | - !any(lat_dim %in% .KnownLatNames())) { - stop("Spatial coordinate names do not match any of the names accepted by ", - "the package.") + lon_lim = NULL, lat_lim = NULL, ncores = NULL) { + # Check inputs + # data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") } # dates if (!is.null(time)) { warning("The parameter 'time' is deprecated, use 'dates' instead.") dates <- time } + # lon_dim + if (!is.character(lon_dim) | length(lon_dim) != 1) { + stop("Parameter 'lon_dim' must be a character string.") + } + if (!lon_dim %in% names(dim(data))) { + stop("Parameter 'lon_dim' is not found in 'data' dimension.") + } + # lat_dim + if (!is.character(lat_dim) | length(lat_dim) != 1) { + stop("Parameter 'lat_dim' must be a character string.") + } + if (!lat_dim %in% names(dim(data))) { + stop("Parameter 'lat_dim' is not found in 'data' dimension.") + } + # lon + if (!is.numeric(lon) | length(lon) != dim(data)[lon_dim]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'data'.")) + } + if (any(lon > 360 | lon < -360)) { + warning("Some 'lon' is out of the range [-360, 360].") + } + # lat + if (!is.numeric(lat) | length(lat) != dim(data)[lat_dim]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'data'.")) + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } # time_dim if (!is.character(time_dim) | length(time_dim) != 1) { stop("Parameter 'time_dim' must be a character string.") @@ -214,6 +318,37 @@ MultiEOF <- function(data, lon, lat, dates, time = NULL, if (!var_dim %in% names(dim(data))) { stop("Parameter 'var_dim' is not found in 'data' dimension.") } + # neof_max + if (!is.numeric(neof_max)) { + stop("Parameter 'neof_max' must be a positive integer.") + } + # neof_composed + if (!is.numeric(neof_composed)) { + stop("Parameter 'neof_composed' must be a positive integer.") + } + # minvar + if (!is.numeric(minvar)) { + stop("Parameter 'minvar' must be a positive number between 0 and 1.") + } + # lon_lim + if (!is.null(lon_lim)) { + if (!is.numeric(lon_lim)) { + stop("Parameter 'lon_lim' must be numeric.") + } + } + # lat_lim + if (!is.null(lat_lim)) { + if (!is.numeric(lat_lim)) { + stop("Parameter 'lat_lim' must be numeric.") + } + } + # ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } # Reorder and group ftime and sdate together at the end in that order imaskt <- names(dim(data)) %in% time_dim @@ -229,10 +364,9 @@ MultiEOF <- function(data, lon, lat, dates, time = NULL, result <- Apply(data = data, target_dims = c(var_dim, lon_dim, lat_dim, "samples"), fun = .multi.eofs, lon = lon, lat = lat, dates = dates, - neof_max = neof_max, - neof_composed = neof_composed, minvar = minvar, - xlim = lon_lim, ylim = lat_lim, - lon_dim = lon_dim, lat_dim = lat_dim) + neof_max = neof_max, neof_composed = neof_composed, + minvar = minvar, xlim = lon_lim, ylim = lat_lim, + lon_dim = lon_dim, lat_dim = lat_dim, ncores = ncores) # Expand back samples to compacted dims dim(result$coeff) <- c(dims[-ind], dim(result$coeff)[-1]) @@ -381,7 +515,8 @@ MultiEOF <- function(data, lon, lat, dates, time = NULL, names(dim(variance)) <- "eof" names(dim(regression)) <- c(names(dim(field_arr_raw))[1:3], "eof") - out <- list(coeff = coefficient, variance = variance, eof_pattern = regression) + out <- list(coeff = coefficient, variance = variance, eof_pattern = regression, + mask = mask_arr) out[[names(n_lon)]] <- slon out[[names(n_lat)]] <- slat diff --git a/man/CST_MultiEOF.Rd b/man/CST_MultiEOF.Rd index bc88390c..265eabd4 100644 --- a/man/CST_MultiEOF.Rd +++ b/man/CST_MultiEOF.Rd @@ -6,16 +6,16 @@ \usage{ CST_MultiEOF( datalist, - neof_max = 40, - neof_composed = 5, - minvar = 0.6, - lon_lim = NULL, - lat_lim = NULL, lon_dim = "lon", lat_dim = "lat", time_dim = "ftime", sdate_dim = "sdate", - var_dim = "var" + var_dim = "var", + neof_max = 40, + neof_composed = 5, + minvar = 0.6, + lon_lim = NULL, + lat_lim = NULL ) } \arguments{ @@ -25,20 +25,9 @@ an element named \code{$data} with at least two spatial dimensions named "lon" and "lat", a dimension "ftime" and a dimension "sdate". Latitudinal dimension accepted names: 'lat', 'lats', 'latitude', 'y', 'j', 'nav_lat'. Longitudinal dimension accepted names: 'lon', 'lons','longitude', 'x', 'i', -'nav_lon'.} - -\item{neof_max}{Maximum number of single eofs considered in the first -decomposition.} - -\item{neof_composed}{Number of composed eofs to return in output.} - -\item{minvar}{Minimum variance fraction to be explained in first decomposition.} - -\item{lon_lim}{Vector with longitudinal range limits for the EOF calculation -for all input variables.} - -\item{lat_lim}{Vector with latitudinal range limits for the EOF calculation -for all input variables.} +'nav_lon'. NAs can exist but it should be consistent along 'time_dim'. That +is, if one grid point has NAs for each variable, all the time steps at this +point should be NAs.} \item{lon_dim}{A character string indicating the name of the longitudinal dimension. By default, it is set to 'lon'.} @@ -54,12 +43,48 @@ dimension. By default, it is set to 'sdate'.} \item{var_dim}{A character string indicating the name of the variable dimension. By default, it is set to 'var'.} + +\item{neof_max}{Maximum number of single eofs considered in the first +decomposition.} + +\item{neof_composed}{Number of composed eofs to return in output.} + +\item{minvar}{Minimum variance fraction to be explained in first decomposition.} + +\item{lon_lim}{Vector with longitudinal range limits for the EOF calculation +for all input variables.} + +\item{lat_lim}{Vector with latitudinal range limits for the EOF calculation +for all input variables.} } \value{ -A list with elements \code{$coeff} (an array of time-varying principal -component coefficients), \code{$variance} (a matrix of explained variances), -\code{eof_pattern} (a matrix of EOF patterns obtained by regression for each -variable). +A list containing: +\item{coeff}{ + An 's2dv_cube' with the data element being an array of principal components + with dimensions 'time_dim', 'sdate_dim', number of eof, rest of the + dimensions of 'data' except 'lon_dim' and 'lat_dim'. +} +\item{variance}{ + An 's2dv_cube' with the data element being an array of explained variances + with dimensions 'eof' and the rest of the dimensions of 'data' except + 'time_dim', 'sdate_dim', 'lon_dim' and 'lat_dim'. +} +\item{eof_pattern}{ + An 's2dv_cube' with the data element being an array of EOF patterns obtained + by regression with dimensions: 'eof' and the rest of the dimensions of + 'data' except 'time_dim' and 'sdate_dim'. +} +\item{mask}{ + An 's2dv_cube' with the data element being an array of the mask with + dimensions ('lon_dim', 'lat_dim', rest of the dimensions of 'data' except + 'time_dim'). It is made from 'data', 1 for the positions that 'data' has + value and NA for the positions that 'data' has NA. It is used to replace NAs + with 0s for EOF calculation and mask the result with NAs again after the + calculation. +} +\item{coordinates}{ + Longitudinal and latitudinal coordinates vectors. +} } \description{ This function performs EOF analysis over multiple variables, diff --git a/man/MultiEOF.Rd b/man/MultiEOF.Rd index d4fe8954..dbefda9d 100644 --- a/man/MultiEOF.Rd +++ b/man/MultiEOF.Rd @@ -20,7 +20,8 @@ MultiEOF( neof_composed = 5, minvar = 0.6, lon_lim = NULL, - lat_lim = NULL + lat_lim = NULL, + ncores = NULL ) } \arguments{ @@ -29,7 +30,9 @@ the variables to be analysed. The other diemnsions follow the same structure as the \code{"exp"} element of a 's2dv_cube' object. Latitudinal dimension accepted names: 'lat', 'lats', 'latitude', 'y', 'j', 'nav_lat'. Longitudinal dimension accepted names: 'lon', 'lons','longitude', 'x', 'i', -'nav_lon'.} +'nav_lon'. NAs can exist but it should be consistent along 'time_dim'. That +is, if one grid point has NAs for each variable, all the time steps at this +point should be NAs.} \item{lon}{Vector of longitudes.} @@ -37,7 +40,8 @@ Longitudinal dimension accepted names: 'lon', 'lons','longitude', 'x', 'i', \item{dates}{Vector or matrix of dates in POSIXct format.} -\item{time}{Deprecated.} +\item{time}{Deprecated parameter, it has been substituted by 'dates'. It will +be removed in the next release.} \item{lon_dim}{A character string indicating the name of the longitudinal dimension. By default, it is set to 'lon'.} @@ -66,12 +70,36 @@ all input variables.} \item{lat_lim}{Vector with latitudinal range limits for the calculation for all input variables.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} } \value{ -A list with elements \code{$coeff} (an array of time-varying principal -component coefficients), \code{$variance} (a matrix of explained variances), -\code{eof_pattern} (a matrix of EOF patterns obtained by regression for each -variable). +A list containing: +\item{coeff}{ + An array of principal components with dimensions 'time_dim', 'sdate_dim', + number of eof, rest of the dimensions of 'data' except 'lon_dim' and + 'lat_dim'. +} +\item{variance}{ + An array of explained variances with dimensions 'eof' and the rest of the + dimensions of 'data' except 'time_dim', 'sdate_dim', 'lon_dim' and + 'lat_dim'. +} +\item{eof_pattern}{ + An array of EOF patterns obtained by regression with dimensions: 'eof' and + the rest of the dimensions of 'data' except 'time_dim' and 'sdate_dim'. +} +\item{mask}{ + An array of the mask with dimensions ('lon_dim', 'lat_dim', rest of the + dimensions of 'data' except 'time_dim'). It is made from 'data', 1 for the + positions that 'data' has value and NA for the positions that 'data' has NA. + It is used to replace NAs with 0s for EOF calculation and mask the result + with NAs again after the calculation. +} +\item{coordinates}{ + Longitudinal and latitudinal coordinates vectors. +} } \description{ This function performs EOF analysis over multiple variables, diff --git a/tests/testthat/test-CST_MultiEOF.R b/tests/testthat/test-CST_MultiEOF.R index 70346206..8c29b7f1 100644 --- a/tests/testthat/test-CST_MultiEOF.R +++ b/tests/testthat/test-CST_MultiEOF.R @@ -32,6 +32,9 @@ mod3 <- cos( 0.5 + seq ) + sin( seq ^ 2 * 0.2 ) dim(mod3) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 8) exp3$data <- mod3 +dat3 <- mod3 +dim(dat3) <- c(var = 1, dim(mod3)) + # dat0 dat0 <- exp1 dat01 <- exp2 @@ -39,9 +42,21 @@ dat0$coords <- NULL dat01$coords <- NULL dat02 <- dat0 dat03 <- dat01 +dat04 <- exp1 +dat04$attrs$Dates <- NULL dat02$coords <- list(long = seq(1:4), lati = seq(1:4)) dat03$coords <- list(long = seq(1:4), lati = seq(1:4)) +# dat4 +exp4 <- array(runif(1280)*280, dim = c(dataset = 2, member = 2, sdates = 3, + time = 3, latitude = 4, longitude = 4, vars = 1)) +lon4 <- seq(0, 3) +lat4 <- seq(47, 44) +dates4 <- c("2000-11-01", "2000-12-01", "2001-01-01", "2001-11-01", + "2001-12-01", "2002-01-01", "2002-11-01", "2002-12-01", "2003-01-01") +Dates4 <- as.POSIXct(dates4, format = "%Y-%m-%d") +dim(Dates4) <- c(ftime = 3, sdate = 3) + ############################################## test_that("1. Input checks", { expect_error( @@ -49,17 +64,17 @@ test_that("1. Input checks", { paste0("Elements of the list in parameter 'datalist' must be of the class.", "'s2dv_cube'.") ) - # Check if all dims equal - expect_error( - CST_MultiEOF(list(exp1, exp03)), - "Input data fields must all have the same dimensions." - ) - # Know spatial coordinates names expect_error( CST_MultiEOF(list(dat0, dat01)), paste0("Parameter 'datalist' must have 'data', 'coords' and 'attrs' elements ", "within the 's2dv_cube' structure.") ) + # Dates + expect_error( + CST_MultiEOF(list(dat04)), + "Element 'Dates' is not found in 'attrs' list of the first array." + ) + # coordinates expect_error( CST_MultiEOF(list(dat02, dat03)), paste0("Spatial coordinate names do not match any of the names accepted by ", @@ -67,16 +82,69 @@ test_that("1. Input checks", { " 'y', 'j', 'nav_lat'. Longitudes accepted names: 'lon', 'lons',", " 'longitude', 'x', 'i', 'nav_lon'.") ) + # Check if all dims equal + expect_error( + CST_MultiEOF(list(exp1, exp03)), + "Input data fields must all have the same dimensions." + ) + expect_error( + CST_MultiEOF(list(exp1, exp2, exp3), lon_lim = c(-250, -245), lat_lim = c(10, 25)), + "No intersection between longitude bounds and data domain." + ) +}) + +############################################## + +test_that("2. Input checks MultiEOF", { + # time + expect_warning( + MultiEOF(data = dat3, lon = lon, lat = lat, time = d), + "The parameter 'time' is deprecated, use 'dates' instead." + ) + expect_error( + MultiEOF(data = 1, lon = lon, lat = lat, time = d), + "Parameter 'data' must have dimension names." + ) + # lon_dim + expect_error( + MultiEOF(data = dat3, lon = lon, lat = lat, dates = d, lon_dim = 'lons'), + "Parameter 'lon_dim' is not found in 'data' dimension." + ) + # lat_dim + expect_error( + MultiEOF(data = dat3, lon = lon, lat = lat, dates = d, lat_dim = 1), + "Parameter 'lat_dim' must be a character string." + ) + # lon expect_error( MultiEOF(data = array(rnorm(96), dim = c(var = 2, lonss = 8, latss = 6)), lon = seq(1:7), lat = seq(1:5), lon_dim = 'lonss', lat_dim = 'latss'), - paste0("Spatial coordinate names do not match any of the names accepted by ", - "the package.") + paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'data'.") ) + # lat expect_error( - CST_MultiEOF(list(exp1, exp2, exp3), lon_lim = c(-250, -245), lat_lim = c(10, 25)), - "No intersection between longitude bounds and data domain.") -}) + MultiEOF(data = array(rnorm(96), dim = c(var = 2, lonss = 8, latss = 6)), + lon = seq(1:8), lat = seq(1:5), lon_dim = 'lonss', lat_dim = 'latss'), + paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'data'.") + ) + # time_dim + expect_error( + MultiEOF(data = dat3, lon = lon, lat = lat, dates = d, time_dim = 'lons'), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + # sdate_dim + expect_error( + MultiEOF(data = dat3, lon = lon, lat = lat, dates = d, sdate_dim = 1), + "Parameter 'sdate_dim' must be a character string." + ) + # var_dim + expect_error( + MultiEOF(data = dat3, lon = lon, lat = lat, dates = d, var_dim = 'vars'), + "Parameter 'var_dim' is not found in 'data' dimension." + ) +}) ############################################## @@ -84,47 +152,47 @@ test_that("2. Output checks", { cal <- CST_MultiEOF(datalist = list(exp1, exp2, exp3), neof_composed=2) expect_equal( length(cal), - 5 + 6 ) dimexp = dim(exp1$data) expect_equal( - dim(cal$coeff), + dim(cal$coeff$data), c(dimexp["ftime"], dimexp["sdate"], eof=2, dimexp["dataset"], dimexp["member"]) ) expect_equal( - dim(cal$variance), + dim(cal$variance$data), c(eof = 2, dimexp["dataset"], dimexp["member"]) ) expect_equal( - dim(cal$eof_pattern), + dim(cal$eof_pattern$data), c(var = 3, dimexp["lon"], dimexp["lat"], eof = 2, dimexp["dataset"], dimexp["member"]) ) expect_equal( - cal$variance[1, 1, 1], + cal$variance$data[1, 1, 1], 0.2909419, tolerance = .00001 ) expect_equal( - cal$coeff[2, 1, 1, 1, 1], + cal$coeff$data[2, 1, 1, 1, 1], 0.5414261, tolerance = .00001 ) expect_equal( - cal$eof_pattern[1, 2, 2, 2, 1, 1], + cal$eof_pattern$data[1, 2, 2, 2, 1, 1], 0.3932484, tolerance = .00001 ) cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max = 5, neof_composed = 2, minvar = 0.2) expect_equal( - cal$coeff[2, 1, 1, 1, 1], + cal$coeff$data[2, 1, 1, 1, 1], -0.6117927, tolerance = .00001 ) cal <- CST_MultiEOF(list(exp1, exp2, exp3), lon_lim = c(5, 30), lat_lim = c(10, 25)) expect_equal( - cal$coeff[2, 1, 1, 1, 1], + cal$coeff$data[2, 1, 1, 1, 1], 0.8539488, tolerance = .00001 ) @@ -156,3 +224,31 @@ test_that("2. Output checks", { "dimension. If the grid point is NA, it should have NA at all time step.") ) }) + +############################################## + +test_that("3. Output checks II", { + # time_dim, sdate_dim, var_dim, lon_dim, lat_dim + cal <- MultiEOF(data = exp4, lon = lon4, lat = lat4, dates = Dates4, + time_dim = 'time', sdate_dim = 'sdates', var_dim = 'vars', + lon_dim = 'longitude', lat_dim = 'latitude') + expect_equal( + dim(cal[[3]]), + c(vars = 1, longitude = 4, latitude = 4, eof = 5, dataset = 2, member = 2) + ) + # NA + exp4_1 <- exp4 + exp4_1[1,2,1,1,1:2,1,1] <- NA # random NA + expect_error( + MultiEOF(data = exp4_1, lon = lon4, lat = lat4, dates = Dates4, + time_dim = 'time', sdate_dim = 'sdates', var_dim = 'vars', + lon_dim = 'longitude', lat_dim = 'latitude'), + paste0("Detected certain grid points have NAs but not consistent across time ", + "dimension. If the grid point is NA, it should have NA at all time step.") + ) + exp4_2 <- exp4 + exp4_2[,,,,1,1,] <- NA # spatial NA + cal <- MultiEOF(data = exp4_2, lon = lon4, lat = lat4, dates = Dates4, + time_dim = 'time', sdate_dim = 'sdates', var_dim = 'vars', + lon_dim = 'longitude', lat_dim = 'latitude') +}) \ No newline at end of file -- GitLab From aeb46cb1efa4b5765cd27eb31811380b64d6a399 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 14 Sep 2023 17:20:11 +0200 Subject: [PATCH 4/6] Correct calibration warning when using multiple cores --- R/CST_Calibration.R | 66 +++++++++++++++++---------- tests/testthat/test-CST_Calibration.R | 9 ++-- 2 files changed, 48 insertions(+), 27 deletions(-) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index e973c4d8..87c71fe2 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -450,9 +450,9 @@ Calibration <- function(exp, obs, exp_cor = NULL, if (!inherits(na.rm, "logical")) { stop("Parameter 'na.rm' must be a logical value.") } - if (length(na.rm) > 1) { - na.rm <- na.rm[1] - warning("Paramter 'na.rm' has length greater than 1, and only the fist element is used.") + ## na.fill + if (!inherits(na.fill, "logical")) { + stop("Parameter 'na.fill' must be a logical value.") } ## cal.method, apply_to, alpha if (!any(cal.method %in% c('bias', 'evmos', 'mse_min', 'crps_min', 'rpc-based'))) { @@ -491,8 +491,22 @@ Calibration <- function(exp, obs, exp_cor = NULL, warning(paste0("The 'multi.model' parameter is ignored when using the ", "calibration method '", cal.method, "'.")) } + ## data sufficiently large + data.set.sufficiently.large.out <- + Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = target_dims_exp, obs = target_dims_obs), + fun = .data.set.sufficiently.large, dat_dim = dat_dim, + ncores = ncores)$output1 - warning_shown <- FALSE + if (!all(data.set.sufficiently.large.out)) { + if (na.fill) { + warning("Some forecast data could not be corrected due to data lack", + " and is replaced with NA values.") + } else { + warning("Some forecast data could not be corrected due to data lack", + " and is replaced with uncorrected values.") + } + } if (is.null(exp_cor)) { calibrated <- Apply(data = list(exp = exp, obs = obs), dat_dim = dat_dim, @@ -525,10 +539,25 @@ Calibration <- function(exp, obs, exp_cor = NULL, } -.data.set.sufficiently.large <- function(exp, obs) { +.data.set.sufficiently.large <- function(exp, obs, dat_dim = NULL) { amt.min.samples <- 3 - amt.good.pts <- sum(!is.na(obs) & !apply(exp, c(2), function(x) all(is.na(x)))) - return(amt.good.pts > amt.min.samples) + if (is.null(dat_dim)) { + amt.good.pts <- sum(!is.na(obs) & !apply(exp, c(2), function(x) all(is.na(x)))) + return(amt.good.pts > amt.min.samples) + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + amt.good.pts <- NULL + for (i in 1:nexp) { + for (j in 1:nobs) { + agp <- sum(!is.na(obs[, j, drop = FALSE]) & + !apply(exp[, , i, drop = FALSE], c(2), + function(x) all(is.na(x)))) + amt.good.pts <- c(amt.good.pts, agp) + } + } + return(amt.good.pts > amt.min.samples) + } } .make.eval.train.dexes <- function(eval.method, amt.points, amt.points_cor) { @@ -587,26 +616,19 @@ Calibration <- function(exp, obs, exp_cor = NULL, for (i in 1:nexp) { for (j in 1:nobs) { - if (!.data.set.sufficiently.large(exp = exp[, , i, drop = FALSE], - obs = obs[, j, drop = FALSE])) { + exp_data <- exp[, , i] + dim(exp_data) <- dim(exp)[1:2] + obs_data <- as.vector(obs[, j]) + if (!.data.set.sufficiently.large(exp = exp_data, obs = obs_data)) { if (!na.fill) { exp_subset <- exp[, , i] var.cor.fc[, , i, j] <- exp_subset - if (!warning_shown) { - warning("Some forecast data could not be corrected due to data lack", - " and is replaced with uncorrected values.") - warning_shown <<- TRUE - } - } else if (!warning_shown) { - warning("Some forecast data could not be corrected due to data lack", - " and is replaced with NA values.") - warning_shown <<- TRUE + } else { + var.cor.fc <- as.numeric(var.cor.fc) + dim(var.cor.fc) <- c(dim(exp_cor)[1:2], nexp = nexp, nobs = nobs) } } else { # Subset data for dataset dimension - obs_data <- as.vector(obs[, j]) - exp_data <- exp[, , i] - dim(exp_data) <- dim(exp)[1:2] if (cor_dat_dim) { expcor_data <- exp_cor[, , i] dim(expcor_data) <- dim(exp_cor)[1:2] @@ -680,8 +702,6 @@ Calibration <- function(exp, obs, exp_cor = NULL, # no significant -> replacing with observed climatology var.cor.fc[, eval.dexes, i, j] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.ev)) } - } else { - stop("unknown calibration method: ", cal.method) } } } diff --git a/tests/testthat/test-CST_Calibration.R b/tests/testthat/test-CST_Calibration.R index 64a09e5a..da50d810 100644 --- a/tests/testthat/test-CST_Calibration.R +++ b/tests/testthat/test-CST_Calibration.R @@ -190,9 +190,10 @@ test_that("1. Input checks", { CST_Calibration(exp = exp, obs = obs, na.rm = 1), "Parameter 'na.rm' must be a logical value." ) - expect_warning( - CST_Calibration(exp = exp, obs = obs, na.rm = c(T,F)), - "Paramter 'na.rm' has length greater than 1, and only the fist element is used." + # na.fill + expect_error( + CST_Calibration(exp = exp, obs = obs, na.fill = 1), + "Parameter 'na.fill' must be a logical value." ) # cal.method expect_error( @@ -508,7 +509,7 @@ test_that("7. Output checks: dat4", { expect_equal( Calibration(exp = array(1:6, dim = c(member = 2, sdate = 3)), obs = array(1:3, dim = c(sdate = 3))), - array(dim = c(member = 2, sdate = 3)) + array(as.numeric(NA), dim = c(member = 2, sdate = 3)) ) ) suppressWarnings( -- GitLab From 7b05c0f546d07aa7f2ef460eb30070b09a59c17b Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 14 Sep 2023 17:31:05 +0200 Subject: [PATCH 5/6] Correct adding numeric type to logical when all result is NA --- R/CST_Calibration.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index 87c71fe2..d4b9170b 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -523,6 +523,7 @@ Calibration <- function(exp, obs, exp_cor = NULL, exp_cor = target_dims_cor), ncores = ncores, fun = .cal)$output1 } + if (!is.null(dat_dim)) { pos <- match(c(names(dim(exp))[-which(names(dim(exp)) == dat_dim)], 'nexp', 'nobs'), names(dim(calibrated))) @@ -535,6 +536,12 @@ Calibration <- function(exp, obs, exp_cor = NULL, if (exp_cor_remove_memb) { dim(calibrated) <- dim(calibrated)[-which(names(dim(calibrated)) == memb_dim)] } + + dims <- dim(calibrated) + if (is.logical(calibrated)) { + calibrated <- array(as.numeric(calibrated), dim = dims) + } + return(calibrated) } @@ -623,9 +630,6 @@ Calibration <- function(exp, obs, exp_cor = NULL, if (!na.fill) { exp_subset <- exp[, , i] var.cor.fc[, , i, j] <- exp_subset - } else { - var.cor.fc <- as.numeric(var.cor.fc) - dim(var.cor.fc) <- c(dim(exp_cor)[1:2], nexp = nexp, nobs = nobs) } } else { # Subset data for dataset dimension @@ -711,7 +715,6 @@ Calibration <- function(exp, obs, exp_cor = NULL, if (is.null(dat_dim)) { dim(var.cor.fc) <- dim(exp_cor)[1:2] } - return(var.cor.fc) } -- GitLab From 78b05236478d3458e95e4b8305ff09c46397c881 Mon Sep 17 00:00:00 2001 From: Eva Rifa Date: Thu, 14 Sep 2023 17:37:25 +0200 Subject: [PATCH 6/6] Redo changes from a merge request --- R/CST_MultiEOF.R | 377 +++++++---------------------- man/CST_MultiEOF.Rd | 55 +---- man/MultiEOF.Rd | 65 +---- tests/testthat/test-CST_MultiEOF.R | 143 ++--------- 4 files changed, 123 insertions(+), 517 deletions(-) diff --git a/R/CST_MultiEOF.R b/R/CST_MultiEOF.R index 62f6b991..bd218423 100644 --- a/R/CST_MultiEOF.R +++ b/R/CST_MultiEOF.R @@ -17,55 +17,20 @@ #' "lon" and "lat", a dimension "ftime" and a dimension "sdate". Latitudinal #' dimension accepted names: 'lat', 'lats', 'latitude', 'y', 'j', 'nav_lat'. #' Longitudinal dimension accepted names: 'lon', 'lons','longitude', 'x', 'i', -#' 'nav_lon'. NAs can exist but it should be consistent along 'time_dim'. That -#' is, if one grid point has NAs for each variable, all the time steps at this -#' point should be NAs. -#'@param lon_dim A character string indicating the name of the longitudinal -#' dimension. By default, it is set to 'lon'. -#'@param lat_dim A character string indicating the name of the latitudinal -#' dimension. By default, it is set to 'lat'. -#'@param time_dim A character string indicating the name of the temporal -#' dimension. By default, it is set to 'time'. -#'@param sdate_dim A character string indicating the name of the start date -#' dimension. By default, it is set to 'sdate'. -#'@param var_dim A character string indicating the name of the variable -#' dimension. By default, it is set to 'var'. -#'@param neof_max Maximum number of single eofs considered in the first -#' decomposition. +#' 'nav_lon'. #'@param neof_composed Number of composed eofs to return in output. #'@param minvar Minimum variance fraction to be explained in first decomposition. +#'@param neof_max Maximum number of single eofs considered in the first +#' decomposition. #'@param lon_lim Vector with longitudinal range limits for the EOF calculation #' for all input variables. #'@param lat_lim Vector with latitudinal range limits for the EOF calculation #' for all input variables. -#'@return -#'A list containing: -#'\item{coeff}{ -#' An 's2dv_cube' with the data element being an array of principal components -#' with dimensions 'time_dim', 'sdate_dim', number of eof, rest of the -#' dimensions of 'data' except 'lon_dim' and 'lat_dim'. -#'} -#'\item{variance}{ -#' An 's2dv_cube' with the data element being an array of explained variances -#' with dimensions 'eof' and the rest of the dimensions of 'data' except -#' 'time_dim', 'sdate_dim', 'lon_dim' and 'lat_dim'. -#'} -#'\item{eof_pattern}{ -#' An 's2dv_cube' with the data element being an array of EOF patterns obtained -#' by regression with dimensions: 'eof' and the rest of the dimensions of -#' 'data' except 'time_dim' and 'sdate_dim'. -#'} -#'\item{mask}{ -#' An 's2dv_cube' with the data element being an array of the mask with -#' dimensions ('lon_dim', 'lat_dim', rest of the dimensions of 'data' except -#' 'time_dim'). It is made from 'data', 1 for the positions that 'data' has -#' value and NA for the positions that 'data' has NA. It is used to replace NAs -#' with 0s for EOF calculation and mask the result with NAs again after the -#' calculation. -#'} -#'\item{coordinates}{ -#' Longitudinal and latitudinal coordinates vectors. -#'} +#'@return A list with elements \code{$coeff} (an array of time-varying principal +#'component coefficients), \code{$variance} (a matrix of explained variances), +#'\code{eof_pattern} (a matrix of EOF patterns obtained by regression for each +#'variable). +#'@import abind #'@examples #'seq <- 1 : (2 * 3 * 4 * 5 * 6 * 8) #'mod1 <- sin( 0.7 + seq )^2 + cos( seq ^ 2 * 1.22 ) @@ -89,27 +54,34 @@ #'exp2$attrs$Dates = d #' #'cal <- CST_MultiEOF(datalist = list(exp1, exp2), neof_composed = 2) -#'@import abind #'@export -CST_MultiEOF <- function(datalist, lon_dim = "lon", lat_dim = "lat", - time_dim = 'ftime', sdate_dim = 'sdate', - var_dim = 'var', neof_max = 40, neof_composed = 5, +CST_MultiEOF <- function(datalist, neof_max = 40, neof_composed = 5, minvar = 0.6, lon_lim = NULL, lat_lim = NULL) { - # Check 's2dv_cube' + # Check s2dv_cube if (!(all(sapply(datalist, inherits, 's2dv_cube')))) { - stop("Elements of the list in parameter 'datalist' must be of the ", - "class 's2dv_cube'.") + stop("Elements of the list in parameter 'datalist' must be of the class ", + "'s2dv_cube', as output by CSTools::CST_Load.") + } + + # Check if all dims equal + adims = lapply(lapply(datalist, function(x) x$data), dim) + if(!all(apply(apply(abind(adims, along = 0), 2, duplicated), 2, sum) == + (length(adims)-1))) { + stop("Input data fields must all have the same dimensions.") + } + + exp <- abind(lapply(datalist, '[[', 'data'), along = 0) + dim(exp) <- c(var = length(datalist), dim(datalist[[1]]$data)) + + if (any(is.na(exp))) { + stop("Input data contain NA values.") } + + # Check coordinates if (!all(c('data', 'coords', 'attrs') %in% names(datalist[[1]]))) { stop("Parameter 'datalist' must have 'data', 'coords' and 'attrs' elements ", "within the 's2dv_cube' structure.") } - # Dates - dates <- datalist[[1]]$attrs$Dates - if (is.null(dates)) { - stop("Element 'Dates' is not found in 'attrs' list of the first array.") - } - # coordinates if (!any(names(datalist[[1]]$coords) %in% .KnownLonNames()) | !any(names(datalist[[1]]$coords) %in% .KnownLatNames())) { stop("Spatial coordinate names do not match any of the names accepted by the ", @@ -117,47 +89,28 @@ CST_MultiEOF <- function(datalist, lon_dim = "lon", lat_dim = "lat", "'nav_lat'. Longitudes accepted names: 'lon', 'lons', 'longitude', 'x',", " 'i', 'nav_lon'.") } - - # Check if all dims equal - adims = lapply(lapply(datalist, function(x) x$data), dim) - if (!all(apply(apply(abind(adims, along = 0), 2, duplicated), 2, sum) == - (length(adims)-1))) { - stop("Input data fields must all have the same dimensions.") + # Check dimensions + if (!any(names(dim(datalist[[1]]$data)) %in% .KnownLonNames()) | + !any(names(dim(datalist[[1]]$data)) %in% .KnownLatNames())) { + stop("Spatial dimension names do not match any of the names accepted by ", + "the package.") } - exp <- abind(lapply(datalist, '[[', 'data'), along = 0) - dim(exp) <- c(length(datalist), dim(datalist[[1]]$data)) - names(dim(exp)) <- c(var_dim, names(dim(datalist[[1]]$data))) - - lon_name <- names(datalist[[1]]$coords)[[which(names(datalist[[1]]$coords) %in% .KnownLonNames())]] - lat_name <- names(datalist[[1]]$coords)[[which(names(datalist[[1]]$coords) %in% .KnownLatNames())]] - lon <- as.vector(datalist[[1]]$coords[[lon_name]]) - lat <- as.vector(datalist[[1]]$coords[[lat_name]]) - - result <- MultiEOF(exp, lon = lon, lat = lat, - lon_dim = lon_dim, lat_dim = lat_dim, time_dim = time_dim, - sdate_dim = sdate_dim, var_dim = var_dim, - dates = dates, minvar = minvar, + lon <- names(datalist[[1]]$coords)[[which(names(datalist[[1]]$coords) %in% .KnownLonNames())]] + lat <- names(datalist[[1]]$coords)[[which(names(datalist[[1]]$coords) %in% .KnownLatNames())]] + + lon_name <- names(dim(datalist[[1]]$data))[[which(names(dim(datalist[[1]]$data)) %in% .KnownLonNames())]] + lat_name <- names(dim(datalist[[1]]$data))[[which(names(dim(datalist[[1]]$data)) %in% .KnownLatNames())]] + + result <- MultiEOF(exp, + lon = as.vector(datalist[[1]]$coords[[lon]]), + lat = as.vector(datalist[[1]]$coords[[lat]]), + lon_dim = lon_name, lat_dim = lat_name, + time = datalist[[1]]$attrs$Dates, minvar = minvar, neof_max = neof_max, neof_composed = neof_composed, lon_lim = lon_lim, lat_lim = lat_lim) - names_res <- names(result[1:4]) - res <- lapply(seq_along(result)[1:4], function(i) { - coords = list(lon, lat) - names(coords) <- c(lon_dim, lat_dim) - dates <- dates - varName <- names(result)[[i]] - metadata <- lapply(datalist, function(x) x$attrs$Variable$metadata) - metadata <- unlist(metadata, recursive=FALSE) - metadata <- metadata[unique(names(metadata))] - suppressWarnings( - cube <- s2dv_cube(data = result[[i]], coords = coords, varName = varName, Dates = dates, - source_files = unlist(sapply(datalist, function(x) x$attrs$source_files)), - metadata = metadata, when = Sys.time()) - ) - return(cube) - }) - names(res) <- names_res - return(c(res, result[5:6])) + + return(result) } #'@rdname MultiEOF #'@title EOF analysis of multiple variables starting from an array (reduced @@ -179,61 +132,24 @@ CST_MultiEOF <- function(datalist, lon_dim = "lon", lat_dim = "lat", #' as the \code{"exp"} element of a 's2dv_cube' object. Latitudinal #' dimension accepted names: 'lat', 'lats', 'latitude', 'y', 'j', 'nav_lat'. #' Longitudinal dimension accepted names: 'lon', 'lons','longitude', 'x', 'i', -#' 'nav_lon'. NAs can exist but it should be consistent along 'time_dim'. That -#' is, if one grid point has NAs for each variable, all the time steps at this -#' point should be NAs. +#' 'nav_lon'. #'@param lon Vector of longitudes. #'@param lat Vector of latitudes. -#'@param dates Vector or matrix of dates in POSIXct format. -#'@param time Deprecated parameter, it has been substituted by 'dates'. It will -#' be removed in the next release. -#'@param lon_dim A character string indicating the name of the longitudinal -#' dimension. By default, it is set to 'lon'. -#'@param lat_dim A character string indicating the name of the latitudinal -#' dimension. By default, it is set to 'lat'. -#'@param time_dim A character string indicating the name of the temporal -#' dimension. By default, it is set to 'time'. -#'@param sdate_dim A character string indicating the name of the start date -#' dimension. By default, it is set to 'sdate'. -#'@param var_dim A character string indicating the name of the variable -#' dimension. By default, it is set to 'var'. -#'@param neof_max Maximum number of single eofs considered in the first -#' decomposition. +#'@param time Vector or matrix of dates in POSIXct format. +#'@param lon_dim String with dimension name of longitudinal coordinate. +#'@param lat_dim String with dimension name of latitudinal coordinate. #'@param neof_composed Number of composed eofs to return in output. #'@param minvar Minimum variance fraction to be explained in first decomposition. +#'@param neof_max Maximum number of single eofs considered in the first +#' decomposition. #'@param lon_lim Vector with longitudinal range limits for the calculation for #' all input variables. #'@param lat_lim Vector with latitudinal range limits for the calculation for #' all input variables. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#'@return -#'A list containing: -#'\item{coeff}{ -#' An array of principal components with dimensions 'time_dim', 'sdate_dim', -#' number of eof, rest of the dimensions of 'data' except 'lon_dim' and -#' 'lat_dim'. -#'} -#'\item{variance}{ -#' An array of explained variances with dimensions 'eof' and the rest of the -#' dimensions of 'data' except 'time_dim', 'sdate_dim', 'lon_dim' and -#' 'lat_dim'. -#'} -#'\item{eof_pattern}{ -#' An array of EOF patterns obtained by regression with dimensions: 'eof' and -#' the rest of the dimensions of 'data' except 'time_dim' and 'sdate_dim'. -#'} -#'\item{mask}{ -#' An array of the mask with dimensions ('lon_dim', 'lat_dim', rest of the -#' dimensions of 'data' except 'time_dim'). It is made from 'data', 1 for the -#' positions that 'data' has value and NA for the positions that 'data' has NA. -#' It is used to replace NAs with 0s for EOF calculation and mask the result -#' with NAs again after the calculation. -#'} -#'\item{coordinates}{ -#' Longitudinal and latitudinal coordinates vectors. -#'} -#' +#'@return A list with elements \code{$coeff} (an array of time-varying principal +#'component coefficients), \code{$variance} (a matrix of explained variances), +#'\code{eof_pattern} (a matrix of EOF patterns obtained by regression for each +#'variable). #'@examples #'exp <- array(runif(1280)*280, dim = c(dataset = 2, member = 2, sdate = 3, #' ftime = 3, lat = 4, lon = 4, var = 1)) @@ -246,130 +162,38 @@ CST_MultiEOF <- function(datalist, lon_dim = "lon", lat_dim = "lat", #'cal <- MultiEOF(data = exp, lon = lon, lat = lat, time = Dates) #'@import multiApply #'@export -MultiEOF <- function(data, lon, lat, dates, time = NULL, +MultiEOF <- function(data, lon, lat, time, lon_dim = "lon", lat_dim = "lat", - time_dim = 'ftime', sdate_dim = 'sdate', var_dim = 'var', neof_max = 40, neof_composed = 5, minvar = 0.6, - lon_lim = NULL, lat_lim = NULL, ncores = NULL) { - # Check inputs - # data - if (is.null(data)) { - stop("Parameter 'data' cannot be NULL.") - } - if (!is.numeric(data)) { - stop("Parameter 'data' must be a numeric array.") - } - if (any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { - stop("Parameter 'data' must have dimension names.") - } - # dates - if (!is.null(time)) { - warning("The parameter 'time' is deprecated, use 'dates' instead.") - dates <- time - } - # lon_dim - if (!is.character(lon_dim) | length(lon_dim) != 1) { - stop("Parameter 'lon_dim' must be a character string.") - } - if (!lon_dim %in% names(dim(data))) { - stop("Parameter 'lon_dim' is not found in 'data' dimension.") - } - # lat_dim - if (!is.character(lat_dim) | length(lat_dim) != 1) { - stop("Parameter 'lat_dim' must be a character string.") - } - if (!lat_dim %in% names(dim(data))) { - stop("Parameter 'lat_dim' is not found in 'data' dimension.") - } - # lon - if (!is.numeric(lon) | length(lon) != dim(data)[lon_dim]) { - stop(paste0("Parameter 'lon' must be a numeric vector with the same ", - "length as the longitude dimension of 'data'.")) - } - if (any(lon > 360 | lon < -360)) { - warning("Some 'lon' is out of the range [-360, 360].") - } - # lat - if (!is.numeric(lat) | length(lat) != dim(data)[lat_dim]) { - stop(paste0("Parameter 'lat' must be a numeric vector with the same ", - "length as the latitude dimension of 'data'.")) - } - if (any(lat > 90 | lat < -90)) { - stop("Parameter 'lat' must contain values within the range [-90, 90].") - } - # time_dim - if (!is.character(time_dim) | length(time_dim) != 1) { - stop("Parameter 'time_dim' must be a character string.") - } - if (!time_dim %in% names(dim(data))) { - stop("Parameter 'time_dim' is not found in 'data' dimension.") - } - # sdate_dim - if (!is.character(sdate_dim) | length(sdate_dim) != 1) { - stop("Parameter 'sdate_dim' must be a character string.") - } - if (!sdate_dim %in% names(dim(data))) { - stop("Parameter 'sdate_dim' is not found in 'data' dimension.") - } - # var_dim - if (!is.character(var_dim) | length(var_dim) != 1) { - stop("Parameter 'var_dim' must be a character string.") - } - if (!var_dim %in% names(dim(data))) { - stop("Parameter 'var_dim' is not found in 'data' dimension.") - } - # neof_max - if (!is.numeric(neof_max)) { - stop("Parameter 'neof_max' must be a positive integer.") - } - # neof_composed - if (!is.numeric(neof_composed)) { - stop("Parameter 'neof_composed' must be a positive integer.") - } - # minvar - if (!is.numeric(minvar)) { - stop("Parameter 'minvar' must be a positive number between 0 and 1.") - } - # lon_lim - if (!is.null(lon_lim)) { - if (!is.numeric(lon_lim)) { - stop("Parameter 'lon_lim' must be numeric.") - } - } - # lat_lim - if (!is.null(lat_lim)) { - if (!is.numeric(lat_lim)) { - stop("Parameter 'lat_lim' must be numeric.") - } - } - # ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } + lon_lim = NULL, lat_lim = NULL) { + + # Know spatial coordinates names + if (!any(lon_dim %in% .KnownLonNames()) | + !any(lat_dim %in% .KnownLatNames())) { + stop("Spatial coordinate names do not match any of the names accepted by ", + "the package.") } # Reorder and group ftime and sdate together at the end in that order - imaskt <- names(dim(data)) %in% time_dim - imasks <- names(dim(data)) %in% sdate_dim + cdim0 <- dim(data) + imaskt <- names(cdim0) %in% "ftime" + imasks <- names(cdim0) %in% "sdate" data <- .aperm2(data, c(which(!(imasks | imaskt)), which(imaskt), which(imasks))) - dims <- dim(data) + cdim <- dim(data) ind <- 1:length(which(!(imaskt | imasks))) # compact (multiply) time_dim dimensions - dim(data) <- c(dims[ind], samples = prod(dims[-ind])) + dim(data) <- c(cdim[ind], samples = prod(cdim[-ind])) # Repeatedly apply .multi.eofs - result <- Apply(data = data, - target_dims = c(var_dim, lon_dim, lat_dim, "samples"), - fun = .multi.eofs, lon = lon, lat = lat, dates = dates, - neof_max = neof_max, neof_composed = neof_composed, - minvar = minvar, xlim = lon_lim, ylim = lat_lim, - lon_dim = lon_dim, lat_dim = lat_dim, ncores = ncores) + result <- Apply(data, c("var", lon_dim, lat_dim, "samples"), + .multi.eofs, lon, lat, time, neof_max = neof_max, + neof_composed = neof_composed, minvar = minvar, + xlim = lon_lim, ylim = lat_lim, + lon_dim = lon_dim, lat_dim = lat_dim) # Expand back samples to compacted dims - dim(result$coeff) <- c(dims[-ind], dim(result$coeff)[-1]) + dim(result$coeff) <- c(cdim[-ind], dim(result$coeff)[-1]) # Recover first lon and first lat list dd = dim(result[[lon_dim]])[1]; m = matrix(1, nrow = dd, ncol = length(dim(result[[lon_dim]]))); m[1:dd] = 1:dd; result[[lon_dim]] = result[[lon_dim]][m] @@ -398,7 +222,7 @@ MultiEOF <- function(data, lon, lat, dates, time = NULL, #'variable). #'@noRd -.multi.eofs <- function(field_arr_raw, lon, lat, dates, neof_max = 40, +.multi.eofs <- function(field_arr_raw, lon, lat, time, neof_max = 40, neof_composed = 5, minvar = 0.6, xlim = NULL, ylim = NULL, lon_dim = "lon", lat_dim = "lat") { @@ -407,14 +231,9 @@ MultiEOF <- function(data, lon, lat, dates, time = NULL, } else { lin.fit <- lm.fit } - - # Dimensions - n_field <- dim(field_arr_raw)[1] - n_lon <- dim(field_arr_raw)[2] - n_lat <- dim(field_arr_raw)[3] - nt <- dim(field_arr_raw)[4] - etime <- .power.date(dates) + n_field <- dim(field_arr_raw)[1] + etime <- .power.date(time) field_arr <- array(dim = dim(field_arr_raw)) for (k in seq(1, n_field, 1)) { @@ -424,33 +243,8 @@ MultiEOF <- function(data, lon, lat, dates, time = NULL, # area weighting, based on the root of cosine ww <- .area.weight(lon, lat, root = T) - # create a mask - mask_arr <- array(dim = c(n_lon, n_lat, n_field)) - for (k in seq(1, n_field, 1)) { field_orig <- field_arr[k, , , ] - - # Check if all the time steps at one grid point are NA-consistent - # The grid point should have all NAs or no NA along time dim. - if (anyNA(field_orig)) { - field_latlon <- array(field_orig, dim = c(n_lon*n_lat, nt)) # [lon*lat, time] - na_ind <- which(is.na(field_latlon), arr.ind = T) - if (dim(na_ind)[1] != nt * length(unique(na_ind[,1]))) { - stop("Detected certain grid points have NAs but not consistent across time ", - "dimension. If the grid point is NA, it should have NA at all time step.") - } - } - # Build the mask - mask <- field_orig[, , 1] - mask[!is.finite(mask)] <- NA - mask[is.finite(mask)] <- 1 - dim(mask) <- c(n_lon, n_lat) - mask_arr[,,k] <- mask - - # Replace mask of NAs with 0s for EOF analysis. - field_arr[k, , , ][!is.finite(field_orig)] <- 0 - field_orig[!is.finite(field_orig)] <- 0 - # calculate the area weight field <- sweep(field_orig, c(1, 2), ww, "*") idx <- .selbox(lon, lat, xlim, ylim) @@ -480,7 +274,7 @@ MultiEOF <- function(data, lon, lat, dates, time = NULL, for (i in 1:reqPC) { regression[, , i] <- apply(field_orig, c(1, 2), function(x) lin.fit(as.matrix(coefficient[, i], - ncol = 1), x)$coefficients)*mask + ncol = 1), x)$coefficients) } assign(paste0("pc", k), list(coeff = coefficient, variance = variance, wcoeff = sweep(coefficient, c(2), variance, "*"), @@ -506,20 +300,19 @@ MultiEOF <- function(data, lon, lat, dates, time = NULL, for (i in 1:neof_composed) { regression[k, , , i] <- apply(field_arr[k, , , ], c(1, 2), function(x) lin.fit(as.matrix(coefficient[, i], - ncol = 1), x)$coefficients)*mask_arr[,,k] + ncol = 1), x)$coefficients) } } - names(dim(coefficient)) <- c("dates", "eof") + names(dim(coefficient)) <- c("time", "eof") variance <- array(variance) names(dim(variance)) <- "eof" - names(dim(regression)) <- c(names(dim(field_arr_raw))[1:3], "eof") + names(dim(regression)) <- c("var", "lon", "lat", "eof") - out <- list(coeff = coefficient, variance = variance, eof_pattern = regression, - mask = mask_arr) + out <- list(coeff = coefficient, variance = variance, eof_pattern = regression) - out[[names(n_lon)]] <- slon - out[[names(n_lat)]] <- slat + out[[lon_dim]] <- slon + out[[lat_dim]] <- slat return(out) } diff --git a/man/CST_MultiEOF.Rd b/man/CST_MultiEOF.Rd index 265eabd4..11f8877f 100644 --- a/man/CST_MultiEOF.Rd +++ b/man/CST_MultiEOF.Rd @@ -6,11 +6,6 @@ \usage{ CST_MultiEOF( datalist, - lon_dim = "lon", - lat_dim = "lat", - time_dim = "ftime", - sdate_dim = "sdate", - var_dim = "var", neof_max = 40, neof_composed = 5, minvar = 0.6, @@ -25,24 +20,7 @@ an element named \code{$data} with at least two spatial dimensions named "lon" and "lat", a dimension "ftime" and a dimension "sdate". Latitudinal dimension accepted names: 'lat', 'lats', 'latitude', 'y', 'j', 'nav_lat'. Longitudinal dimension accepted names: 'lon', 'lons','longitude', 'x', 'i', -'nav_lon'. NAs can exist but it should be consistent along 'time_dim'. That -is, if one grid point has NAs for each variable, all the time steps at this -point should be NAs.} - -\item{lon_dim}{A character string indicating the name of the longitudinal -dimension. By default, it is set to 'lon'.} - -\item{lat_dim}{A character string indicating the name of the latitudinal -dimension. By default, it is set to 'lat'.} - -\item{time_dim}{A character string indicating the name of the temporal -dimension. By default, it is set to 'time'.} - -\item{sdate_dim}{A character string indicating the name of the start date -dimension. By default, it is set to 'sdate'.} - -\item{var_dim}{A character string indicating the name of the variable -dimension. By default, it is set to 'var'.} +'nav_lon'.} \item{neof_max}{Maximum number of single eofs considered in the first decomposition.} @@ -58,33 +36,10 @@ for all input variables.} for all input variables.} } \value{ -A list containing: -\item{coeff}{ - An 's2dv_cube' with the data element being an array of principal components - with dimensions 'time_dim', 'sdate_dim', number of eof, rest of the - dimensions of 'data' except 'lon_dim' and 'lat_dim'. -} -\item{variance}{ - An 's2dv_cube' with the data element being an array of explained variances - with dimensions 'eof' and the rest of the dimensions of 'data' except - 'time_dim', 'sdate_dim', 'lon_dim' and 'lat_dim'. -} -\item{eof_pattern}{ - An 's2dv_cube' with the data element being an array of EOF patterns obtained - by regression with dimensions: 'eof' and the rest of the dimensions of - 'data' except 'time_dim' and 'sdate_dim'. -} -\item{mask}{ - An 's2dv_cube' with the data element being an array of the mask with - dimensions ('lon_dim', 'lat_dim', rest of the dimensions of 'data' except - 'time_dim'). It is made from 'data', 1 for the positions that 'data' has - value and NA for the positions that 'data' has NA. It is used to replace NAs - with 0s for EOF calculation and mask the result with NAs again after the - calculation. -} -\item{coordinates}{ - Longitudinal and latitudinal coordinates vectors. -} +A list with elements \code{$coeff} (an array of time-varying principal +component coefficients), \code{$variance} (a matrix of explained variances), +\code{eof_pattern} (a matrix of EOF patterns obtained by regression for each +variable). } \description{ This function performs EOF analysis over multiple variables, diff --git a/man/MultiEOF.Rd b/man/MultiEOF.Rd index dbefda9d..04963e1a 100644 --- a/man/MultiEOF.Rd +++ b/man/MultiEOF.Rd @@ -9,19 +9,14 @@ MultiEOF( data, lon, lat, - dates, - time = NULL, + time, lon_dim = "lon", lat_dim = "lat", - time_dim = "ftime", - sdate_dim = "sdate", - var_dim = "var", neof_max = 40, neof_composed = 5, minvar = 0.6, lon_lim = NULL, - lat_lim = NULL, - ncores = NULL + lat_lim = NULL ) } \arguments{ @@ -30,33 +25,17 @@ the variables to be analysed. The other diemnsions follow the same structure as the \code{"exp"} element of a 's2dv_cube' object. Latitudinal dimension accepted names: 'lat', 'lats', 'latitude', 'y', 'j', 'nav_lat'. Longitudinal dimension accepted names: 'lon', 'lons','longitude', 'x', 'i', -'nav_lon'. NAs can exist but it should be consistent along 'time_dim'. That -is, if one grid point has NAs for each variable, all the time steps at this -point should be NAs.} +'nav_lon'.} \item{lon}{Vector of longitudes.} \item{lat}{Vector of latitudes.} -\item{dates}{Vector or matrix of dates in POSIXct format.} +\item{time}{Vector or matrix of dates in POSIXct format.} -\item{time}{Deprecated parameter, it has been substituted by 'dates'. It will -be removed in the next release.} +\item{lon_dim}{String with dimension name of longitudinal coordinate.} -\item{lon_dim}{A character string indicating the name of the longitudinal -dimension. By default, it is set to 'lon'.} - -\item{lat_dim}{A character string indicating the name of the latitudinal -dimension. By default, it is set to 'lat'.} - -\item{time_dim}{A character string indicating the name of the temporal -dimension. By default, it is set to 'time'.} - -\item{sdate_dim}{A character string indicating the name of the start date -dimension. By default, it is set to 'sdate'.} - -\item{var_dim}{A character string indicating the name of the variable -dimension. By default, it is set to 'var'.} +\item{lat_dim}{String with dimension name of latitudinal coordinate.} \item{neof_max}{Maximum number of single eofs considered in the first decomposition.} @@ -70,36 +49,12 @@ all input variables.} \item{lat_lim}{Vector with latitudinal range limits for the calculation for all input variables.} - -\item{ncores}{An integer indicating the number of cores to use for parallel -computation. The default value is NULL.} } \value{ -A list containing: -\item{coeff}{ - An array of principal components with dimensions 'time_dim', 'sdate_dim', - number of eof, rest of the dimensions of 'data' except 'lon_dim' and - 'lat_dim'. -} -\item{variance}{ - An array of explained variances with dimensions 'eof' and the rest of the - dimensions of 'data' except 'time_dim', 'sdate_dim', 'lon_dim' and - 'lat_dim'. -} -\item{eof_pattern}{ - An array of EOF patterns obtained by regression with dimensions: 'eof' and - the rest of the dimensions of 'data' except 'time_dim' and 'sdate_dim'. -} -\item{mask}{ - An array of the mask with dimensions ('lon_dim', 'lat_dim', rest of the - dimensions of 'data' except 'time_dim'). It is made from 'data', 1 for the - positions that 'data' has value and NA for the positions that 'data' has NA. - It is used to replace NAs with 0s for EOF calculation and mask the result - with NAs again after the calculation. -} -\item{coordinates}{ - Longitudinal and latitudinal coordinates vectors. -} +A list with elements \code{$coeff} (an array of time-varying principal +component coefficients), \code{$variance} (a matrix of explained variances), +\code{eof_pattern} (a matrix of EOF patterns obtained by regression for each +variable). } \description{ This function performs EOF analysis over multiple variables, diff --git a/tests/testthat/test-CST_MultiEOF.R b/tests/testthat/test-CST_MultiEOF.R index 8c29b7f1..66a63525 100644 --- a/tests/testthat/test-CST_MultiEOF.R +++ b/tests/testthat/test-CST_MultiEOF.R @@ -32,9 +32,6 @@ mod3 <- cos( 0.5 + seq ) + sin( seq ^ 2 * 0.2 ) dim(mod3) <- c(dataset = 2, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 8) exp3$data <- mod3 -dat3 <- mod3 -dim(dat3) <- c(var = 1, dim(mod3)) - # dat0 dat0 <- exp1 dat01 <- exp2 @@ -42,39 +39,27 @@ dat0$coords <- NULL dat01$coords <- NULL dat02 <- dat0 dat03 <- dat01 -dat04 <- exp1 -dat04$attrs$Dates <- NULL dat02$coords <- list(long = seq(1:4), lati = seq(1:4)) dat03$coords <- list(long = seq(1:4), lati = seq(1:4)) -# dat4 -exp4 <- array(runif(1280)*280, dim = c(dataset = 2, member = 2, sdates = 3, - time = 3, latitude = 4, longitude = 4, vars = 1)) -lon4 <- seq(0, 3) -lat4 <- seq(47, 44) -dates4 <- c("2000-11-01", "2000-12-01", "2001-01-01", "2001-11-01", - "2001-12-01", "2002-01-01", "2002-11-01", "2002-12-01", "2003-01-01") -Dates4 <- as.POSIXct(dates4, format = "%Y-%m-%d") -dim(Dates4) <- c(ftime = 3, sdate = 3) - ############################################## test_that("1. Input checks", { expect_error( CST_MultiEOF(datalist = 1), - paste0("Elements of the list in parameter 'datalist' must be of the class.", - "'s2dv_cube'.") + paste0("Elements of the list in parameter 'datalist' must be of the class ", + "'s2dv_cube', as output by CSTools::CST_Load.") ) + # Check if all dims equal + expect_error( + CST_MultiEOF(list(exp1, exp03)), + "Input data fields must all have the same dimensions." + ) + # Know spatial coordinates names expect_error( CST_MultiEOF(list(dat0, dat01)), paste0("Parameter 'datalist' must have 'data', 'coords' and 'attrs' elements ", "within the 's2dv_cube' structure.") ) - # Dates - expect_error( - CST_MultiEOF(list(dat04)), - "Element 'Dates' is not found in 'attrs' list of the first array." - ) - # coordinates expect_error( CST_MultiEOF(list(dat02, dat03)), paste0("Spatial coordinate names do not match any of the names accepted by ", @@ -82,69 +67,16 @@ test_that("1. Input checks", { " 'y', 'j', 'nav_lat'. Longitudes accepted names: 'lon', 'lons',", " 'longitude', 'x', 'i', 'nav_lon'.") ) - # Check if all dims equal - expect_error( - CST_MultiEOF(list(exp1, exp03)), - "Input data fields must all have the same dimensions." - ) - expect_error( - CST_MultiEOF(list(exp1, exp2, exp3), lon_lim = c(-250, -245), lat_lim = c(10, 25)), - "No intersection between longitude bounds and data domain." - ) -}) - -############################################## - -test_that("2. Input checks MultiEOF", { - # time - expect_warning( - MultiEOF(data = dat3, lon = lon, lat = lat, time = d), - "The parameter 'time' is deprecated, use 'dates' instead." - ) - expect_error( - MultiEOF(data = 1, lon = lon, lat = lat, time = d), - "Parameter 'data' must have dimension names." - ) - # lon_dim - expect_error( - MultiEOF(data = dat3, lon = lon, lat = lat, dates = d, lon_dim = 'lons'), - "Parameter 'lon_dim' is not found in 'data' dimension." - ) - # lat_dim - expect_error( - MultiEOF(data = dat3, lon = lon, lat = lat, dates = d, lat_dim = 1), - "Parameter 'lat_dim' must be a character string." - ) - # lon expect_error( MultiEOF(data = array(rnorm(96), dim = c(var = 2, lonss = 8, latss = 6)), lon = seq(1:7), lat = seq(1:5), lon_dim = 'lonss', lat_dim = 'latss'), - paste0("Parameter 'lon' must be a numeric vector with the same ", - "length as the longitude dimension of 'data'.") - ) - # lat - expect_error( - MultiEOF(data = array(rnorm(96), dim = c(var = 2, lonss = 8, latss = 6)), - lon = seq(1:8), lat = seq(1:5), lon_dim = 'lonss', lat_dim = 'latss'), - paste0("Parameter 'lat' must be a numeric vector with the same ", - "length as the latitude dimension of 'data'.") - ) - # time_dim - expect_error( - MultiEOF(data = dat3, lon = lon, lat = lat, dates = d, time_dim = 'lons'), - "Parameter 'time_dim' is not found in 'data' dimension." - ) - # sdate_dim - expect_error( - MultiEOF(data = dat3, lon = lon, lat = lat, dates = d, sdate_dim = 1), - "Parameter 'sdate_dim' must be a character string." + paste0("Spatial coordinate names do not match any of the names accepted by ", + "the package.") ) - # var_dim expect_error( - MultiEOF(data = dat3, lon = lon, lat = lat, dates = d, var_dim = 'vars'), - "Parameter 'var_dim' is not found in 'data' dimension." - ) -}) + CST_MultiEOF(list(exp1, exp2, exp3), lon_lim = c(-250, -245), lat_lim = c(10, 25)), + "No intersection between longitude bounds and data domain.") +}) ############################################## @@ -152,47 +84,47 @@ test_that("2. Output checks", { cal <- CST_MultiEOF(datalist = list(exp1, exp2, exp3), neof_composed=2) expect_equal( length(cal), - 6 + 5 ) dimexp = dim(exp1$data) expect_equal( - dim(cal$coeff$data), + dim(cal$coeff), c(dimexp["ftime"], dimexp["sdate"], eof=2, dimexp["dataset"], dimexp["member"]) ) expect_equal( - dim(cal$variance$data), + dim(cal$variance), c(eof = 2, dimexp["dataset"], dimexp["member"]) ) expect_equal( - dim(cal$eof_pattern$data), + dim(cal$eof_pattern), c(var = 3, dimexp["lon"], dimexp["lat"], eof = 2, dimexp["dataset"], dimexp["member"]) ) expect_equal( - cal$variance$data[1, 1, 1], + cal$variance[1, 1, 1], 0.2909419, tolerance = .00001 ) expect_equal( - cal$coeff$data[2, 1, 1, 1, 1], + cal$coeff[2, 1, 1, 1, 1], 0.5414261, tolerance = .00001 ) expect_equal( - cal$eof_pattern$data[1, 2, 2, 2, 1, 1], + cal$eof_pattern[1, 2, 2, 2, 1, 1], 0.3932484, tolerance = .00001 ) cal <- CST_MultiEOF(list(exp1, exp2, exp3), neof_max = 5, neof_composed = 2, minvar = 0.2) expect_equal( - cal$coeff$data[2, 1, 1, 1, 1], + cal$coeff[2, 1, 1, 1, 1], -0.6117927, tolerance = .00001 ) cal <- CST_MultiEOF(list(exp1, exp2, exp3), lon_lim = c(5, 30), lat_lim = c(10, 25)) expect_equal( - cal$coeff$data[2, 1, 1, 1, 1], + cal$coeff[2, 1, 1, 1, 1], 0.8539488, tolerance = .00001 ) @@ -220,35 +152,6 @@ test_that("2. Output checks", { exp3$data[1, 1, 1, 1, 1, 1] = NaN expect_error( CST_MultiEOF(list(exp1, exp3), neof_max = 8, neof_composed=2), - paste0("Detected certain grid points have NAs but not consistent across time ", - "dimension. If the grid point is NA, it should have NA at all time step.") + "Input data contain NA values." ) }) - -############################################## - -test_that("3. Output checks II", { - # time_dim, sdate_dim, var_dim, lon_dim, lat_dim - cal <- MultiEOF(data = exp4, lon = lon4, lat = lat4, dates = Dates4, - time_dim = 'time', sdate_dim = 'sdates', var_dim = 'vars', - lon_dim = 'longitude', lat_dim = 'latitude') - expect_equal( - dim(cal[[3]]), - c(vars = 1, longitude = 4, latitude = 4, eof = 5, dataset = 2, member = 2) - ) - # NA - exp4_1 <- exp4 - exp4_1[1,2,1,1,1:2,1,1] <- NA # random NA - expect_error( - MultiEOF(data = exp4_1, lon = lon4, lat = lat4, dates = Dates4, - time_dim = 'time', sdate_dim = 'sdates', var_dim = 'vars', - lon_dim = 'longitude', lat_dim = 'latitude'), - paste0("Detected certain grid points have NAs but not consistent across time ", - "dimension. If the grid point is NA, it should have NA at all time step.") - ) - exp4_2 <- exp4 - exp4_2[,,,,1,1,] <- NA # spatial NA - cal <- MultiEOF(data = exp4_2, lon = lon4, lat = lat4, dates = Dates4, - time_dim = 'time', sdate_dim = 'sdates', var_dim = 'vars', - lon_dim = 'longitude', lat_dim = 'latitude') -}) \ No newline at end of file -- GitLab