Newer
Older
#'Area-weighted empirical orthogonal function analysis using SVD
#'
#'Perform an area-weighted EOF analysis using single value decomposition (SVD)
#'based on a covariance matrix or a correlation matrix if parameter 'corr' is
#'set to TRUE.
#'
#'@param ano A numerical array of anomalies with named dimensions to calculate
#' EOF. The dimensions must have at least 'time_dim' and 'space_dim'. NAs
#' could exist but it should be consistent along time_dim. That is, if one grid
#' point has NAs, all the time steps at this point should be NAs.
#'@param lat A vector of the latitudes of 'ano'.
#'@param lon A vector of the longitudes of 'ano'.
#'@param time_dim A character string indicating the name of the time dimension
#'@param space_dim A vector of two character strings. The first is the dimension
#' name of latitude of 'ano' and the second is the dimension name of longitude
#' of 'ano'. The default value is c('lat', 'lon').
#'@param neofs A positive integer of the modes to be kept. The default value is
#' 15. If time length or the product of the length of space_dim is smaller than
#' neofs, neofs will be changed to the minimum of the three values.
#'@param corr A logical value indicating whether to base on a correlation (TRUE)
#' or on a covariance matrix (FALSE). The default value is FALSE.
#'@param ncores An integer indicating the number of cores to use for parallel
#' computation. The default value is NULL.
#'
#'@return
#'A list containing:
#'\item{EOFs}{
#' An array of EOF patterns normalized to 1 (unitless) with dimensions
#' (number of modes, rest of the dimensions of 'ano' except 'time_dim').
#' Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed
#' field.
#'}
#'\item{PCs}{
#' An array of principal components with the units of the original field to
#' the power of 2, with dimensions (time_dim, number of modes, rest of the
#' dimensions of 'ano' except 'space_dim').
#' 'PCs' contains already the percentage of explained variance so,
#' to reconstruct the original field it's only needed to multiply 'EOFs'
#' by 'PCs'.
#' An array of the percentage (%) of variance fraction of total variance
#' explained by each mode (number of modes). The dimensions are (number of
#' modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim').
#' An array of the mask with dimensions (space_dim, rest of the dimensions of
#' 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that
#' 'ano' has value and NA for the positions that 'ano' has NA. It is used to
aho
committed
#' replace NAs with 0s for EOF calculation and mask the result with NAs again
#' after the calculation.
aho
committed
#' An array of the area weighting with dimensions 'space_dim'. It is calculated
#' by cosine of 'lat' and used to compute the fraction of variance explained by
#' each EOFs.
#'\item{tot_var}{
#' A number or a numeric array of the total variance explained by all the modes.
#' The dimensions are same as 'ano' except 'time_dim' and 'space_dim'.
#'}
#'
#'@seealso ProjectField, NAO, PlotBoxWhisker
#'@examples
aho
committed
#'# This example computes the EOFs along forecast horizons and plots the one
#'# that explains the greatest amount of variability. The example data has low
#'# resolution so the result may not be explanatory, but it displays how to
#'# use this function.
#'\dontshow{
#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101')
#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'),
#' c('observation'), startDates,
#' leadtimemin = 1,
#' leadtimemax = 4,
#' output = 'lonlat',
#' latmin = 27, latmax = 48,
#' lonmin = -12, lonmax = 40)
#'}
#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs)
#'tmp <- MeanDims(ano$exp, c('dataset', 'member'))
#'eof <- EOF(ano, sampleData$lat, sampleData$lon)
#'\dontrun{
#'PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat)
#'}
#'
#'@import multiApply
#'@importFrom stats sd
#'@export
EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'),
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
neofs = 15, corr = FALSE, ncores = NULL) {
# Check inputs
## ano
if (is.null(ano)) {
stop("Parameter 'ano' cannot be NULL.")
}
if (!is.numeric(ano)) {
stop("Parameter 'ano' must be a numeric array.")
}
if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) {
stop("Parameter 'ano' must have dimension names.")
}
## time_dim
if (!is.character(time_dim) | length(time_dim) > 1) {
stop("Parameter 'time_dim' must be a character string.")
}
if (!time_dim %in% names(dim(ano))) {
stop("Parameter 'time_dim' is not found in 'ano' dimension.")
}
## space_dim
if (!is.character(space_dim) | length(space_dim) != 2) {
stop("Parameter 'space_dim' must be a character vector of 2.")
}
if (any(!space_dim %in% names(dim(ano)))) {
stop("Parameter 'space_dim' is not found in 'ano' dimension.")
}
## lat
if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) {
stop(paste0("Parameter 'lat' must be a numeric vector with the same ",
"length as the latitude dimension of 'ano'."))
}
if (any(lat > 90 | lat < -90)) {
stop("Parameter 'lat' must contain values within the range [-90, 90].")
}
## lon
if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) {
stop(paste0("Parameter 'lon' must be a numeric vector with the same ",
"length as the longitude dimension of 'ano'."))
}
if (any(lon > 360 | lon < -360)) {
warning("Some 'lon' is out of the range [-360, 360].")
}
## neofs
if (!is.numeric(neofs) | neofs %% 1 != 0 | neofs <= 0 | length(neofs) > 1) {
stop("Parameter 'neofs' must be a positive integer.")
}
## corr
if (!is.logical(corr) | length(corr) > 1) {
stop("Parameter 'corr' must be one logical value.")
}
## ncores
if (!is.null(ncores)) {
length(ncores) > 1) {
stop("Parameter 'ncores' must be a positive integer.")
}
}
###############################
# Calculate EOF
# # Replace mask of NAs with 0s for EOF analysis.
# ano[!is.finite(ano)] <- 0
# Area weighting. Weights for EOF; needed to compute the
# fraction of variance explained by each EOFs
space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a))
wght <- array(cos(lat * pi/180), dim = dim(ano)[space_ind])
# We want the covariance matrix to be weigthed by the grid
# cell area so the anomaly field is weighted by its square
# root since the covariance matrix equals transpose(ano)
# times ano.
wght <- sqrt(wght)
# neofs is bounded
if (neofs != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)) {
neofs <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)
.warning(paste0("Parameter 'neofs' is changed to ", neofs, ", the minimum among ",
"the length of time_dim, the production of the length of space_dim, ",
"and neofs."))
}
target_dims = c(time_dim, space_dim),
output_dims = list(EOFs = c('mode', space_dim),
PCs = c(time_dim, 'mode'),
var = 'mode',
fun = .EOF,
corr = corr, neofs = neofs,
wght = wght,
ncores = ncores)
return(c(res, wght = list(wght)))
}
.EOF <- function(ano, neofs = 15, corr = FALSE, wght = wght) {
# ano: [time, lat, lon]
# Dimensions
nt <- dim(ano)[1]
ny <- dim(ano)[2]
nx <- dim(ano)[3]
# Check if all the time steps at one grid point are NA-consistent.
# The grid point should have all NAs or no NA along time dim.
if (any(is.na(ano))) {
ano_latlon <- array(ano, dim = c(nt, ny * nx)) # [time, lat*lon]
na_ind <- which(is.na(ano_latlon), arr.ind = T)
if (dim(na_ind)[1] != nt * length(unique(na_ind[, 2]))) {
stop("Detect certain grid points have NAs but not consistent across time ",
"dimension. If the grid point is NA, it should have NA at all time step.")
}
}
# Build the mask
mask <- ano[1, , ]
mask[!is.finite(mask)] <- NA
mask[is.finite(mask)] <- 1
dim(mask) <- c(ny, nx)
# Replace mask of NAs with 0s for EOF analysis.
ano[!is.finite(ano)] <- 0
ano <- ano * InsertDim(wght, 1, nt)
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
# The use of the correlation matrix is done under the option corr.
if (corr == TRUE) {
stdv <- apply(ano, c(2, 3), sd, na.rm = T)
ano <- ano/InsertDim(stdv, 1, nt)
}
# Time/space matrix for SVD
dim(ano) <- c(nt, ny * nx)
dim.dat <- dim(ano)
# 'transpose' means the array needs to be transposed before
# calling La.svd for computational efficiency because the
# spatial dimension is larger than the time dimension. This
# goes with transposing the outputs of LA.svd also.
if (dim.dat[2] > dim.dat[1]) {
transpose <- TRUE
} else {
transpose <- FALSE
}
if (transpose) {
pca <- La.svd(t(ano))
} else {
pca <- La.svd(ano)
}
# La.svd conventions: decomposition X = U D t(V) La.svd$u
# returns U La.svd$d returns diagonal values of D La.svd$v
# returns t(V) !! The usual convention is PC=U and EOF=V.
# If La.svd is called for ano (transpose=FALSE case): EOFs:
# $v PCs: $u If La.svd is called for t(ano) (transposed=TRUE
# case): EOFs: t($u) PCs: t($v)
if (transpose) {
pca.EOFs <- t(pca$u)
pca.PCs <- t(pca$v)
} else {
pca.EOFs <- pca$v
pca.PCs <- pca$u
}
# The numbers of transposition is limited to neofs
PC <- pca.PCs[, 1:neofs]
EOF <- pca.EOFs[1:neofs, ]
dim(EOF) <- c(neofs, ny, nx)
# To sort out crash when neofs=1.
if (neofs == 1) {
}
# Computation of the % of variance associated with each mode
W <- pca$d[1:neofs]
tot.var <- sum(pca$d^2)
var.eof <- 100 * pca$d[1:neofs]^2/tot.var
for (e in 1:neofs) {
# Set all masked grid points to NA in the EOFs
# Divide patterns by area weights so that EOF * PC gives unweigthed (original) data
EOF[e, , ] <- EOF[e, , ] * mask / wght
# PC is multiplied by the explained variance,
# so that the reconstruction is only EOF * PC
PC[, e] <- PC[, e] * W[e]
return(invisible(list(EOFs = EOF, PCs = PC, var = var.eof, tot_var = tot.var, mask = mask)))