diff --git a/NAMESPACE b/NAMESPACE index 2f19f4b5a559a7e01270959599b31a8d7e0f0402..3aa8f2901eef4b768a0125e9aa4bfc3a0b6d1c93 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export(ACC) export(AMV) export(AnimateMap) export(Ano) +export(Ano_CrossValid) export(CDORemap) export(Clim) export(Cluster) @@ -22,7 +23,9 @@ export(ConfigShowDefinitions) export(ConfigShowSimilarEntries) export(ConfigShowTable) export(Corr) +export(EOF) export(Eno) +export(EuroAtlanticTC) export(Filter) export(GMST) export(GSAT) @@ -31,15 +34,19 @@ export(InsertDim) export(LeapYear) export(Load) export(MeanDims) +export(NAO) export(Persistence) export(PlotACC) export(PlotAno) +export(PlotBoxWhisker) export(PlotClim) export(PlotEquiMap) export(PlotLayout) export(PlotMatrix) export(PlotSection) export(PlotStereoMap) +export(ProjectField) +export(REOF) export(RMS) export(RMSSS) export(RandomWalkTest) @@ -109,4 +116,5 @@ importFrom(stats,sd) importFrom(stats,setNames) importFrom(stats,spectrum) importFrom(stats,ts) +importFrom(stats,varimax) importFrom(stats,window) diff --git a/R/Ano_CrossValid.R b/R/Ano_CrossValid.R new file mode 100644 index 0000000000000000000000000000000000000000..22e710a1a3ded8b428eb7493a1dd0c0aec489d18 --- /dev/null +++ b/R/Ano_CrossValid.R @@ -0,0 +1,226 @@ +#'Compute anomalies in cross-validation mode +#' +#'Compute the anomalies from the arrays of the experimental and observational +#'data output by subtracting the climatologies computed with a leave-one-out +#'cross validation technique and a per-pair method (Garcia-Serrano and +#'Doblas-Reyes, CD, 2012). +#'Per-pair climatology means that only the start dates covered by the +#'whole experiments/observational datasets will be used. In other words, the +#'startdates which do not all have values along 'dat_dim' dimension of both +#'the 'exp' and 'obs' are excluded when computing the climatologies. +#' +#'@param exp A named numeric array of experimental data, with at least +#' dimensions 'time_dim' and 'dat_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param dat_dim A character vector indicating the name of the dataset and +#' member dimensions. When calculating the climatology, if data at one +#' startdate (i.e., 'time_dim') is not complete along 'dat_dim', this startdate +#' along 'dat_dim' will be discarded. The default value is +#' "c('dataset', 'member')". +#'@param memb_dim A character string indicating the name of the member +#' dimension. Only used when parameter 'memb' is FALSE. It must be one element +#' in 'dat_dim'. The default value is 'member'. +#'@param memb A logical value indicating whether to subtract the climatology +#' based on the individual members (TRUE) or the ensemble mean over all +#' members (FALSE) when calculating the anomalies. The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list of 2: +#'\item{$exp}{ +#' A numeric array with the same dimensions as 'exp'. The dimension order may +#' change. +#'} +#'\item{$obs}{ +#' A numeric array with the same dimensions as 'obs'.The dimension order may +#' change. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'anomalies <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'\dontrun{ +#'PlotAno(anomalies$exp, anomalies$obs, startDates, +#' toptitle = paste('anomalies'), ytitle = c('K', 'K', 'K'), +#' legends = 'ERSST', biglab = FALSE, fileout = 'tos_ano_crossvalid.eps') +#'} +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +Ano_CrossValid <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), + memb_dim = 'member', memb = TRUE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must have at least dimensions ", + "time_dim and dat_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension name.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.character(dat_dim)) { + stop("Parameter 'dat_dim' must be a character vector.") + } + if (!all(dat_dim %in% names(dim(exp))) | !all(dat_dim %in% names(dim(obs)))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb + if (!is.logical(memb) | length(memb) > 1) { + stop("Parameter 'memb' must be one logical value.") + } + ## memb_dim + if (!memb) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!memb_dim %in% dat_dim) { + stop("Parameter 'memb_dim' must be one element in parameter 'dat_dim'.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + for (i in 1:length(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim[i])] + name_obs <- name_obs[-which(name_obs == dat_dim[i])] + } + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have the same length of ", + "all dimensions except 'dat_dim'.")) + } + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + if (any(order_obs != sort(order_obs))) { + obs <- Reorder(obs, order_obs) + } + + #----------------------------------- + # Per-paired method: If any sdate along dat_dim is NA, turn all sdate points along dat_dim into NA. + pos <- rep(0, length(dat_dim)) # dat_dim: [dataset, member] + for (i in 1:length(dat_dim)) { + pos[i] <- which(names(dim(obs)) == dat_dim[i]) + } + outrows_exp <- MeanDims(exp, pos, na.rm = FALSE) + + MeanDims(obs, pos, na.rm = FALSE) + outrows_obs <- outrows_exp + + for (i in 1:length(pos)) { + outrows_exp <- InsertDim(outrows_exp, pos[i], dim(exp)[pos[i]]) + outrows_obs <- InsertDim(outrows_obs, pos[i], dim(obs)[pos[i]]) + } + exp_for_clim <- exp + obs_for_clim <- obs + exp_for_clim[which(is.na(outrows_exp))] <- NA + obs_for_clim[which(is.na(outrows_obs))] <- NA + + #----------------------------------- + + res <- Apply(list(exp, obs, exp_for_clim, obs_for_clim), + target_dims = c(time_dim, dat_dim), + fun = .Ano_CrossValid, + memb_dim = memb_dim, memb = memb, + ncores = ncores) + + return(res) +} + +.Ano_CrossValid <- function(exp, obs, exp_for_clim, obs_for_clim, + memb_dim = 'member', memb = TRUE, ncores = NULL) { + # exp: [sdate, dat_dim, memb_dim] + # obs: [sdate, dat_dim, memb_dim] + ano_exp_list <- vector('list', length = dim(exp)[1]) #length: [sdate] + ano_obs_list <- vector('list', length = dim(obs)[1]) + + for (tt in 1:dim(exp)[1]) { #[sdate] + # calculate clim + exp_sub <- ClimProjDiags::Subset(exp_for_clim, 1, c(1:dim(exp)[1])[-tt]) + obs_sub <- ClimProjDiags::Subset(obs_for_clim, 1, c(1:dim(obs)[1])[-tt]) + clim_exp <- apply(exp_sub, c(1:length(dim(exp)))[-1], mean, na.rm = TRUE) # average out time_dim -> [dat, memb] + clim_obs <- apply(obs_sub, c(1:length(dim(obs)))[-1], mean, na.rm = TRUE) + + # ensemble mean + if (!memb) { + if (is.null(dim(clim_exp)) | length(dim(clim_exp)) == 1) { #dim: [member] + clim_exp <- mean(clim_exp, na.rm = TRUE) # a number + clim_obs <- mean(clim_obs, na.rm = TRUE) + } else { + pos <- which(names(dim(clim_exp)) == memb_dim) + pos <- c(1:length(dim(clim_exp)))[-pos] + dim_name <- names(dim(clim_exp)) + dim_exp_ori <- dim(clim_exp) + dim_obs_ori <- dim(clim_obs) + + clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE) + clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE) + if (is.null(names(dim(as.array(clim_exp))))) { + clim_exp <- as.array(clim_exp) + clim_obs <- as.array(clim_obs) + names(dim(clim_exp)) <- dim_name[pos] + names(dim(clim_obs)) <- dim_name[pos] + } + + # Expand it back + clim_exp_tmp <- array(clim_exp, dim = c(dim_exp_ori[pos], dim_exp_ori[-pos])) + clim_obs_tmp <- array(clim_obs, dim = c(dim_obs_ori[pos], dim_obs_ori[-pos])) + # Reorder it back to dim(clim_exp) + tmp <- match(dim_exp_ori, dim(clim_exp_tmp)) + if (any(tmp != sort(tmp))) { + clim_exp <- Reorder(clim_exp_tmp, tmp) + clim_obs <- Reorder(clim_obs_tmp, tmp) + } else { + clim_exp <- clim_exp_tmp + clim_obs <- clim_obs_tmp + } + } + } + # calculate ano + ano_exp_list[[tt]] <- ClimProjDiags::Subset(exp, 1, tt, drop = 'selected') - clim_exp + ano_obs_list[[tt]] <- ClimProjDiags::Subset(obs, 1, tt, drop = 'selected') - clim_obs + } + + ano_exp <- array(unlist(ano_exp_list), dim = c(dim(exp)[-1], dim(exp)[1])) + ano_exp <- Reorder(ano_exp, c(length(dim(exp)), 1:(length(dim(exp)) - 1))) + ano_obs <- array(unlist(ano_obs_list), dim = c(dim(obs)[-1], dim(obs)[1])) + ano_obs <- Reorder(ano_obs, c(length(dim(obs)), 1:(length(dim(obs)) - 1))) + + return(list(exp = ano_exp, obs = ano_obs)) +} diff --git a/R/EOF.R b/R/EOF.R new file mode 100644 index 0000000000000000000000000000000000000000..4221ccfac8f2f20c9dd1583853b85c6ecd13b5b9 --- /dev/null +++ b/R/EOF.R @@ -0,0 +1,290 @@ +#'Area-weighted empirical orthogonal function analysis using SVD +#' +#'Perform an area-weighted EOF analysis using single value decomposition (SVD) +#'based on a covariance matrix or a correlation matrix if parameter 'corr' is +#'set to TRUE. +#' +#'@param ano A numerical array of anomalies with named dimensions to calculate +#' EOF. The dimensions must have at least 'time_dim' and 'space_dim'. +#'@param lat A vector of the latitudes of 'ano'. +#'@param lon A vector of the longitudes of 'ano'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param neofs A positive integer of the modes to be kept. The default value is +#' 15. If time length or the product of the length of space_dim is smaller than +#' neofs, neofs will be changed to the minimum of the three values. +#'@param corr A logical value indicating whether to base on a correlation (TRUE) +#' or on a covariance matrix (FALSE). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{EOFs}{ +#' An array of EOF patterns normalized to 1 (unitless) with dimensions +#' (number of modes, rest of the dimensions of 'ano' except 'time_dim'). +#' Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed +#' field. +#'} +#'\item{PCs}{ +#' An array of principal components with the units of the original field to +#' the power of 2, with dimensions (time_dim, number of modes, rest of the +#' dimensions of 'ano' except 'space_dim'). +#' 'PCs' contains already the percentage of explained variance so, +#' to reconstruct the original field it's only needed to multiply 'EOFs' +#' by 'PCs'. +#'} +#'\item{var}{ +#' An array of the percentage (%) of variance fraction of total variance +#' explained by each mode (number of modes). The dimensions are (number of +#' modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim'). +#'} +#'\item{mask}{ +#' An array of the mask with dimensions (space_dim, rest of the dimensions of +#' 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that +#' 'ano' has value and NA for the positions that 'ano' has NA. It is used to +#' replace NAs with 0s for EOF calculation and mask the result with NAs again +#' after the calculation. +#'} +#'\item{wght}{ +#' An array of the area weighting with dimensions 'space_dim'. It is calculated +#' by cosine of 'lat' and used to compute the fraction of variance explained by +#' each EOFs. +#'} +#'\item{tot_var}{ +#' A number or a numeric array of the total variance explained by all the modes. +#' The dimensions are same as 'ano' except 'time_dim' and 'space_dim'. +#'} +#' +#'@seealso ProjectField, NAO, PlotBoxWhisker +#'@examples +#'# This example computes the EOFs along forecast horizons and plots the one +#'# that explains the greatest amount of variability. The example data has low +#'# resolution so the result may not be explanatory, but it displays how to +#'# use this function. +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'tmp <- MeanDims(ano$exp, c('dataset', 'member')) +#'ano <- tmp[1, , ,] +#'names(dim(ano)) <- names(dim(tmp))[-2] +#'eof <- EOF(ano, sampleData$lat, sampleData$lon) +#'\dontrun{ +#'PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat) +#'} +#' +#'@import multiApply +#'@importFrom stats sd +#'@export +EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), + neofs = 15, corr = FALSE, ncores = NULL) { + + # Check inputs + ## ano + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (any(!space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## lat + if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.")) + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } + ## lon + if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.")) + } + if (any(lon > 360 | lon < -360)) { + warning("Some 'lon' is out of the range [-360, 360].") + } + ## neofs + if (!is.numeric(neofs) | neofs %% 1 != 0 | neofs <= 0 | length(neofs) > 1) { + stop("Parameter 'neofs' must be a positive integer.") + } + ## corr + if (!is.logical(corr) | length(corr) > 1) { + stop("Parameter 'corr' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate EOF + +# # Replace mask of NAs with 0s for EOF analysis. +# ano[!is.finite(ano)] <- 0 + + # Area weighting. Weights for EOF; needed to compute the + # fraction of variance explained by each EOFs + space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) + wght <- array(cos(lat * pi/180), dim = dim(ano)[space_ind]) + + # We want the covariance matrix to be weigthed by the grid + # cell area so the anomaly field is weighted by its square + # root since the covariance matrix equals transpose(ano) + # times ano. + wght <- sqrt(wght) + + # neofs is bounded + if (neofs != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)) { + neofs <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs) + .warning(paste0("Parameter 'neofs' is changed to ", neofs, ", the minimum among ", + "the length of time_dim, the production of the length of space_dim, ", + "and neofs.")) + } + + res <- Apply(ano, + target_dims = c(time_dim, space_dim), + output_dims = list(EOFs = c('mode', space_dim), + PCs = c(time_dim, 'mode'), + var = 'mode', + tot_var = NULL, + mask = space_dim), + fun = .EOF, + corr = corr, neofs = neofs, + wght = wght, + ncores = ncores) + + return(c(res, wght = list(wght))) + +} + +.EOF <- function(ano, neofs = 15, corr = FALSE, wght = wght) { + # ano: [time, lat, lon] + + # Dimensions + nt <- dim(ano)[1] + ny <- dim(ano)[2] + nx <- dim(ano)[3] + + # Build the mask + mask <- ano[1, , ] + mask[!is.finite(mask)] <- NA + mask[is.finite(mask)] <- 1 + dim(mask) <- dim(ano)[c(2, 3)] + + # Replace mask of NAs with 0s for EOF analysis. + ano[!is.finite(ano)] <- 0 + + ano <- ano * InsertDim(wght, 1, nt) + + # The use of the correlation matrix is done under the option corr. + if (corr == TRUE) { + stdv <- apply(ano, c(2, 3), sd, na.rm = T) + ano <- ano/InsertDim(stdv, 1, nt) + } + + # Time/space matrix for SVD + dim(ano) <- c(nt, ny * nx) + dim.dat <- dim(ano) + + # 'transpose' means the array needs to be transposed before + # calling La.svd for computational efficiency because the + # spatial dimension is larger than the time dimension. This + # goes with transposing the outputs of LA.svd also. + if (dim.dat[2] > dim.dat[1]) { + transpose <- TRUE + } else { + transpose <- FALSE + } + if (transpose) { + pca <- La.svd(t(ano)) + } else { + pca <- La.svd(ano) + } + + # La.svd conventions: decomposition X = U D t(V) La.svd$u + # returns U La.svd$d returns diagonal values of D La.svd$v + # returns t(V) !! The usual convention is PC=U and EOF=V. + # If La.svd is called for ano (transpose=FALSE case): EOFs: + # $v PCs: $u If La.svd is called for t(ano) (transposed=TRUE + # case): EOFs: t($u) PCs: t($v) + + if (transpose) { + pca.EOFs <- t(pca$u) + pca.PCs <- t(pca$v) + } else { + pca.EOFs <- pca$v + pca.PCs <- pca$u + } + + # The numbers of transposition is limited to neofs + PC <- pca.PCs[, 1:neofs] + EOF <- pca.EOFs[1:neofs, ] + dim(EOF) <- c(neofs, ny, nx) + + # To sort out crash when neofs=1. + if (neofs == 1) { + PC <- InsertDim(PC, 2, 1, name = 'new') + } + + # Computation of the % of variance associated with each mode + W <- pca$d[1:neofs] + tot.var <- sum(pca$d^2) + var.eof <- 100 * pca$d[1:neofs]^2/tot.var + + for (e in 1:neofs) { + + # Factor to normalize the EOF. + eof.patt.nn <- EOF[e, , ] * mask + eof.patt.ms <- sum(eof.patt.nn^2, na.rm = TRUE) + + # Normalize the EOF + eof.patt <- eof.patt.nn/eof.patt.ms + + # PC is multiplied by the normalization factor and the + # weights, then the reconstruction is only EOF * PC (we have + # multiplied ano by weight) + eof.pc <- PC[, e] * eof.patt.ms * W[e] + + eof.patt <- eof.patt/wght + + EOF[e, , ] <- eof.patt + PC[, e] <- eof.pc + } + + if (neofs == 1) { + var.eof <- as.array(var.eof) + } + + return(invisible(list(EOFs = EOF, PCs = PC, var = var.eof, tot_var = tot.var, mask = mask))) +} diff --git a/R/EuroAtlanticTC.R b/R/EuroAtlanticTC.R new file mode 100644 index 0000000000000000000000000000000000000000..2860a5336695698ecd62eb43e6bef81e13d68b57 --- /dev/null +++ b/R/EuroAtlanticTC.R @@ -0,0 +1,208 @@ +#'Teleconnection indices in European Atlantic Ocean region +#' +#'Calculate the four main teleconnection indices in European Atlantic Ocean +#'region: North Atlantic oscillation (NAO), East Atlantic Pattern (EA), East +#'Atlantic/Western Russia (EAWR), and Scandinavian pattern (SCA). The function +#'\code{REOF()} is used for the calculation, and the first four modes are +#'returned. +#' +#'@param ano A numerical array of anomalies with named dimensions to calculate +#' REOF then the four teleconnections. The dimensions must have at least +#' 'time_dim' and 'space_dim', and the data should cover the European Atlantic +#' Ocean area (20N-80N, 90W-60E). +#'@param lat A vector of the latitudes of 'ano'. It should be 20N-80N. +#'@param lon A vector of the longitudes of 'ano'. It should be 90W-60E. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param ntrunc A positive integer of the modes to be kept. The default value +#' is 30. If time length or the product of latitude length and longitude +#' length is less than ntrunc, ntrunc is equal to the minimum of the three +#' values. +#'@param corr A logical value indicating whether to base on a correlation (TRUE) +#' or on a covariance matrix (FALSE). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{patterns}{ +#' An array of the first four REOF patterns normalized to 1 (unitless) with +#' dimensions (modes = 4, the rest of the dimensions of 'ano' except +#' 'time_dim'). The modes represent NAO, EA, EAWR, and SCA, of which the order +#' and sign changes depending on the dataset and period employed, so manual +#' reordering may be needed. Multiplying 'patterns' by 'indices' gives the +#' original reconstructed field. +#'} +#'\item{indices}{ +#' An array of the first four principal components with the units of the +#' original field to the power of 2, with dimensions (time_dim, modes = 4, the +#' rest of the dimensions of 'ano' except 'space_dim'). +#'} +#'\item{var}{ +#' An array of the percentage (%) of variance fraction of total variance +#' explained by each mode. The dimensions are (modes = ntrunc, the rest of the +#' dimensions of 'ano' except 'time_dim' and 'space_dim'). +#'} +#'\item{wght}{ +#' An array of the area weighting with dimensions 'space_dim'. It is calculated +#' by the square root of cosine of 'lat' and used to compute the fraction of +#' variance explained by each REOFs. +#'} +#'@examples +#'# Use synthetic data +#'set.seed(1) +#'dat <- array(rnorm(800), dim = c(dat = 2, sdate = 5, lat = 8, lon = 15)) +#'lat <- seq(10, 90, length.out = 8) +#'lon <- seq(-100, 70, length.out = 15) +#'res <- EuroAtlanticTC(dat, lat = lat, lon = lon) +#' +#'@seealso REOF NAO +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +EuroAtlanticTC <- function(ano, lat, lon, ntrunc = 30, time_dim = 'sdate', + space_dim = c('lat', 'lon'), corr = FALSE, + ncores = NULL) { + + # Check inputs + ## ano + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (any(!space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## lat and lon + if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.")) + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } + if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.")) + } + if (all(lon >= 0)) { + if (any(lon > 360 | lon < 0)) { + stop("Parameter 'lon' must be within the range [-180, 180] or [0, 360].") + } + } else { + if (any(lon < -180 | lon > 180)) { + stop("Parameter 'lon' must be within the range [-180, 180] or [0, 360].") + } + } + stop_needed <- FALSE + # A preset region for computing EuroAtlantic teleconnections + lat.min <- 20 + lat.max <- 80 + lon.min <- -90 # Write this as a negative number please! + lon.max <- 60 + + # Choose lats and lons inside the Euroatlantic region. + # Change lon to [-180, 180] if it isn't + lon <- ifelse(lon < 180, lon, lon - 360) + ind_lat <- which(lat >= lat.min & lat <= lat.max) + ind_lon <- which(lon >= lon.min & lon <= lon.max) + + # Subset + lat <- lat[ind_lat] + lon <- lon[ind_lon] + + # Lat should be [20, 80] (5deg tolerance) + if (max(lat) < (lat.max - 5) | min(lat) > (lat.min + 5)) { + stop_needed <- TRUE + } + # Lon should be [-90, 60] (5deg tolerance) + if (!(min(lon) < (lon.min + 5) & max(lon) > (lon.max - 5))) { + stop_needed <- TRUE + } + if (stop_needed) { + stop("The provided data does not cover the EuroAtlantic region (20N-80N, 90W-60E).") + } + ## ntrunc + if (!is.numeric(ntrunc) | ntrunc %% 1 != 0 | ntrunc <= 0 | length(ntrunc) > 1) { + stop("Parameter 'ntrunc' must be a positive integer.") + } + ## corr + if (!is.logical(corr) | length(corr) > 1) { + stop("Parameter 'corr' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate indices + + ano <- ClimProjDiags::Subset(ano, space_dim, list(ind_lat, ind_lon), drop = FALSE) + + # ntrunc is bounded + if (ntrunc != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), ntrunc)) { + ntrunc <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), ntrunc) + .warning(paste0("Parameter 'ntrunc' is changed to ", ntrunc, ", the minimum among ", + "the length of time_dim, the production of the length of space_dim, ", + "and ntrunc.")) + } + if (ntrunc < 4) { + .warning(paste0("Parameter 'ntrunc' is ", ntrunc, " so only the first ", ntrunc, + " modes will be calculated.")) + } + + # Area weighting is needed to compute the fraction of variance explained by + # each mode + space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) + wght <- array(cos(lat * pi/180), dim = dim(ano)[space_ind]) + + # We want the covariance matrix to be weigthed by the grid + # cell area so the anoaly field is weighted by its square + # root since the covariance matrix equals transpose(ano) + # times ano. + wght <- sqrt(wght) + + reofs <- Apply(ano, + target_dims = c(time_dim, space_dim), + output_dims = list(REOFs = c('mode', space_dim), + RPCs = c(time_dim, 'mode'), + var = 'mode'), + fun = .REOF, + corr = corr, ntrunc = ntrunc, wght = wght, + ncores = ncores) + + if (ntrunc >= 4) { + TCP <- ClimProjDiags::Subset(reofs$REOFs, 'mode', 1:4, drop = FALSE) + TCI <- ClimProjDiags::Subset(reofs$RPCs, 'mode', 1:4, drop = FALSE) + } else { + TCP <- reofs$REOFs + TCI <- reofs$RPCs + } + + return(list(patterns = TCP, indices = TCI, var = reofs$var, wght = wght)) +} + diff --git a/R/NAO.R b/R/NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..4af0308865db2b9e6975851448b1434f4a0019c3 --- /dev/null +++ b/R/NAO.R @@ -0,0 +1,421 @@ +#'Compute the North Atlantic Oscillation (NAO) Index +#' +#'Compute the North Atlantic Oscillation (NAO) index based on the leading EOF +#'of the sea level pressure (SLP) anomalies over the north Atlantic region +#'(20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and +#'observed anomalies onto the observed EOF pattern or the forecast +#'anomalies onto the EOF pattern of the other years of the forecast. +#'By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month +#'lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns +#'cross-validated PCs of the NAO index for forecast (exp) and observations +#'(obs) based on the leading EOF pattern. +#' +#'@param exp A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of observational data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param obs A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' observed anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of experimental data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param lat A vector of the latitudes of 'exp' and 'obs'. +#'@param lon A vector of the longitudes of 'exp' and 'obs'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'exp' and 'obs'. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member +#' dimension of 'exp' (and 'obs', optional). If 'obs' has memb_dim, the length +#' must be 1. The default value is 'member'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension of 'exp' and 'obs'. The default value is 'ftime'. +#'@param ftime_avg A numeric vector of the forecast time steps to average +#' across the target period. The default value is 2:4, i.e., from 2nd to 4th +#' forecast time steps. +#'@param obsproj A logical value indicating whether to compute the NAO index by +#' projecting the forecast anomalies onto the leading EOF of observational +#' reference (TRUE) or compute the NAO by first computing the leading +#' EOF of the forecast anomalies (in cross-validation mode, i.e. leaving the +#' year you are evaluating out), and then projecting forecast anomalies onto +#' this EOF (FALSE). The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list which contains: +#'\item{exp}{ +#' A numeric array of forecast NAO index in verification format with the same +#' dimensions as 'exp' except space_dim and ftime_dim. +#' } +#'\item{obs}{ +#' A numeric array of observed NAO index in verification format with the same +#' dimensions as 'obs' except space_dim and ftime_dim. +#'} +#' +#'@references +#'Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of +#' multi-model seasonal forecasts of the wintertime North Atlantic +#' Oscillation. Climate Dynamics, 21, 501-514. +#' DOI: 10.1007/s00382-003-0350-4 +#' +#'@examples +#'# Make up synthetic data +#'set.seed(1) +#'exp <- array(rnorm(1620), dim = c(member = 2, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'set.seed(2) +#'obs <- array(rnorm(1620), dim = c(member = 1, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'lat <- seq(20, 80, length.out = 6) +#'lon <- seq(-80, 40, length.out = 9) +#'nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) +#' +#'# plot the NAO index +#' \dontrun{ +#'nao$exp <- Reorder(nao$exp, c(2, 1)) +#'nao$obs <- Reorder(nao$obs, c(2, 1)) +#'PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", +#' monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") +#' } +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', + memb_dim = 'member', space_dim = c('lat', 'lon'), + ftime_dim = 'ftime', ftime_avg = 2:4, + obsproj = TRUE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(obs) & is.null(exp)) { + stop("Parameter 'exp' and 'obs' cannot both be NULL.") + } + if (!is.null(exp)) { + if (!is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (is.null(dim(exp))) { + stop(paste0("Parameter 'exp' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.")) + } + if(any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0)) { + stop("Parameter 'exp' must have dimension names.") + } + } + if (!is.null(obs)) { + if (!is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (is.null(dim(obs))) { + stop(paste0("Parameter 'obs' must have at least dimensions ", + "time_dim, space_dim, and ftime_dim.")) + } + if(any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'obs' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!time_dim %in% names(dim(exp))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + if (!is.null(obs)) { + if (memb_dim %in% names(dim(obs))) { + if (dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") + } else { + add_member_back <- TRUE + obs <- ClimProjDiags::Subset(obs, memb_dim, 1, drop = 'selected') + } + } else { + add_member_back <- FALSE + } + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!is.null(exp)) { + if (any(!space_dim %in% names(dim(exp)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (any(!space_dim %in% names(dim(obs)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## ftime_dim + if (!is.character(ftime_dim) | length(ftime_dim) > 1) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!ftime_dim %in% names(dim(exp))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!ftime_dim %in% names(dim(obs))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## exp and obs (2) + if (!is.null(exp) & !is.null(obs)) { + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + throw_error <- FALSE + if (length(name_exp) != length(name_obs)) { + throw_error <- TRUE + } else if (any(name_exp != name_obs)) { + throw_error <- TRUE + } else if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + throw_error <- TRUE + } + if (throw_error) { + stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'memb_dim'.")) + } + } + ## ftime_avg + if (!is.vector(ftime_avg) | !is.integer(ftime_avg)) { + stop("Parameter 'ftime_avg' must be an integer vector.") + } + if (!is.null(exp)) { + if (max(ftime_avg) > dim(exp)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } else { + if (max(ftime_avg) > dim(obs)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + ## sdate >= 2 + if (!is.null(exp)) { + if (dim(exp)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } else { + if (dim(obs)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } + ## lat and lon + if (!is.null(exp)) { + if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.")) + } + if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.")) + } + } else { + if (!is.numeric(lat) | length(lat) != dim(obs)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.")) + } + if (!is.numeric(lon) | length(lon) != dim(obs)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.")) + } + } + stop_needed <- FALSE + if (max(lat) > 80 | min(lat) < 20) { + stop_needed <- TRUE + } + #NOTE: different from s2dverification + # lon is not used in the calculation actually. EOF only uses lat to do the + # weight. So we just need to ensure the data is in this region, regardless + # the order. + if (any(lon < 0)) { #[-180, 180] + if (!(min(lon) > -90 & min(lon) < -70 & max(lon) < 50 & max(lon) > 30)) { + stop_needed <- TRUE + } + } else { #[0, 360] + if (any(lon >= 50 & lon <= 270)) { + stop_needed <- TRUE + } else { + lon_E <- lon[which(lon < 50)] + lon_W <- lon[-which(lon < 50)] + if (max(lon_E) < 30 | min(lon_W) > 290) { + stop_needed <- TRUE + } + } + } + if (stop_needed) { + stop(paste0("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.")) + } + ## obsproj + if (!is.logical(obsproj) | length(obsproj) > 1) { + stop("Parameter 'obsproj' must be either TRUE or FALSE.") + } + if (obsproj) { + if (is.null(obs)) { + stop("Parameter 'obsproj' set to TRUE but no 'obs' provided.") + } + if (is.null(exp)) { + .warning("parameter 'obsproj' set to TRUE but no 'exp' provided.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # Average ftime + if (!is.null(exp)) { + exp_sub <- ClimProjDiags::Subset(exp, ftime_dim, ftime_avg, drop = FALSE) + exp <- MeanDims(exp_sub, ftime_dim, na.rm = TRUE) + ## Cross-validated PCs. Fabian. This should be extended to + ## nmod and nlt by simple loops. Virginie + } + if (!is.null(obs)) { + obs_sub <- ClimProjDiags::Subset(obs, ftime_dim, ftime_avg, drop = FALSE) + obs <- MeanDims(obs_sub, ftime_dim, na.rm = TRUE) + } + + # wght + wght <- array(sqrt(cos(lat * pi/180)), dim = c(length(lat), length(lon))) + + if (!is.null(exp) & !is.null(obs)) { + res <- Apply(list(exp, obs), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim)), + fun = .NAO, + obsproj = obsproj, wght = wght, add_member_back = add_member_back, + ncores = ncores) + } else if (!is.null(exp)) { + res <- Apply(list(exp = exp), + target_dims = list(exp = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + obsproj = obsproj, wght = wght, obs = NULL, add_member_back = FALSE, + ncores = ncores) + } else if (!is.null(obs)) { + if (add_member_back) { + output_dims <- list(obs = c(time_dim, memb_dim)) + } else { + output_dims <- list(obs = time_dim) + } + res <- Apply(list(obs = obs), + target_dims = list(obs = c(time_dim, space_dim)), + output_dims = output_dims, + fun = .NAO, + obsproj = obsproj, wght = wght, exp = NULL, add_member_back = add_member_back, + ncores = ncores) + } + return(res) +} + +.NAO <- function(exp = NULL, obs = NULL, wght, obsproj = TRUE, add_member_back = FALSE) { + # exp: [memb_exp, sdate, lat, lon] + # obs: [sdate, lat, lon] + # wght: [lat, lon] + + if (!is.null(exp)) { + ntime <- dim(exp)[2] + nlat <- dim(exp)[3] + nlon <- dim(exp)[4] + nmemb_exp <- dim(exp)[1] + } else { + ntime <- dim(obs)[1] + nlat <- dim(obs)[2] + nlon <- dim(obs)[3] + } + + if (!is.null(obs)) NAOO.ver <- array(NA, dim = ntime) + if (!is.null(exp)) NAOF.ver <- array(NA, dim = c(ntime, nmemb_exp)) + + for (tt in 1:ntime) { #sdate + + if (!is.null(obs)) { + ## Calculate observation EOF. Excluding one forecast start year. + obs_sub <- obs[c(1:ntime)[-tt], , , drop = FALSE] + obs_EOF <- .EOF(obs_sub, neofs = 1, wght = wght) # $EOFs: [mode, lat, lon] + + ## Correct polarity of pattern. + # dim(obs_EOF$EOFs): [mode, lat, lon] + if (0 < mean(obs_EOF$EOFs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + obs_EOF$EOFs <- obs_EOF$EOFs * (-1) +# obs_EOF$PCs <- obs_EOF$PCs * (-1) # not used + } + ## Project observed anomalies. + PF <- .ProjectField(obs, eof_mode = obs_EOF$EOFs[1, , ], wght = wght) # [sdate] + ## Keep PCs of excluded forecast start year. Fabian. + NAOO.ver[tt] <- PF[tt] + } + + if (!is.null(exp)) { + if (!obsproj) { + exp_sub <- exp[, c(1:ntime)[-tt], , , drop = FALSE] + # Combine 'memb' and 'sdate' to calculate EOF + dim(exp_sub) <- c(nmemb_exp * (ntime - 1), nlat, nlon) + exp_EOF <- .EOF(exp_sub, neofs = 1, wght = wght) # $EOFs: [mode, lat, lon] + + ## Correct polarity of pattern. + ##NOTE: different from s2dverification, which doesn't use mean(). +# if (0 < exp_EOF$EOFs[1, which.min(abs(lat - 65)), ]) { + if (0 < mean(exp_EOF$EOFs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + exp_EOF$EOFs <- exp_EOF$EOFs * (-1) +# exp_EOF$PCs <- exp_EOF$PCs * sign # not used + } + + ### Lines below could be simplified further by computing + ### ProjectField() only on the year of interest... (though this is + ### not vital). Lauriane + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = exp_EOF$EOFs[1, , ], wght = wght) # [sdate, memb] + NAOF.ver[tt, imemb] <- PF[tt] + } + } else { + ## Project forecast anomalies on obs EOF + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = obs_EOF$EOFs[1, , ], wght = wght) # [sdate] + NAOF.ver[tt, imemb] <- PF[tt] + } + } + } + + } # for loop sdate + + # add_member_back + if (add_member_back) { + suppressWarnings( + NAOO.ver <- InsertDim(NAOO.ver, 2, 1, name = names(dim(exp))[1]) + ) + } + + #NOTE: EOFs_obs is not returned because it's only the result of the last sdate + # (It is returned in s2dverification.) + if (!is.null(exp) & !is.null(obs)) { + return(list(exp = NAOF.ver, obs = NAOO.ver)) #, EOFs_obs = obs_EOF)) + } else if (!is.null(exp)) { + return(list(exp = NAOF.ver)) + } else if (!is.null(obs)) { + return(list(obs = NAOO.ver)) + } +} diff --git a/R/PlotBoxWhisker.R b/R/PlotBoxWhisker.R new file mode 100644 index 0000000000000000000000000000000000000000..2ddcec0d59a7627432b388db555678dcaf4041e8 --- /dev/null +++ b/R/PlotBoxWhisker.R @@ -0,0 +1,242 @@ +#'Box-And-Whisker Plot of Time Series with Ensemble Distribution +#' +#'Produce time series of box-and-whisker plot showing the distribution of the +#'members of a forecast vs. the observed evolution. The correlation between +#'forecast and observational data is calculated and displayed. Only works for +#'n-monthly to n-yearly time series. +#' +#'@param exp Forecast array of multi-member time series, e.g., the NAO index +#' of one experiment. The expected dimensions are +#' c(members, start dates/forecast horizons). A vector with only the time +#' dimension can also be provided. Only monthly or lower frequency time +#' series are supported. See parameter freq. +#'@param obs Observational vector or array of time series, e.g., the NAO index +#' of the observations that correspond the forecast data in \code{exp}. +#' The expected dimensions are c(start dates/forecast horizons) or +#' c(1, start dates/forecast horizons). Only monthly or lower frequency time +#' series are supported. See parameter freq. +#'@param toptitle Character string to be drawn as figure title. +#'@param ytitle Character string to be drawn as y-axis title. +#'@param monini Number of the month of the first time step, from 1 to 12. +#'@param yearini Year of the first time step. +#'@param freq Frequency of the provided time series: 1 = yearly, 12 = monthly, +# 4 = seasonal, ... Default = 12. +#'@param expname Experimental dataset name. +#'@param obsname Name of the observational reference dataset. +#'@param drawleg TRUE/FALSE: whether to draw the legend or not. +#'@param fileout Name of output file. Extensions allowed: eps/ps, jpeg, png, +#' pdf, bmp and tiff. \cr +#' Default = 'output_PlotBox.ps'. +#'@param width File width, in the units specified in the parameter size_units +#' (inches by default). Takes 8 by default. +#'@param height File height, in the units specified in the parameter +#' size_units (inches by default). Takes 5 by default. +#'@param size_units Units of the size of the device (file or window) to plot +#' in. Inches ('in') by default. See ?Devices and the creator function of the +#' corresponding device. +#'@param res Resolution of the device (file or window) to plot in. See +#' ?Devices and the creator function of the corresponding device. +#'@param ... Arguments to be passed to the method. Only accepts the following +#' graphical parameters:\cr +#' ann ask bg cex.lab cex.sub cin col.axis col.lab col.main col.sub cra crt +#' csi cxy err family fg fig font font.axis font.lab font.main font.sub lend +#' lheight ljoin lmitre mex mfcol mfrow mfg mkh oma omd omi page pin plt pty +#' smo srt tck tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog \cr +#' For more information about the parameters see `par`. +#' +#'@return Generates a file at the path specified via \code{fileout}. +#' +#'@seealso EOF, ProjectField, NAO +#'@keywords datagen +#'@author History:\cr +#'0.1 - 2013-09 (F. Lienert, \email{flienert@@ic3.cat}) - Original code\cr +#'0.2 - 2015-03 (L. Batte, \email{lauriane.batte@@ic3.cat}) - Removed all\cr +#' normalization for sake of clarity. +#'1.0 - 2016-03 (N. Manubens, \email{nicolau.manubens@@bsc.es}) - Formatting to R CRAN +#'@examples +#'# See examples on Load() to understand the first lines in this example +#' \dontrun{ +#'data_path <- system.file('sample_data', package = 's2dverification') +#'expA <- list(name = 'experiment', path = file.path(data_path, +#' 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', +#' '$VAR_NAME$_$START_DATE$.nc')) +#'obsX <- list(name = 'observation', path = file.path(data_path, +#' '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', +#' '$VAR_NAME$_$YEAR$$MONTH$.nc')) +#' +#'# Now we are ready to use Load(). +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- Load('tos', list(expA), list(obsX), startDates, +#' leadtimemin = 1, leadtimemax = 4, output = 'lonlat', +#' latmin = 27, latmax = 48, lonmin = -12, lonmax = 40) +#' } +#' \dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 20, latmax = 80, +#' lonmin = -80, lonmax = 40) +#'# No example data is available over NAO region, so in this example we will +#'# tweak the available data. In a real use case, one can Load() the data over +#'# NAO region directly. +#'sampleData$lon[] <- c(40, 280, 340) +#'sampleData$lat[] <- c(20, 80) +#' } +#'# Now ready to compute the EOFs and project on, for example, the first +#'# variability mode. +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'ano_exp <- array(ano$exp, dim = dim(ano$exp)[-2]) +#'ano_obs <- array(ano$obs, dim = dim(ano$obs)[-2]) +#'nao <- NAO(ano_exp, ano_obs, sampleData$lat, sampleData$lon) +#'# Finally plot the nao index +#' \dontrun{ +#'nao$exp <- Reorder(nao$exp, c(2, 1)) +#'nao$obs <- Reorder(nao$obs, c(2, 1)) +#'PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", +#' monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") +#' } +#' +#'@importFrom grDevices dev.cur dev.new dev.off +#'@importFrom stats cor +#'@export +PlotBoxWhisker <- function(exp, obs, toptitle = '', ytitle = '', monini = 1, + yearini = 0, freq = 1, expname = "exp 1", + obsname = "obs 1", drawleg = TRUE, + fileout = NULL, + width = 8, height = 5, size_units = 'in', res = 100, ...) { + + # Process the user graphical parameters that may be passed in the call + ## Graphical parameters to exclude + excludedArgs <- c("adj", "bty", "cex", "cex.axis", "cex.main", "col", "din", "fin", "lab", "las", "lty", "lwd", "mai", "mar", "mgp", "new", "pch", "ps") + userArgs <- .FilterUserGraphicArgs(excludedArgs, ...) + + # If there is any filenames to store the graphics, process them + # to select the right device + if (!is.null(fileout)) { + deviceInfo <- .SelectDevice(fileout = fileout, width = width, height = height, units = size_units, res = res) + saveToFile <- deviceInfo$fun + fileout <- deviceInfo$files + } + + # Checking exp + if (is.numeric(exp)) { + if (is.null(dim(exp)) || length(dim(exp)) == 1) { + dim(exp) <- c(1, length(exp)) + } + } + if (!is.numeric(exp) || length(dim(exp)) != 2) { + stop("Parameter 'exp' must be a numeric vector or array of dimensions c(forecast horizons/start dates) or c(ensemble members, forecast horizons/start dates)") + } + + # Checking obs + if (is.numeric(obs)) { + if (is.null(dim(obs)) || length(dim(obs)) == 1) { + dim(obs) <- c(1, length(obs)) + } + } + if (!is.numeric(obs) || length(dim(obs)) != 2) { + stop("Parameter 'obs' must be a numeric vector or array of dimensions c(forecast horizons/start dates) or c(1, forecast horizons/start dates)") + } + + # Checking consistency in exp and obs + if (dim(exp)[2] != dim(obs)[2]) { + stop("'exp' and 'obs' must have data for the same amount of time steps.") + } + + if (!is.character(toptitle) || !is.character(ytitle)) { + stop("Parameters 'ytitle' and 'toptitle' must be character strings.") + } + + if (!is.numeric(monini)) { + stop("'monini' must be a month number, from 1 to 12.") + } + if (monini < 1 || monini > 12) { + stop("'monini' must be >= 1 and <= 12.") + } + + if (!is.numeric(yearini)) { + stop("'yearini' must be a month number, from 1 to 12.") + } + + if (!is.numeric(freq)) { + stop("'freq' must be a number <= 12.") + } + + if (!is.character(expname) || !is.character(obsname)) { + stop("'expname' and 'obsname' must be character strings.") + } + + if (!is.logical(drawleg)) { + stop("Parameter 'drawleg' must be either TRUE or FALSE.") + } + + if (!is.character(fileout) && !is.null(fileout)) { + stop("Parameter 'fileout' must be a character string.") + } + + ntimesteps <- dim(exp)[2] + lastyear <- (monini + (ntimesteps - 1) * 12 / freq - 1) %/% 12 + yearini + lastmonth <- (monini + (ntimesteps - 1) * 12 / freq - 1) %% 12 + 1 + # + # Define some plot parameters + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + labind <- seq(1, ntimesteps) + months <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", + "Oct", "Nov", "Dec") + labyear <- ((labind - 1) * 12 / freq + monini - 1) %/% 12 + yearini + labmonth <- months[((labind - 1) * 12 / freq + monini - 1) %% 12 + 1] + for (jx in 1:length(labmonth)) { + y2o3dig <- paste("0", as.character(labyear[jx]), sep = "") + labmonth[jx] <- paste(labmonth[jx], "\nYr ", substr(y2o3dig, + nchar(y2o3dig) - 1, nchar(y2o3dig)), sep = "") + } + + # Open connection to graphical device + if (!is.null(fileout)) { + saveToFile(fileout) + } else if (names(dev.cur()) == 'null device') { + dev.new(units = size_units, res = res, width = width, height = height) + } + + # Load the user parameters + par(userArgs) + + ## Observed time series. + #pc.o <- ts(obs[1, ], deltat = 1, start = yr1, end = yr2) + pc.o <- obs[1, ] + ## Normalization of obs, forecast members. Fabian + ## Normalization of forecast should be according to ensemble + ## mean, to keep info on ensemble spread, no? Lauriane pc.o <- + ## pc.o/sd(pc.o) sd.fc <- apply(exp,c(1),sd) + ## exp <- exp/sd.fc mn.fc <- + ## apply(exp,2, mean) exp <- + ## exp/sd(mn.fc) Produce plot. + par(mar = c(5, 6, 4, 2)) + boxplot(exp, add = FALSE, main = toptitle, + ylab = "", xlab = "", col = "red", lwd = 2, t = "b", + axes = FALSE, cex.main = 2, ylim = c(-max(abs(c(exp, pc.o))), max(abs(c(exp, pc.o))))) + lines(1:ntimesteps, pc.o, lwd = 3, col = "blue") + abline(h = 0, lty = 1) + if (drawleg) { + legend("bottomleft", c(obsname, expname), lty = c(1, 1), lwd = c(3, + 3), pch = c(NA, NA), col = c("blue", "red"), horiz = FALSE, + bty = "n", inset = 0.05) + } + ##mtext(1, line = 3, text = tar, cex = 1.9) + mtext(3, line = -2, text = paste(" AC =", round(cor(pc.o, + apply(exp, c(2), mean)), 2)), cex = 1.9, adj = 0) + axis(2, cex.axis = 2) + mtext(2, line = 3, text = ytitle, cex = 1.9) + par(mgp = c(0, 4, 0)) + ##axis(1, c(1:ntimesteps), NA, cex.axis = 2) + axis(1, seq(1, ntimesteps, by = 1), labmonth, cex.axis = 2) + box() + + # If the graphic was saved to file, close the connection with the device + if(!is.null(fileout)) dev.off() +} + diff --git a/R/ProjectField.R b/R/ProjectField.R new file mode 100644 index 0000000000000000000000000000000000000000..309f3efd731824e3e031a473a22a657e7d7ed845 --- /dev/null +++ b/R/ProjectField.R @@ -0,0 +1,268 @@ +#'Project anomalies onto modes of variability +#' +#'Project anomalies onto modes of variability to get the temporal evolution of +#'the EOF mode selected. It returns principal components (PCs) by area-weighted +#'projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from +#'\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and +#'returns NA if the whole spatial pattern is NA. +#' +#'@param ano A numerical array of anomalies with named dimensions. The +#' dimensions must have at least 'time_dim' and 'space_dim'. It can be +#' generated by Ano(). +#'@param eof A list that contains at least 'EOFs' or 'REOFs' and 'wght', which +#' are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and +#' 'space_dim' at least. 'wght' has dimensions space_dim. It can be generated +#' by EOF() or REOF(). +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param mode An integer of the variability mode number in the EOF to be +#' projected on. The default value is NULL, which means all the modes of 'eof' +#' is calculated. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numerical array of the principal components in the verification +#' format. The dimensions are the same as 'ano' except 'space_dim'. +#' +#'@seealso EOF, NAO, PlotBoxWhisker +#'@examples +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) +#'eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) +#'mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1) +#'mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1) +#' +#'\dontrun{ +#' # Plot the forecast and the observation of the first mode for the last year +#' # of forecast +#' sdate_dim_length <- dim(mode1_obs)['sdate'] +#' plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), +#' lwd = 2) +#' for (i in 1:dim(mode1_exp)['member']) { +#' par(new = TRUE) +#' plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], +#' ylim = c(-15000, 15000)) +#' } +#'} +#' +#'@import multiApply +#'@export +ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon'), + mode = NULL, ncores = NULL) { + + # Check inputs + ## ano (1) + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## eof (1) + if (is.null(eof)) { + stop("Parameter 'eof' cannot be NULL.") + } + if (!is.list(eof)) { + stop("Parameter 'eof' must be a list generated by EOF() or REOF().") + } + if ('EOFs' %in% names(eof)) { + EOFs <- "EOFs" + } else if ('REOFs' %in% names(eof)) { + EOFs <- "REOFs" + } else if ('patterns' %in% names(eof)) { + EOFs <- "patterns" + } else { + stop(paste0("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", + "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().")) + } + if (!'wght' %in% names(eof)) { + stop(paste0("Parameter 'eof' must be a list that contains 'wght'. ", + "It can be generated by EOF() or REOF().")) + } + if (!is.numeric(eof[[EOFs]]) || !is.array(eof[[EOFs]])) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array.") + } + if (!is.numeric(eof$wght) || !is.array(eof$wght)) { + stop("The component 'wght' of parameter 'eof' must be a numeric array.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (any(!space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## ano (2) + if (!all(space_dim %in% names(dim(ano))) | !time_dim %in% names(dim(ano))) { + stop(paste0("Parameter 'ano' must be an array with dimensions named as ", + "parameter 'space_dim' and 'time_dim'.")) + } + ## eof (2) + if (!all(space_dim %in% names(dim(eof[[EOFs]]))) | + !'mode' %in% names(dim(eof[[EOFs]]))) { + stop(paste0("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim' and 'mode'.")) + } + if (length(dim(eof$wght)) != 2 | !all(names(dim(eof$wght)) %in% space_dim)) { + stop(paste0("The component 'wght' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim'.")) + } + ## mode + if (!is.null(mode)) { + if (!is.numeric(mode) | mode %% 1 != 0 | mode < 0 | length(mode) > 1) { + stop("Parameter 'mode' must be NULL or a positive integer.") + } + if (mode > dim(eof[[EOFs]])['mode']) { + stop(paste0("Parameter 'mode' is greater than the number of available ", + "modes in 'eof'.")) + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + +#------------------------------------------------------- + + # Keep the chosen mode + if (!is.null(mode)) { + eof_mode <- ClimProjDiags::Subset(eof[[EOFs]], 'mode', mode, drop = 'selected') + } else { + eof_mode <- eof[[EOFs]] + } + + if ('mode' %in% names(dim(eof_mode))) { + dimnames_without_mode <- names(dim(eof_mode))[-which(names(dim(eof_mode)) == 'mode')] + } else { + dimnames_without_mode <- names(dim(eof_mode)) + } + + if (all(dimnames_without_mode %in% space_dim)) { # eof_mode: [lat, lon] or [mode, lat, lon] + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + + } else { + + if (!all(dimnames_without_mode %in% names(dim(ano)))) { + stop(paste0("The array 'EOF' in parameter 'eof' has dimension not in parameter ", + "'ano'. Check if 'ano' and 'eof' are compatible.")) + } + + common_dim_ano <- dim(ano)[which(names(dim(ano)) %in% dimnames_without_mode)] + if (any(common_dim_ano[match(dimnames_without_mode, names(common_dim_ano))] != + dim(eof_mode)[dimnames_without_mode])) { + stop(paste0("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ", + "with different length. Check if 'ano' and 'eof' are compatible.")) + } + + # Enlarge eof/ano is needed. The margin_dims of Apply() must be consistent + # between ano and eof. + additional_dims <- dim(ano)[-which(names(dim(ano)) %in% names(dim(eof_mode)))] + additional_dims <- additional_dims[-which(names(additional_dims) == time_dim)] + if (length(additional_dims) != 0) { + for (i in 1:length(additional_dims)) { + eof_mode <- InsertDim(eof_mode, posdim = (length(dim(eof_mode)) + 1), + lendim = additional_dims[i], name = names(additional_dims)[i]) + } + } + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + } + + return(res) +} + + +.ProjectField <- function(ano, eof_mode, wght) { + # ano: [sdate, lat, lon] + # eof_mode: [lat, lon] or [mode, lat, lon] + # wght: [lat, lon] + + ntime <- dim(ano)[1] + + if (length(dim(eof_mode)) == 2) { # mode != NULL + # Initialization of pc.ver. + pc.ver <- array(NA, dim = ntime) #[sdate] + + # Weight + e.1 <- eof_mode * wght + ano <- ano * InsertDim(wght, 1, ntime) + + na <- apply(ano, 1, mean, na.rm = TRUE) # if [lat, lon] all NA, it's NA + tmp <- ano * InsertDim(e.1, 1, ntime) # [sdate, lat, lon] + pc.ver <- apply(tmp, 1, sum, na.rm = TRUE) + pc.ver[which(is.na(na))] <- NA + + } else { # mode = NULL + # Weight + e.1 <- eof_mode * InsertDim(wght, 1, dim(eof_mode)[1]) + dim(e.1) <- c(dim(eof_mode)[1], prod(dim(eof_mode)[2:3])) # [mode, lat*lon] + ano <- ano * InsertDim(wght, 1, ntime) + dim(ano) <- c(ntime, prod(dim(ano)[2:3])) # [sdate, lat*lon] + + na <- apply(ano, 1, mean, na.rm = TRUE) # if [lat, lon] all NA, it's NA + na <- aperm(array(na, dim = c(ntime, dim(e.1)[1])), c(2, 1)) + + # Matrix multiplication e.1 [mode, lat*lon] by ano [lat*lon, sdate] + # Result: [mode, sdate] + pc.ver <- e.1 %*% t(ano) + pc.ver[which(is.na(na))] <- NA + +# # Change back dimensions to feet original input +# dim(projection) <- c(moredims, mode = unname(neofs)) +# return(projection) + } + + return(pc.ver) +} + diff --git a/R/REOF.R b/R/REOF.R new file mode 100644 index 0000000000000000000000000000000000000000..c9c82cf94e1091f88ea7a8c71dd2957e5079de80 --- /dev/null +++ b/R/REOF.R @@ -0,0 +1,224 @@ +#'Area-weighted empirical orthogonal function analysis with varimax rotation using SVD +#' +#'Perform an area-weighted EOF analysis with varimax rotation using single +#'value decomposition (SVD) based on a covariance matrix or a correlation matrix if +#'parameter 'corr' is set to TRUE. The internal s2dv function \code{.EOF()} is used +#'internally. +#' +#'@param ano A numerical array of anomalies with named dimensions to calculate +#' REOF. The dimensions must have at least 'time_dim' and 'space_dim'. +#'@param lat A vector of the latitudes of 'ano'. +#'@param lon A vector of the longitudes of 'ano'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param ntrunc A positive integer of the number of eofs to be kept for varimax +#' rotation. This function uses this value as 'neof' too, which is the number +#' of eofs to return by \code{.EOF()}. The default value is 15. If time length +#' or the product of latitude length and longitude length is less than +#' 'ntrunc', 'ntrunc' is equal to the minimum of the three values. +#'@param corr A logical value indicating whether to base on a correlation (TRUE) +#' or on a covariance matrix (FALSE). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{REOFs}{ +#' An array of REOF patterns normalized to 1 (unitless) with dimensions +#' (number of modes, the rest of the dimensions of 'ano' except +#' 'time_dim'). Multiplying 'REOFs' by 'RPCs' gives the original +#' reconstructed field. +#'} +#'\item{RPCs}{ +#' An array of principal components with the units of the original field to +#' the power of 2, with dimensions (time_dim, number of modes, the rest of the +#' dimensions of 'ano' except 'space_dim'). +#'} +#'\item{var}{ +#' An array of the percentage (%) of variance fraction of total variance +#' explained by each mode. The dimensions are (number of modes, the rest of +#' the dimension except 'time_dim' and 'space_dim'). +#'} +#'\item{wght}{ +#' An array of the area weighting with dimensions 'space_dim'. It is calculated +#' by the square root of cosine of 'lat' and used to compute the fraction of +#' variance explained by each REOFs. +#'} +#' +#'@seealso EOF +#'@examples +#'# This example computes the REOFs along forecast horizons and plots the one +#'# that explains the greatest amount of variability. The example data has low +#'# resolution so the result may not be explanatory, but it displays how to +#'# use this function. +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'ano <- MeanDims(ano$exp, c('dataset', 'member')) +#'res <- REOF(ano, lat = sampleData$lat, lon = sampleData$lon, ntrunc = 5) +#'\dontrun{ +#'PlotEquiMap(eof$EOFs[1, , , 1], sampleData$lat, sampleData$lon) +#'} +#' +#'@import multiApply +#'@importFrom stats varimax +#'@export +REOF <- function(ano, lat, lon, ntrunc = 15, time_dim = 'sdate', + space_dim = c('lat', 'lon'), corr = FALSE, ncores = NULL) { + + # Check inputs + ## ano + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (any(!space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## lat + if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.")) + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } + ## lon + if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.")) + } + if (all(lon >= 0)) { + if (any(lon > 360 | lon < 0)) { + stop("Parameter 'lon' must be within the range [-180, 180] or [0, 360].") + } + } else { + if (any(lon < -180 | lon > 180)) { + stop("Parameter 'lon' must be within the range [-180, 180] or [0, 360].") + } + } + ## ntrunc + if (!is.numeric(ntrunc) | ntrunc %% 1 != 0 | ntrunc <= 0 | length(ntrunc) > 1) { + stop("Parameter 'ntrunc' must be a positive integer.") + } + ## corr + if (!is.logical(corr) | length(corr) > 1) { + stop("Parameter 'corr' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate REOF + + # ntrunc is bounded + if (ntrunc != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), ntrunc)) { + ntrunc <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), ntrunc) + .warning(paste0("Parameter 'ntrunc' is changed to ", ntrunc, ", the minimum among ", + "the length of time_dim, the production of the length of space_dim, ", + "and ntrunc.")) + } + + # Area weighting is needed to compute the fraction of variance explained by + # each mode + space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) + wght <- array(cos(lat * pi/180), dim = dim(ano)[space_ind]) + + # We want the covariance matrix to be weigthed by the grid + # cell area so the anomaly field is weighted by its square + # root since the covariance matrix equals transpose(ano) + # times ano. + wght <- sqrt(wght) + + res <- Apply(ano, + target_dims = c(time_dim, space_dim), + output_dims = list(REOFs = c('mode', space_dim), + RPCs = c(time_dim, 'mode'), + var = 'mode'), + fun = .REOF, + corr = corr, ntrunc = ntrunc, wght = wght, + ncores = ncores) + + return(c(res, wght = list(wght))) + +} + + +.REOF <- function(ano, ntrunc, corr = FALSE, wght = wght) { + # ano: [sdate, lat, lon] + + # Dimensions + nt <- dim(ano)[1] + ny <- dim(ano)[2] + nx <- dim(ano)[3] + + # Get the first ntrunc EOFs: + eofs <- .EOF(ano = ano, neofs = ntrunc, corr = corr, wght = wght) #list(EOFs = EOF, PCs = PC, var = var.eof, mask = mask) + + # Recover loadings (with norm 1), weight the EOFs by the weigths + # eofs$EOFs: [mode, lat, lon] + Loadings <- apply(eofs$EOFs, 1, '*', wght) # [lat*lon, mode] + + # Rotate the loadings: + varim <- varimax(Loadings) + + # Weight back the rotated loadings (REOFs): + if (is.list(varim)) { + varim_loadings <- varim$loadings # [lat*lon, mode] + } else { # if mode = 1, varim is an array + varim_loadings <- varim + } + REOFs <- apply(varim_loadings, 2, '/', wght) + dim(REOFs) <- c(ny, nx, ntrunc) + + # Reorder dimensions to match EOF conventions: [mode, lat, lon] + REOFs <- aperm(REOFs, c(3, 1, 2)) + + # Compute the rotated PCs (RPCs): multiply the weigthed anomalies by the loading patterns. + ano.wght <- apply(ano, 1, '*', wght) # [lat*lon, sdate] + RPCs <- t(ano.wght) %*% varim_loadings # [sdate, mode] + + ## Alternative methods suggested here: + ## https://stats.stackexchange.com/questions/59213/how-to-compute-varimax-rotated-principal-components-in-r/137003#137003 + ## gives same results as pinv is just transpose in this case, as loadings are ortonormal! + # invLoadings <- t(pracma::pinv(varim$loadings)) ## invert and traspose the rotated loadings. pinv uses a SVD again (!) + # RPCs <- ano.wght %*% invLoadings + + # Compute explained variance fraction: + var <- apply(RPCs, 2, function(x) { sum(x*x) } ) * 100 / eofs$tot_var # [mode] + dim(var) <- c(mode = length(var)) + + return(invisible(list(REOFs = REOFs, RPCs = RPCs, var = var))) +} diff --git a/README.md b/README.md index f9cce382008cbb08f2e099f82e6e39d273a94808..63b261da62e2c0bc343acef04242b584d88d72b9 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,27 @@ correlation with reliability indicators such as p-values and confidence interval - **Visualization** module: Plotting functions are also provided to plot the results obtained from any of the modules above. +One important feature of s2dv is the named dimension of the data array. All the +data input of the functions should have names for all the dimensions. It should +not be a problem since the data retrieved by s2dv::Load or startR::Start have +named dimension inherently. Take the sample data in s2dv as an example: +```r +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), c('observation'), + '19901101', leadtimemin = 1, leadtimemax = 4, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) +# It returns an object 'sampleData' +dim(sampleData$mod) +dataset member sdate ftime lat lon + 1 3 1 4 2 3 +dim(sampleData$obs) +dataset member sdate ftime lat lon + 1 1 1 4 2 3 +``` +The feature provides security during the analysis, ensuring that the dimensions +under operation are the desired ones. + Contribute ---------- diff --git a/man/Ano_CrossValid.Rd b/man/Ano_CrossValid.Rd new file mode 100644 index 0000000000000000000000000000000000000000..1e91528335c2e68b812594311f1446a8d7eabeb3 --- /dev/null +++ b/man/Ano_CrossValid.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Ano_CrossValid.R +\name{Ano_CrossValid} +\alias{Ano_CrossValid} +\title{Compute anomalies in cross-validation mode} +\usage{ +Ano_CrossValid( + exp, + obs, + time_dim = "sdate", + dat_dim = c("dataset", "member"), + memb_dim = "member", + memb = TRUE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numeric array of experimental data, with at least +dimensions 'time_dim' and 'dat_dim'.} + +\item{obs}{A named numeric array of observational data, same dimensions as +parameter 'exp' except along 'dat_dim'.} + +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'sdate'.} + +\item{dat_dim}{A character vector indicating the name of the dataset and +member dimensions. When calculating the climatology, if data at one +startdate (i.e., 'time_dim') is not complete along 'dat_dim', this startdate +along 'dat_dim' will be discarded. The default value is +"c('dataset', 'member')".} + +\item{memb_dim}{A character string indicating the name of the member +dimension. Only used when parameter 'memb' is FALSE. It must be one element +in 'dat_dim'. The default value is 'member'.} + +\item{memb}{A logical value indicating whether to subtract the climatology +based on the individual members (TRUE) or the ensemble mean over all +members (FALSE) when calculating the anomalies. The default value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list of 2: +\item{$exp}{ + A numeric array with the same dimensions as 'exp'. The dimension order may + change. +} +\item{$obs}{ + A numeric array with the same dimensions as 'obs'.The dimension order may + change. +} +} +\description{ +Compute the anomalies from the arrays of the experimental and observational +data output by subtracting the climatologies computed with a leave-one-out +cross validation technique and a per-pair method (Garcia-Serrano and +Doblas-Reyes, CD, 2012). +Per-pair climatology means that only the start dates covered by the +whole experiments/observational datasets will be used. In other words, the +startdates which do not all have values along 'dat_dim' dimension of both +the 'exp' and 'obs' are excluded when computing the climatologies. +} +\examples{ +# Load sample data as in Load() example: +example(Load) +anomalies <- Ano_CrossValid(sampleData$mod, sampleData$obs) +\dontrun{ +PlotAno(anomalies$exp, anomalies$obs, startDates, + toptitle = paste('anomalies'), ytitle = c('K', 'K', 'K'), + legends = 'ERSST', biglab = FALSE, fileout = 'tos_ano_crossvalid.eps') +} +} diff --git a/man/EOF.Rd b/man/EOF.Rd new file mode 100644 index 0000000000000000000000000000000000000000..ae84c55eff26b20bd98f64cdf16bfa2a5e592366 --- /dev/null +++ b/man/EOF.Rd @@ -0,0 +1,113 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/EOF.R +\name{EOF} +\alias{EOF} +\title{Area-weighted empirical orthogonal function analysis using SVD} +\usage{ +EOF( + ano, + lat, + lon, + time_dim = "sdate", + space_dim = c("lat", "lon"), + neofs = 15, + corr = FALSE, + ncores = NULL +) +} +\arguments{ +\item{ano}{A numerical array of anomalies with named dimensions to calculate +EOF. The dimensions must have at least 'time_dim' and 'space_dim'.} + +\item{lat}{A vector of the latitudes of 'ano'.} + +\item{lon}{A vector of the longitudes of 'ano'.} + +\item{time_dim}{A character string indicating the name of the time dimension +of 'ano'. The default value is 'sdate'.} + +\item{space_dim}{A vector of two character strings. The first is the dimension +name of latitude of 'ano' and the second is the dimension name of longitude +of 'ano'. The default value is c('lat', 'lon').} + +\item{neofs}{A positive integer of the modes to be kept. The default value is +15. If time length or the product of the length of space_dim is smaller than +neofs, neofs will be changed to the minimum of the three values.} + +\item{corr}{A logical value indicating whether to base on a correlation (TRUE) +or on a covariance matrix (FALSE). The default value is FALSE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing: +\item{EOFs}{ + An array of EOF patterns normalized to 1 (unitless) with dimensions + (number of modes, rest of the dimensions of 'ano' except 'time_dim'). + Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed + field. +} +\item{PCs}{ + An array of principal components with the units of the original field to + the power of 2, with dimensions (time_dim, number of modes, rest of the + dimensions of 'ano' except 'space_dim'). + 'PCs' contains already the percentage of explained variance so, + to reconstruct the original field it's only needed to multiply 'EOFs' + by 'PCs'. +} +\item{var}{ + An array of the percentage (%) of variance fraction of total variance + explained by each mode (number of modes). The dimensions are (number of + modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim'). +} +\item{mask}{ + An array of the mask with dimensions (space_dim, rest of the dimensions of + 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that + 'ano' has value and NA for the positions that 'ano' has NA. It is used to + replace NAs with 0s for EOF calculation and mask the result with NAs again + after the calculation. +} +\item{wght}{ + An array of the area weighting with dimensions 'space_dim'. It is calculated + by cosine of 'lat' and used to compute the fraction of variance explained by + each EOFs. +} +\item{tot_var}{ + A number or a numeric array of the total variance explained by all the modes. + The dimensions are same as 'ano' except 'time_dim' and 'space_dim'. +} +} +\description{ +Perform an area-weighted EOF analysis using single value decomposition (SVD) +based on a covariance matrix or a correlation matrix if parameter 'corr' is +set to TRUE. +} +\examples{ +# This example computes the EOFs along forecast horizons and plots the one +# that explains the greatest amount of variability. The example data has low +# resolution so the result may not be explanatory, but it displays how to +# use this function. +\dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + leadtimemin = 1, + leadtimemax = 4, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) +} +ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +tmp <- MeanDims(ano$exp, c('dataset', 'member')) +ano <- tmp[1, , ,] +names(dim(ano)) <- names(dim(tmp))[-2] +eof <- EOF(ano, sampleData$lat, sampleData$lon) +\dontrun{ +PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat) +} + +} +\seealso{ +ProjectField, NAO, PlotBoxWhisker +} diff --git a/man/EuroAtlanticTC.Rd b/man/EuroAtlanticTC.Rd new file mode 100644 index 0000000000000000000000000000000000000000..7f81b249623077ee76d07bb1090484979296b3e4 --- /dev/null +++ b/man/EuroAtlanticTC.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/EuroAtlanticTC.R +\name{EuroAtlanticTC} +\alias{EuroAtlanticTC} +\title{Teleconnection indices in European Atlantic Ocean region} +\usage{ +EuroAtlanticTC( + ano, + lat, + lon, + ntrunc = 30, + time_dim = "sdate", + space_dim = c("lat", "lon"), + corr = FALSE, + ncores = NULL +) +} +\arguments{ +\item{ano}{A numerical array of anomalies with named dimensions to calculate +REOF then the four teleconnections. The dimensions must have at least +'time_dim' and 'space_dim', and the data should cover the European Atlantic +Ocean area (20N-80N, 90W-60E).} + +\item{lat}{A vector of the latitudes of 'ano'. It should be 20N-80N.} + +\item{lon}{A vector of the longitudes of 'ano'. It should be 90W-60E.} + +\item{ntrunc}{A positive integer of the modes to be kept. The default value +is 30. If time length or the product of latitude length and longitude +length is less than ntrunc, ntrunc is equal to the minimum of the three +values.} + +\item{time_dim}{A character string indicating the name of the time dimension +of 'ano'. The default value is 'sdate'.} + +\item{space_dim}{A vector of two character strings. The first is the dimension +name of latitude of 'ano' and the second is the dimension name of longitude +of 'ano'. The default value is c('lat', 'lon').} + +\item{corr}{A logical value indicating whether to base on a correlation (TRUE) +or on a covariance matrix (FALSE). The default value is FALSE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing: +\item{patterns}{ + An array of the first four REOF patterns normalized to 1 (unitless) with + dimensions (modes = 4, the rest of the dimensions of 'ano' except + 'time_dim'). The modes represent NAO, EA, EAWR, and SCA, of which the order + and sign changes depending on the dataset and period employed, so manual + reordering may be needed. Multiplying 'patterns' by 'indices' gives the + original reconstructed field. +} +\item{indices}{ + An array of the first four principal components with the units of the + original field to the power of 2, with dimensions (time_dim, modes = 4, the + rest of the dimensions of 'ano' except 'space_dim'). +} +\item{var}{ + An array of the percentage (%) of variance fraction of total variance + explained by each mode. The dimensions are (modes = ntrunc, the rest of the + dimensions of 'ano' except 'time_dim' and 'space_dim'). +} +\item{wght}{ + An array of the area weighting with dimensions 'space_dim'. It is calculated + by the square root of cosine of 'lat' and used to compute the fraction of + variance explained by each REOFs. +} +} +\description{ +Calculate the four main teleconnection indices in European Atlantic Ocean +region: North Atlantic oscillation (NAO), East Atlantic Pattern (EA), East +Atlantic/Western Russia (EAWR), and Scandinavian pattern (SCA). The function +\code{REOF()} is used for the calculation, and the first four modes are +returned. +} +\examples{ +# Use synthetic data +set.seed(1) +dat <- array(rnorm(800), dim = c(dat = 2, sdate = 5, lat = 8, lon = 15)) +lat <- seq(10, 90, length.out = 8) +lon <- seq(-100, 70, length.out = 15) +res <- EuroAtlanticTC(dat, lat = lat, lon = lon) + +} +\seealso{ +REOF NAO +} diff --git a/man/NAO.Rd b/man/NAO.Rd new file mode 100644 index 0000000000000000000000000000000000000000..64b16562fb45f5ad2a84c6efd5dca8ba8e171876 --- /dev/null +++ b/man/NAO.Rd @@ -0,0 +1,112 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/NAO.R +\name{NAO} +\alias{NAO} +\title{Compute the North Atlantic Oscillation (NAO) Index} +\usage{ +NAO( + exp = NULL, + obs = NULL, + lat, + lon, + time_dim = "sdate", + memb_dim = "member", + space_dim = c("lat", "lon"), + ftime_dim = "ftime", + ftime_avg = 2:4, + obsproj = TRUE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. +If only NAO of observational data needs to be computed, this parameter can +be left to NULL. The default value is NULL.} + +\item{obs}{A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +observed anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. +If only NAO of experimental data needs to be computed, this parameter can +be left to NULL. The default value is NULL.} + +\item{lat}{A vector of the latitudes of 'exp' and 'obs'.} + +\item{lon}{A vector of the longitudes of 'exp' and 'obs'.} + +\item{time_dim}{A character string indicating the name of the time dimension +of 'exp' and 'obs'. The default value is 'sdate'.} + +\item{memb_dim}{A character string indicating the name of the member +dimension of 'exp' (and 'obs', optional). If 'obs' has memb_dim, the length +must be 1. The default value is 'member'.} + +\item{space_dim}{A vector of two character strings. The first is the dimension +name of latitude of 'ano' and the second is the dimension name of longitude +of 'ano'. The default value is c('lat', 'lon').} + +\item{ftime_dim}{A character string indicating the name of the forecast time +dimension of 'exp' and 'obs'. The default value is 'ftime'.} + +\item{ftime_avg}{A numeric vector of the forecast time steps to average +across the target period. The default value is 2:4, i.e., from 2nd to 4th +forecast time steps.} + +\item{obsproj}{A logical value indicating whether to compute the NAO index by +projecting the forecast anomalies onto the leading EOF of observational +reference (TRUE) or compute the NAO by first computing the leading +EOF of the forecast anomalies (in cross-validation mode, i.e. leaving the +year you are evaluating out), and then projecting forecast anomalies onto +this EOF (FALSE). The default value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list which contains: +\item{exp}{ + A numeric array of forecast NAO index in verification format with the same + dimensions as 'exp' except space_dim and ftime_dim. + } +\item{obs}{ + A numeric array of observed NAO index in verification format with the same + dimensions as 'obs' except space_dim and ftime_dim. +} +} +\description{ +Compute the North Atlantic Oscillation (NAO) index based on the leading EOF +of the sea level pressure (SLP) anomalies over the north Atlantic region +(20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and +observed anomalies onto the observed EOF pattern or the forecast +anomalies onto the EOF pattern of the other years of the forecast. +By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month +lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns +cross-validated PCs of the NAO index for forecast (exp) and observations +(obs) based on the leading EOF pattern. +} +\examples{ +# Make up synthetic data +set.seed(1) +exp <- array(rnorm(1620), dim = c(member = 2, sdate = 3, ftime = 5, lat = 6, lon = 9)) +set.seed(2) +obs <- array(rnorm(1620), dim = c(member = 1, sdate = 3, ftime = 5, lat = 6, lon = 9)) +lat <- seq(20, 80, length.out = 6) +lon <- seq(-80, 40, length.out = 9) +nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) + +# plot the NAO index + \dontrun{ +nao$exp <- Reorder(nao$exp, c(2, 1)) +nao$obs <- Reorder(nao$obs, c(2, 1)) +PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", + monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") + } + +} +\references{ +Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of + multi-model seasonal forecasts of the wintertime North Atlantic + Oscillation. Climate Dynamics, 21, 501-514. + DOI: 10.1007/s00382-003-0350-4 +} diff --git a/man/PlotBoxWhisker.Rd b/man/PlotBoxWhisker.Rd new file mode 100644 index 0000000000000000000000000000000000000000..9c5a3f48ab5ae30097f36dbc78bf141b0f648c9c --- /dev/null +++ b/man/PlotBoxWhisker.Rd @@ -0,0 +1,146 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PlotBoxWhisker.R +\name{PlotBoxWhisker} +\alias{PlotBoxWhisker} +\title{Box-And-Whisker Plot of Time Series with Ensemble Distribution} +\usage{ +PlotBoxWhisker( + exp, + obs, + toptitle = "", + ytitle = "", + monini = 1, + yearini = 0, + freq = 1, + expname = "exp 1", + obsname = "obs 1", + drawleg = TRUE, + fileout = NULL, + width = 8, + height = 5, + size_units = "in", + res = 100, + ... +) +} +\arguments{ +\item{exp}{Forecast array of multi-member time series, e.g., the NAO index +of one experiment. The expected dimensions are +c(members, start dates/forecast horizons). A vector with only the time +dimension can also be provided. Only monthly or lower frequency time +series are supported. See parameter freq.} + +\item{obs}{Observational vector or array of time series, e.g., the NAO index +of the observations that correspond the forecast data in \code{exp}. +The expected dimensions are c(start dates/forecast horizons) or +c(1, start dates/forecast horizons). Only monthly or lower frequency time +series are supported. See parameter freq.} + +\item{toptitle}{Character string to be drawn as figure title.} + +\item{ytitle}{Character string to be drawn as y-axis title.} + +\item{monini}{Number of the month of the first time step, from 1 to 12.} + +\item{yearini}{Year of the first time step.} + +\item{freq}{Frequency of the provided time series: 1 = yearly, 12 = monthly,} + +\item{expname}{Experimental dataset name.} + +\item{obsname}{Name of the observational reference dataset.} + +\item{drawleg}{TRUE/FALSE: whether to draw the legend or not.} + +\item{fileout}{Name of output file. Extensions allowed: eps/ps, jpeg, png, +pdf, bmp and tiff. \cr +Default = 'output_PlotBox.ps'.} + +\item{width}{File width, in the units specified in the parameter size_units +(inches by default). Takes 8 by default.} + +\item{height}{File height, in the units specified in the parameter +size_units (inches by default). Takes 5 by default.} + +\item{size_units}{Units of the size of the device (file or window) to plot +in. Inches ('in') by default. See ?Devices and the creator function of the +corresponding device.} + +\item{res}{Resolution of the device (file or window) to plot in. See +?Devices and the creator function of the corresponding device.} + +\item{...}{Arguments to be passed to the method. Only accepts the following +graphical parameters:\cr +ann ask bg cex.lab cex.sub cin col.axis col.lab col.main col.sub cra crt +csi cxy err family fg fig font font.axis font.lab font.main font.sub lend +lheight ljoin lmitre mex mfcol mfrow mfg mkh oma omd omi page pin plt pty +smo srt tck tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog \cr +For more information about the parameters see `par`.} +} +\value{ +Generates a file at the path specified via \code{fileout}. +} +\description{ +Produce time series of box-and-whisker plot showing the distribution of the +members of a forecast vs. the observed evolution. The correlation between +forecast and observational data is calculated and displayed. Only works for +n-monthly to n-yearly time series. +} +\examples{ +# See examples on Load() to understand the first lines in this example + \dontrun{ +data_path <- system.file('sample_data', package = 's2dverification') +expA <- list(name = 'experiment', path = file.path(data_path, + 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', + '$VAR_NAME$_$START_DATE$.nc')) +obsX <- list(name = 'observation', path = file.path(data_path, + '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', + '$VAR_NAME$_$YEAR$$MONTH$.nc')) + +# Now we are ready to use Load(). +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- Load('tos', list(expA), list(obsX), startDates, + leadtimemin = 1, leadtimemax = 4, output = 'lonlat', + latmin = 27, latmax = 48, lonmin = -12, lonmax = 40) + } + \dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + leadtimemin = 1, + leadtimemax = 4, + output = 'lonlat', + latmin = 20, latmax = 80, + lonmin = -80, lonmax = 40) +# No example data is available over NAO region, so in this example we will +# tweak the available data. In a real use case, one can Load() the data over +# NAO region directly. +sampleData$lon[] <- c(40, 280, 340) +sampleData$lat[] <- c(20, 80) + } +# Now ready to compute the EOFs and project on, for example, the first +# variability mode. +ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +ano_exp <- array(ano$exp, dim = dim(ano$exp)[-2]) +ano_obs <- array(ano$obs, dim = dim(ano$obs)[-2]) +nao <- NAO(ano_exp, ano_obs, sampleData$lat, sampleData$lon) +# Finally plot the nao index + \dontrun{ +nao$exp <- Reorder(nao$exp, c(2, 1)) +nao$obs <- Reorder(nao$obs, c(2, 1)) +PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", + monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") + } + +} +\seealso{ +EOF, ProjectField, NAO +} +\author{ +History:\cr +0.1 - 2013-09 (F. Lienert, \email{flienert@ic3.cat}) - Original code\cr +0.2 - 2015-03 (L. Batte, \email{lauriane.batte@ic3.cat}) - Removed all\cr + normalization for sake of clarity. +1.0 - 2016-03 (N. Manubens, \email{nicolau.manubens@bsc.es}) - Formatting to R CRAN +} +\keyword{datagen} diff --git a/man/ProjectField.Rd b/man/ProjectField.Rd new file mode 100644 index 0000000000000000000000000000000000000000..358f4ee86e8aefaebb4e6cac23eaae6b9e4f8b1e --- /dev/null +++ b/man/ProjectField.Rd @@ -0,0 +1,84 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ProjectField.R +\name{ProjectField} +\alias{ProjectField} +\title{Project anomalies onto modes of variability} +\usage{ +ProjectField( + ano, + eof, + time_dim = "sdate", + space_dim = c("lat", "lon"), + mode = NULL, + ncores = NULL +) +} +\arguments{ +\item{ano}{A numerical array of anomalies with named dimensions. The +dimensions must have at least 'time_dim' and 'space_dim'. It can be +generated by Ano().} + +\item{eof}{A list that contains at least 'EOFs' or 'REOFs' and 'wght', which +are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and +'space_dim' at least. 'wght' has dimensions space_dim. It can be generated +by EOF() or REOF().} + +\item{time_dim}{A character string indicating the name of the time dimension +of 'ano'. The default value is 'sdate'.} + +\item{space_dim}{A vector of two character strings. The first is the dimension +name of latitude of 'ano' and the second is the dimension name of longitude +of 'ano'. The default value is c('lat', 'lon').} + +\item{mode}{An integer of the variability mode number in the EOF to be +projected on. The default value is NULL, which means all the modes of 'eof' +is calculated.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A numerical array of the principal components in the verification + format. The dimensions are the same as 'ano' except 'space_dim'. +} +\description{ +Project anomalies onto modes of variability to get the temporal evolution of +the EOF mode selected. It returns principal components (PCs) by area-weighted +projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from +\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and +returns NA if the whole spatial pattern is NA. +} +\examples{ +\dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + leadtimemin = 1, + leadtimemax = 4, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) +} +ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) +eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) +mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1) +mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1) + +\dontrun{ + # Plot the forecast and the observation of the first mode for the last year + # of forecast + sdate_dim_length <- dim(mode1_obs)['sdate'] + plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), + lwd = 2) + for (i in 1:dim(mode1_exp)['member']) { + par(new = TRUE) + plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], + ylim = c(-15000, 15000)) + } +} + +} +\seealso{ +EOF, NAO, PlotBoxWhisker +} diff --git a/man/REOF.Rd b/man/REOF.Rd new file mode 100644 index 0000000000000000000000000000000000000000..a5d416c22e132b96125b18738ecf7330d9ce7d2f --- /dev/null +++ b/man/REOF.Rd @@ -0,0 +1,100 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/REOF.R +\name{REOF} +\alias{REOF} +\title{Area-weighted empirical orthogonal function analysis with varimax rotation using SVD} +\usage{ +REOF( + ano, + lat, + lon, + ntrunc = 15, + time_dim = "sdate", + space_dim = c("lat", "lon"), + corr = FALSE, + ncores = NULL +) +} +\arguments{ +\item{ano}{A numerical array of anomalies with named dimensions to calculate +REOF. The dimensions must have at least 'time_dim' and 'space_dim'.} + +\item{lat}{A vector of the latitudes of 'ano'.} + +\item{lon}{A vector of the longitudes of 'ano'.} + +\item{ntrunc}{A positive integer of the number of eofs to be kept for varimax +rotation. This function uses this value as 'neof' too, which is the number +of eofs to return by \code{.EOF()}. The default value is 15. If time length +or the product of latitude length and longitude length is less than +'ntrunc', 'ntrunc' is equal to the minimum of the three values.} + +\item{time_dim}{A character string indicating the name of the time dimension +of 'ano'. The default value is 'sdate'.} + +\item{space_dim}{A vector of two character strings. The first is the dimension +name of latitude of 'ano' and the second is the dimension name of longitude +of 'ano'. The default value is c('lat', 'lon').} + +\item{corr}{A logical value indicating whether to base on a correlation (TRUE) +or on a covariance matrix (FALSE). The default value is FALSE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing: +\item{REOFs}{ + An array of REOF patterns normalized to 1 (unitless) with dimensions + (number of modes, the rest of the dimensions of 'ano' except + 'time_dim'). Multiplying 'REOFs' by 'RPCs' gives the original + reconstructed field. +} +\item{RPCs}{ + An array of principal components with the units of the original field to + the power of 2, with dimensions (time_dim, number of modes, the rest of the + dimensions of 'ano' except 'space_dim'). +} +\item{var}{ + An array of the percentage (%) of variance fraction of total variance + explained by each mode. The dimensions are (number of modes, the rest of + the dimension except 'time_dim' and 'space_dim'). +} +\item{wght}{ + An array of the area weighting with dimensions 'space_dim'. It is calculated + by the square root of cosine of 'lat' and used to compute the fraction of + variance explained by each REOFs. +} +} +\description{ +Perform an area-weighted EOF analysis with varimax rotation using single +value decomposition (SVD) based on a covariance matrix or a correlation matrix if +parameter 'corr' is set to TRUE. The internal s2dv function \code{.EOF()} is used +internally. +} +\examples{ +# This example computes the REOFs along forecast horizons and plots the one +# that explains the greatest amount of variability. The example data has low +# resolution so the result may not be explanatory, but it displays how to +# use this function. +\dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + leadtimemin = 1, + leadtimemax = 4, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) +} +ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +ano <- MeanDims(ano$exp, c('dataset', 'member')) +res <- REOF(ano, lat = sampleData$lat, lon = sampleData$lon, ntrunc = 5) +\dontrun{ +PlotEquiMap(eof$EOFs[1, , , 1], sampleData$lat, sampleData$lon) +} + +} +\seealso{ +EOF +} diff --git a/tests/testthat/test-Ano_CrossValid.R b/tests/testthat/test-Ano_CrossValid.R new file mode 100644 index 0000000000000000000000000000000000000000..60bff8c70a1752c795664cc2acebed6dc9c81413 --- /dev/null +++ b/tests/testthat/test-Ano_CrossValid.R @@ -0,0 +1,143 @@ +context("s2dv::Ano_CrossValid tests") + +############################################## + # dat1 +set.seed(1) +exp1 <- array(rnorm(60), dim = c(dataset = 2, member = 3, sdate = 5, ftime = 2)) +set.seed(2) +obs1 <- array(rnorm(20), dim = c(dataset = 1, member = 2, sdate = 5, ftime = 2)) + +# dat2 +set.seed(1) +exp2 <- array(rnorm(30), dim = c(member = 3, ftime = 2, sdate = 5)) +set.seed(2) +obs2 <- array(rnorm(20), dim = c(ftime = 2, member = 2, sdate = 5)) + + +############################################## +test_that("1. Input checks", { + + # exp and obs (1) + expect_error( + Ano_CrossValid(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + Ano_CrossValid(c('b'), c('a')), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + Ano_CrossValid(c(1:10), c(2:4)), + paste0("Parameter 'exp' and 'obs' must have at least dimensions ", + "time_dim and dat_dim.") + ) + expect_error( + Ano_CrossValid(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + # time_dim + expect_error( + Ano_CrossValid(exp1, obs1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + Ano_CrossValid(exp1, obs1, time_dim = c('a', 'sdate')), + "Parameter 'time_dim' must be a character string." + ) + # dat_dim + expect_error( + Ano_CrossValid(exp1, obs1, dat_dim = 1), + "Parameter 'dat_dim' must be a character vector." + ) + expect_error( + Ano_CrossValid(exp1, obs1, dat_dim = 'dat'), + "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." + ) + # memb + expect_error( + Ano_CrossValid(exp1, obs1, memb = 'member'), + "Parameter 'memb' must be one logical value." + ) + # memb_dim + expect_error( + Ano_CrossValid(exp1, obs1, memb = FALSE, memb_dim = 2), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + Ano_CrossValid(exp1, obs1, memb = FALSE, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Ano_CrossValid(exp1, obs1, memb = FALSE, memb_dim = 'ftime'), + "Parameter 'memb_dim' must be one element in parameter 'dat_dim'." + ) + # ncores + expect_error( + Ano_CrossValid(exp1, obs1, memb = FALSE, ncores = -1), + "Parameter 'ncores' must be a positive integer." + ) + # exp and obs (2) + expect_error( + Ano_CrossValid(exp1, array(1:20, dim = c(dataset = 1, member = 2, sdate = 4, ftime = 2))), + paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension expect 'dat_dim'.") + ) + + +}) +############################################## +test_that("2. dat1", { + + expect_equal( + names(Ano_CrossValid(exp1, obs1)), + c("exp", "obs") + ) + expect_equal( + dim(Ano_CrossValid(exp1, obs1)$exp), + c(sdate = 5, dataset = 2, member = 3, ftime = 2) + ) + expect_equal( + Ano_CrossValid(exp1, obs1)$exp[, 1, 2, 2], + c(0.2771331, 1.1675753, -1.0684010, 0.2901759, -0.6664833), + tolerance = 0.0001 + ) + expect_equal( + Ano_CrossValid(exp1, obs1)$obs[, 1, 2, 2], + c(1.7024193, -0.8243579, -2.4136080, 0.5199868, 1.0155598), + tolerance = 0.0001 + ) + expect_equal( + Ano_CrossValid(exp1, obs1, memb = FALSE)$exp[, 1, 2, 2], + c(0.1229714, 0.8496518, -0.9531644, 0.1548713, -0.5264025), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("3. dat2", { + expect_equal( + names(Ano_CrossValid(exp2, obs2, dat_dim = 'member')), + c("exp", "obs") + ) + expect_equal( + dim(Ano_CrossValid(exp2, obs2, dat_dim = 'member')$exp), + c(sdate = 5, member = 3, ftime = 2) + ) + expect_equal( + Ano_CrossValid(exp2, obs2, dat_dim = 'member')$exp[, 2, 2], + c(0.05650631, 1.53434806, -0.37561623, -0.26217217, -0.95306597), + tolerance = 0.0001 + ) + expect_equal( + Ano_CrossValid(exp2, obs2, dat_dim = 'member', memb = FALSE)$exp[, 2, 2], + c(0.34489635, 1.56816273, -0.01926901, -0.09646066, -0.68236823), + tolerance = 0.0001 + ) + +}) + + + + + diff --git a/tests/testthat/test-EOF.R b/tests/testthat/test-EOF.R new file mode 100644 index 0000000000000000000000000000000000000000..c8c6d30f8474d7c94e3c4a92a54f7b2baf7f7b62 --- /dev/null +++ b/tests/testthat/test-EOF.R @@ -0,0 +1,227 @@ +context("s2dv::EOF tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(120), dim = c(sdate = 10, lat = 6, lon = 2)) + lat1 <- seq(10, 30, length.out = 6) + lon1 <- c(10, 12) + + # dat2 + set.seed(1) + dat2 <- array(rnorm(240), dim = c(lat = 6, lon = 2, sdate = 20)) + lat2 <- seq(-10, 10, length.out = 6) + lon2 <- c(-10, -12) + + # dat3 + set.seed(1) + dat3 <- array(rnorm(480), dim = c(dat = 2, lat = 6, lon = 2, sdate = 20)) + lat3 <- seq(10, 30, length.out = 6) + lon3 <- c(10, 12) + +############################################## +test_that("1. Input checks", { + + # ano + expect_error( + EOF(c()), + "Parameter 'ano' cannot be NULL." + ) + expect_error( + EOF(c(NA, NA)), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + EOF(list(a = array(rnorm(50), dim = c(dat = 5, sdate = 10)), b = c(1:4))), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + EOF(array(1:10, dim = c(2, 5))), + "Parameter 'ano' must have dimension names." + ) + # time_dim + expect_error( + EOF(dat1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + EOF(dat1, time_dim = c('a','sdate')), + "Parameter 'time_dim' must be a character string." + ) + # space_dim + expect_error( + EOF(dat1, space_dim = 'lat'), + "Parameter 'space_dim' must be a character vector of 2." + ) + expect_error( + EOF(dat1, space_dim = c('latitude', 'longitude')), + "Parameter 'space_dim' is not found in 'ano' dimension." + ) + # lat + expect_error( + EOF(dat1, lat = 1:10), + paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") + ) + expect_error( + EOF(dat1, lat = seq(-100, -80, length.out = 6)), + "Parameter 'lat' must contain values within the range \\[-90, 90\\]." + ) + # lon + expect_error( + EOF(dat1, lat = lat1, lon = c('a', 'b')), + paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") + ) + expect_warning( + EOF(dat1, lat = lat1, lon = c(350, 370)), + "Some 'lon' is out of the range \\[-360, 360\\]." + ) + # neofs + expect_error( + EOF(dat1, lat = lat1, lon = lon1, neofs = -1), + "Parameter 'neofs' must be a positive integer." + ) + # corr + expect_error( + EOF(dat1, lat = lat1, lon = lon1, corr = 0.1), + "Parameter 'corr' must be one logical value." + ) + # ncores + expect_error( + EOF(dat1, lat1, lon1, ncore = 3.5), + "Parameter 'ncores' must be a positive integer." + ) + +}) +############################################## +test_that("2. dat1", { + + expect_equal( + names(EOF(dat1, lon = lon1, lat = lat1)), + c("EOFs", "PCs", "var", "tot_var", "mask", "wght") + ) + expect_equal( + dim(EOF(dat1, lon = lon1, lat = lat1)$EOFs), + c(mode = 10, lat = 6, lon = 2) + ) + expect_equal( + dim(EOF(dat1, lon = lon1, lat = lat1)$PCs), + c(sdate = 10, mode = 10) + ) + expect_equal( + dim(EOF(dat1, lon = lon1, lat = lat1)$var), + c(mode = 10) + ) + expect_equal( + dim(EOF(dat1, lon = lon1, lat = lat1)$mask), + c(lat = 6, lon = 2) + ) + expect_equal( + dim(EOF(dat1, lon = lon1, lat = lat1)$wght), + c(lat = 6, lon = 2) + ) + expect_equal( + EOF(dat1, lon = lon1, lat = lat1)$EOFs[1:5], + c(-0.2888168, 0.2792765, 0.1028387, 0.1883640, -0.2896943), + tolerance = 0.0001 + ) + expect_equal( + mean(EOF(dat1, lon = lon1, lat = lat1)$EOFs), + 0.01792716, + tolerance = 0.0001 + ) + expect_equal( + EOF(dat1, lon = lon1, lat = lat1)$PCs[1:5], + c(3.2061306, -0.1692669, 0.5420990, -2.5324441, -0.6143680), + tolerance = 0.0001 + ) + expect_equal( + mean(EOF(dat1, lon = lon1, lat = lat1)$PCs), + 0.08980279, + tolerance = 0.0001 + ) + expect_equal( + EOF(dat1, lon = lon1, lat = lat1)$var[1:5], + array(c(29.247073, 25.364840, 13.247046, 11.121006, 8.662517), dim = c(mode = 5)), + tolerance = 0.0001 + ) + expect_equal( + sum(EOF(dat1, lon = lon1, lat = lat1)$mask), + 12 + ) + expect_equal( + EOF(dat1, lon = lon1, lat = lat1)$wght[1:5], + c(0.9923748, 0.9850359, 0.9752213, 0.9629039, 0.9480475), + tolerance = 0.0001 + ) + expect_equal( + EOF(dat1, lon = lon1, lat = lat1)$tot_var, + 88.20996, + tolerance = 0.0001 + ) + +}) +############################################## +test_that("3. dat2", { + expect_equal( + dim(EOF(dat2, lon = lon2, lat = lat2)$EOFs), + c(mode = 12, lat = 6, lon = 2) + ) + expect_equal( + dim(EOF(dat2, lon = lon2, lat = lat2)$PCs), + c(sdate = 20, mode = 12) + ) + expect_equal( + EOF(dat2, lon = lon2, lat = lat2)$EOFs[1:5], + c(0.33197201, 0.18837900, -0.19697143, 0.08305805, -0.51297585), + tolerance = 0.0001 + ) + expect_equal( + mean(EOF(dat2, lon = lon2, lat = lat2)$EOFs), + 0.02720393, + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("4. dat3", { + expect_equal( + dim(EOF(dat3, lon = lon3, lat = lat3)$EOFs), + c(mode = 12, lat = 6, lon = 2, dat = 2) + ) + expect_equal( + dim(EOF(dat3, lon = lon3, lat = lat3)$PCs), + c(sdate = 20, mode = 12, dat = 2) + ) + expect_equal( + dim(EOF(dat3, lon = lon3, lat = lat3)$var), + c(mode = 12, dat = 2) + ) + expect_equal( + dim(EOF(dat3, lon = lon3, lat = lat3)$mask), + c(lat = 6, lon = 2, dat = 2) + ) + expect_equal( + dim(EOF(dat3, lon = lon3, lat = lat3)$wght), + c(lat = 6, lon = 2) + ) + expect_equal( + mean(EOF(dat3, lon = lon3, lat = lat3)$EOFs), + 0.01214845, + tolerance = 0.0001 + ) + expect_equal( + EOF(dat3, lon = lon3, lat = lat3)$EOFs[1:5], + c(0.3292733, 0.1787016, -0.3801986, 0.1957160, -0.4377031), + tolerance = 0.0001 + ) + expect_equal( + EOF(dat3, lon = lon3, lat = lat3)$tot_var, + array(c(213.2422, 224.4203), dim = c(dat = 2)), + tolerance = 0.0001 + ) + +}) +############################################## diff --git a/tests/testthat/test-EuroAtlanticTC.R b/tests/testthat/test-EuroAtlanticTC.R new file mode 100644 index 0000000000000000000000000000000000000000..6e3ac4bad6b2173bdc033dec4e92ab697b9b8354 --- /dev/null +++ b/tests/testthat/test-EuroAtlanticTC.R @@ -0,0 +1,308 @@ +context("s2dv::EuroAtlanticTC tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(480), dim = c(sdate = 10, lat = 6, lon = 8)) + lat1 <- seq(20, 80, length.out = 6) + lon1 <- seq(-90, 60, length.out = 8) + + # dat2 + set.seed(2) + dat2 <- array(rnorm(800), dim = c(dat = 2, lat = 8, lon = 15, sdate = 5)) + lat2 <- seq(10, 90, length.out = 8) + lon2 <- seq(-100, 70, length.out = 15) + + # dat3 + set.seed(2) + dat3 <- array(rnorm(1520), dim = c(dat = 2, lat = 8, lon = 19, sdate = 5)) + lat3 <- seq(10, 90, length.out = 8) + lon3 <- c(seq(0, 70, length.out = 8), seq(250, 350, length.out = 11)) + +############################################## +test_that("1. Input checks", { + + # ano + expect_error( + EuroAtlanticTC(c()), + "Parameter 'ano' cannot be NULL." + ) + expect_error( + EuroAtlanticTC(c(NA, NA)), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + EuroAtlanticTC(list(a = array(rnorm(50), dim = c(dat = 5, sdate = 10)), b = c(1:4))), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + EuroAtlanticTC(array(1:10, dim = c(2, 5))), + "Parameter 'ano' must have dimension names." + ) + # time_dim + expect_error( + EuroAtlanticTC(dat1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + EuroAtlanticTC(dat1, time_dim = c('a','sdate')), + "Parameter 'time_dim' must be a character string." + ) + # space_dim + expect_error( + EuroAtlanticTC(dat1, space_dim = 'lat'), + "Parameter 'space_dim' must be a character vector of 2." + ) + expect_error( + EuroAtlanticTC(dat1, space_dim = c('latitude', 'longitude')), + "Parameter 'space_dim' is not found in 'ano' dimension." + ) + # lat and lon + expect_error( + EuroAtlanticTC(dat1, lat = 1:10), + paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") + ) + expect_error( + EuroAtlanticTC(dat1, lat = seq(-100, -80, length.out = 6)), + "Parameter 'lat' must contain values within the range \\[-90, 90\\]." + ) + expect_error( + EuroAtlanticTC(dat1, lat = lat1, lon = c('a', 'b')), + paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") + ) + expect_error( + EuroAtlanticTC(dat1, lat = lat1, lon = seq(300, 370, length.out = 8)), + "Parameter 'lon' must be within the range [-180, 180] or [0, 360].", + fixed = TRUE + ) + expect_error( + EuroAtlanticTC(dat1, lat = lat1, lon = seq(-190, -10, length.out = 8)), + "Parameter 'lon' must be within the range [-180, 180] or [0, 360].", + fixed = TRUE + ) + expect_error( + EuroAtlanticTC(dat1, lat = seq(30, 80, length.out = 6), lon = lon1), + "The provided data does not cover the EuroAtlantic region (20N-80N, 90W-60E).", + fixed = TRUE + ) + expect_error( + EuroAtlanticTC(dat1, lat = lat1, lon = seq(-80, 20, length.out = 8)), + "The provided data does not cover the EuroAtlantic region (20N-80N, 90W-60E).", + fixed = TRUE + ) + # ntrunc + expect_error( + EuroAtlanticTC(dat1, lat = lat1, lon = lon1, ntrunc = 0), + "Parameter 'ntrunc' must be a positive integer." + ) + # corr + expect_error( + EuroAtlanticTC(dat1, lat = lat1, lon = lon1, corr = 0.1), + "Parameter 'corr' must be one logical value." + ) + # ncores + expect_error( + EuroAtlanticTC(dat1, lat1, lon1, ncore = 3.5), + "Parameter 'ncores' must be a positive integer." + ) + +}) + +############################################## +test_that("2. dat1", { + + expect_equal( + names(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 4)), + c("patterns", "indices", "var", "wght") + ) + expect_equal( + dim(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 4)$patterns), + c(mode = 4, lat = 6, lon = 8) + ) + expect_equal( + dim(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$patterns), + c(mode = 4, lat = 6, lon = 8) + ) + expect_equal( + dim(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 9)$indices), + c(sdate = 10, mode = 4) + ) + expect_equal( + dim(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$var), + c(mode = 10) + ) + expect_equal( + dim(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 4)$wght), + c(lat = 6, lon = 8) + ) + expect_equal( + EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 4)$patterns[, 2, 3], + c(-0.019905033, -0.048926441, -0.330219176, 0.008138493), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 5)$patterns[, 2, 3], + c(0.01878324, -0.03784923, -0.22820514, -0.21184373), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$indices[2, ], + c(-1.944509, -1.335159, 0.997195, -2.697545), + tolerance = 0.0001 + ) + expect_equal( + mean(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$var), + 10, + tolerance = 0.0001 + ) + expect_equal( + as.vector(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$var[1:4]), + c(17.995853, 10.768974, 9.598904, 10.234672), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$wght[1,1], + EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$wght[1,2] + ) + expect_equal( + EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$wght[1,1], + c(0.9693774), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 4, corr = T)$patterns[, 2, 3], + c(-0.05850999, 0.03827591, -0.04454523, -0.43713946), + tolerance = 0.0001 + ) +}) + +############################################## +test_that("3. dat2", { + + expect_equal( + names(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 4)), + c("patterns", "indices", "var", "wght") + ) + expect_equal( + dim(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$patterns), + c(mode = 4, lat = 6, lon = 13, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 4)$patterns), + c(mode = 4, lat = 6, lon = 13, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$indices), + c(sdate = 5, mode = 4, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$var), + c(mode = 5, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 4)$wght), + c(lat = 6, lon = 13) + ) + expect_equal( + EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 4)$patterns[, 2, 3, 2], + c(-0.17289486, -0.07021256, -0.08045222, 0.17330862), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$patterns[, 2, 3, 1], + c(0.1347727, 0.2157945, -0.1024759, 0.1633547), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$indices[2, , 1], + c(2.1975962, 2.9158790, -3.2257169, -0.4055974), + tolerance = 0.0001 + ) + expect_equal( + mean(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$var), + 20, + tolerance = 0.0001 + ) + expect_equal( + as.vector(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$var[1:4]), + c(23.06692, 21.98278, 20.22588, 19.51251), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$wght[1,1], + EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$wght[1,2] + ) + expect_equal( + EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$wght[1,1], + c(0.964818), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("4. dat3", { + + expect_equal( + names(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 4)), + c("patterns", "indices", "var", "wght") + ) + expect_equal( + dim(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$patterns), + c(mode = 4, lat = 6, lon = 16, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 4)$patterns), + c(mode = 4, lat = 6, lon = 16, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$indices), + c(sdate = 5, mode = 4, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$var), + c(mode = 5, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 4)$wght), + c(lat = 6, lon = 16) + ) + expect_equal( + EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 4)$patterns[, 2, 3, 2], + c(-0.10653582, -0.22437848, 0.10192633, 0.08331549), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$patterns[, 2, 3, 1], + c(0.25209479, -0.05872688, 0.03186457, -0.02901076), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$indices[2, , 1], + c(2.940060, 5.036896, 4.188896, 2.816158), + tolerance = 0.0001 + ) + expect_equal( + mean(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$var), + 20, + tolerance = 0.0001 + ) + expect_equal( + as.vector(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$var[1:4]), + c(24.38583, 22.57439, 20.19659, 17.95064), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$wght[1,1], + EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$wght[1,2] + ) + expect_equal( + EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$wght[1,1], + c(0.964818), + tolerance = 0.0001 + ) + +}) + diff --git a/tests/testthat/test-NAO.R b/tests/testthat/test-NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..f3c6d21f5574be54e0b05513d887e927aa6f7a87 --- /dev/null +++ b/tests/testthat/test-NAO.R @@ -0,0 +1,246 @@ +context("s2dv::NAO tests") + +############################################## + # dat1 + set.seed(1) + exp1 <- array(rnorm(144), dim = c(dataset = 1, member = 2, sdate = 3, ftime = 4, lat = 2, lon = 3)) + set.seed(2) + obs1 <- array(rnorm(72), dim = c(dataset = 1, member = 1, sdate = 3, ftime = 4, lat = 2, lon = 3)) + lat1 <- c(20, 80) + lon1 <- c(40, 280, 350) + + # dat2 + set.seed(1) + exp2 <- array(rnorm(216), dim = c(sdate = 3, ftime = 4, member = 2, lat = 3, lon = 3)) + set.seed(2) + obs2 <- array(rnorm(108), dim = c(sdate = 3, ftime = 4, lat = 3, lon = 3)) + lat2 <- c(80, 50, 20) + lon2 <- c(-80, 0, 40) + +############################################## +test_that("1. Input checks", { + + # exp and obs (1) + expect_error( + NAO(c(), c()), + "Parameter 'exp' and 'obs' cannot both be NULL." + ) + expect_error( + NAO(exp = c(NA, NA)), + "Parameter 'exp' must be a numeric array." + ) + expect_error( + NAO(exp = c(1:10)), + paste0("Parameter 'exp' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.") + ) + expect_error( + NAO(array(1:10, dim = c(2, 5))), + "Parameter 'exp' must have dimension names." + ) + expect_error( + NAO(exp = exp1, obs = c(NA, NA)), + "Parameter 'obs' must be a numeric array." + ) + expect_error( + NAO(exp = exp1, obs = c(1:10)), + paste0("Parameter 'obs' must have at least dimensions ", + "time_dim, space_dim, and ftime_dim.") + ) + expect_error( + NAO(exp = exp1, obs = array(1:10, dim = c(2, 5))), + "Parameter 'obs' must have dimension names." + ) + # time_dim + expect_error( + NAO(exp1, obs1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + NAO(exp1, obs1, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + # memb_dim + expect_error( + NAO(exp1, obs1, memb_dim = 2), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + NAO(exp1, array(rnorm(10), dim = c(member = 10, sdate = 3, ftime = 4, lat = 2, lon = 2))), + "The length of parameter 'memb_dim' in 'obs' must be 1." + ) + expect_error( + NAO(exp1, obs1, memb_dim = 'a'), + "Parameter 'memb_dim' is not found in 'exp' dimension." + ) + # space_dim + expect_error( + NAO(exp1, obs1, space_dim = 'lat'), + "Parameter 'space_dim' must be a character vector of 2." + ) + expect_error( + NAO(exp1, obs1, space_dim = c('latitude', 'longitude')), + "Parameter 'space_dim' is not found in 'exp' or 'obs' dimension." + ) + # ftime_dim + expect_error( + NAO(exp1, obs1, ftime_dim = 2), + "Parameter 'ftime_dim' must be a character string." + ) + expect_error( + NAO(exp1, obs1, ftime_dim = 'a'), + "Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension." + ) + # exp and obs (2) + expect_error( + NAO(exp1, array(rnorm(10), dim = c(member = 1, sdate = 3, ftime = 4, lat = 2, lon = 2))), + paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'memb_dim'.") + ) + expect_error( + NAO(exp1, array(rnorm(10), dim = c(dataset = 1, member = 1, sdate = 3, ftime = 4, lat = 1, lon = 2))), + paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'memb_dim'.") + ) + # ftime_avg + expect_error( + NAO(exp1, obs1, ftime_avg = T), + "Parameter 'ftime_avg' must be an integer vector." + ) + expect_error( + NAO(exp1, obs1, ftime_avg = 1:10), +"Parameter 'ftime_avg' must be within the range of ftime_dim length." + ) + # sdate >= 2 + expect_error( + NAO(exp = array(rnorm(10), dim = c(member = 10, sdate = 1, ftime = 4, lat = 2, lon = 2)), + obs = array(rnorm(10), dim = c(member = 1, sdate = 1, ftime = 4, lat = 2, lon = 2))), + "The length of time_dim must be at least 2." + ) + # lat and lon + expect_error( + NAO(exp1, obs1, lat = 1:10), + paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + ) + expect_error( + NAO(exp1, obs1, lat = lat1, lon = c('a', 'b')), + paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + ) + expect_error( + NAO(exp1, obs1, lat = c(1, 2), lon = lon1), + paste0("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.") + ) + expect_error( + NAO(exp1, obs1, lat = c(-10, -5), lon = lon1), + paste0("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.") + ) + expect_error( + NAO(exp1, obs1, lat = lat1, lon = c(40, 50, 60)), + paste0("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.") + ) + # obsproj + expect_error( + NAO(exp1, obs1, lat = lat1, lon = lon1, obsproj = 1), + "Parameter 'obsproj' must be either TRUE or FALSE." + ) + expect_error( + NAO(exp = exp1, lat = lat1, lon = lon1), + "Parameter 'obsproj' set to TRUE but no 'obs' provided." + ) + # ncores + expect_error( + NAO(exp1, obs1, lat1, lon1, ncore = 3.5), + "Parameter 'ncores' must be a positive integer." + ) + +}) +############################################## +test_that("2. dat1", { + + expect_equal( + names(NAO(exp1, obs1, lat = lat1, lon = lon1)), + c("exp", "obs") + ) + expect_equal( + dim(NAO(exp1, obs1, lat = lat1, lon = lon1)$exp), + c(sdate = 3, member = 2, dataset = 1) + ) + expect_equal( + dim(NAO(exp1, obs1, lat = lat1, lon = lon1)$obs), + c(sdate = 3, member = 1, dataset = 1) + ) + expect_equal( + NAO(exp1, obs1, lat = lat1, lon = lon1)$exp[1:5], + c(-0.1995564, -0.2996030, 0.7340010, -0.2747980, -0.3606155), + tolerance = 0.0001 + ) + expect_equal( + NAO(exp1, obs1, lat = lat1, lon = lon1)$obs[1:3], + c(-0.1139683, 0.1056687, 0.1889449), + tolerance = 0.0001 + ) + expect_equal( + mean(NAO(exp1, obs1, lat = lat1, lon = lon1, obsproj = FALSE)$exp), + -0.2263239, + tolerance = 0.0001 + ) + expect_equal( + names(NAO(exp = exp1, lat = lat1, lon = lon1, obsproj = FALSE)), + c("exp") + ) + suppressWarnings( + expect_equal( + names(NAO(obs = obs1, lat = lat1, lon = lon1)), + c("obs") + ) + ) + expect_equal( + dim(NAO(obs = obs1, lat = lat1, lon = lon1, obsproj = FALSE)$obs), + c(sdate = 3, member = 1, dataset = 1) + ) + expect_equal( + as.vector(NAO(obs = obs1, lat = lat1, lon = lon1, obsproj = FALSE)$obs), + c(-0.1139683, 0.1056687, 0.1889449), + tolerance = 0.0001 + ) + +}) +############################################## +test_that("3. dat2", { + expect_equal( + dim(NAO(exp2, obs2, lat = lat2, lon = lon2)$exp), + c(sdate = 3, member = 2) + ) + expect_equal( + dim(NAO(exp2, obs2, lat = lat2, lon = lon2)$obs), + c(sdate = 3) + ) + expect_equal( + mean(NAO(exp2, obs2, lat = lat2, lon = lon2)$exp), + 0.006805087, + tolerance = 0.00001 + ) + expect_equal( + NAO(exp2, obs2, lat = lat2, lon = lon2)$exp[2:4], + c(0.07420822, 0.09383927, -0.17372708), + tolerance = 0.00001 + ) + expect_equal( + NAO(exp2, obs2, lat = lat2, lon = lon2, ftime_avg = 1:3)$exp[2:4], + c(0.01652294, -0.63365859, -0.74297551), + tolerance = 0.00001 + ) + expect_equal( + as.vector(NAO(exp = exp2, lat = lat2, lon = lon2, obsproj = FALSE)$exp), + c(-0.3529993, 0.4702901, 0.2185340, 0.1525028, 0.3759627, -0.4451322), + tolerance = 0.00001 + ) + +}) + +############################################## diff --git a/tests/testthat/test-ProjectField.R b/tests/testthat/test-ProjectField.R new file mode 100644 index 0000000000000000000000000000000000000000..f3f05ce618df9777393a911eaae2f02c9e428b9e --- /dev/null +++ b/tests/testthat/test-ProjectField.R @@ -0,0 +1,303 @@ +context("s2dv::ProjectField tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(120), dim = c(sdate = 10, lat = 6, lon = 2)) + lat1 <- seq(10, 30, length.out = 6) + lon1 <- c(10, 12) + eof1 <- EOF(dat1, lat1, lon1, neofs = 10) + reof1 <- REOF(dat1, lat1, lon1, ntrunc = 3) + + # dat2 + set.seed(1) + dat2 <- array(rnorm(48), dim = c(dat = 1, memb = 1, sdate = 6, ftime = 1, lat = 4, lon = 2)) + lat2 <- seq(10, 30, length.out = 4) + lon2 <- c(-5, 5) + eof2 <- EOF(dat2, lat2, lon2, neofs = 6) + + # dat3 + dat3 <- dat2 + dat3[1, 1, 1, 1, , ] <- NA + names(dim(dat3)) <- names(dim(dat2)) + lat3 <- lat2 + lon3 <- lon2 + eof3 <- eof2 + + # dat4 + set.seed(1) + dat4 <- array(rnorm(288*2), dim = c(dat = 2, memb = 2, sdate = 6, ftime = 3, lat = 4, lon = 2)) + lat4 <- seq(-10, -30, length.out = 4) + lon4 <- c(350, 355) + set.seed(2) + tmp <- array(rnorm(144), dim = c(dat = 2, sdate = 6, lat = 4, lon = 2)) + eof4 <- EOF(tmp, lat4, lon4, neofs = 6) + reof4 <- REOF(tmp, lat4, lon4, ntrunc = 6) + + # dat5 + set.seed(1) + dat5 <- array(rnorm(144*3), dim = c(dat = 1, memb = 2, sdate = 6, ftime = 3, lat = 4, lon = 3)) + lat5 <- seq(-10, 10, length.out = 4) + lon5 <- c(0, 5, 10) + set.seed(2) + tmp <- array(rnorm(72), dim = c(sdate = 6, lat = 4, lon = 3)) + eof5 <- EOF(tmp, lat5, lon5, neofs = 6) + + # dat6 + set.seed(1) + dat6 <- array(rnorm(480), dim = c(sdate = 10, lat = 6, lon = 8)) + lat6 <- seq(20, 80, length.out = 6) + lon6 <- seq(-90, 60, length.out = 8) + reof6 <- EuroAtlanticTC(dat6, lat6, lon6, ntrunc = 10) + +############################################## +test_that("1. Input checks", { + + # ano + expect_error( + ProjectField(c()), + "Parameter 'ano' cannot be NULL." + ) + expect_error( + ProjectField(c(NA, NA)), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + ProjectField(list(a = array(rnorm(50), dim = c(dat = 5, sdate = 10)), b = c(1:4))), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + ProjectField(array(1:10, dim = c(2, 5))), + "Parameter 'ano' must have dimension names." + ) + # eof + expect_error( + ProjectField(dat1, c()), + "Parameter 'eof' cannot be NULL." + ) + expect_error( + ProjectField(dat1, c(1, 2)), + "Parameter 'eof' must be a list generated by EOF() or REOF().", + fixed = TRUE + ) + expect_error( + ProjectField(dat1, list(a = 1)), + paste0("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", + "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC()."), + fixed = TRUE + ) + eof_fake <- list(EOFs = 'a', wght = 1:10) + expect_error( + ProjectField(dat1, eof_fake), + "The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array." + ) + eof_fake <- list(EOFs = array(rnorm(10), dim = c(mode = 1, lat = 2, lon = 5)), + wght = c(1:10)) + expect_error( + ProjectField(dat1, eof_fake), + "The component 'wght' of parameter 'eof' must be a numeric array." + ) + # time_dim + expect_error( + ProjectField(dat1, eof1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + ProjectField(dat1, eof1, time_dim = c('a','sdate')), + "Parameter 'time_dim' must be a character string." + ) + # space_dim + expect_error( + ProjectField(dat1, eof1, space_dim = 'lat'), + "Parameter 'space_dim' must be a character vector of 2." + ) + expect_error( + ProjectField(dat1, eof1, space_dim = c('latitude', 'longitude')), + "Parameter 'space_dim' is not found in 'ano' dimension." + ) + # eof (2) + eof_fake <- list(EOFs = array(rnorm(10), dim = c(lat = 2, lon = 5)), + wght = array(rnorm(10), dim = c(lat = 2, lon = 5))) + expect_error( + ProjectField(dat1, eof_fake), + paste0("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim' and 'mode'.") + ) + eof_fake <- list(EOFs = array(rnorm(10), dim = c(mode = 1, lat = 6, lon = 2)), + wght = array(rnorm(10), dim = c(level = 6, lon = 2))) + expect_error( + ProjectField(dat1, eof_fake), + paste0("The component 'wght' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim'.") + ) + # mode + expect_error( + ProjectField(dat1, eof1, mode = -1), + "Parameter 'mode' must be NULL or a positive integer." + ) + expect_error( + ProjectField(dat1, eof1, mode = 15), + paste0("Parameter 'mode' is greater than the number of available ", + "modes in 'eof'.") + ) + # ncores + expect_error( + ProjectField(dat1, eof1, ncore = 3.5), + "Parameter 'ncores' must be a positive integer." + ) + +}) +############################################## +test_that("2. dat1", { + + expect_equal( + dim(ProjectField(dat1, eof = eof1, mode = 1)), + c(sdate = 10) + ) + expect_equal( + dim(ProjectField(dat1, eof = eof1)), + c(mode = 10, sdate = 10) + ) + expect_equal( + as.vector(ProjectField(dat1, eof1, mode = 1))[1:5], + c(3.2061306, -0.1692669, 0.5420990, -2.5324441, -0.6143680), + tolerance = 0.0001 + ) + expect_equal( + as.vector(ProjectField(dat1, eof1, mode = 10))[1:5], + c(-0.24848299, -0.19311118, -0.16951195, -0.10000207, 0.04554693), + tolerance = 0.0001 + ) + expect_equal( + as.vector(ProjectField(dat1, eof1, mode = 1)), + as.vector(ProjectField(dat1, eof1)[1, ]) + ) + # reof + expect_equal( + dim(ProjectField(dat1, eof = reof1, mode = 1)), + c(sdate = 10) + ) + expect_equal( + dim(ProjectField(dat1, eof = reof1)), + c(mode = 3, sdate = 10) + ) + expect_equal( + as.vector(ProjectField(dat1, reof1, mode = 1))[1:5], + c(3.1567219, -0.1023512, 0.6339372, -0.7998676, -1.3727226), + tolerance = 0.0001 + ) + +}) +############################################## +test_that("3. dat2", { + expect_equal( + dim(ProjectField(dat2, eof2, mode = 1)), + c(sdate = 6, dat = 1, memb = 1, ftime = 1) + ) + expect_equal( + dim(ProjectField(dat2, eof2)), + c(mode = 6, sdate = 6, dat = 1, memb = 1, ftime = 1) + ) + expect_equal( + ProjectField(dat2, eof2, mode = 1)[1:6], + c(0.00118771, -1.20872474, -0.00821559, -2.06064916, -0.19245169, 2.26026937), + tolerance = 0.0001 + ) + expect_equal( + mean(ProjectField(dat2, eof2, mode = 6)), + 0.1741076, + tolerance = 0.0001 + ) + expect_equal( + as.vector(ProjectField(dat2, eof2, mode = 1)), + as.vector(ProjectField(dat2, eof2)[1, , , , ]) + ) + expect_equal( + as.vector(ProjectField(dat2, eof2, mode = 5)), + as.vector(ProjectField(dat2, eof2)[5, , , , ]) + ) + +}) + +############################################## +test_that("4. dat3", { + expect_equal( + dim(ProjectField(dat3, eof3, mode = 1)), + c(sdate = 6, dat = 1, memb = 1, ftime = 1) + ) + expect_equal( + ProjectField(dat3, eof3, mode = 1)[1:6], + c(NA, -1.20872474, -0.00821559, -2.06064916, -0.19245169, 2.26026937), + tolerance = 0.0001 + ) + +}) +############################################## +test_that("5. dat4", { + expect_equal( + dim(ProjectField(dat4, eof4, mode = 1)), + c(sdate = 6, dat = 2, memb = 2, ftime = 3) + ) + expect_equal( + mean(ProjectField(dat4, eof4, mode = 1)), + 0.078082, + tolerance = 0.0001 + ) + expect_equal( + ProjectField(dat4, eof4, mode = 1)[, 1, 2, 2], + c(0.28137048, -0.17616154, -0.39155370, 0.08288953, 1.18465521, 0.81850535), + tolerance = 0.0001 + ) + # reof + expect_equal( + dim(ProjectField(dat4, reof4)), + c(mode = 6, sdate = 6, dat = 2, memb = 2, ftime = 3) + ) + expect_equal( + ProjectField(dat4, reof4, mode = 1)[, 1, 2, 2], + c(-1.6923627, -0.4080116, 0.3044336, -0.7853220, -0.2670783, 0.6940482), + tolerance = 0.0001 + ) + +}) +############################################## +test_that("6. dat5", { + expect_equal( + dim(ProjectField(dat5, eof5, mode = 1)), + c(sdate = 6, dat = 1, memb = 2, ftime = 3) + ) + expect_equal( + mean(ProjectField(dat5, eof5, mode = 1)), + 0.0907149, + tolerance = 0.0001 + ) + expect_equal( + ProjectField(dat5, eof5, mode = 1)[, 1, 2, 2], + c(0.60881970, 0.93588392, 0.01982465, 0.82376024, -0.33147699, -1.35488289), + tolerance = 0.0001 + ) + +}) +############################################## +test_that("7. dat6", { + expect_equal( + dim(ProjectField(dat6, reof6, mode = 1)), + c(sdate = 10) + ) + expect_equal( + dim(ProjectField(dat6, reof6)), + c(mode = 4, sdate = 10) + ) + expect_equal( + mean(ProjectField(dat6, reof6)), + 0.3080207, + tolerance = 0.0001 + ) + expect_equal( + as.vector(ProjectField(dat6, reof6)[, 1]), + c(4.6114959, 0.8241051, 1.4160364, -0.9601872), + tolerance = 0.0001 + ) + +}) + diff --git a/tests/testthat/test-REOF.R b/tests/testthat/test-REOF.R new file mode 100644 index 0000000000000000000000000000000000000000..9f4bb480a5dc5bb3de121041fa9f72afe1cea72c --- /dev/null +++ b/tests/testthat/test-REOF.R @@ -0,0 +1,173 @@ +context("s2dv::REOF tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(120), dim = c(sdate = 10, lat = 6, lon = 2)) + lat1 <- seq(10, 30, length.out = 6) + lon1 <- c(10, 12) + + # dat2 + set.seed(2) + dat2 <- array(rnorm(120), dim = c(dat = 2, lat = 6, lon = 2, sdate = 5)) + lat2 <- lat1 + lon2 <- lon1 + +############################################## +test_that("1. Input checks", { + + # ano + expect_error( + REOF(c()), + "Parameter 'ano' cannot be NULL." + ) + expect_error( + REOF(c(NA, NA)), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + REOF(list(a = array(rnorm(50), dim = c(dat = 5, sdate = 10)), b = c(1:4))), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + REOF(array(1:10, dim = c(2, 5))), + "Parameter 'ano' must have dimension names." + ) + # time_dim + expect_error( + REOF(dat1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + REOF(dat1, time_dim = c('a','sdate')), + "Parameter 'time_dim' must be a character string." + ) + # space_dim + expect_error( + REOF(dat1, space_dim = 'lat'), + "Parameter 'space_dim' must be a character vector of 2." + ) + expect_error( + REOF(dat1, space_dim = c('latitude', 'longitude')), + "Parameter 'space_dim' is not found in 'ano' dimension." + ) + # lat + expect_error( + REOF(dat1, lat = 1:10), + paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") + ) + expect_error( + REOF(dat1, lat = seq(-100, -80, length.out = 6)), + "Parameter 'lat' must contain values within the range \\[-90, 90\\]." + ) + # lon + expect_error( + REOF(dat1, lat = lat1, lon = c('a', 'b')), + paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") + ) + expect_error( + REOF(dat1, lat = lat1, lon = c(350, 370)), + "Parameter 'lon' must be within the range [-180, 180] or [0, 360].", + fixed = TRUE + ) + # ntrunc + expect_error( + REOF(dat1, lat = lat1, lon = lon1, ntrunc = 0), + "Parameter 'ntrunc' must be a positive integer." + ) + # corr + expect_error( + REOF(dat1, lat = lat1, lon = lon1, corr = 0.1), + "Parameter 'corr' must be one logical value." + ) + # ncores + expect_error( + REOF(dat1, lat1, lon1, ncore = 3.5), + "Parameter 'ncores' must be a positive integer." + ) + +}) + +############################################## +test_that("2. dat1", { + + expect_equal( + names(REOF(dat1, lon = lon1, lat = lat1, ntrunc = 5)), + c("REOFs", "RPCs", "var", "wght") + ) + expect_equal( + dim(REOF(dat1, lon = lon1, lat = lat1)$REOFs), + c(mode = 10, lat = 6, lon = 2) + ) + expect_equal( + dim(REOF(dat1, lon = lon1, lat = lat1, ntrunc = 9)$RPCs), + c(sdate = 10, mode = 9) + ) + expect_equal( + dim(REOF(dat1, lon = lon1, lat = lat1, ntrunc = 1)$var), + c(mode = 1) + ) + expect_equal( + dim(REOF(dat1, lon = lon1, lat = lat1, ntrunc = 1)$wght), + c(lat = 6, lon = 2) + ) + expect_equal( + REOF(dat1, lon = lon1, lat = lat1, ntrunc = 1)$REOFs[1:5], + c(-0.28881677, 0.47116712, 0.27298759, 0.32794052, 0.01873475), + tolerance = 0.0001 + ) + expect_equal( + mean(REOF(dat1, lon = lon1, lat = lat1, ntrunc = 2)$REOFs), + -0.007620167, + tolerance = 0.0001 + ) + expect_equal( + REOF(dat1, lon = lon1, lat = lat1, ntrunc = 2)$RPCs[4:8], + c(-0.58817084, -1.86745710, -0.09939452, -1.11012382, 1.89513430), + tolerance = 0.0001 + ) + expect_equal( + REOF(dat1, lon = lon1, lat = lat1, ntrunc = 2)$var[1:2], + array(c(28.04203, 26.56988), dim = c(mode = 2)), + tolerance = 0.0001 + ) + expect_equal( + REOF(dat1, lon = lon1, lat = lat1, ntrunc = 1)$wght[1,1], + REOF(dat1, lon = lon1, lat = lat1, ntrunc = 1)$wght[1,2] + ) + expect_equal( + REOF(dat1, lon = lon1, lat = lat1, ntrunc = 1)$wght[1:1], + c(0.9923748), + tolerance = 0.0001 + ) + +}) +############################################## +test_that("3. dat2", { + expect_equal( + dim(REOF(dat2, lon = lon2, lat = lat2)$REOFs), + c(mode = 5, lat = 6, lon = 2, dat = 2) + ) + expect_equal( + dim(REOF(dat2, lon = lon2, lat = lat2, ntrunc = 4)$RPCs), + c(sdate = 5, mode = 4, dat = 2) + ) + expect_equal( + dim(REOF(dat2, lon = lon2, lat = lat2, ntrunc = 2)$wght), + c(lat = 6, lon = 2) + ) + expect_equal( + REOF(dat2, lon = lon2, lat = lat2, ntrunc = 1)$REOFs[1, 3, 2, 1], + 0.09529009, + tolerance = 0.0001 + ) + expect_equal( + mean(REOF(dat2, lon = lon2, lat = lat2)$REOFs), + 0.01120786, + tolerance = 0.0001 + ) + +}) +