diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ee4c80918fa3999a444af9367d700aee1652ca4c..cbe09a22ac03d801b1ae1a2c0f07cc6c4b59f8ef 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,8 +3,8 @@ stages: build: stage: build script: - - module load R + - module load R/3.6.1-foss-2015a-bare # - module load CDO - R CMD build --resave-data . - R CMD check --as-cran --no-manual --run-donttest s2dv_*.tar.gz - - R -e 'covr::package_coverage()' +# - R -e 'covr::package_coverage()' diff --git a/DESCRIPTION b/DESCRIPTION index 126179a2af69c234ead2c12cf6d6e562cfdd91e8..c2dbf9d5120f8e134ee8b7d3dd91872960c618ff 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,12 +1,13 @@ Package: s2dv Title: A Set of Common Tools for Seasonal to Decadal Verification -Version: 0.1.1 +Version: 1.0.0 Authors@R: c( person("BSC-CNS", role = c("aut", "cph")), person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")), person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = "aut"), person("Roberto", "Bilbao", , "roberto.bilbao@bsc.es", role = "ctb"), person("Carlos", "Delgado", , "carlos.delgado@bsc.es", role = "ctb"), + person("Llorenç", "Lledó", , "llorenc.lledo@bsc.es", role = "ctb"), person("Andrea", "Manrique", , "andrea.manrique@bsc.es", role = "ctb"), person("Deborah", "Verfaillie", , "deborah.verfaillie@bsc.es", role = "ctb")) Description: The advanced version of package 's2dverification'. It is @@ -21,7 +22,7 @@ Description: The advanced version of package 's2dverification'. It is Depends: maps, methods, - R (>= 3.2.0) + R (>= 3.6.0) Imports: abind, bigmemory, @@ -35,7 +36,10 @@ Imports: stats, plyr, ncdf4, - multiApply (>= 2.1.1) + NbClust, + multiApply (>= 2.1.1), + SpecsVerification (>= 0.5.0), + easyNCDF Suggests: easyVerification, testthat @@ -45,4 +49,4 @@ BugReports: https://earth.bsc.es/gitlab/es/s2dv/-/issues LazyData: true SystemRequirements: cdo Encoding: UTF-8 -RoxygenNote: 5.0.0 +RoxygenNote: 7.0.1 diff --git a/NAMESPACE b/NAMESPACE index 6da8d0c7ee74b297aab584a94a5431777f714d19..c02f502122458ebeec850a5c93b498ab9de625f7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,14 @@ # Generated by roxygen2: do not edit by hand +export(ACC) export(AMV) export(AnimateMap) export(Ano) +export(Ano_CrossValid) +export(BrierScore) +export(CDORemap) export(Clim) +export(Cluster) export(ColorBar) export(Composite) export(ConfigAddEntry) @@ -18,36 +23,57 @@ export(ConfigRemoveEntry) export(ConfigShowDefinitions) export(ConfigShowSimilarEntries) export(ConfigShowTable) +export(Consist_Trend) export(Corr) +export(EOF) export(Eno) +export(EuroAtlanticTC) +export(Filter) export(GMST) export(GSAT) +export(Histo2Hindcast) export(InsertDim) export(LeapYear) export(Load) export(MeanDims) +export(NAO) export(Persistence) +export(Plot2VarsVsLTime) +export(PlotACC) export(PlotAno) +export(PlotBoxWhisker) export(PlotClim) export(PlotEquiMap) export(PlotLayout) export(PlotMatrix) export(PlotSection) export(PlotStereoMap) +export(PlotVsLTime) +export(ProbBins) +export(ProjectField) +export(REOF) export(RMS) export(RMSSS) export(RandomWalkTest) +export(RatioRMS) +export(RatioSDRMS) export(Regression) export(Reorder) export(SPOD) export(Season) export(Smoothing) +export(Spectrum) +export(Spread) +export(StatSeasAtlHurr) export(TPI) export(ToyModel) export(Trend) +export(UltimateBrier) export(clim.colors) export(clim.palette) import(GEOmap) +import(NbClust) +import(SpecsVerification) import(bigmemory) import(geomapdata) import(graphics) @@ -63,6 +89,7 @@ importFrom(ClimProjDiags,Subset) importFrom(ClimProjDiags,WeightedMean) importFrom(abind,abind) importFrom(abind,adrop) +importFrom(easyNCDF,ArrayToNc) importFrom(grDevices,bmp) importFrom(grDevices,col2rgb) importFrom(grDevices,colorRampPalette) @@ -78,20 +105,30 @@ importFrom(grDevices,rainbow) importFrom(grDevices,rgb) importFrom(grDevices,svg) importFrom(grDevices,tiff) +importFrom(stats,IQR) importFrom(stats,acf) importFrom(stats,anova) importFrom(stats,confint) importFrom(stats,cor) +importFrom(stats,kmeans) importFrom(stats,lm) +importFrom(stats,mad) importFrom(stats,median) importFrom(stats,na.fail) importFrom(stats,na.omit) importFrom(stats,na.pass) importFrom(stats,pf) +importFrom(stats,predict) importFrom(stats,pt) importFrom(stats,qchisq) importFrom(stats,qnorm) +importFrom(stats,qt) +importFrom(stats,quantile) importFrom(stats,rnorm) +importFrom(stats,runif) importFrom(stats,sd) +importFrom(stats,setNames) +importFrom(stats,spectrum) importFrom(stats,ts) +importFrom(stats,varimax) importFrom(stats,window) diff --git a/NEWS.md b/NEWS.md index 567b9e0d4060b6e584c959aa4868691d3eab15f2..e537e95964bc8453030f091399a1a66bc81cd3f0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,20 @@ +# s2dv 1.0.0 (Release date: 2021-06-16) +- New functions: +ACC, Ano_CrossValid, BrierScore, CDORemap, Cluster, Consistent_Trend, EOF, EuroAtlanticTC, Filter, Histo2Hindcast, +NAO, Plot2VarsVsLTime, PlotACC, PlotBoxWhisker, PlotVsLTime, ProbBins, ProjectField, RatioRMS, +RatioSDRMS, REOF, Spectrum, Spread, StatSeasAtlHurr, UltimateBrier +- Season(): Accept one-dimension input. +- Persistence(): Add parameters checks for 'start' and 'end'; correct the output 'AR.lowCI' and 'AR.highCI'. +- Corr(): Add parameter 'member' and 'memb_dim'. They allow the existence of the member dimension + which can have different length between exp and obs, and users can choose to do the ensemble mean +first before correlation or calculate the correlation for individual member. +- InsertDim(): Remove Apply() to improve the efficiency. +- Reorder(): Improve efficiency. +- Indices functions take the case without 'memb_dim' into consideration. The climatology calculation for the anomaly is member-dependent if member exists. +- PlotStereoMap(): Add contour and arrow feature. +- PlotAno(): Add parameter check for 'sdates'. +- PlotEquiMap(): Add new arguments 'contour_draw_label', 'lake_color', 'lab_dist_x', 'lab_dist_y', and 'degree_sym'. Fix the border error; the border grids are fully plotted now. Add ocean mask feature. + # s2dv 0.1.1 (Release date: 2020-11-16) - Change the lincense to Apache License 2.0. diff --git a/R/ACC.R b/R/ACC.R new file mode 100644 index 0000000000000000000000000000000000000000..0f1040cec8741e1f482ca7e95b769ad91e898d61 --- /dev/null +++ b/R/ACC.R @@ -0,0 +1,618 @@ +#'Compute the anomaly correlation coefficient between the forecast and corresponding observation +#' +#'Calculate the anomaly correlation coefficient for the ensemble mean of each +#'model and the corresponding references over a spatial domain. It can return a +#'forecast time series if the data contain forest time dimension, and also the +#'start date mean if the data contain start date dimension. +#'The domain of interest can be specified by providing the list +#'of longitudes/latitudes (lon/lat) of the data together with the corners +#'of the domain: lonlatbox = c(lonmin, lonmax, latmin, latmax). +#' +#'@param exp A numeric array of experimental anomalies with named dimensions. +#' It must have at least 'dat_dim' and 'space_dim'. +#'@param obs A numeric array of observational anomalies with named dimensions. +#' It must have the same dimensions as 'exp' except the length of 'dat_dim' +#' and 'memb_dim'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is 'dataset'. +#'@param space_dim A character string vector of 2 indicating the name of the +#' latitude and longitude dimensions (in order) along which ACC is computed. +#' The default value is c('lat', 'lon'). +#'@param avg_dim A character string indicating the name of the dimension to be +#' averaged. It must be one of 'time_dim'. The mean ACC is calculated along +#' averaged. If no need to calculate mean ACC, set as NULL. The default value +#' is 'sdate'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. If the data are not ensemble ones, set as NULL. The default +#' value is 'member'. +#'@param lat A vector of the latitudes of the exp/obs grids. Only required when +#' the domain of interested is specified. The default value is NULL. +#'@param lon A vector of the longitudes of the exp/obs grids. Only required when +#' the domain of interested is specified. The default value is NULL. +#'@param lonlatbox A numeric vector of 4 indicating the corners of the domain of +#' interested: c(lonmin, lonmax, latmin, latmax). Only required when the domain +#' of interested is specified. The default value is NULL. +#'@param conf A logical value indicating whether to retrieve the confidence +#' intervals or not. The default value is TRUE. +#'@param conftype A charater string of "parametric" or "bootstrap". +#' "parametric" provides a confidence interval for the ACC computed by a +#' Fisher transformation and a significance level for the ACC from a one-sided +#' student-T distribution. "bootstrap" provides a confidence interval for the +#' ACC and MACC computed from bootstrapping on the members with 100 drawings +#' with replacement. To guarantee the statistical robustness of the result, +#' make sure that your experiment and observation always have the same number +#' of members. "bootstrap" requires 'memb_dim' has value. The default value is +#' 'parametric'. +#'@param conf.lev A numeric indicating the confidence level for the +#' regression computation. The default value is 0.95. +#'@param pval A logical value indicating whether to compute the p-value or not. +#' The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays:\cr +#'\item{acc}{ +#' The ACC with the dimensions c(nexp, nobs, the rest of the dimension except +#' space_dim and memb_dim). nexp is the number of experiment (i.e., dat_dim in +#' exp), and nobs is the number of observation (i.e., dat_dim in obs). +#'} +#'\item{conf.lower (if conftype = "parametric") or acc_conf.lower (if +#' conftype = "bootstrap")}{ +#' The lower confidence interval of ACC with the same dimensions as ACC. Only +#' present if \code{conf = TRUE}. +#'} +#'\item{conf.upper (if conftype = "parametric") or acc_conf.upper (if +#' conftype = "bootstrap")}{ +#' The upper confidence interval of ACC with the same dimensions as ACC. Only +#' present if \code{conf = TRUE}. +#'} +#'\item{p.val}{ +#' The p-value with the same dimensions as ACC. Only present if +#' \code{pval = TRUE} and code{conftype = "parametric"}. +#'} +#'\item{macc}{ +#' The mean anomaly correlation coefficient with dimensions +#' c(nexp, nobs, the rest of the dimension except space_dim, memb_dim, and +#' avg_dim). Only present if 'avg_dim' is not NULL. +#'} +#'\item{macc_conf.lower}{ +#' The lower confidence interval of MACC with the same dimensions as MACC. +#' Only present if \code{conftype = "bootstrap"}. +#'} +#'\item{macc_conf.upper}{ +#' The upper confidence interval of MACC with the same dimensions as MACC. +#' Only present if \code{conftype = "bootstrap"}. +#'} +#' +#'@examples +#' \dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#' } +#'sampleData$mod <- Season(sampleData$mod, monini = 11, moninf = 12, monsup = 2) +#'sampleData$obs <- Season(sampleData$obs, monini = 11, moninf = 12, monsup = 2) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'acc <- ACC(ano_exp, ano_obs) +#'acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap') +#'# Combine acc results for PlotACC +#'res <- array(c(acc$conf.lower, acc$acc, acc$conf.upper, acc$p.val), +#' dim = c(dim(acc$acc), 4)) +#'res_bootstrap <- array(c(acc$acc_conf.lower, acc$acc, acc$acc_conf.upper, acc$p.val), +#' dim = c(dim(acc$acc), 4)) +#' \donttest{ +#'PlotACC(res, startDates) +#'PlotACC(res_bootstrap, startDates) +#' } +#'@references Joliffe and Stephenson (2012). Forecast Verification: A +#' Practitioner's Guide in Atmospheric Science. Wiley-Blackwell. +#'@import multiApply +#'@importFrom abind abind +#'@importFrom stats qt qnorm quantile +#'@importFrom ClimProjDiags Subset +#'@export +ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), + avg_dim = 'sdate', memb_dim = 'member', + lat = NULL, lon = NULL, lonlatbox = NULL, + conf = TRUE, conftype = "parametric", conf.lev = 0.95, pval = TRUE, + ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must have at least dimensions ", + "dat_dim and space_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension names.") + } + ## dat_dim + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (any(!space_dim %in% names(dim(exp))) | any(!space_dim %in% names(dim(obs)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + ## avg_dim + if (!is.null(avg_dim)) { + if (!is.character(avg_dim) | length(avg_dim) > 1) { + stop("Parameter 'avg_dim' must be a character string.") + } + if (!avg_dim %in% names(dim(exp)) | !avg_dim %in% names(dim(obs))) { + stop("Parameter 'avg_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## lat + if (!is.null(lat)) { + if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.")) + } + } + ## lon + if (!is.null(lon)) { + if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.")) + } + } + ## lonlatbox + if (!is.null(lonlatbox)) { + if (!is.numeric(lonlatbox) | length(lonlatbox) != 4) { + stop("Parameter 'lonlatbox' must be a numeric vector of 4.") + } + } + ## lat, lon, and lonlatbox + if (!is.null(lon) & !is.null(lat) & !is.null(lonlatbox)) { + select_lonlat <- TRUE + } else if (is.null(lon) & is.null(lat) & is.null(lonlatbox)) { + select_lonlat <- FALSE + } else { + stop(paste0("Parameters 'lon', 'lat', and 'lonlatbox' must be used or be ", + "NULL at the same time.")) + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + if (conf) { + ## conftype + if (!conftype %in% c('parametric', 'bootstrap')) { + stop("Parameter 'conftype' must be either 'parametric' or 'bootstrap'.") + } + if (conftype == 'bootstrap' & is.null(memb_dim)) { + stop("Parameter 'memb_dim' cannot be NULL when parameter 'conftype' is 'bootstrap'.") + } + ## conf.lev + if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { + stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + } + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions expect 'dat_dim' and 'memb_dim'.")) + } + +#----------------------------------------------------------------- + + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- Reorder(obs, order_obs) + ############################### + + # Select the domain + if (select_lonlat) { + for (jind in 1:2) { + while (lonlatbox[jind] < 0) { + lonlatbox[jind] <- lonlatbox[jind] + 360 + } + while (lonlatbox[jind] > 360) { + lonlatbox[jind] <- lonlatbox[jind] - 360 + } + } + indlon <- which((lon >= lonlatbox[1] & lon <= lonlatbox[2]) | + (lonlatbox[1] > lonlatbox[2] & (lon > lonlatbox[1] | lon < lonlatbox[2]))) + indlat <- which(lat >= lonlatbox[3] & lat <= lonlatbox[4]) + + exp <- ClimProjDiags::Subset(exp, space_dim, list(indlat, indlon), drop = FALSE) + obs <- ClimProjDiags::Subset(obs, space_dim, list(indlat, indlon), drop = FALSE) + } + + # Ensemble mean + if (!is.null(memb_dim)) { + if (conftype == 'bootstrap') { + exp_ori <- exp + obs_ori <- obs + } + exp <- MeanDims(exp, memb_dim, na.rm = TRUE) + obs <- MeanDims(obs, memb_dim, na.rm = TRUE) + } + + if (is.null(avg_dim)) { + res <- Apply(list(exp, obs), + target_dims = list(c(space_dim, dat_dim), + c(space_dim, dat_dim)), + fun = .ACC, + dat_dim = dat_dim, avg_dim = avg_dim, + conftype = conftype, pval = pval, conf = conf, conf.lev = conf.lev, + ncores_input = ncores, + ncores = ncores) + + if (conftype == 'bootstrap') { + res_conf <- Apply(list(exp_ori, obs_ori), + target_dims = list(c(memb_dim, dat_dim, space_dim), + c(memb_dim, dat_dim, space_dim)), + fun = .ACC_bootstrap, + dat_dim = dat_dim, memb_dim = memb_dim, avg_dim = avg_dim, + conftype = conftype, pval = pval, conf = conf, conf.lev = conf.lev, + ncores_input = ncores, + ncores = ncores) + #NOTE: pval? + res <- list(acc = res$acc, + acc_conf.lower = res_conf$acc_conf.lower, + acc_conf.upper = res_conf$acc_conf.upper, + macc = res$macc, + macc_conf.lower = res_conf$macc_conf.lower, + macc_conf.upper = res_conf$macc_conf.upper) + } + + } else { + res <- Apply(list(exp, obs), + target_dims = list(c(space_dim, avg_dim, dat_dim), + c(space_dim, avg_dim, dat_dim)), + fun = .ACC, + dat_dim = dat_dim, avg_dim = avg_dim, + conftype = conftype, pval = pval, conf = conf, conf.lev = conf.lev, + ncores_input = ncores, + ncores = ncores) + + if (conftype == 'bootstrap') { + res_conf <- Apply(list(exp_ori, obs_ori), + target_dims = list(c(memb_dim, dat_dim, avg_dim, space_dim), + c(memb_dim, dat_dim, avg_dim, space_dim)), + fun = .ACC_bootstrap, + dat_dim = dat_dim, memb_dim = memb_dim, avg_dim = avg_dim, + conftype = conftype, pval = pval, conf = conf, conf.lev = conf.lev, + ncores_input = ncores, + ncores = ncores) + res <- list(acc = res$acc, + acc_conf.lower = res_conf$acc_conf.lower, + acc_conf.upper = res_conf$acc_conf.upper, + macc = res$macc, + macc_conf.lower = res_conf$macc_conf.lower, + macc_conf.upper = res_conf$macc_conf.upper) + + } + + } + + return(res) +} + +.ACC <- function(exp, obs, dat_dim = 'dataset', #space_dim = c('lat', 'lon'), + avg_dim = 'sdate', #memb_dim = NULL, + lon = NULL, lat = NULL, lonlatbox = NULL, + conf = TRUE, conftype = "parametric", conf.lev = 0.95, pval = TRUE, + ncores_input = NULL) { + +# if (is.null(avg_dim)) + # exp: [space_dim, dat_exp] + # obs: [space_dim, dat_obs] +# if (!is.null(avg_dim)) + # exp: [space_dim, avg_dim, dat_exp] + # obs: [space_dim, avg_dim, dat_obs] + + # .ACC() should use all the spatial points to calculate ACC. It returns [nexp, nobs]. + + nexp <- as.numeric(dim(exp)[length(dim(exp))]) + nobs <- as.numeric(dim(obs)[length(dim(obs))]) + + if (is.null(avg_dim)) { + acc <- array(dim = c(nexp = nexp, nobs = nobs)) + if (pval) p.val <- array(dim = c(nexp = nexp, nobs = nobs)) + if (conf) { + conf.upper <- array(dim = c(nexp = nexp, nobs = nobs)) + conf.lower <- array(dim = c(nexp = nexp, nobs = nobs)) + if (conftype == 'bootstrap') { + ndraw <- 100 + acc_draw <- array(dim = c(nexp = nexp, nobs = nobs, ndraw)) + } + } + + } else { + acc <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[length(dim(exp)) - 1])) + names(dim(acc))[3] <- avg_dim + macc <- array(dim = c(nexp = nexp, nobs = nobs)) + if (pval) p.val <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[length(dim(exp)) - 1])) + if (conf) { + conf.upper <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[length(dim(exp)) - 1])) + conf.lower <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[length(dim(exp)) - 1])) + if (conftype == 'bootstrap') { + ndraw <- 100 + acc_draw <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[length(dim(exp)) - 1], ndraw)) + macc_draw <- array(dim = c(nexp = nexp, nobs = nobs, ndraw)) + } + } + } + + # Per-paired exp and obs. NAs should be in the same position in both exp and obs + for (iobs in 1:nobs) { + for (iexp in 1:nexp) { + exp_sub <- ClimProjDiags::Subset(exp, dat_dim, iexp, drop = 'selected') + obs_sub <- ClimProjDiags::Subset(obs, dat_dim, iobs, drop = 'selected') + # dim: [space_dim] + + # Variance(iexp) should not take into account any point + # that is not available in iobs and therefore not accounted for + # in covariance(iexp, iobs) and vice-versa + exp_sub[is.na(obs_sub)] <- NA + obs_sub[is.na(exp_sub)] <- NA + + if (is.null(avg_dim)) { + # ACC + top <- sum(exp_sub*obs_sub, na.rm = TRUE) #a number + bottom <- sqrt(sum(exp_sub^2, na.rm = TRUE) * sum(obs_sub^2, na.rm = TRUE)) + acc[iexp, iobs] <- top/bottom #a number + # handle bottom = 0 + if (is.infinite(acc[iexp, iobs])) acc[iexp, iobs] <- NA + # pval and conf + if (pval | conf) { + if (conftype == "parametric") { + # calculate effective sample size along space_dim + # combine space_dim into one dim first + obs_tmp <- array(obs_sub, dim = c(space = length(obs_sub))) + eno <- Eno(obs_tmp, 'space', ncores = ncores_input) # a number + if (pval) { + t <- qt(conf.lev, eno - 2) # a number + p.val[iexp, iobs] <- sqrt(t^2 / (t^2 + eno - 2)) + } + if (conf) { + conf.upper[iexp, iobs] <- tanh(atanh(acc[iexp, iobs]) + qnorm(1 - (1 - conf.lev) / 2) / sqrt(eno - 3)) + conf.lower[iexp, iobs] <- tanh(atanh(acc[iexp, iobs]) + qnorm((1 - conf.lev) / 2) / sqrt(eno - 3)) + } + } + } + + } else { #avg_dim is not NULL + # MACC + top <- sum(exp_sub*obs_sub, na.rm = TRUE) #a number + bottom <- sqrt(sum(exp_sub^2, na.rm = TRUE) * sum(obs_sub^2, na.rm = TRUE)) + macc[iexp, iobs] <- top/bottom #a number + # handle bottom = 0 + if (is.infinite(macc[iexp, iobs])) macc[iexp, iobs] <- NA + # ACC + for (i in 1:dim(acc)[3]) { #NOTE: use sapply!!! + exp_sub_i <- ClimProjDiags::Subset(exp_sub, avg_dim, i, drop = 'selected') + obs_sub_i <- ClimProjDiags::Subset(obs_sub, avg_dim, i, drop = 'selected') + #dim: [space_dim] + top <- sum(exp_sub_i*obs_sub_i, na.rm = TRUE) #a number + bottom <- sqrt(sum(exp_sub_i^2, na.rm = TRUE) * sum(obs_sub_i^2, na.rm = TRUE)) + acc[iexp, iobs, i] <- top/bottom #a number + # handle bottom = 0 + if (is.infinite(acc[iexp, iobs, i])) acc[iexp, iobs, i] <- NA + } + + # pval and conf + if (pval | conf) { + if (conftype == "parametric") { + # calculate effective sample size along space_dim + # combine space_dim into one dim first + obs_tmp <- array(obs_sub, dim = c(space = prod(dim(obs_sub)[-length(dim(obs_sub))]), + dim(obs_sub)[length(dim(obs_sub))])) + eno <- Eno(obs_tmp, 'space', ncores = ncores_input) # a vector of avg_dim + if (pval) { + t <- qt(conf.lev, eno - 2) # a vector of avg_dim + p.val[iexp, iobs, ] <- sqrt(t^2 / (t^2 + eno - 2)) + } + if (conf) { + conf.upper[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) + qnorm(1 - (1 - conf.lev) / 2) / sqrt(eno - 3)) + conf.lower[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) + qnorm((1 - conf.lev) / 2) / sqrt(eno - 3)) + } + } + } + + } # if avg_dim is not NULL + + } + } + +#------------------------------------------------ + + + + # Return output + if (is.null(avg_dim)) { + if (conf & pval) { + return(list(acc = acc, conf.lower = conf.lower, conf.upper = conf.upper, + p.val = p.val)) + } else if (conf & !pval) { + return(list(acc = acc, conf.lower = conf.lower, conf.upper = conf.upper, + macc = macc)) + } else if (!conf & pval) { + return(list(acc = acc, p.val = p.val)) + } else { + return(list(acc = acc)) + } + } else { + if (conf & pval) { + return(list(acc = acc, conf.lower = conf.lower, conf.upper = conf.upper, + p.val = p.val, macc = macc)) + } else if (conf & !pval) { + return(list(acc = acc, conf.lower = conf.lower, conf.upper = conf.upper, + macc = macc)) + } else if (!conf & pval) { + return(list(acc = acc, p.val = p.val, macc = macc)) + } else { + return(list(acc = acc, macc = macc)) + } + } + +} + + +.ACC_bootstrap <- function(exp, obs, dat_dim = 'dataset', #space_dim = c('lat', 'lon'), + avg_dim = 'sdate', memb_dim = NULL, + lon = NULL, lat = NULL, lonlatbox = NULL, + conf = TRUE, conftype = "parametric", conf.lev = 0.95, pval = TRUE, + ncores_input = NULL) { +# if (is.null(avg_dim)) + # exp: [memb_exp, dat_exp, space_dim] + # obs: [memb_obs, dat_obs, space_dim] +# if (!is.null(avg_dim)) + # exp: [memb_exp, dat_exp, avg_dim, space_dim] + # obs: [memb_obs, dat_obs, avg_dim, space_dim] + + nexp <- as.numeric(dim(exp)[2]) + nobs <- as.numeric(dim(obs)[2]) + nmembexp <- as.numeric(dim(exp)[1]) + nmembobs <- as.numeric(dim(obs)[1]) + + ndraw <- 100 + if (is.null(avg_dim)) { + acc_draw <- array(dim = c(nexp = nexp, nobs = nobs, ndraw)) + } else { + acc_draw <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[3], ndraw)) + macc_draw <- array(dim = c(nexp = nexp, nobs = nobs, ndraw)) + } + + for (jdraw in 1:ndraw) { + #choose a randomly member index for each point of the matrix + indexp <- array(sample(nmembexp, size = prod(dim(exp)[-c(length(dim(exp)) - 1, length(dim(exp)))]), + replace = TRUE), + dim = dim(exp)) + indobs <- array(sample(nmembobs, size = prod(dim(obs)[-c(length(dim(obs)) - 1, length(dim(obs)))]), + replace = TRUE), + dim = dim(obs)) + + #combine maxtrix of data and random index + varindexp <- abind::abind(exp, indexp, along = length(dim(exp)) + 1) + varindobs <- abind::abind(obs, indobs, along = length(dim(obs)) + 1) + + #select randomly the members for each point of the matrix +# if (is.null(avg_dim)) { + + drawexp <- array( + apply(varindexp, c(2:length(dim(exp))), function(x) x[,1][x[,2]] ), + dim = dim(exp)) + drawobs <- array( + apply(varindobs, c(2:length(dim(obs))), function(x) x[,1][x[,2]] ), + dim = dim(obs)) + + # ensemble mean before .ACC + drawexp <- MeanDims(drawexp, memb_dim, na.rm = TRUE) + drawobs <- MeanDims(drawobs, memb_dim, na.rm = TRUE) + # Reorder + if (is.null(avg_dim)) { + drawexp <- Reorder(drawexp, c(2, 3, 1)) + drawobs <- Reorder(drawobs, c(2, 3, 1)) + } else { + drawexp <- Reorder(drawexp, c(3, 4, 2, 1)) + drawobs <- Reorder(drawobs, c(3, 4, 2, 1)) + } + + #calculate the ACC of the randomized field + tmpACC <- .ACC(drawexp, drawobs, conf = FALSE, pval = FALSE, avg_dim = avg_dim, + ncores_input = ncores_input) + if (is.null(avg_dim)) { + acc_draw[, , jdraw] <- tmpACC$acc + } else { + acc_draw[, , , jdraw] <- tmpACC$acc + macc_draw[, , jdraw] <- tmpACC$macc + } + } + + #calculate the confidence interval + if (is.null(avg_dim)) { + acc_conf.upper <- apply(acc_draw, c(1, 2), + function (x) { + quantile(x, 1 - (1 - conf.lev) / 2, na.rm = TRUE)}) + acc_conf.lower <- apply(acc_draw, c(1, 2), + function (x) { + quantile(x, (1 - conf.lev) / 2, na.rm = TRUE)}) + + } else { + acc_conf.upper <- apply(acc_draw, c(1, 2, 3), + function (x) { + quantile(x, 1 - (1 - conf.lev) / 2, na.rm = TRUE)}) + acc_conf.lower <- apply(acc_draw, c(1, 2, 3), + function (x) { + quantile(x, (1 - conf.lev) / 2, na.rm = TRUE)}) + macc_conf.upper <- apply(macc_draw, c(1, 2), + function (x) { + quantile(x, 1 - (1 - conf.lev) / 2, na.rm = TRUE)}) + macc_conf.lower <- apply(macc_draw, c(1, 2), + function (x) { + quantile(x, (1 - conf.lev) / 2, na.rm = TRUE)}) + } + + # Return output + if (is.null(avg_dim)) { + return(list(acc_conf.lower = acc_conf.lower, + acc_conf.upper = acc_conf.upper)) + } else { + return(list(acc_conf.lower = acc_conf.lower, + acc_conf.upper = acc_conf.upper, + macc_conf.lower = macc_conf.lower, + macc_conf.upper = macc_conf.upper)) + } + +} diff --git a/R/AMV.R b/R/AMV.R index 985444986e2cec5c58e8548428ce676f63f50683..a51a819def8e8b163e10b01eb36cad4673692b13 100644 --- a/R/AMV.R +++ b/R/AMV.R @@ -6,13 +6,13 @@ #'time scales. The AMV index is computed as the difference of weighted-averaged #'SST anomalies over the North Atlantic region (0ºN-60ºN, 280ºE-360ºE) and the #'weighted-averaged SST anomalies over 60ºS-60ºN, 0ºE-360ºE (Trenberth & -#'Dennis, 2005; Doblas-Reyes et al., 2013). +#'Dennis, 2005; Doblas-Reyes et al., 2013). If different members and/or datasets are provided, +#'the climatology (used to calculate the anomalies) is computed individually for all of them. #' -#'@param data A numerical array to be used for the index computation with the -#' dimensions: 1) latitude, longitude, start date, forecast month, and member -#' (in case of decadal predictions), 2) latitude, longitude, year, month and -#' member (in case of historical simulations), or 3) latitude, longitude, year -#' and month (in case of observations or reanalyses). This data has to be +#'@param data A numerical array to be used for the index computation with, at least, the +#' dimensions: 1) latitude, longitude, start date and forecast month +#' (in case of decadal predictions), 2) latitude, longitude, year and month +#' (in case of historical simulations or observations). This data has to be #' provided, at least, over the whole region needed to compute the index. #'@param data_lats A numeric vector indicating the latitudes of the data. #'@param data_lons A numeric vector indicating the longitudes of the data. @@ -45,21 +45,22 @@ #' anomalies, set it to FALSE. The default value is NULL.\cr #' In case of parameter 'type' is 'dcpp', 'indices_for_clim' must be relative #' to the first forecast year, and the climatology is automatically computed -#' over the actual common period for the different forecast years. +#' over the common calendar period for the different forecast years. #'@param year_dim A character string indicating the name of the year dimension #' The default value is 'year'. Only used if parameter 'type' is 'hist' or #' 'obs'. #'@param month_dim A character string indicating the name of the month #' dimension. The default value is 'month'. Only used if parameter 'type' is #' 'hist' or 'obs'. -#'@param member_dim A character string indicating the name of the member -#' dimension. The default value is 'member'. Only used if parameter 'type' is -#' 'dcpp' or 'hist'. +#'@param na.rm A logical value indicanting whether to remove NA values. The default +#' value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. #' -#'@return A numerical array of the AMV index with the dimensions of: -#' 1) sdate, forecast year, and member (in case of decadal predictions); -#' 2) year and member (in case of historical simulations); or -#' 3) year (in case of observations or reanalyses). +#'@return A numerical array with the AMV index with the same dimensions as data except +#' the lat_dim, lon_dim and fmonth_dim (month_dim) in case of decadal predictions +#' (historical simulations or observations). In case of decadal predictions, a new dimension +#' 'fyear' is added. #' #'@examples #' ## Observations or reanalyses @@ -86,7 +87,7 @@ AMV <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lon', mask = NULL, monini = 11, fmonth_dim = 'fmonth', sdate_dim = 'sdate', indices_for_clim = NULL, year_dim = 'year', month_dim = 'month', - member_dim = 'member') { + na.rm = TRUE, ncores = NULL) { ## Input Checks # data @@ -130,6 +131,13 @@ AMV <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo stop(paste0("The longitude dimension of parameter 'data' must be the same", " length of parameter 'data_lons'.")) } + # ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } # mask if (!is.null(mask)) { if (is.array(mask) & identical(names(dim(mask)), c(lat_dim,lon_dim)) & @@ -141,7 +149,7 @@ AMV <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo return(data) } data <- multiApply::Apply(data = data, target_dims = c(lat_dim, lon_dim), - fun = fun_mask, mask = mask)$output1 + fun = fun_mask, mask = mask, ncores = ncores)$output1 } else { stop(paste0("Parameter 'mask' must be NULL (no mask) or a numerical array ", "with c(lat_dim, lon_dim) dimensions and 0 in those grid ", @@ -200,14 +208,9 @@ AMV <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo stop("Parameter 'month_dim' is not found in 'data' dimension.") } } - # member_dim - if (type == 'hist' | type == 'dcpp') { - if (!(is.character(member_dim) & length(member_dim) == 1)) { - stop("Parameter 'member_dim' must be a character string.") - } - if (!member_dim %in% names(dim(data))) { - stop("Parameter 'member_dim' is not found in 'data' dimension.") - } + # na.rm + if (!na.rm %in% c(TRUE,FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") } ## Regions for AMV (Doblas-Reyes et al., 2013) @@ -230,9 +233,16 @@ AMV <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo data <- ClimProjDiags::CombineIndices(indices = list(mean_1,mean_2), weights = NULL, operation = 'subtract') # (mean_1 - mean_2) - INDEX <- .Indices(data = data, type = type, monini = monini, - indices_for_clim = indices_for_clim, fmonth_dim = fmonth_dim, - sdate_dim = sdate_dim, year_dim = year_dim, - month_dim = month_dim, member_dim = member_dim) + if (type == 'dcpp'){ + target_dims <- c(sdate_dim, fmonth_dim) + } else if (type %in% c('hist','obs')){ + target_dims <- c(year_dim, month_dim) + } + + INDEX <- multiApply::Apply(data = data, target_dims = target_dims, fun = .Indices, + type = type, monini = monini, indices_for_clim = indices_for_clim, + fmonth_dim = fmonth_dim, sdate_dim = sdate_dim, + year_dim = year_dim, month_dim = month_dim, + na.rm = na.rm, ncores = ncores)$output1 return(INDEX) } diff --git a/R/Ano.R b/R/Ano.R index 75a3edfef98ec1876c42541286532a444b3bd4d6..e0a69db232230da28d8dde61fc1d92d07f123de5 100644 --- a/R/Ano.R +++ b/R/Ano.R @@ -13,8 +13,7 @@ #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' -#'@return An array with same dimensions as parameter 'data' but with different -#' dimension order. The dimensions in parameter 'clim' are ordered first. +#'@return An array with same dimensions as parameter 'data'. #' #'@examples #'# Load sample data as in Load() example: @@ -22,8 +21,6 @@ #'clim <- Clim(sampleData$mod, sampleData$obs) #'ano_exp <- Ano(sampleData$mod, clim$clim_exp) #'ano_obs <- Ano(sampleData$obs, clim$clim_obs) -#'ano_exp <- Reorder(ano_exp, c(1, 2, 4, 3)) -#'ano_obs <- Reorder(ano_obs, c(1, 2, 4, 3)) #'\donttest{ #'PlotAno(ano_exp, ano_obs, startDates, #' toptitle = 'Anomaly', ytitle = c('K', 'K', 'K'), @@ -62,38 +59,62 @@ if (any(is.null(names(dim(clim))))| any(nchar(names(dim(clim))) == 0)) { stop("Parameter 'clim' must have dimension names.") } - for (i in 1:length(dim(clim))) { - if (!(names(dim(clim))[i] %in% names(dim(data)))) { - stop("Parameter 'data' must have all the dimensions of parameter 'clim'.") - } else { - pos <- names(dim(data))[which(names(dim(clim))[i] == names(dim(data)))] - if ((dim(clim)[i] != dim(data)[pos])) { - stop("Some dimensions of parameter 'clim' have different length from parameter 'data'.") - } + ## data and clim + if (!all(names(dim(clim)) %in% names(dim(data)))) { + stop("Parameter 'data' must have all the dimensions of parameter 'clim'.") + } else { + pos <- names(dim(data))[match(names(dim(clim)), names(dim(data)))] + if (any(dim(clim) != dim(data)[pos])) { + stop("Some dimensions of parameter 'clim' have different length from parameter 'data'.") } } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | - length(ncores) > 1) { + if (!is.numeric(ncores)) { + stop("Parameter 'ncores' must be a positive integer.") + } else if (ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } } ############################### # Calculate Ano + parallel_compute <- TRUE + if (is.null(ncores)) { + parallel_compute <- FALSE + } else if (ncores == 1) { + parallel_compute <- FALSE + } + if (!parallel_compute) { + target_dims_ind <- match(names(dim(clim)), names(dim(data))) + if (any(target_dims_ind != sort(target_dims_ind))) { + clim <- Reorder(clim, match(sort(target_dims_ind), target_dims_ind)) + } + if (length(dim(data)) == length(dim(clim))) { + res <- data - clim + } else { + target_dims_ind <- match(names(dim(clim)), names(dim(data))) + margin_dims_ind <- c(1:length(dim(data)))[-target_dims_ind] + res <- apply(data, margin_dims_ind, .Ano, clim) + res <- array(res, dim = dim(data)[c(target_dims_ind, margin_dims_ind)]) + } + } else { + res <- Apply(list(data), + target_dims = names(dim(clim)), + output_dims = names(dim(clim)), + fun = .Ano, + clim = clim, + ncores = ncores)$output1 + } - res <- Apply(list(data), - target_dims = names(dim(clim)), - output_dims = names(dim(clim)), - fun = .Ano, - clim = clim, - ncores = ncores)$output1 + # Reorder dim back to data + if (any(dim(res) != dim(data))) { + res <- Reorder(res, names(dim(data))) + } - return(invisible(res)) + return(invisible(res)) } .Ano <- function(data, clim) { - ano <- data - clim - return(ano) + data - clim } diff --git a/R/Ano_CrossValid.R b/R/Ano_CrossValid.R new file mode 100644 index 0000000000000000000000000000000000000000..22e710a1a3ded8b428eb7493a1dd0c0aec489d18 --- /dev/null +++ b/R/Ano_CrossValid.R @@ -0,0 +1,226 @@ +#'Compute anomalies in cross-validation mode +#' +#'Compute the anomalies from the arrays of the experimental and observational +#'data output by subtracting the climatologies computed with a leave-one-out +#'cross validation technique and a per-pair method (Garcia-Serrano and +#'Doblas-Reyes, CD, 2012). +#'Per-pair climatology means that only the start dates covered by the +#'whole experiments/observational datasets will be used. In other words, the +#'startdates which do not all have values along 'dat_dim' dimension of both +#'the 'exp' and 'obs' are excluded when computing the climatologies. +#' +#'@param exp A named numeric array of experimental data, with at least +#' dimensions 'time_dim' and 'dat_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param dat_dim A character vector indicating the name of the dataset and +#' member dimensions. When calculating the climatology, if data at one +#' startdate (i.e., 'time_dim') is not complete along 'dat_dim', this startdate +#' along 'dat_dim' will be discarded. The default value is +#' "c('dataset', 'member')". +#'@param memb_dim A character string indicating the name of the member +#' dimension. Only used when parameter 'memb' is FALSE. It must be one element +#' in 'dat_dim'. The default value is 'member'. +#'@param memb A logical value indicating whether to subtract the climatology +#' based on the individual members (TRUE) or the ensemble mean over all +#' members (FALSE) when calculating the anomalies. The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list of 2: +#'\item{$exp}{ +#' A numeric array with the same dimensions as 'exp'. The dimension order may +#' change. +#'} +#'\item{$obs}{ +#' A numeric array with the same dimensions as 'obs'.The dimension order may +#' change. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'anomalies <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'\dontrun{ +#'PlotAno(anomalies$exp, anomalies$obs, startDates, +#' toptitle = paste('anomalies'), ytitle = c('K', 'K', 'K'), +#' legends = 'ERSST', biglab = FALSE, fileout = 'tos_ano_crossvalid.eps') +#'} +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +Ano_CrossValid <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), + memb_dim = 'member', memb = TRUE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must have at least dimensions ", + "time_dim and dat_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have same dimension name.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.character(dat_dim)) { + stop("Parameter 'dat_dim' must be a character vector.") + } + if (!all(dat_dim %in% names(dim(exp))) | !all(dat_dim %in% names(dim(obs)))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb + if (!is.logical(memb) | length(memb) > 1) { + stop("Parameter 'memb' must be one logical value.") + } + ## memb_dim + if (!memb) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!memb_dim %in% dat_dim) { + stop("Parameter 'memb_dim' must be one element in parameter 'dat_dim'.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + for (i in 1:length(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim[i])] + name_obs <- name_obs[-which(name_obs == dat_dim[i])] + } + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have the same length of ", + "all dimensions except 'dat_dim'.")) + } + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + if (any(order_obs != sort(order_obs))) { + obs <- Reorder(obs, order_obs) + } + + #----------------------------------- + # Per-paired method: If any sdate along dat_dim is NA, turn all sdate points along dat_dim into NA. + pos <- rep(0, length(dat_dim)) # dat_dim: [dataset, member] + for (i in 1:length(dat_dim)) { + pos[i] <- which(names(dim(obs)) == dat_dim[i]) + } + outrows_exp <- MeanDims(exp, pos, na.rm = FALSE) + + MeanDims(obs, pos, na.rm = FALSE) + outrows_obs <- outrows_exp + + for (i in 1:length(pos)) { + outrows_exp <- InsertDim(outrows_exp, pos[i], dim(exp)[pos[i]]) + outrows_obs <- InsertDim(outrows_obs, pos[i], dim(obs)[pos[i]]) + } + exp_for_clim <- exp + obs_for_clim <- obs + exp_for_clim[which(is.na(outrows_exp))] <- NA + obs_for_clim[which(is.na(outrows_obs))] <- NA + + #----------------------------------- + + res <- Apply(list(exp, obs, exp_for_clim, obs_for_clim), + target_dims = c(time_dim, dat_dim), + fun = .Ano_CrossValid, + memb_dim = memb_dim, memb = memb, + ncores = ncores) + + return(res) +} + +.Ano_CrossValid <- function(exp, obs, exp_for_clim, obs_for_clim, + memb_dim = 'member', memb = TRUE, ncores = NULL) { + # exp: [sdate, dat_dim, memb_dim] + # obs: [sdate, dat_dim, memb_dim] + ano_exp_list <- vector('list', length = dim(exp)[1]) #length: [sdate] + ano_obs_list <- vector('list', length = dim(obs)[1]) + + for (tt in 1:dim(exp)[1]) { #[sdate] + # calculate clim + exp_sub <- ClimProjDiags::Subset(exp_for_clim, 1, c(1:dim(exp)[1])[-tt]) + obs_sub <- ClimProjDiags::Subset(obs_for_clim, 1, c(1:dim(obs)[1])[-tt]) + clim_exp <- apply(exp_sub, c(1:length(dim(exp)))[-1], mean, na.rm = TRUE) # average out time_dim -> [dat, memb] + clim_obs <- apply(obs_sub, c(1:length(dim(obs)))[-1], mean, na.rm = TRUE) + + # ensemble mean + if (!memb) { + if (is.null(dim(clim_exp)) | length(dim(clim_exp)) == 1) { #dim: [member] + clim_exp <- mean(clim_exp, na.rm = TRUE) # a number + clim_obs <- mean(clim_obs, na.rm = TRUE) + } else { + pos <- which(names(dim(clim_exp)) == memb_dim) + pos <- c(1:length(dim(clim_exp)))[-pos] + dim_name <- names(dim(clim_exp)) + dim_exp_ori <- dim(clim_exp) + dim_obs_ori <- dim(clim_obs) + + clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE) + clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE) + if (is.null(names(dim(as.array(clim_exp))))) { + clim_exp <- as.array(clim_exp) + clim_obs <- as.array(clim_obs) + names(dim(clim_exp)) <- dim_name[pos] + names(dim(clim_obs)) <- dim_name[pos] + } + + # Expand it back + clim_exp_tmp <- array(clim_exp, dim = c(dim_exp_ori[pos], dim_exp_ori[-pos])) + clim_obs_tmp <- array(clim_obs, dim = c(dim_obs_ori[pos], dim_obs_ori[-pos])) + # Reorder it back to dim(clim_exp) + tmp <- match(dim_exp_ori, dim(clim_exp_tmp)) + if (any(tmp != sort(tmp))) { + clim_exp <- Reorder(clim_exp_tmp, tmp) + clim_obs <- Reorder(clim_obs_tmp, tmp) + } else { + clim_exp <- clim_exp_tmp + clim_obs <- clim_obs_tmp + } + } + } + # calculate ano + ano_exp_list[[tt]] <- ClimProjDiags::Subset(exp, 1, tt, drop = 'selected') - clim_exp + ano_obs_list[[tt]] <- ClimProjDiags::Subset(obs, 1, tt, drop = 'selected') - clim_obs + } + + ano_exp <- array(unlist(ano_exp_list), dim = c(dim(exp)[-1], dim(exp)[1])) + ano_exp <- Reorder(ano_exp, c(length(dim(exp)), 1:(length(dim(exp)) - 1))) + ano_obs <- array(unlist(ano_obs_list), dim = c(dim(obs)[-1], dim(obs)[1])) + ano_obs <- Reorder(ano_obs, c(length(dim(obs)), 1:(length(dim(obs)) - 1))) + + return(list(exp = ano_exp, obs = ano_obs)) +} diff --git a/R/BrierScore.R b/R/BrierScore.R new file mode 100644 index 0000000000000000000000000000000000000000..1363f6155ff53a7c73cb8afd3d630f05fceb343a --- /dev/null +++ b/R/BrierScore.R @@ -0,0 +1,370 @@ +#'Compute Brier score, its decomposition, and Brier skill score +#' +#'Compute the Brier score (BS) and the components of its standard decompostion +#'with the two within-bin components described in Stephenson et al., (2008). It +#'also returns the bias-corrected decomposition of the BS (Ferro and Fricker, +#'2012). BSS has the climatology as the reference forecast. +#' +#'@param exp A vector or a numeric array with named dimensions. It should be +#' the predicted probabilities which are within the range [0, 1] if memb_dim +#' doesn't exist. If it has memb_dim, the value should be 0 or 1, and the +#' predicted probabilities will be computed by ensemble mean. The dimensions +#' must at least have 'time_dim'. +#' range [0, 1]. +#'@param obs A numeric array with named dimensions of the binary observations +#' (0 or 1). The dimension must be the same as 'exp' except memb_dim, which is +#' optional. If it has 'memb_dim', then the length must be 1. The length of +#' 'dat_dim' can be different from 'exp' if it has. +#'@param thresholds A numeric vector used to bin the forecasts. The default +#' value is \code{seq(0.1, 0.9, 0.1)}, which means that the bins are +#' \code{[0, 0.1), [0.1, 0.2), ... [0.9, 1]}. +#'@param time_dim A character string indicating the name of dimension along +#' which Brier score is computed. The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension in +#' 'exp' and 'obs'. The length of this dimension can be different between +#' 'exp' and 'obs'. The default value is NULL. +#'@param memb_dim A character string of the name of the member dimension in +#' 'exp' (and 'obs', optional). The function will do the ensemble mean +#' over this dimension. If there is no member dimension, set NULL. The default +#' value is NULL. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list that contains: +#'\item{$rel}{standard reliability} +#'\item{$res}{standard resolution} +#'\item{$unc}{standard uncertainty} +#'\item{$bs}{Brier score} +#'\item{$bs_check_res}{rel - res + unc} +#'\item{$bss_res}{res - rel / unc} +#'\item{$gres}{generalized resolution} +#'\item{$bs_check_gres}{rel - gres + unc} +#'\item{$bss_gres}{gres - rel / unc} +#'\item{$rel_bias_corrected}{bias - corrected rel} +#'\item{$gres_bias_corrected}{bias - corrected gres} +#'\item{$unc_bias_corrected}{bias - corrected unc} +#'\item{$bss_bias_corrected}{gres_bias_corrected - rel_bias_corrected / unc_bias_corrected} +#'\item{$nk}{number of forecast in each bin} +#'\item{$fkbar}{average probability of each bin} +#'\item{$okbar}{relative frequency that the observed event occurred} +#'The data type and dimensions of the items depend on if the input 'exp' and +#''obs' are:\cr +#'(a) Vectors\cr +#'(b) Arrays with 'dat_dim' specified\cr +#'(c) Arrays with no 'dat_dim' specified\cr +#'Items 'rel', 'res', 'unc', 'bs', 'bs_check_res', 'bss_res', 'gres', +#''bs_check_gres', 'bss_gres', 'rel_bias_corrected', 'gres_bias_corrected', +#''unc_bias_corrected', and 'bss_bias_corrected' are (a) a number (b) an array +#'with dimensions c(nexp, nobs, all the rest dimensions in 'exp' and 'obs' +#'expect 'time_dim' and 'memb_dim') (c) an array with dimensions of +#''exp' and 'obs' except 'time_dim' and 'memb_dim'\cr +#'Items 'nk', 'fkbar', and 'okbar' are (a) a vector of length of bin number +#'determined by 'threshold' (b) an array with dimensions c(nexp, nobs, +#'no. of bins, all the rest dimensions in 'exp' and 'obs' expect 'time_dim' and +#''memb_dim') (c) an array with dimensions c(no. of bin, all the rest dimensions +#'in 'exp' and 'obs' expect 'time_dim' and 'memb_dim') +#' +#'@references +#'Wilks (2006) Statistical Methods in the Atmospheric Sciences.\cr +#'Stephenson et al. (2008). Two extra components in the Brier score decomposition. +#' Weather and Forecasting, 23: 752-757.\cr +#'Ferro and Fricker (2012). A bias-corrected decomposition of the BS. +#' Quarterly Journal of the Royal Meteorological Society, DOI: 10.1002/qj.1924. +#' +#'@examples +#'# Inputs are vectors +#'exp <- runif(10) +#'obs <- round(exp) +#'x <- BrierScore(exp, obs) +#' +#'# Inputs are arrays +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3)) +#'bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3)) +#'res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member') +#' +#'@import multiApply +#'@export +BrierScore <- function(exp, obs, thresholds = seq(0.1, 0.9, 0.1), time_dim = 'sdate', + dat_dim = NULL, memb_dim = NULL, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric vector or a numeric array.") + } + if (is.null(dim(exp))) { #is vector + dim(exp) <- c(length(exp)) + names(dim(exp)) <- time_dim + } + if (is.null(dim(obs))) { #is vector + dim(obs) <- c(length(obs)) + names(dim(obs)) <- time_dim + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if (any(!obs %in% c(0, 1))) { + stop("Parameter 'obs' must be binary events (0 or 1).") + } + ## thresholds + if (!is.numeric(thresholds) | !is.vector(thresholds)) { + stop("Parameter 'thresholds' must be a numeric vector.") + } + if (any(thresholds <= 0 | thresholds >= 1)) { + stop("Parameter 'thresholds' must be between 0 and 1 as the bin-breaks.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' and 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' and 'obs' dimension.") + } + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") + } + } + } + ## exp and obs (2) + if (is.null(memb_dim)) { + if (max(exp) > 1 | min(exp) < 0) { + stop("Parameter 'exp' must be within [0, 1] range.") + } + } else { + if (any(!exp %in% c(0, 1))) { + stop("Parameter 'exp' must be 0 or 1 if it has memb_dim.") + } + } + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (any(name_exp != name_obs)) { + stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'dat_dim' and 'memb_dim'.")) + } + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'dat_dim' and 'memb_dim'.")) + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Brier score + + ## ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim) + if (memb_dim %in% names(dim(obs))) { + obs <- MeanDims(obs, memb_dim) + } + } + + if (is.null(dat_dim)) { + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim), + c(time_dim)), + fun = .BrierScore, + thresholds = thresholds, + ncores = ncores) + } else { + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim), + c(time_dim, dat_dim)), + fun = .BrierScore, + thresholds = thresholds, + ncores = ncores) + } + + return(res) +} + +.BrierScore <- function(exp, obs, thresholds = seq(0.1, 0.9, 0.1)) { + + # exp: [sdate] or [sdate, nexp] + # obs: [sdate] or [sdate, nobs] + if (length(dim(exp)) == 2) { + nexp <- as.numeric(dim(exp)[2]) + nobs <- as.numeric(dim(obs)[2]) + exp_ori <- exp + obs_ori <- obs + # Create empty arrays + arr_rel <- arr_res <- arr_unc <- arr_bs <- arr_bs_check_res <- arr_bss_res <- + arr_gres <- arr_bs_check_gres <- arr_bss_gres <- arr_rel_bias_corrected <- + arr_gres_bias_corrected <- arr_unc_bias_corrected <- arr_bss_bias_corrected <- + array(dim = c(nexp = nexp, nobs = nobs)) + arr_nk <- arr_fkbar <- arr_okbar <- + array(dim = c(nexp = nexp, nobs = nobs, bin = length(thresholds) + 1)) + + } else { + nexp <- 1 + nobs <- 1 + } + + for (n_exp in 1:nexp) { + for (n_obs in 1:nobs) { + if (exists('exp_ori')) { + exp <- exp_ori[, n_exp] + obs <- obs_ori[, n_obs] + } + n <- length(exp) + nbins <- length(thresholds) + 1 # Number of bins + bins <- vector('list', nbins) #as.list(paste("bin", 1:nbins, sep = "")) + for (i in 1:nbins) { + if (i == 1) { + bins[[i]] <- list(which(exp >= 0 & exp < thresholds[i])) + } else if (i == nbins) { + bins[[i]] <- list(which(exp >= thresholds[i - 1] & exp <= 1)) + } else { + bins[[i]] <- list(which(exp >= thresholds[i - 1] & exp < thresholds[i])) + } + } + + fkbar <- okbar <- nk <- array(0, dim = nbins) + for (i in 1:nbins) { + nk[i] <- length(bins[[i]][[1]]) + fkbar[i] <- sum(exp[bins[[i]][[1]]]) / nk[i] + okbar[i] <- sum(obs[bins[[i]][[1]]]) / nk[i] + } + + #-----in old .BrierScore()--------- + # fkbar[fkbar == Inf] <- 0 + # okbar[is.nan(okbar)] <- 0 + #---------------------------------- + + obar <- sum(obs) / length(obs) + relsum <- ressum <- term1 <- term2 <- 0 + for (i in 1:nbins) { + if (nk[i] > 0) { + relsum <- relsum + nk[i] * (fkbar[i] - okbar[i])^2 + ressum <- ressum + nk[i] * (okbar[i] - obar)^2 + for (j in 1:nk[i]) { + term1 <- term1 + (exp[bins[[i]][[1]][j]] - fkbar[i])^2 + term2 <- term2 + (exp[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) + } + } + } + rel <- relsum / n + res <- ressum / n + unc <- obar * (1 - obar) + bs <- sum((exp - obs)^2) / n + bs_check_res <- rel - res + unc + bss_res <- (res - rel) / unc + gres <- res - term1 * (1 / n) + term2 * (2 / n) # Generalized resolution + bs_check_gres <- rel - gres + unc # BS using GRES + bss_gres <- (gres - rel) / unc # BSS using GRES + + + # Estimating the bias-corrected components of the BS + term3 <- array(0, nbins) + for (i in 1:nbins) { + term3[i] <- (nk[i] / (nk[i] - 1)) * okbar[i] * (1 - okbar[i]) + } + term_a <- sum(term3, na.rm = T) / n + term_b <- (obar * (1 - obar)) / (n - 1) + rel_bias_corrected <- rel - term_a + gres_bias_corrected <- gres - term_a + term_b + if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { + rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) + gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) + rel_bias_corrected <- rel_bias_corrected2 + gres_bias_corrected <- gres_bias_corrected2 + } + unc_bias_corrected <- unc + term_b + bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected + + #if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) { + # cat("No error found \ n") + # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") + #} + + # Add name for nk, fkbar, okbar + names(dim(nk)) <- 'bin' + names(dim(fkbar)) <- 'bin' + names(dim(okbar)) <- 'bin' + + if (exists('exp_ori')) { + arr_rel[n_exp, n_obs] <- rel + arr_res[n_exp, n_obs] <- res + arr_unc[n_exp, n_obs] <- unc + arr_bs[n_exp, n_obs] <- bs + arr_bs_check_res[n_exp, n_obs] <- bs_check_res + arr_bss_res[n_exp, n_obs] <- bss_res + arr_gres[n_exp, n_obs] <- gres + arr_bs_check_gres[n_exp, n_obs] <- bs_check_gres + arr_bss_gres[n_exp, n_obs] <- bss_gres + arr_rel_bias_corrected[n_exp, n_obs] <- rel_bias_corrected + arr_gres_bias_corrected[n_exp, n_obs] <- gres_bias_corrected + arr_unc_bias_corrected[n_exp, n_obs] <- unc_bias_corrected + arr_bss_bias_corrected[n_exp, n_obs] <- bss_bias_corrected + arr_nk[n_exp, n_obs, ] <- nk + arr_fkbar[n_exp, n_obs, ] <- fkbar + arr_okbar[n_exp, n_obs, ] <- okbar + } + + } + } + + if (exists('exp_ori')) { + res_list <- list(rel = arr_rel, res = arr_res, unc = arr_unc, bs = arr_bs, + bs_check_res = arr_bs_check_res, bss_res = arr_bss_res, + gres = arr_gres, bs_check_gres = arr_bs_check_gres, + bss_gres = arr_bss_gres, rel_bias_corrected = arr_rel_bias_corrected, + gres_bias_corrected = arr_gres_bias_corrected, + unc_bias_corrected = arr_unc_bias_corrected, + bss_bias_corrected = arr_bss_bias_corrected, nk = arr_nk, + fkbar = arr_fkbar, okbar = arr_okbar) #bins = list(bins), + } else { + + res_list <- list(rel = rel, res = res, unc = unc, bs = bs, bs_check_res = bs_check_res, + bss_res = bss_res, gres = gres, bs_check_gres = bs_check_gres, + bss_gres = bss_gres, rel_bias_corrected = rel_bias_corrected, + gres_bias_corrected = gres_bias_corrected, + unc_bias_corrected = unc_bias_corrected, + bss_bias_corrected = bss_bias_corrected, nk = nk, fkbar = fkbar, + okbar = okbar) #bins = list(bins), + } + + return(invisible(res_list)) +} diff --git a/R/CDORemap.R b/R/CDORemap.R new file mode 100644 index 0000000000000000000000000000000000000000..fc25b527de4036257c462b28a0d5eb27eac89154 --- /dev/null +++ b/R/CDORemap.R @@ -0,0 +1,1106 @@ +#'Interpolate arrays with longitude and latitude dimensions using CDO +#' +#'This function takes as inputs a multidimensional array (optional), a vector +#'or matrix of longitudes, a vector or matrix of latitudes, a destination grid +#'specification, and the name of a method to be used to interpolate (one of +#'those available in the 'remap' utility in CDO). The interpolated array is +#'returned (if provided) together with the new longitudes and latitudes.\cr\cr +#'\code{CDORemap()} permutes by default the dimensions of the input array (if +#'needed), splits it in chunks (CDO can work with data arrays of up to 4 +#'dimensions), generates a file with the data of each chunk, interpolates it +#'with CDO, reads it back into R and merges it into a result array. If no +#'input array is provided, the longitude and latitude vectors will be +#'transformed only. If the array is already on the desired destination grid, +#'no transformation is performed (this behvaiour works only for lonlat and +#'gaussian grids). \cr\cr +#'Any metadata attached to the input data array, longitudes or latitudes will +#'be preserved or accordingly modified. +#' +#'@param data_array Multidimensional numeric array to be interpolated. If +#' provided, it must have at least a longitude and a latitude dimensions, +#' identified by the array dimension names. The names for these dimensions +#' must be one of the recognized by s2dverification (can be checked with +#' \code{s2dverification:::.KnownLonNames()} and +#' \code{s2dverification:::.KnownLatNames()}). +#'@param lons Numeric vector or array of longitudes of the centers of the grid +#' cells. Its size must match the size of the longitude/latitude dimensions +#' of the input array. +#'@param lats Numeric vector or array of latitudes of the centers of the grid +#' cells. Its size must match the size of the longitude/latitude dimensions +#' of the input array. +#'@param grid Character string specifying either a name of a target grid +#' (recognized by CDO; e.g.: 'r256x128', 't106grid') or a path to another +#' NetCDF file which to read the target grid from (a single grid must be +#' defined in such file). +#'@param method Character string specifying an interpolation method +#' (recognized by CDO; e.g.: 'con', 'bil', 'bic', 'dis'). The following +#' long names are also supported: 'conservative', 'bilinear', 'bicubic' and +#' 'distance-weighted'. +#'@param avoid_writes The step of permutation is needed when the input array +#' has more than 3 dimensions and none of the longitude or latitude dimensions +#' in the right-most position (CDO would not accept it without permuting +#' previously). This step, executed by default when needed, can be avoided +#' for the price of writing more intermediate files (whis usually is +#' unconvenient) by setting the parameter \code{avoid_writes = TRUE}. +#'@param crop Whether to crop the data after interpolation with +#' 'cdo sellonlatbox' (TRUE) or to extend interpolated data to the whole +#' world as CDO does by default (FALSE). If \code{crop = TRUE} then the +#' longitude and latitude borders which to crop at are taken as the limits of +#' the cells at the borders ('lons' and 'lats' are perceived as cell centers), +#' i.e. the resulting array will contain data that covers the same area as +#' the input array. This is equivalent to specifying \code{crop = 'preserve'}, +#' i.e. preserving area. If \code{crop = 'tight'} then the borders which to +#' crop at are taken as the minimum and maximum cell centers in 'lons' and +#' 'lats', i.e. the area covered by the resulting array may be smaller if +#' interpolating from a coarse grid to a fine grid. The parameter 'crop' also +#' accepts a numeric vector of custom borders which to crop at: +#' c(western border, eastern border, southern border, northern border). +#'@param force_remap Whether to force remapping, even if the input data array +#' is already on the target grid. +#'@param write_dir Path to the directory where to create the intermediate +#' files for CDO to work. By default, the R session temporary directory is +#' used (\code{tempdir()}). +#' +#'@return A list with the following components: +#' \item{'data_array'}{The interpolated data array (if an input array +#' is provided at all, NULL otherwise).} +#' \item{'lons'}{The longitudes of the data on the destination grid.} +#' \item{'lats'}{The latitudes of the data on the destination grid.} +#'@examples +#' \dontrun{ +#'# Interpolating only vectors of longitudes and latitudes +#'lon <- seq(0, 360 - 360/50, length.out = 50) +#'lat <- seq(-90, 90, length.out = 25) +#'tas2 <- CDORemap(NULL, lon, lat, 't170grid', 'bil', TRUE) +#' +#'# Minimal array interpolation +#'tas <- array(1:50, dim = c(25, 50)) +#'names(dim(tas)) <- c('lat', 'lon') +#'lon <- seq(0, 360 - 360/50, length.out = 50) +#'lat <- seq(-90, 90, length.out = 25) +#'tas2 <- CDORemap(tas, lon, lat, 't170grid', 'bil', TRUE) +#' +#'# Metadata can be attached to the inputs. It will be preserved and +#'# accordignly modified. +#'tas <- array(1:50, dim = c(25, 50)) +#'names(dim(tas)) <- c('lat', 'lon') +#'lon <- seq(0, 360 - 360/50, length.out = 50) +#'metadata <- list(lon = list(units = 'degrees_east')) +#'attr(lon, 'variables') <- metadata +#'lat <- seq(-90, 90, length.out = 25) +#'metadata <- list(lat = list(units = 'degrees_north')) +#'attr(lat, 'variables') <- metadata +#'metadata <- list(tas = list(dim = list(lat = list(len = 25, +#' vals = lat), +#' lon = list(len = 50, +#' vals = lon) +#' ))) +#'attr(tas, 'variables') <- metadata +#'tas2 <- CDORemap(tas, lon, lat, 't170grid', 'bil', TRUE) +#' +#'# Arrays of any number of dimensions in any order can be provided. +#'num_lats <- 25 +#'num_lons <- 50 +#'tas <- array(1:(10*num_lats*10*num_lons*10), +#' dim = c(10, num_lats, 10, num_lons, 10)) +#'names(dim(tas)) <- c('a', 'lat', 'b', 'lon', 'c') +#'lon <- seq(0, 360 - 360/num_lons, length.out = num_lons) +#'metadata <- list(lon = list(units = 'degrees_east')) +#'attr(lon, 'variables') <- metadata +#'lat <- seq(-90, 90, length.out = num_lats) +#'metadata <- list(lat = list(units = 'degrees_north')) +#'attr(lat, 'variables') <- metadata +#'metadata <- list(tas = list(dim = list(a = list(), +#' lat = list(len = num_lats, +#' vals = lat), +#' b = list(), +#' lon = list(len = num_lons, +#' vals = lon), +#' c = list() +#' ))) +#'attr(tas, 'variables') <- metadata +#'tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil', TRUE) +#'# The step of permutation can be avoided but more intermediate file writes +#'# will be performed. +#'tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil', FALSE) +#' +#'# If the provided array has the longitude or latitude dimension in the +#'# right-most position, the same number of file writes will be performed, +#'# even if avoid_wrties = FALSE. +#'num_lats <- 25 +#'num_lons <- 50 +#'tas <- array(1:(10*num_lats*10*num_lons*10), +#' dim = c(10, num_lats, 10, num_lons)) +#'names(dim(tas)) <- c('a', 'lat', 'b', 'lon') +#'lon <- seq(0, 360 - 360/num_lons, length.out = num_lons) +#'metadata <- list(lon = list(units = 'degrees_east')) +#'attr(lon, 'variables') <- metadata +#'lat <- seq(-90, 90, length.out = num_lats) +#'metadata <- list(lat = list(units = 'degrees_north')) +#'attr(lat, 'variables') <- metadata +#'metadata <- list(tas = list(dim = list(a = list(), +#' lat = list(len = num_lats, +#' vals = lat), +#' b = list(), +#' lon = list(len = num_lons, +#' vals = lon) +#' ))) +#'attr(tas, 'variables') <- metadata +#'tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil', TRUE) +#'tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil', FALSE) +#' +#'# An example of an interpolation from and onto a rectangular regular grid +#'num_lats <- 25 +#'num_lons <- 50 +#'tas <- array(1:(1*num_lats*num_lons), dim = c(num_lats, num_lons)) +#'names(dim(tas)) <- c('y', 'x') +#'lon <- array(seq(0, 360 - 360/num_lons, length.out = num_lons), +#' dim = c(num_lons, num_lats)) +#'metadata <- list(lon = list(units = 'degrees_east')) +#'names(dim(lon)) <- c('x', 'y') +#'attr(lon, 'variables') <- metadata +#'lat <- t(array(seq(-90, 90, length.out = num_lats), +#' dim = c(num_lats, num_lons))) +#'metadata <- list(lat = list(units = 'degrees_north')) +#'names(dim(lat)) <- c('x', 'y') +#'attr(lat, 'variables') <- metadata +#'tas2 <- CDORemap(tas, lon, lat, 'r100x50', 'bil') +#' +#'# An example of an interpolation from an irregular grid onto a gaussian grid +#'num_lats <- 25 +#'num_lons <- 50 +#'tas <- array(1:(10*num_lats*10*num_lons*10), +#' dim = c(10, num_lats, 10, num_lons)) +#'names(dim(tas)) <- c('a', 'j', 'b', 'i') +#'lon <- array(seq(0, 360 - 360/num_lons, length.out = num_lons), +#' dim = c(num_lons, num_lats)) +#'metadata <- list(lon = list(units = 'degrees_east')) +#'names(dim(lon)) <- c('i', 'j') +#'attr(lon, 'variables') <- metadata +#'lat <- t(array(seq(-90, 90, length.out = num_lats), +#' dim = c(num_lats, num_lons))) +#'metadata <- list(lat = list(units = 'degrees_north')) +#'names(dim(lat)) <- c('i', 'j') +#'attr(lat, 'variables') <- metadata +#'tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil') +#' +#'# Again, the dimensions can be in any order +#'num_lats <- 25 +#'num_lons <- 50 +#'tas <- array(1:(10*num_lats*10*num_lons), +#' dim = c(10, num_lats, 10, num_lons)) +#'names(dim(tas)) <- c('a', 'j', 'b', 'i') +#'lon <- array(seq(0, 360 - 360/num_lons, length.out = num_lons), +#' dim = c(num_lons, num_lats)) +#'names(dim(lon)) <- c('i', 'j') +#'lat <- t(array(seq(-90, 90, length.out = num_lats), +#' dim = c(num_lats, num_lons))) +#'names(dim(lat)) <- c('i', 'j') +#'tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil') +#'tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil', FALSE) +#'# It is ossible to specify an external NetCDF file as target grid reference +#'tas2 <- CDORemap(tas, lon, lat, 'external_file.nc', 'bil') +#'} +#'@import ncdf4 +#'@importFrom easyNCDF ArrayToNc +#'@importFrom stats lm predict setNames +#'@export +CDORemap <- function(data_array = NULL, lons, lats, grid, method, + avoid_writes = TRUE, crop = TRUE, + force_remap = FALSE, write_dir = tempdir()) { #, mask = NULL) { + .isRegularVector <- function(x, tol = 0.1) { + if (length(x) < 2) { + #stop("The provided vector must be of length 2 or greater.") + TRUE + } else { + spaces <- x[2:length(x)] - x[1:(length(x) - 1)] + (sum(abs(spaces - mean(spaces)) > mean(spaces) / (1 / tol)) < 2) + } + } + # Check parameters data_array, lons and lats. + known_lon_names <- .KnownLonNames() + known_lat_names <- .KnownLatNames() + if (!is.numeric(lons) || !is.numeric(lats)) { + stop("Expected numeric 'lons' and 'lats'.") + } + if (any(is.na(lons > 0))) { + stop("Found invalid values in 'lons'.") + } + if (any(is.na(lats > 0))) { + stop("Found invalid values in 'lats'.") + } + if (is.null(dim(lons))) { + dim(lons) <- length(lons) + } + if (is.null(dim(lats))) { + dim(lats) <- length(lats) + } + if (length(dim(lons)) > 2 || length(dim(lats)) > 2) { + stop("'lons' and 'lats' can only have up to 2 dimensions.") + } + if (length(dim(lons)) != length(dim(lats))) { + stop("'lons' and 'lats' must have the same number of dimensions.") + } + if (length(dim(lons)) == 2 && !all(dim(lons) == dim(lats))) { + stop("'lons' and 'lats' must have the same dimension sizes.") + } + return_array <- TRUE + if (is.null(data_array)) { + return_array <- FALSE + if (length(dim(lons)) == 1) { + array_dims <- c(length(lats), length(lons)) + new_lon_dim_name <- 'lon' + new_lat_dim_name <- 'lat' + } else { + array_dims <- dim(lons) + new_lon_dim_name <- 'i' + new_lat_dim_name <- 'j' + } + if (!is.null(names(dim(lons)))) { + if (any(known_lon_names %in% names(dim(lons)))) { + new_lon_dim_name <- known_lon_names[which(known_lon_names %in% names(dim(lons)))[1]] + } + } + if (!is.null(names(dim(lats)))) { + if (any(known_lat_names %in% names(dim(lats)))) { + new_lat_dim_name <- known_lat_names[which(known_lat_names %in% names(dim(lats)))[1]] + } + } + names(array_dims) <- c(new_lat_dim_name, new_lon_dim_name) + data_array <- array(as.numeric(NA), array_dims) + } + if (!(is.logical(data_array) || is.numeric(data_array)) || !is.array(data_array)) { + stop("Parameter 'data_array' must be a numeric array.") + } + if (is.null(names(dim(data_array)))) { + stop("Parameter 'data_array' must have named dimensions.") + } + lon_dim <- which(known_lon_names %in% names(dim(data_array))) + if (length(lon_dim) < 1) { + stop("Could not find a known longitude dimension name in the provided 'data_array'.") + } + if (length(lon_dim) > 1) { + stop("Found more than one known longitude dimension names in the provided 'data_array'.") + } + lon_dim <- known_lon_names[lon_dim] + lat_dim <- which(known_lat_names %in% names(dim(data_array))) + if (length(lat_dim) < 1) { + stop("Could not find a known latitude dimension name in the provided 'data_array'.") + } + if (length(lat_dim) > 1) { + stop("Found more than one known latitude dimension name in the provided 'data_array'.") + } + lat_dim <- known_lat_names[lat_dim] + if (is.null(names(dim(lons)))) { + if (length(dim(lons)) == 1) { + names(dim(lons)) <- lon_dim + } else { + stop("Parameter 'lons' must be provided with dimension names.") + } + } else { + if (!(lon_dim %in% names(dim(lons)))) { + stop("Parameter 'lon' must have the same longitude dimension name as the 'data_array'.") + } + if (length(dim(lons)) > 1 && !(lat_dim %in% names(dim(lons)))) { + stop("Parameter 'lon' must have the same latitude dimension name as the 'data_array'.") + } + } + if (is.null(names(dim(lats)))) { + if (length(dim(lats)) == 1) { + names(dim(lats)) <- lat_dim + } else { + stop("Parameter 'lats' must be provided with dimension names.") + } + } else { + if (!(lat_dim %in% names(dim(lats)))) { + stop("Parameter 'lat' must have the same latitude dimension name as the 'data_array'.") + } + if (length(dim(lats)) > 1 && !(lon_dim %in% names(dim(lats)))) { + stop("Parameter 'lat' must have the same longitude dimension name as the 'data_array'.") + } + } + lons_attr_bk <- attributes(lons) + if (is.null(lons_attr_bk)) { + lons_attr_bk <- list() + } + lats_attr_bk <- attributes(lats) + if (is.null(lats_attr_bk)) { + lats_attr_bk <- list() + } + if (length(attr(lons, 'variables')) == 0) { + new_metadata <- list(list()) + if (length(dim(lons)) == 1) { + names(new_metadata) <- lon_dim + } else { + names(new_metadata) <- paste0(lon_dim, '_var') + } + attr(lons, 'variables') <- new_metadata + } + if (!('units' %in% names(attr(lons, 'variables')[[1]]))) { + new_metadata <- attr(lons, 'variables') + #names(new_metadata)[1] <- lon_dim + new_metadata[[1]][['units']] <- 'degrees_east' + attr(lons, 'variables') <- new_metadata + } + if (length(attr(lats, 'variables')) == 0) { + new_metadata <- list(list()) + if (length(dim(lats)) == 1) { + names(new_metadata) <- lat_dim + } else { + names(new_metadata) <- paste0(lat_dim, '_var') + } + attr(lats, 'variables') <- new_metadata + } + if (!('units' %in% names(attr(lats, 'variables')[[1]]))) { + new_metadata <- attr(lats, 'variables') + #names(new_metadata)[1] <- lat_dim + new_metadata[[1]][['units']] <- 'degrees_north' + attr(lats, 'variables') <- new_metadata + } + # Check grid. + if (!is.character(grid)) { + stop("Parameter 'grid' must be a character string specifying a ", + "target CDO grid, 'rXxY' or 'tRESgrid', or a path to another ", + "NetCDF file.") + } + if (grepl('^r[0-9]{1,}x[0-9]{1,}$', grid)) { + grid_type <- 'regular' + grid_lons <- as.numeric(strsplit(strsplit(grid, 'x')[[1]][1], 'r')[[1]][2]) + grid_lats <- as.numeric(strsplit(grid, 'x')[[1]][2]) + } else if (grepl('^t[0-9]{1,}grid$', grid)) { + grid_type <- 'gaussian' + grid_t <- as.numeric(strsplit(strsplit(grid, 'grid')[[1]][1], 't')[[1]][2]) + grid_size <- .t2nlatlon(grid_t) + grid_lons <- grid_size[2] + grid_lats <- grid_size[1] + } else { + grid_type <- 'custom' + } + # Check method. + if (method %in% c('bil', 'bilinear')) { + method <- 'bil' + } else if (method %in% c('bic', 'bicubic')) { + method <- 'bic' + } else if (method %in% c('con', 'conservative')) { + method <- 'con' + } else if (method %in% c('dis', 'distance-weighted')) { + method <- 'dis' + } else { + stop("Unsupported CDO remap method. 'bilinear', 'bicubic', 'conservative' or 'distance-weighted' supported only.") + } + # Check avoid_writes + if (!is.logical(avoid_writes)) { + stop("Parameter 'avoid_writes' must be a logical value.") + } + # Check crop + crop_tight <- FALSE + if (is.character(crop)) { + if (crop == 'tight') { + crop_tight <- TRUE + } else if (crop != 'preserve') { + stop("Parameter 'crop' can only take the values 'tight' or 'preserve' if specified as a character string.") + } + crop <- TRUE + } + if (is.logical(crop)) { + if (crop) { + warning("Parameter 'crop' = 'TRUE'. The output grid range will follow the input lons and lats.") + if (length(lons) == 1 || length(lats) == 1) { + stop("CDORemap cannot remap if crop = TRUE and values for only one ", + "longitude or one latitude are provided. Either a) provide ", + "values for more than one longitude/latitude, b) explicitly ", + "specify the crop limits in the parameter crop, or c) set ", + "crop = FALSE.") + } + if (crop_tight) { + lon_extremes <- c(min(lons), max(lons)) + lat_extremes <- c(min(lats), max(lats)) + } else { + # Here we are trying to look for the extreme lons and lats in the data. + # Not the centers of the extreme cells, but the borders of the extreme cells. +###--- + if (length(dim(lons)) == 1) { + tmp_lon <- lons + } else { + min_pos <- which(lons == min(lons), arr.ind = TRUE)[1, ] + tmp_lon <- Subset(lons, lat_dim, min_pos[which(names(dim(lons)) == lat_dim)], drop = 'selected') + } + i <- 1:length(tmp_lon) + degree <- min(3, length(i) - 1) + lon_model <- lm(tmp_lon ~ poly(i, degree)) + lon_extremes <- c(NA, NA) + left_is_min <- FALSE + right_is_max <- FALSE + if (which.min(tmp_lon) == 1) { + left_is_min <- TRUE + prev_lon <- predict(lon_model, data.frame(i = 0)) + first_lon_cell_width <- (tmp_lon[1] - prev_lon) + # The signif is needed because cdo sellonlatbox crashes with too many digits + lon_extremes[1] <- tmp_lon[1] - first_lon_cell_width / 2 + } else { + lon_extremes[1] <- min(tmp_lon) + } + if (which.max(tmp_lon) == length(tmp_lon)) { + right_is_max <- TRUE + next_lon <- predict(lon_model, data.frame(i = length(tmp_lon) + 1)) + last_lon_cell_width <- (next_lon - tmp_lon[length(tmp_lon)]) + lon_extremes[2] <- tmp_lon[length(tmp_lon)] + last_lon_cell_width / 2 + } else { + lon_extremes[2] <- max(tmp_lon) + } + # Adjust the crop window if possible in order to keep lons from 0 to 360 + # or from -180 to 180 when the extremes of the cropped window are contiguous. + if (right_is_max) { + if (lon_extremes[1] < -180) { + if (!((lon_extremes[2] < 180) && !((180 - lon_extremes[2]) <= last_lon_cell_width / 2))) { + lon_extremes[1] <- -180 + lon_extremes[2] <- 180 + } + } else if (lon_extremes[1] < 0) { + if (!((lon_extremes[2] < 360) && !((360 - lon_extremes[2]) <= last_lon_cell_width / 2))) { + lon_extremes[1] <- 0 + lon_extremes[2] <- 360 + } + } + } + if (left_is_min) { + if (lon_extremes[2] > 360) { + if (!((lon_extremes[1] > 0) && !(lon_extremes[1] <= first_lon_cell_width / 2))) { + lon_extremes[1] <- 0 + lon_extremes[2] <- 360 + } + } else if (lon_extremes[2] > 180) { + if (!((lon_extremes[1] > -180) && !((180 + lon_extremes[1]) <= first_lon_cell_width / 2))) { + lon_extremes[1] <- -180 + lon_extremes[2] <- 180 + } + } + } +## lon_extremes <- signif(lon_extremes, 5) +## lon_extremes <- lon_extremes + 0.00001 +###--- + if (length(dim(lats)) == 1) { + tmp_lat <- lats + } else { + min_pos <- which(lats == min(lats), arr.ind = TRUE)[1, ] + tmp_lat <- Subset(lats, lon_dim, min_pos[which(names(dim(lats)) == lon_dim)], drop = 'selected') + } + i <- 1:length(tmp_lat) + degree <- min(3, length(i) - 1) + lat_model <- lm(tmp_lat ~ poly(i, degree)) + lat_extremes <- c(NA, NA) + if (which.min(tmp_lat) == 1) { + prev_lat <- predict(lat_model, data.frame(i = 0)) + lat_extremes[1] <- tmp_lat[1] - (tmp_lat[1] - prev_lat) / 2 + } else { + lat_extremes[1] <- min(tmp_lat) + } + if (which.max(tmp_lat) == length(tmp_lat)) { + next_lat <- predict(lat_model, data.frame(i = length(tmp_lat) + 1)) + lat_extremes[2] <- tmp_lat[length(tmp_lat)] + (next_lat - tmp_lat[length(tmp_lat)]) / 2 + } else { + lat_extremes[2] <- max(tmp_lat) + } +## lat_extremes <- signif(lat_extremes, 5) + # Adjust crop window + if (lat_extremes[1] < -90) { + lat_extremes[1] <- -90 + } else if (lat_extremes[1] > 90) { + lat_extremes[1] <- 90 + } + if (lat_extremes[2] < -90) { + lat_extremes[2] <- -90 + } else if (lat_extremes[2] > 90) { + lat_extremes[2] <- 90 + } +###--- + } + } else if (crop == FALSE) { + warning("Parameter 'crop' = 'FALSE'. The output grid range will follow parameter 'grid'.") + } + } else if (is.numeric(crop)) { + if (length(crop) != 4) { + stop("Paramrter 'crop' must be a logical value or a numeric vector of length 4: c(western border, eastern border, southern border, northern border.") + } else { + lon_extremes <- crop[1:2] + lat_extremes <- crop[3:4] + crop <- TRUE + } + } else { + stop("Parameter 'crop' must be a logical value or a numeric vector.") + } + # Check force_remap + if (!is.logical(force_remap)) { + stop("Parameter 'force_remap' must be a logical value.") + } + # Check write_dir + if (!is.character(write_dir)) { + stop("Parameter 'write_dir' must be a character string.") + } + if (!dir.exists(write_dir)) { + stop("Parameter 'write_dir' must point to an existing directory.") + } +# if (!is.null(mask)) { +# if (!is.numeric(mask) || !is.array(mask)) { +# stop("Parameter 'mask' must be a numeric array.") +# } +# if (length(dim(mask)) != 2) { +# stop("Parameter 'mask' must have two dimensions.") +# } +# if (is.null(names(dim(mask)))) { +# if (dim(data_array)[lat_dim] == dim(data_array)[lon_dim]) { +# stop("Cannot disambiguate which is the longitude dimension of ", +# "the provided 'mask'. Provide it with dimension names.") +# } +# names(dim(mask)) <- c('', '') +# found_lon_dim <- which(dim(mask) == dim(data_array)[lon_dim]) +# if (length(found_lon_dim) < 0) { +# stop("The dimension sizes of the provided 'mask' do not match ", +# "the spatial dimension sizes of the array to interpolate.") +# } else { +# names(dim(mask)[found_lon_dim]) <- lon_dim +# } +# found_lat_dim <- which(dim(mask) == dim(data_array)[lat_dim]) +# if (length(found_lat_dim) < 0) { +# stop("The dimension sizes of the provided 'mask' do not match ", +# "the spatial dimension sizes of the array to interpolate.") +# } else { +# names(dim(mask)[found_lat_dim]) <- lat_dim +# } +# } +# lon_position <- which(names(dim(data_array)) == lon_dim) +# lat_position <- which(names(dim(data_array)) == lat_dim) +# if (lon_position > lat_position) { +# if (names(dim(mask))[1] == lon_dim) { +# mask <- t(mask) +# } +# } else { +# if (names(dim(mask))[1] == lat_dim) { +# mask <- t(mask) +# } +# } +# ## TODO: Apply mask!!! Preserve attributes +# } + # Check if interpolation can be skipped. + interpolation_needed <- TRUE + if (!force_remap) { + if (!(grid_type == 'custom')) { + if (length(lons) == grid_lons && length(lats) == grid_lats) { + if (grid_type == 'regular') { + if (.isRegularVector(lons) && .isRegularVector(lats)) { + interpolation_needed <- FALSE + } + } else if (grid_type == 'gaussian') { + # TODO: improve this check. Gaussian quadrature should be used. + if (.isRegularVector(lons) && !.isRegularVector(lats)) { + interpolation_needed <- FALSE + } + } + } + } + } + found_lons <- lons + found_lats <- lats + if (interpolation_needed) { + if (nchar(Sys.which('cdo')[1]) < 1) { + stop("CDO must be installed in order to use the .CDORemap.") + } + cdo_version <- as.numeric_version( + strsplit(suppressWarnings(system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5] + ) + warning("CDORemap: Using CDO version ", cdo_version, ".") + if ((cdo_version >= as.numeric_version('1.7.0')) && (method == 'con')) { + method <- 'ycon' + } + # CDO takes arrays of 3 dimensions or 4 if one of them is unlimited. + # The unlimited dimension can only be the left-most (right-most in R). + # There are no restrictions for the dimension names or variable names. + # The longitude and latitude are detected by their units. + # There are no restrictions for the order of the limited dimensions. + # The longitude/latitude variables and dimensions must have the same name. + # The procedure consists in: + # - take out the array metadata + # - be aware of var dimension (replacing the dimension names would do). + # - take arrays of 4 dimensions always if possible + # - make the last dimension unlimited when saving to netcdf + # - if the last dimension is lon or lat, either reorder the array and + # then reorder back or iterate over the dimensions at the right + # side of lon AND lat. + # If the input array has more than 4 dimensions, it is needed to + # run CDO on each sub-array of 4 dimensions because it can handle + # only up to 4 dimensions. The shortest dimensions are chosen to + # iterate over. + is_irregular <- FALSE + if (length(dim(lats)) > 1 && length(dim(lons)) > 1) { + is_irregular <- TRUE + } + attribute_backup <- attributes(data_array) + other_dims <- which(!(names(dim(data_array)) %in% c(lon_dim, lat_dim))) + permutation <- NULL + unlimited_dim <- NULL + dims_to_iterate <- NULL + total_slices <- 1 + other_dims_per_chunk <- ifelse(is_irregular, 1, 2) # 4 (the maximum accepted by CDO) - 2 (lon, lat) = 2. + if (length(other_dims) > 1 || (length(other_dims) > 0 && (is_irregular))) { + if (!(length(dim(data_array)) %in% other_dims)) { + if (avoid_writes || is_irregular) { + dims_mod <- dim(data_array) + dims_mod[which(names(dim(data_array)) %in% + c(lon_dim, lat_dim))] <- 0 + dim_to_move <- which.max(dims_mod) + permutation <- (1:length(dim(data_array)))[-dim_to_move] + permutation <- c(permutation, dim_to_move) + permutation_back <- sort(permutation, index.return = TRUE)$ix + dim_backup <- dim(data_array) + data_array <- aperm(data_array, permutation) + dim(data_array) <- dim_backup[permutation] + other_dims <- which(!(names(dim(data_array)) %in% c(lon_dim, lat_dim))) + } else { + # We allow only lon, lat and 1 more dimension per chunk, so + # CDO has no restrictions in the order. + other_dims_per_chunk <- 1 + } + } + other_dims_ordered_by_size <- other_dims[sort(dim(data_array)[other_dims], index.return = TRUE)$ix] + dims_to_iterate <- sort(head(other_dims_ordered_by_size, length(other_dims) - other_dims_per_chunk)) + if (length(dims_to_iterate) == 0) { + dims_to_iterate <- NULL + } else { + slices_to_iterate <- array(1:prod(dim(data_array)[dims_to_iterate]), + dim(data_array)[dims_to_iterate]) + total_slices <- prod(dim(slices_to_iterate)) + } + if ((other_dims_per_chunk > 1) || (other_dims_per_chunk > 0 && is_irregular)) { + unlimited_dim <- tail(sort(tail(other_dims_ordered_by_size, other_dims_per_chunk)), 1) + #unlimited_dim <- tail(other_dims) + } + } + + result_array <- NULL + lon_pos <- which(names(dim(data_array)) == lon_dim) + lat_pos <- which(names(dim(data_array)) == lat_dim) + dim_backup <- dim(data_array) + attributes(data_array) <- NULL + dim(data_array) <- dim_backup + names(dim(data_array)) <- paste0('dim', 1:length(dim(data_array))) + names(dim(data_array))[c(lon_pos, lat_pos)] <- c(lon_dim, lat_dim) + if (!is.null(unlimited_dim)) { + # This will make ArrayToNc create this dim as unlimited. + names(dim(data_array))[unlimited_dim] <- 'time' + } + if (length(dim(lons)) == 1) { + names(dim(lons)) <- lon_dim + } + if (length(dim(lats)) == 1) { + names(dim(lats)) <- lat_dim + } + if (length(dim(lons)) > 1) { + lon_var_name <- paste0(lon_dim, '_var') + } else { + lon_var_name <- lon_dim + } + if (length(dim(lats)) > 1) { + lat_var_name <- paste0(lat_dim, '_var') + } else { + lat_var_name <- lat_dim + } + if (is_irregular) { + metadata <- list(list(coordinates = paste(lon_var_name, lat_var_name))) + names(metadata) <- 'var' + attr(data_array, 'variables') <- metadata + } + names(attr(lons, 'variables')) <- lon_var_name + names(attr(lats, 'variables')) <- lat_var_name + if (!is.null(attr(lons, 'variables')[[1]][['dim']])) { + attr(lons, 'variables')[[1]][['dim']] <- NULL + } + if (!is.null(attr(lats, 'variables')[[1]][['dim']])) { + attr(lats, 'variables')[[1]][['dim']] <- NULL + } + lons_lats_taken <- FALSE + for (i in 1:total_slices) { + tmp_file <- tempfile('R_CDORemap_', write_dir, fileext = '.nc') + tmp_file2 <- tempfile('R_CDORemap_', write_dir, fileext = '.nc') + if (!is.null(dims_to_iterate)) { + slice_indices <- which(slices_to_iterate == i, arr.ind = TRUE) + subset <- Subset(data_array, dims_to_iterate, as.list(slice_indices), drop = 'selected') + # Fix issue 259, curvilinear grid, the order of the dimensions in slices and + # coordinates needs to match + if (is_irregular) { + pos_lon <- which(names(dim(subset)) == lon_dim) + pos_lat <- which(names(dim(subset)) == lat_dim) + pos_lon_dim_in_lons <- which(names(dim(lons)) == lon_dim) + pos_lat_dim_in_lons <- which(names(dim(lons)) == lat_dim) + if ((pos_lon > pos_lat && pos_lon_dim_in_lons < pos_lat_dim_in_lons) || + (pos_lon < pos_lat && pos_lon_dim_in_lons > pos_lat_dim_in_lons)) { + new_pos <- 1:length(dim(subset)) + new_pos[pos_lon] <- pos_lat + new_pos[pos_lat] <- pos_lon + subset <- .aperm2(subset, new_pos) + } + # The unlimited dimension should be placed in the last position + if ('time' %in% names(dim(subset)) && + which(names(dim(subset)) == 'time') != length(dim(subset))) { + new_pos <- 2:length(dim(subset)) + new_pos[length(dim(subset))] <- 1 + subset <- .aperm2(subset, new_pos) + } + } +# dims_before_crop <- dim(subset) + # Make sure subset goes along with metadata + easyNCDF::ArrayToNc(setNames(list(subset, lons, lats), c('var', lon_var_name, lat_var_name)), tmp_file) + } else { + if (is_irregular) { + pos_lon <- which(names(dim(data_array)) == lon_dim) + pos_lat <- which(names(dim(data_array)) == lat_dim) + pos_lon_dim_in_lons <- which(names(dim(lons)) == lon_dim) + pos_lat_dim_in_lons <- which(names(dim(lons)) == lat_dim) + if ((pos_lon > pos_lat && pos_lon_dim_in_lons < pos_lat_dim_in_lons) || + (pos_lon < pos_lat && pos_lon_dim_in_lons > pos_lat_dim_in_lons)) { + new_pos <- 1:length(dim(data_array)) + new_pos[pos_lon] <- pos_lat + new_pos[pos_lat] <- pos_lon + data_array <- .aperm2(data_array, new_pos) + } + } +# dims_before_crop <- dim(data_array) + easyNCDF::ArrayToNc(setNames(list(data_array, lons, lats), c('var', lon_var_name, lat_var_name)), tmp_file) + } + sellonlatbox <- '' + if (crop) { + sellonlatbox <- paste0('sellonlatbox,', format(lon_extremes[1], scientific = FALSE), + ',', format(lon_extremes[2], scientific = FALSE), + ',', format(lat_extremes[1], scientific = FALSE), + ',', format(lat_extremes[2], scientific = FALSE), ' -') + } + err <- try({ + system(paste0("cdo -s ", sellonlatbox, "remap", method, ",", grid, " ", tmp_file, " ", tmp_file2)) + }) + file.remove(tmp_file) + if (('try-error' %in% class(err)) || err > 0) { + stop("CDO remap failed.") + } + ncdf_remapped <- nc_open(tmp_file2) + if (!lons_lats_taken) { + found_dim_names <- sapply(ncdf_remapped$var$var$dim, '[[', 'name') + found_lon_dim <- found_dim_names[which(found_dim_names %in% .KnownLonNames())[1]] + found_lat_dim <- found_dim_names[which(found_dim_names %in% .KnownLatNames())[1]] + found_lon_dim_size <- length(ncdf_remapped$dim[[found_lon_dim]]$vals) + found_lat_dim_size <- length(ncdf_remapped$dim[[found_lat_dim]]$vals) + found_var_names <- names(ncdf_remapped$var) + found_lon_var_name <- which(found_var_names %in% .KnownLonNames()) + found_lat_var_name <- which(found_var_names %in% .KnownLatNames()) + if (length(found_lon_var_name) > 0) { + found_lon_var_name <- found_var_names[found_lon_var_name[1]] + } else { + found_lon_var_name <- NULL + } + if (length(found_lat_var_name) > 0) { + found_lat_var_name <- found_var_names[found_lat_var_name[1]] + } else { + found_lat_var_name <- NULL + } + if (length(found_lon_var_name) > 0) { + found_lons <- ncvar_get(ncdf_remapped, found_lon_var_name, + collapse_degen = FALSE) + } else { + found_lons <- ncdf_remapped$dim[[found_lon_dim]]$vals + dim(found_lons) <- found_lon_dim_size + } + if (length(found_lat_var_name) > 0) { + found_lats <- ncvar_get(ncdf_remapped, found_lat_var_name, + collapse_degen = FALSE) + } else { + found_lats <- ncdf_remapped$dim[[found_lat_dim]]$vals + dim(found_lats) <- found_lat_dim_size + } + if (length(dim(lons)) == length(dim(found_lons))) { + new_lon_name <- lon_dim + } else { + new_lon_name <- found_lon_dim + } + if (length(dim(lats)) == length(dim(found_lats))) { + new_lat_name <- lat_dim + } else { + new_lat_name <- found_lat_dim + } + if (length(dim(found_lons)) > 1) { + if (which(sapply(ncdf_remapped$var$lon$dim, '[[', 'name') == found_lon_dim) < + which(sapply(ncdf_remapped$var$lon$dim, '[[', 'name') == found_lat_dim)) { + names(dim(found_lons)) <- c(new_lon_name, new_lat_name) + } else { + names(dim(found_lons)) <- c(new_lat_name, new_lon_name) + } + } else { + names(dim(found_lons)) <- new_lon_name + } + if (length(dim(found_lats)) > 1) { + if (which(sapply(ncdf_remapped$var$lat$dim, '[[', 'name') == found_lon_dim) < + which(sapply(ncdf_remapped$var$lat$dim, '[[', 'name') == found_lat_dim)) { + names(dim(found_lats)) <- c(new_lon_name, new_lat_name) + } else { + names(dim(found_lats)) <- c(new_lat_name, new_lon_name) + } + } else { + names(dim(found_lats)) <- new_lat_name + } + lons_lats_taken <- TRUE + } + if (!is.null(dims_to_iterate)) { + if (is.null(result_array)) { + if (return_array) { + new_dims <- dim(data_array) + new_dims[c(lon_dim, lat_dim)] <- c(found_lon_dim_size, found_lat_dim_size) + lon_pos <- which(names(new_dims) == lon_dim) + lat_pos <- which(names(new_dims) == lat_dim) + # Fix issue 259, expected order from CDO output is lon lat + # If is irregular, lat and lon position need to be checked: + if (is_irregular) { + if (lon_pos > lat_pos) { + new_pos <- 1:length(new_dims) + new_pos[lon_pos] <- lat_pos + new_pos[lat_pos] <- lon_pos + new_dims <- new_dims[new_pos] + } + } + result_array <- array(dim = new_dims) + store_indices <- as.list(rep(TRUE, length(dim(result_array)))) + } + } + if (return_array) { + store_indices[dims_to_iterate] <- as.list(slice_indices) + # If is irregular, the order of dimenesions in result_array and file may be different and need to be checked before reading the temporal file: + if (is_irregular) { + test_dims <- dim(ncvar_get(ncdf_remapped, 'var', + collapse_degen = FALSE)) + test_dims <- test_dims[which(test_dims > 1)] + pos_test_dims <- match(dim(result_array), test_dims) + if (is.unsorted(pos_test_dims, na.rm = TRUE)) { + # pos_new_dims is used later in the code. Don't overwrite + pos_new_dims <- 1:length(dim(result_array)) + pos_new_dims[which(!is.na(pos_test_dims))] <- + match(test_dims, dim(result_array)) + backup_result_array_dims <- dim(result_array) + dim(result_array) <- dim(result_array)[pos_new_dims] + } + } + result_array <- do.call('[<-', c(list(x = result_array), store_indices, + list(value = ncvar_get(ncdf_remapped, 'var', collapse_degen = FALSE)))) + } + } else { + new_dims <- dim(data_array) + new_dims[c(lon_dim, lat_dim)] <- c(found_lon_dim_size, found_lat_dim_size) + + result_array <- ncvar_get(ncdf_remapped, 'var', collapse_degen = FALSE) + dim(result_array) <- new_dims + } + nc_close(ncdf_remapped) + file.remove(tmp_file2) + } + # If is irregular, the order of dimension may need to be recovered after reading all the file: + if (is_irregular & (!is.null(dims_to_iterate))) { + if (exists('pos_new_dims')) { + pos_new_dims <- 1:length(dim(result_array)) + dims_to_change <- match(backup_result_array_dims, dim(result_array)) + pos_new_dims[which(dims_to_change != 1)] <- + dims_to_change[which(dims_to_change != 1)] + result_array <- .aperm2(result_array, pos_new_dims) + } + } + + if (!is.null(permutation)) { + dim_backup <- dim(result_array) + result_array <- aperm(result_array, permutation_back) + dim(result_array) <- dim_backup[permutation_back] + } + # Now restore the metadata + result_is_irregular <- FALSE + if (length(dim(found_lats)) > 1 && length(dim(found_lons)) > 1) { + result_is_irregular <- TRUE + } + attribute_backup[['dim']][which(names(dim(result_array)) == lon_dim)] <- dim(result_array)[lon_dim] + attribute_backup[['dim']][which(names(dim(result_array)) == lat_dim)] <- dim(result_array)[lat_dim] + names(attribute_backup[['dim']])[which(names(dim(result_array)) == lon_dim)] <- new_lon_name + names(attribute_backup[['dim']])[which(names(dim(result_array)) == lat_dim)] <- new_lat_name + if (!is.null(attribute_backup[['variables']]) && (length(attribute_backup[['variables']]) > 0)) { + for (var in 1:length(attribute_backup[['variables']])) { + if (length(attribute_backup[['variables']][[var]][['dim']]) > 0) { + for (dim in 1:length(attribute_backup[['variables']][[var]][['dim']])) { + dim_name <- NULL + if ('name' %in% names(attribute_backup[['variables']][[var]][['dim']][[dim]])) { + dim_name <- attribute_backup[['variables']][[var]][['dim']][[dim]][['name']] + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + attribute_backup[['variables']][[var]][['dim']][[dim]][['name']] <- new_lon_name + } else { + attribute_backup[['variables']][[var]][['dim']][[dim]][['name']] <- new_lat_name + } + } + } else if (!is.null(names(attribute_backup[['variables']][[var]][['dim']]))) { + dim_name <- names(attribute_backup[['variables']][[var]][['dim']])[dim] + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + names(attribute_backup[['variables']][[var]][['dim']])[which(names(attribute_backup[['variables']][[var]][['dim']]) == lon_dim)] <- new_lon_name + } else { + names(attribute_backup[['variables']][[var]][['dim']])[which(names(attribute_backup[['variables']][[var]][['dim']]) == lat_dim)] <- new_lat_name + } + } + } + if (!is.null(dim_name)) { + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + new_vals <- found_lons[TRUE] + } else if (dim_name == lat_dim) { + new_vals <- found_lats[TRUE] + } + if (!is.null(attribute_backup[['variables']][[var]][['dim']][[dim]][['len']])) { + attribute_backup[['variables']][[var]][['dim']][[dim]][['len']] <- length(new_vals) + } + if (!is.null(attribute_backup[['variables']][[var]][['dim']][[dim]][['vals']])) { + if (!result_is_irregular) { + attribute_backup[['variables']][[var]][['dim']][[dim]][['vals']] <- new_vals + } else { + attribute_backup[['variables']][[var]][['dim']][[dim]][['vals']] <- 1:length(new_vals) + } + } + } + } + } + } + if (!is_irregular && result_is_irregular) { + attribute_backup[['coordinates']] <- paste(lon_var_name, lat_var_name) + } else if (is_irregular && !result_is_irregular) { + attribute_backup[['coordinates']] <- NULL + } + } + } + attributes(result_array) <- attribute_backup + lons_attr_bk[['dim']] <- dim(found_lons) + if (!is.null(lons_attr_bk[['variables']]) && (length(lons_attr_bk[['variables']]) > 0)) { + for (var in 1:length(lons_attr_bk[['variables']])) { + if (length(lons_attr_bk[['variables']][[var]][['dim']]) > 0) { + dims_to_remove <- NULL + for (dim in 1:length(lons_attr_bk[['variables']][[var]][['dim']])) { + dim_name <- NULL + if ('name' %in% names(lons_attr_bk[['variables']][[var]][['dim']][[dim]])) { + dim_name <- lons_attr_bk[['variables']][[var]][['dim']][[dim]][['name']] + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + lons_attr_bk[['variables']][[var]][['dim']][[dim]][['name']] <- new_lon_name + } else { + lons_attr_bk[['variables']][[var]][['dim']][[dim]][['name']] <- new_lat_name + } + } + } else if (!is.null(names(lons_attr_bk[['variables']][[var]][['dim']]))) { + dim_name <- names(lons_attr_bk[['variables']][[var]][['dim']])[dim] + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + names(lons_attr_bk[['variables']][[var]][['dim']])[which(names(lons_attr_bk[['variables']][[var]][['dim']]) == lon_dim)] <- new_lon_name + } else { + names(lons_attr_bk[['variables']][[var]][['dim']])[which(names(lons_attr_bk[['variables']][[var]][['dim']]) == lat_dim)] <- new_lat_name + } + } + } + if (!is.null(dim_name)) { + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + new_vals <- found_lons[TRUE] + } else if (dim_name == lat_dim) { + new_vals <- found_lats[TRUE] + if (!result_is_irregular) { + dims_to_remove <- c(dims_to_remove, dim) + } + } + if (!is.null(lons_attr_bk[['variables']][[var]][['dim']][[dim]][['len']])) { + lons_attr_bk[['variables']][[var]][['dim']][[dim]][['len']] <- length(new_vals) + } + if (!is.null(lons_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']])) { + if (!result_is_irregular) { + lons_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']] <- new_vals + } else { + lons_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']] <- 1:length(new_vals) + } + } + } + } + } + if (length(dims_to_remove) > 1) { + lons_attr_bk[['variables']][[var]][['dim']] <- lons_attr_bk[['variables']][[var]][['dim']][[-dims_to_remove]] + } + } + } + names(lons_attr_bk[['variables']])[1] <- lon_var_name + lons_attr_bk[['variables']][[1]][['units']] <- 'degrees_east' + } + attributes(found_lons) <- lons_attr_bk + lats_attr_bk[['dim']] <- dim(found_lats) + if (!is.null(lats_attr_bk[['variables']]) && (length(lats_attr_bk[['variables']]) > 0)) { + for (var in 1:length(lats_attr_bk[['variables']])) { + if (length(lats_attr_bk[['variables']][[var]][['dim']]) > 0) { + dims_to_remove <- NULL + for (dim in 1:length(lats_attr_bk[['variables']][[var]][['dim']])) { + dim_name <- NULL + if ('name' %in% names(lats_attr_bk[['variables']][[var]][['dim']][[dim]])) { + dim_name <- lats_attr_bk[['variables']][[var]][['dim']][[dim]][['name']] + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + lons_attr_bk[['variables']][[var]][['dim']][[dim]][['name']] <- new_lon_name + } else { + lons_attr_bk[['variables']][[var]][['dim']][[dim]][['name']] <- new_lat_name + } + } + } else if (!is.null(names(lats_attr_bk[['variables']][[var]][['dim']]))) { + dim_name <- names(lats_attr_bk[['variables']][[var]][['dim']])[dim] + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + names(lats_attr_bk[['variables']][[var]][['dim']])[which(names(lats_attr_bk[['variables']][[var]][['dim']]) == lon_dim)] <- new_lon_name + } else { + names(lats_attr_bk[['variables']][[var]][['dim']])[which(names(lats_attr_bk[['variables']][[var]][['dim']]) == lat_dim)] <- new_lat_name + } + } + } + if (!is.null(dim_name)) { + if (dim_name %in% c(lon_dim, lat_dim)) { + if (dim_name == lon_dim) { + new_vals <- found_lons[TRUE] + if (!result_is_irregular) { + dims_to_remove <- c(dims_to_remove, dim) + } + } else if (dim_name == lat_dim) { + new_vals <- found_lats[TRUE] + } + if (!is.null(lats_attr_bk[['variables']][[var]][['dim']][[dim]][['len']])) { + lats_attr_bk[['variables']][[var]][['dim']][[dim]][['len']] <- length(new_vals) + } + if (!is.null(lats_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']])) { + if (!result_is_irregular) { + lats_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']] <- new_vals + } else { + lats_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']] <- 1:length(new_vals) + } + } + } + } + } + if (length(dims_to_remove) > 1) { + lats_attr_bk[['variables']][[var]][['dim']] <- lats_attr_bk[['variables']][[var]][['dim']][[-dims_to_remove]] + } + } + } + names(lats_attr_bk[['variables']])[1] <- lat_var_name + lats_attr_bk[['variables']][[1]][['units']] <- 'degrees_north' + } + attributes(found_lats) <- lats_attr_bk + } + list(data_array = if (return_array) { + if (interpolation_needed) { + result_array + } else { + data_array + } + } else { + NULL + }, + lons = found_lons, lats = found_lats) +} + diff --git a/R/Clim.R b/R/Clim.R index d879fc4f4fdba572efeb4308ded28ea1ce41b799..21f97b67ebcd586efb284f6654523b51f53eadd7 100644 --- a/R/Clim.R +++ b/R/Clim.R @@ -136,7 +136,7 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } @@ -191,7 +191,7 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), fun = .Clim, method = method, time_dim = time_dim, dat_dim = dat_dim, memb_dim = memb_dim, - memb = memb, na.rm = na.rm, + memb = memb, na.rm = na.rm, ncores_input = ncores, ncores = ncores) # Add member dimension name back if (memb) { @@ -207,7 +207,7 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), fun = .Clim, method = method, time_dim = time_dim, dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim, - memb = memb, na.rm = na.rm, + memb = memb, na.rm = na.rm, ncores_input = ncores, ncores = ncores) } else if (method == 'NDV') { @@ -216,7 +216,7 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), fun = .Clim, method = method, time_dim = time_dim, dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim, - memb = memb, na.rm = na.rm, + memb = memb, na.rm = na.rm, ncores_input = ncores, ncores = ncores) } @@ -227,7 +227,7 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), .Clim <- function(exp, obs, method = 'clim', time_dim = 'sdate', dat_dim = c('dataset', 'member'), ftime_dim = 'ftime', memb_dim = 'member', memb = TRUE, - na.rm = TRUE) { + na.rm = TRUE, ncores_input = NULL) { if (method == 'clim') { # exp: [sdate, dat_dim_exp] @@ -269,9 +269,9 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), # exp clim ##--- NEW trend ---## tmp_obs <- Trend(data = obs, time_dim = time_dim, interval = 1, - polydeg = 1, conf = FALSE)$trend + polydeg = 1, conf = FALSE, ncores = ncores_input)$trend tmp_exp <- Trend(data = exp, time_dim = time_dim, interval = 1, - polydeg = 1, conf = FALSE)$trend + polydeg = 1, conf = FALSE, ncores = ncores_input)$trend # tmp_exp: [stats, dat_dim)] tmp_obs_mean <- apply(tmp_obs, 1, mean) #average out dat_dim (dat and member) @@ -337,10 +337,10 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), #ini_: [sdate, dat_dim, ftime] tmp_exp <- Regression(datay = exp, datax = ini_exp, reg_dim = time_dim, na.action = na.omit, - pval = FALSE, conf = FALSE)$regression + pval = FALSE, conf = FALSE, ncores = ncores_input)$regression tmp_obs <- Regression(datay = obs, datax = ini_obs, reg_dim = time_dim, na.action = na.omit, - pval = FALSE, conf = FALSE)$regression + pval = FALSE, conf = FALSE, ncores = ncores_input)$regression #tmp_: [stats = 2, dat_dim, ftime] tmp_obs_mean <- apply(tmp_obs, c(1, length(dim(tmp_obs))), mean) #average out dat_dim (dat and member) #tmp_obs_mean: [stats = 2, ftime] diff --git a/R/Cluster.R b/R/Cluster.R new file mode 100644 index 0000000000000000000000000000000000000000..33527aea9b0e8f43bcce78be874cd07d5a02629a --- /dev/null +++ b/R/Cluster.R @@ -0,0 +1,267 @@ +#'K-means Clustering +#' +#'Compute cluster centers and their time series of occurrences, with the +#'K-means clustering method using Euclidean distance, of an array of input data +#'with any number of dimensions that at least contain time_dim. +#'Specifically, it partitions the array along time axis in K groups or clusters +#'in which each space vector/array belongs to (i.e., is a member of) the +#'cluster with the nearest center or centroid. This function is a wrapper of +#'kmeans() and relies on the NbClust package (Charrad et al., 2014 JSS) to +#'determine the optimal number of clusters used for K-means clustering if it is +#'not provided by users. +#' +#'@param data A numeric array with named dimensions that at least have +#' 'time_dim' corresponding to time and 'space_dim' (optional) corresponding +#' to either area-averages over a series of domains or the grid points for any +#' sptial grid structure. +#'@param weights A numeric array with named dimension of multiplicative weights +#' based on the areas covering each domain/region or grid-cell of 'data'. The +#' dimensions must be equal to the 'space_dim' in 'data'. The default value is +#' NULL which means no weighting is applied. +#'@param time_dim A character string indicating the name of time dimension in +#' 'data'. The default value is 'sdate'. +#'@param space_dim A character vector indicating the names of spatial dimensions +#' in 'data'. The default value is NULL. +#'@param nclusters A positive integer K that must be bigger than 1 indicating +#' the number of clusters to be computed, or K initial cluster centers to be +#' used in the method. The default value is NULL, which means that the number +#' of clusters will be determined by NbClust(). The parameter 'index' +#' therefore needs to be specified for NbClust() to find the optimal number of +#' clusters to be used for K-means clustering calculation. +#'@param index A character string of the validity index from NbClust package +#' that can be used to determine optimal K if K is not specified with +#' 'nclusters'. The default value is 'sdindex' (Halkidi et al. 2001, JIIS). +#' Other indices available in NBClust are "kl", "ch", "hartigan", "ccc", +#' "scott", "marriot", "trcovw", "tracew", "friedman", "rubin", "cindex", "db", +#' "silhouette", "duda", "pseudot2", "beale", "ratkowsky", "ball", +#' "ptbiserial", "gap", "frey", "mcclain", "gamma", "gplus", "tau", "dunn", +#' "hubert", "sdindex", and "sdbw". +#' One can also use all of them with the option 'alllong' or almost all indices +# except gap, gamma, gplus and tau with 'all', when the optimal number of +#' clusters K is detremined by the majority rule (the maximum of histogram of +#' the results of all indices with finite solutions). Use of some indices on +#' a big and/or unstructured dataset can be computationally intense and/or +#' could lead to numerical singularity. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{$cluster}{ +#' An integer array of the occurrence of a cluster along time, i.e., when +#' certain data member in time is allocated to a specific cluster. The dimensions +#' are same as 'data' without 'space_dim'. +#'} +#'\item{$centers}{ +#' A nemeric array of cluster centres or centroids (e.g. [1:K, 1:spatial degrees +#' of freedom]). The rest dimensions are same as 'data' except 'time_dim' +#' and 'space_dim'. +#'} +#'\item{$totss}{ +#' A numeric array of the total sum of squares. The dimensions are same as 'data' +#' except 'time_dim' and 'space_dim'. +#'} +#'\item{$withinss}{ +#' A numeric array of within-cluster sum of squares, one component per cluster. +#' The first dimenion is the number of cluster, and the rest dimensions are +#' same as 'data' except 'time_dim' and 'space_dim'. +#'} +#'\item{$tot.withinss}{ +#' A numeric array of the total within-cluster sum of squares, i.e., +#' sum(withinss). The dimensions are same as 'data' except 'time_dim' and +#' 'space_dim'. +#'} +#'\item{$betweenss}{ +#' A numeric array of the between-cluster sum of squares, i.e. totss-tot.withinss. +#' The dimensions are same as 'data' except 'time_dim' and 'space_dim'. +#'} +#'\item{$size}{ +#' A numeric array of the number of points in each cluster. The first dimenion +#' is the number of cluster, and the rest dimensions are same as 'data' except +#' 'time_dim' and 'space_dim'. +#'} +#'\item{$iter}{ +#' A numeric array of the number of (outer) iterations. The dimensions are +#' same as 'data' except 'time_dim' and 'space_dim'. +#'} +#'\item{$ifault}{ +#' A numeric array of an indicator of a possible algorithm problem. The +#' dimensions are same as 'data' except 'time_dim' and 'space_dim'. +#'} +#' +#'@references +#'Wilks, 2011, Statistical Methods in the Atmospheric Sciences, 3rd ed., Elsevire, pp 676. +#' +#'@examples +#'# Generating synthetic data +#'a1 <- array(dim = c(200, 4)) +#'mean1 <- 0 +#'sd1 <- 0.3 +#' +#'c0 <- seq(1, 200) +#'c1 <- sort(sample(x = 1:200, size = sample(x = 50:150, size = 1), replace = FALSE)) +#'x1 <- c(1, 1, 1, 1) +#'for (i1 in c1) { +#' a1[i1, ] <- x1 + rnorm(4, mean = mean1, sd = sd1) +#'} +#' +#'c1p5 <- c0[!(c0 %in% c1)] +#'c2 <- c1p5[seq(1, length(c1p5), 2)] +#'x2 <- c(2, 2, 4, 4) +#'for (i2 in c2) { +#' a1[i2, ] <- x2 + rnorm(4, mean = mean1, sd = sd1) +#'} +#' +#'c3 <- c1p5[seq(2, length(c1p5), 2)] +#'x3 <- c(3, 3, 1, 1) +#'for (i3 in c3) { +#' a1[i3, ] <- x3 + rnorm(4, mean = mean1, sd = sd1) +#'} +#' +#'# Computing the clusters +#'names(dim(a1)) <- c('sdate', 'space') +#'res1 <- Cluster(data = a1, weights = array(1, dim = dim(a1)[2]), nclusters = 3) +#'res2 <- Cluster(data = a1, weights = array(1, dim = dim(a1)[2])) +#' +#'@import NbClust multiApply +#'@importFrom abind abind +#'@importFrom stats kmeans +#'@importFrom grDevices pdf dev.off +#'@export +Cluster <- function(data, weights = NULL, time_dim = 'sdate', space_dim = NULL, + nclusters = NULL, index = 'sdindex', ncores = NULL) { + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(dim(data))) { #is vector + dim(data) <- c(length(data)) + names(dim(data)) <- time_dim + } + if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + + ## weights + if (!is.null(weights)) { + if (!is.numeric(weights)) { + stop("Parameter 'weights' must be a numeric array.") + } + if (is.null(dim(weights))) { #is vector + dim(weights) <- c(length(weights)) + } + if (any(is.null(names(dim(weights))))| any(nchar(names(dim(weights))) == 0)) { + stop("Parameter 'weights' must have dimension names.") + } + if (any(!names(dim(weights)) %in% names(dim(data)) | + !dim(weights) %in% dim(data))) { + stop("Parameter 'weights' must have dimensions that can be found in 'data' dimensions.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimensions.") + } + ## space_dim + if (!is.null(space_dim)) { + if (!is.character(space_dim)) { + stop("Parameter 'space_dim' must be a character vector.") + } + if (any(!space_dim %in% names(dim(data)))) { + stop("Parameter 'space_dim' is not found in 'data' dimensions.") + } + if (!is.null(weights)) { + if (!(length(space_dim) == length(dim(weights)) & all(space_dim %in% names(dim(weights))))) { + stop("Parameter 'weights' must have dimension names the same as 'space_dim'.") + } + if (space_dim != names(dim(weights))) { + space_dim <- names(dim(weights)) + } + } + } + if (is.null(space_dim) & !is.null(weights)) { + space_dim <- names(dim(weights)) + .warning(paste0("Parameter 'weights' is assigned but not 'space_dim'. Define 'space_dim' ", + "by the dimensions of 'weights'.")) + } + ## nclusters + if (!is.null(nclusters)) { + if (!is.numeric(nclusters) | length(nclusters) != 1) { + stop("Parameter 'nclusters' must be an integer bigger than 1.") + } else if (nclusters <= 1) { + stop("Parameter 'nclusters' must be an integer bigger than 1.") + } + } + + ## index + if (!is.character(index) | length(index) > 1) { + stop("Parameter 'index' should be a character strings accepted as 'index' by the function NbClust::NbClust.") + } + + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Cluster + + output <- Apply(list(data), + target_dims = c(time_dim, space_dim), + fun = .Cluster, + weights = weights, nclusters = nclusters, index = index, + ncores = ncores) + + return(output) +} + +.Cluster <- function(data, weights = NULL, nclusters = NULL, index = 'sdindex') { + # data: [time, (lat, lon)] + dat_dim <- dim(data) + + if (length(dim(data)) != 1) { + # Reshape data into two dims + dim(data) <- c(dat_dim[1], prod(dat_dim[-1])) + + # weights + if (!is.null(weights)) { + dim(weights) <- prod(dim(weights)) # a vector + data_list <- lapply(1:dat_dim[1], + function(x) { data[x, ] * weights }) + data <- do.call(abind::abind, c(data_list, along = 0)) + } + } + + if (!is.null(nclusters)) { + kmeans.results <- kmeans(data, centers = nclusters, iter.max = 300, + nstart = 30) + } else { + pdf(file = NULL) + nbclust.results <- NbClust::NbClust(data, distance = 'euclidean', + min.nc = 2, max.nc = 20, + method = 'kmeans', index = index) + dev.off() + + if (index == 'all' || index == 'alllong') { + kmc <- hist(nbclust.results$Best.nc[1, ], breaks = seq(0, 20), + plot = FALSE)$counts + kmc1 <- which(kmc == max(kmc)) + } else { + kmc1 <- nbclust.results$Best.nc[1] + } + + kmeans.results <- kmeans(data, centers = kmc1, iter.max = 300, + nstart = 30) + } + invisible(kmeans.results) +} diff --git a/R/Composite.R b/R/Composite.R index ebab24750a9bdf98e977ab914c134ac2f6fb0ac9..be03ac9e71b37672a4eb2d84c061611d8921ed57 100644 --- a/R/Composite.R +++ b/R/Composite.R @@ -157,7 +157,7 @@ Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } @@ -181,7 +181,7 @@ Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), fun = .Composite, output_dims = output_dims, occ = occ, time_dim = time_dim, space_dim = space_dim, - K = K, lag = lag, eno = eno, + K = K, lag = lag, eno = eno, ncores_input = ncores, ncores = ncores) if (!is.null(fileout)) { @@ -192,7 +192,7 @@ Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), } .Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), - K = NULL, lag = 0, eno = FALSE) { + K = NULL, lag = 0, eno = FALSE, ncores_input = NULL) { # data: [lon, lat, time] # occ: [time] if (is.null(K)) { @@ -204,7 +204,7 @@ Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), pval <- array(dim = c(dim(data)[1:2], composite = K)) if (eno == TRUE) { - n_tot <- Eno(data, time_dim = time_dim) + n_tot <- Eno(data, time_dim = time_dim, ncores = ncores_input) } else { n_tot <- length(occ) } @@ -224,7 +224,7 @@ Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), if (eno == TRUE) { data_tmp <- data[, , indices] names(dim(data_tmp)) <- names(dim(data)) - n_k <- Eno(data_tmp, time_dim = time_dim) + n_k <- Eno(data_tmp, time_dim = time_dim, ncores = ncores_input) } else { n_k <- length(indices) } diff --git a/R/Consist_Trend.R b/R/Consist_Trend.R new file mode 100644 index 0000000000000000000000000000000000000000..b02aa5fe10cf64b6c574fb2314dd3ad91ab38029 --- /dev/null +++ b/R/Consist_Trend.R @@ -0,0 +1,201 @@ +#'Compute trend using only model data for which observations are available +#' +#'Compute the linear trend for a time series by least square fitting together +#'with the associated error interval for both the observational and model data. +#'The 95\% confidence interval and detrended observational and model data are +#'also provided.\cr +#'The function doesn't do the ensemble mean, so if the input data have the +#'member dimension, ensemble mean needs to be computed beforehand. +#' +#'@param exp A named numeric array of experimental data, with at least two +#' dimensions 'time_dim' and 'dat_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along 'dat_dim'. +#'@param dat_dim A character string indicating the name of the dataset +#' dimensions. If data at some point of 'time_dim' are not complete along +#' 'dat_dim' in both 'exp' and 'obs', this point in all 'dat_dim' will be +#' discarded. The default value is 'dataset'. +#'@param time_dim A character string indicating the name of dimension along +#' which the trend is computed. The default value is 'sdate'. +#'@param interval A positive numeric indicating the unit length between two +#' points along 'time_dim' dimension. The default value is 1. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{$trend}{ +#' A numeric array of the trend coefficients of model and observational data +#' with dimensions c(stats = 2, nexp + nobs, the rest dimensions of 'exp' and +#' 'obs' except time_dim), where 'nexp' is the length of 'dat_dim' in 'exp' +#' and 'nobs' is the length of 'dat_dim' in 'obs. The 'stats' dimension +#' contains the intercept and the slope. +#'} +#'\item{$conf.lower}{ +#' A numeric array of the lower limit of 95\% confidence interval with +#' dimensions same as $trend. The 'stats' dimension contains the lower +#' confidence level of the intercept and the slope. +#'} +#'\item{$conf.upper}{ +#' A numeric array of the upper limit of 95\% confidence interval with +#' dimensions same as $trend. The 'stats' dimension contains the upper +#' confidence level of the intercept and the slope. +#'} +#'\item{$detrended_exp}{ +#' A numeric array of the detrended model data with the same dimensions as +#' 'exp'. +#'} +#'\item{$detrended_obs}{ +#' A numeric array of the detrended observational data with the same +#' dimensions as 'obs'. +#'} +#' +#'@examples +#'#'# Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'runmean_months <- 12 +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) +#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months) +#'dim_to_mean <- 'member' # average along members +#'years_between_startdates <- 5 +#'trend <- Consist_Trend(MeanDims(smooth_ano_exp, dim_to_mean, na.rm = TRUE), +#' MeanDims(smooth_ano_obs, dim_to_mean, na.rm = TRUE), +#' interval = years_between_startdates) +#'#Bind data for plotting +#'trend_bind <- abind::abind(trend$conf.lower[2, , ], trend$trend[2, , ], +#' trend$conf.upper[2, , ], trend$trend[1, , ], along = 0) +#'trend_bind <- Reorder(trend_bind, c(2, 1, 3)) +#'\donttest{ +#'PlotVsLTime(trend_bind, toptitle = "trend", ytitle = "K/(5 years)", +#' monini = 11, limits = c(-0.8, 0.8), listexp = c('CMIP5 IC3'), +#' listobs = c('ERSST'), biglab = FALSE, hlines = c(0)) +#'PlotAno(InsertDim(trend$detrended_exp, 2, 1), InsertDim(trend$detrended_obs, 2, 1), +#' startDates, "Detrended tos anomalies", ytitle = 'K', +#' legends = 'ERSST', biglab = FALSE) +#'} +#' +#'@import multiApply +#'@export +Consist_Trend <- function(exp, obs, dat_dim = 'dataset', time_dim = 'sdate', interval = 1, + ncores = NULL) { + # Check inputs + ## exp and obs + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have the same dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.character(dat_dim)) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!all(dat_dim %in% names(dim(exp))) | !all(dat_dim %in% names(dim(obs)))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + for (i in 1:length(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim[i])] + name_obs <- name_obs[-which(name_obs == dat_dim[i])] + } + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension expect 'dat_dim'.")) + } + ## interval + if (!is.numeric(interval) | interval <= 0 | length(interval) > 1) { + stop("Parameter 'interval' must be a positive number.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Consist_Trend + + output <- Apply(list(exp, obs), + target_dims = list(c(dat_dim, time_dim), + c(dat_dim, time_dim)), + fun = .Consist_Trend, + output_dims = list(trend = c('stats', dat_dim), + conf.lower = c('stats', dat_dim), + conf.upper = c('stats', dat_dim), + detrended_exp = c(dat_dim, time_dim), + detrended_obs = c(dat_dim, time_dim)), + interval = interval, + ncores = ncores) + + return(output) +} + +.Consist_Trend <- function(exp, obs, interval = 1) { + # exp: [nexp, sdate] + # obs: [nobs, sdate] + + # Find common points + nan <- apply(exp, 2, mean, na.rm = FALSE) + apply(obs, 2, mean, na.rm = FALSE) # [sdate] + exp[, is.na(nan)] <- NA + obs[, is.na(nan)] <- NA + + # Compute trends + res_exp <- apply(exp, 1, .Trend, interval = interval, polydeg = 1) + res_obs <- apply(obs, 1, .Trend, interval = interval, polydeg = 1) + exp_trend <- lapply(res_exp, '[[', 'trend') + exp_trend <- do.call(abind::abind, c(exp_trend, along = 2)) # [stats = 2, dat] + obs_trend <- lapply(res_obs, '[[', 'trend') + obs_trend <- do.call(abind::abind, c(obs_trend, along = 2)) + # bind along 'dat' + res_trend <- abind::abind(exp_trend, obs_trend, along = 2) # [stats = 2, dat = (nexp + nobs)] + + # Compute conf.lower + exp_conf.lower <- lapply(res_exp, '[[', 'conf.lower') + exp_conf.lower <- do.call(abind::abind, c(exp_conf.lower, along = 2)) # [stats = 2, dat] + obs_conf.lower <- lapply(res_obs, '[[', 'conf.lower') + obs_conf.lower <- do.call(abind::abind, c(obs_conf.lower, along = 2)) + res_conf.lower <- abind::abind(exp_conf.lower, obs_conf.lower, along = 2) + + # Compute conf.upper + exp_conf.upper <- lapply(res_exp, '[[', 'conf.upper') + exp_conf.upper <- do.call(abind::abind, c(exp_conf.upper, along = 2)) # [stats = 2, dat] + obs_conf.upper <- lapply(res_obs, '[[', 'conf.upper') + obs_conf.upper <- do.call(abind::abind, c(obs_conf.upper, along = 2)) + res_conf.upper <- abind::abind(exp_conf.upper, obs_conf.upper, along = 2) + + # Compute detrended + exp_detrended <- lapply(res_exp, '[[', 'detrended') + exp_detrended <- do.call(abind::abind, c(exp_detrended, along = 0)) + obs_detrended <- lapply(res_obs, '[[', 'detrended') + obs_detrended <- do.call(abind::abind, c(obs_detrended, along = 0)) + + return(invisible(list(trend = res_trend, + conf.lower = res_conf.lower, conf.upper = res_conf.upper, + detrended_exp = exp_detrended, detrended_obs = obs_detrended))) +} diff --git a/R/Corr.R b/R/Corr.R index a74725f14be1990f9bb5352201f5eb3aa8d936f3..0382a393266a7b021a7aab39e69f9e7382e1155c 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -19,7 +19,7 @@ #'@param exp A named numeric array of experimental data, with at least two #' dimensions 'time_dim' and 'dat_dim'. #'@param obs A named numeric array of observational data, same dimensions as -#' parameter 'exp' except along dat_dim. +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. #'@param time_dim A character string indicating the name of dimension along #' which the correlations are computed. The default value is 'sdate'. #'@param dat_dim A character string indicating the name of dataset (nobs/nexp) @@ -31,6 +31,12 @@ #' be completed. The default is c(1, length(comp_dim dimension)). #'@param method A character string indicating the type of correlation: #' 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no +#' member dimension, set NULL. The default value is NULL. +#'@param memb A logical value indicating whether to remain 'memb_dim' dimension +#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when +#' 'memb_dim' is not NULL. The default value is TRUE. #'@param pval A logical value indicating whether to compute or not the p-value #' of the test Ho: Corr = 0. The default value is TRUE. #'@param conf A logical value indicating whether to retrieve the confidence @@ -42,9 +48,12 @@ #' #'@return #'A list containing the numeric arrays with dimension:\cr -#' c(nexp, nobs, all other dimensions of exp except time_dim).\cr -#'nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the -#'number of observation (i.e., dat_dim in obs).\cr +#' c(nexp, nobs, exp_memb, obs_memb, all other dimensions of exp except +#' time_dim and memb_dim).\cr +#'nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is the +#'number of observation (i.e., 'dat_dim' in obs). exp_memb is the number of +#'member in experiment (i.e., 'memb_dim' in exp) and obs_memb is the number of +#'member in observation (i.e., 'memb_dim' in obs).\cr\cr #'\item{$corr}{ #' The correlation coefficient. #'} @@ -59,20 +68,37 @@ #'} #' #'@examples -#'# Load sample data as in Load() example: +#'# Case 1: Load sample data as in Load() example: #'example(Load) #'clim <- Clim(sampleData$mod, sampleData$obs) -#'corr <- Corr(clim$clim_exp, clim$clim_obs, time_dim = 'ftime', dat_dim = 'member') -#'# Renew the example when Ano and Smoothing is ready +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'runmean_months <- 12 +#' +#'# Smooth along lead-times +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) +#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months) +#'required_complete_row <- 3 # Discard start dates which contain any NA lead-times +#'leadtimes_per_startdate <- 60 +#'corr <- Corr(MeanDims(smooth_ano_exp, 'member'), +#' MeanDims(smooth_ano_obs, 'member'), +#' comp_dim = 'ftime', +#' limits = c(ceiling((runmean_months + 1) / 2), +#' leadtimes_per_startdate - floor(runmean_months / 2))) +#' +#'# Case 2: Keep member dimension +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member') +#'# ensemble mean +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE) #' -#'@rdname Corr #'@import multiApply #'@importFrom ClimProjDiags Subset #'@importFrom stats cor pt qnorm #'@export Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', - comp_dim = NULL, limits = NULL, - method = 'pearson', pval = TRUE, conf = TRUE, + comp_dim = NULL, limits = NULL, method = 'pearson', + memb_dim = NULL, memb = TRUE, + pval = TRUE, conf = TRUE, conf.lev = 0.95, ncores = NULL) { # Check inputs @@ -133,6 +159,19 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', if (!(method %in% c("kendall", "spearman", "pearson"))) { stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb + if (!is.logical(memb) | length(memb) > 1) { + stop("Parameter 'memb' must be one logical value.") + } ## pval if (!is.logical(pval) | length(pval) > 1) { stop("Parameter 'pval' must be one logical value.") @@ -147,7 +186,7 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } @@ -157,9 +196,13 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', name_obs <- sort(names(dim(obs))) name_exp <- name_exp[-which(name_exp == dat_dim)] name_obs <- name_obs[-which(name_obs == dat_dim)] + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + } if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension expect 'dat_dim'.")) + "all dimension expect 'dat_dim' and 'memb_dim'.")) } if (dim(exp)[time_dim] < 3) { stop("The length of time_dim must be at least 3 to compute correlation.") @@ -179,53 +222,168 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', # Remove data along comp_dim dim if there is at least one NA between limits if (!is.null(comp_dim)) { + pos <- which(names(dim(obs)) == comp_dim) if (is.null(limits)) { - limits <- c(1, dim(obs)[comp_dim]) + obs_sub <- obs + } else { + obs_sub <- ClimProjDiags::Subset(obs, pos, list(limits[1]:limits[2])) } - pos <- which(names(dim(obs)) == comp_dim) - obs_sub <- Subset(obs, pos, list(limits[1]:limits[2])) outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE)) outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) obs[which(outrows)] <- NA + rm(obs_sub, outrows) + } + + if (is.null(memb_dim)) { + # Define output_dims + if (conf & pval) { + output_dims <- list(corr = c('nexp', 'nobs'), + p.val = c('nexp', 'nobs'), + conf.lower = c('nexp', 'nobs'), + conf.upper = c('nexp', 'nobs')) + } else if (conf & !pval) { + output_dims <- list(corr = c('nexp', 'nobs'), + conf.lower = c('nexp', 'nobs'), + conf.upper = c('nexp', 'nobs')) + } else if (!conf & pval) { + output_dims <- list(corr = c('nexp', 'nobs'), + p.val = c('nexp', 'nobs')) + } else { + output_dims <- list(corr = c('nexp', 'nobs')) + } + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim), + c(time_dim, dat_dim)), + output_dims = output_dims, + fun = .Corr, + time_dim = time_dim, method = method, + pval = pval, conf = conf, conf.lev = conf.lev, + ncores = ncores) + + } else { + if (!memb) { #ensemble mean + name_exp <- names(dim(exp)) + margin_dims_ind <- c(1:length(name_exp))[-which(name_exp == memb_dim)] + exp <- apply(exp, margin_dims_ind, mean, na.rm = TRUE) #NOTE: remove NAs here + obs <- apply(obs, margin_dims_ind, mean, na.rm = TRUE) + + # Define output_dims + if (conf & pval) { + output_dims <- list(corr = c('nexp', 'nobs'), + p.val = c('nexp', 'nobs'), + conf.lower = c('nexp', 'nobs'), + conf.upper = c('nexp', 'nobs')) + } else if (conf & !pval) { + output_dims <- list(corr = c('nexp', 'nobs'), + conf.lower = c('nexp', 'nobs'), + conf.upper = c('nexp', 'nobs')) + } else if (!conf & pval) { + output_dims <- list(corr = c('nexp', 'nobs'), + p.val = c('nexp', 'nobs')) + } else { + output_dims <- list(corr = c('nexp', 'nobs')) + } + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim), + c(time_dim, dat_dim)), + output_dims = output_dims, + fun = .Corr, + time_dim = time_dim, method = method, + pval = pval, conf = conf, conf.lev = conf.lev, ncores_input = ncores, + ncores = ncores) + + } else { # correlation for each member + + # Define output_dims + if (conf & pval) { + output_dims <- list(corr = c('nexp', 'nobs', 'exp_memb', 'obs_memb'), + p.val = c('nexp', 'nobs', 'exp_memb', 'obs_memb'), + conf.lower = c('nexp', 'nobs', 'exp_memb', 'obs_memb'), + conf.upper = c('nexp', 'nobs', 'exp_memb', 'obs_memb')) + } else if (conf & !pval) { + output_dims <- list(corr = c('nexp', 'nobs', 'exp_memb', 'obs_memb'), + conf.lower = c('nexp', 'nobs', 'exp_memb', 'obs_memb'), + conf.upper = c('nexp', 'nobs', 'exp_memb', 'obs_memb')) + } else if (!conf & pval) { + output_dims <- list(corr = c('nexp', 'nobs', 'exp_memb', 'obs_memb'), + p.val = c('nexp', 'nobs', 'exp_memb', 'obs_memb')) + } else { + output_dims <- list(corr = c('nexp', 'nobs', 'exp_memb', 'obs_memb')) + } + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim, memb_dim), + c(time_dim, dat_dim, memb_dim)), + output_dims = output_dims, + fun = .Corr, + time_dim = time_dim, method = method, + pval = pval, conf = conf, conf.lev = conf.lev, ncores_input = ncores, + ncores = ncores) + } } - res <- Apply(list(exp, obs), - target_dims = list(c(time_dim, dat_dim), - c(time_dim, dat_dim)), - fun = .Corr, - time_dim = time_dim, method = method, - pval = pval, conf = conf, conf.lev = conf.lev, - ncores = ncores) return(res) } .Corr <- function(exp, obs, time_dim = 'sdate', method = 'pearson', - conf = TRUE, pval = TRUE, conf.lev = 0.95) { + conf = TRUE, pval = TRUE, conf.lev = 0.95, ncores_input = NULL) { + if (length(dim(exp)) == 2) { # exp: [sdate, dat_exp] # obs: [sdate, dat_obs] nexp <- as.numeric(dim(exp)[2]) nobs <- as.numeric(dim(obs)[2]) - CORR <- array(dim = c(nexp = nexp, nobs = nobs)) - eno_expand <- array(dim = c(nexp = nexp, nobs = nobs)) - p.val <- array(dim = c(nexp = nexp, nobs = nobs)) - - # ens_mean - for (i in 1:nobs) { - - CORR[, i] <- sapply(1:nexp, - function(x) { - if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) { -cor(exp[, x], obs[, i], - use = "pairwise.complete.obs", - method = method) -} else { - CORR[, i] <- NA -} -}) +# NOTE: Use sapply to replace the for loop + CORR <- sapply(1:nobs, function(i) { + sapply(1:nexp, function (x) { + if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) { #NOTE: Is this necessary? + cor(exp[, x], obs[, i], + use = "pairwise.complete.obs", + method = method) + } else { + NA #CORR[, i] <- NA + } + }) + }) + if (is.null(dim(CORR))) { + CORR <- array(CORR, dim = c(1, 1)) + } + + } else { # member + + # exp: [sdate, dat_exp, memb_exp] + # obs: [sdate, dat_obs, memb_obs] + nexp <- as.numeric(dim(exp)[2]) + nobs <- as.numeric(dim(obs)[2]) + exp_memb <- as.numeric(dim(exp)[3]) + obs_memb <- as.numeric(dim(obs)[3]) + + CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + CORR[, , y, j] <- sapply(1:nobs, function(i) { + sapply(1:nexp, function (x) { + if (any(!is.na(exp[, x, y])) && sum(!is.na(obs[, i, j])) > 2) { + cor(exp[, x, y], obs[, i, j], + use = "pairwise.complete.obs", + method = method) + } else { + NA #CORR[, i] <- NA + } + }) + }) + + } + } + + } + # if (pval) { # for (i in 1:nobs) { # p.val[, i] <- try(sapply(1:nexp, @@ -240,16 +398,29 @@ cor(exp[, x], obs[, i], if (pval | conf) { if (method == "kendall" | method == "spearman") { - tmp <- apply(obs, 2, rank) + tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3) names(dim(tmp))[1] <- time_dim - eno <- Eno(tmp, time_dim) + eno <- Eno(tmp, time_dim, ncores = ncores_input) } else if (method == "pearson") { - eno <- Eno(obs, time_dim) + eno <- Eno(obs, time_dim, ncores = ncores_input) } - for (i in 1:nexp) { - eno_expand[i, ] <- eno + + if (length(dim(exp)) == 2) { + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + eno_expand[i, ] <- eno + } + } else { #member + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + for (i in 1:nexp) { + for (j in 1:exp_memb) { + eno_expand[i, , j, ] <- eno + } + } } + } + #############old################# #This doesn't return error but it's diff from cor.test() when method is spearman and kendall if (pval) { diff --git a/R/EOF.R b/R/EOF.R new file mode 100644 index 0000000000000000000000000000000000000000..8f8d6403f96876039c0087e340866101f4eddeb1 --- /dev/null +++ b/R/EOF.R @@ -0,0 +1,292 @@ +#'Area-weighted empirical orthogonal function analysis using SVD +#' +#'Perform an area-weighted EOF analysis using single value decomposition (SVD) +#'based on a covariance matrix or a correlation matrix if parameter 'corr' is +#'set to TRUE. +#' +#'@param ano A numerical array of anomalies with named dimensions to calculate +#' EOF. The dimensions must have at least 'time_dim' and 'space_dim'. NAs +#' could exist but it should be consistent along time_dim. That is, if one grid +#' point has NAs, all the time steps at this point should be NAs. +#'@param lat A vector of the latitudes of 'ano'. +#'@param lon A vector of the longitudes of 'ano'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param neofs A positive integer of the modes to be kept. The default value is +#' 15. If time length or the product of the length of space_dim is smaller than +#' neofs, neofs will be changed to the minimum of the three values. +#'@param corr A logical value indicating whether to base on a correlation (TRUE) +#' or on a covariance matrix (FALSE). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{EOFs}{ +#' An array of EOF patterns normalized to 1 (unitless) with dimensions +#' (number of modes, rest of the dimensions of 'ano' except 'time_dim'). +#' Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed +#' field. +#'} +#'\item{PCs}{ +#' An array of principal components with the units of the original field to +#' the power of 2, with dimensions (time_dim, number of modes, rest of the +#' dimensions of 'ano' except 'space_dim'). +#' 'PCs' contains already the percentage of explained variance so, +#' to reconstruct the original field it's only needed to multiply 'EOFs' +#' by 'PCs'. +#'} +#'\item{var}{ +#' An array of the percentage (%) of variance fraction of total variance +#' explained by each mode (number of modes). The dimensions are (number of +#' modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim'). +#'} +#'\item{mask}{ +#' An array of the mask with dimensions (space_dim, rest of the dimensions of +#' 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that +#' 'ano' has value and NA for the positions that 'ano' has NA. It is used to +#' replace NAs with 0s for EOF calculation and mask the result with NAs again +#' after the calculation. +#'} +#'\item{wght}{ +#' An array of the area weighting with dimensions 'space_dim'. It is calculated +#' by cosine of 'lat' and used to compute the fraction of variance explained by +#' each EOFs. +#'} +#'\item{tot_var}{ +#' A number or a numeric array of the total variance explained by all the modes. +#' The dimensions are same as 'ano' except 'time_dim' and 'space_dim'. +#'} +#' +#'@seealso ProjectField, NAO, PlotBoxWhisker +#'@examples +#'# This example computes the EOFs along forecast horizons and plots the one +#'# that explains the greatest amount of variability. The example data has low +#'# resolution so the result may not be explanatory, but it displays how to +#'# use this function. +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'tmp <- MeanDims(ano$exp, c('dataset', 'member')) +#'ano <- tmp[1, , ,] +#'names(dim(ano)) <- names(dim(tmp))[-2] +#'eof <- EOF(ano, sampleData$lat, sampleData$lon) +#'\dontrun{ +#'PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat) +#'} +#' +#'@import multiApply +#'@importFrom stats sd +#'@export +EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), + neofs = 15, corr = FALSE, ncores = NULL) { + + # Check inputs + ## ano + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (any(!space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## lat + if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.")) + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } + ## lon + if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.")) + } + if (any(lon > 360 | lon < -360)) { + warning("Some 'lon' is out of the range [-360, 360].") + } + ## neofs + if (!is.numeric(neofs) | neofs %% 1 != 0 | neofs <= 0 | length(neofs) > 1) { + stop("Parameter 'neofs' must be a positive integer.") + } + ## corr + if (!is.logical(corr) | length(corr) > 1) { + stop("Parameter 'corr' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate EOF + +# # Replace mask of NAs with 0s for EOF analysis. +# ano[!is.finite(ano)] <- 0 + + # Area weighting. Weights for EOF; needed to compute the + # fraction of variance explained by each EOFs + space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) + wght <- array(cos(lat * pi/180), dim = dim(ano)[space_ind]) + + # We want the covariance matrix to be weigthed by the grid + # cell area so the anomaly field is weighted by its square + # root since the covariance matrix equals transpose(ano) + # times ano. + wght <- sqrt(wght) + + # neofs is bounded + if (neofs != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)) { + neofs <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs) + .warning(paste0("Parameter 'neofs' is changed to ", neofs, ", the minimum among ", + "the length of time_dim, the production of the length of space_dim, ", + "and neofs.")) + } + + res <- Apply(ano, + target_dims = c(time_dim, space_dim), + output_dims = list(EOFs = c('mode', space_dim), + PCs = c(time_dim, 'mode'), + var = 'mode', + tot_var = NULL, + mask = space_dim), + fun = .EOF, + corr = corr, neofs = neofs, + wght = wght, + ncores = ncores) + + return(c(res, wght = list(wght))) + +} + +.EOF <- function(ano, neofs = 15, corr = FALSE, wght = wght) { + # ano: [time, lat, lon] + + # Dimensions + nt <- dim(ano)[1] + ny <- dim(ano)[2] + nx <- dim(ano)[3] + + # Check if all the time steps at one grid point are NA-consistent. + # The grid point should have all NAs or no NA along time dim. + if (any(is.na(ano))) { + ano_latlon <- array(ano, dim = c(nt, ny * nx)) # [time, lat*lon] + na_ind <- which(is.na(ano_latlon), arr.ind = T) + if (dim(na_ind)[1] != nt * length(unique(na_ind[, 2]))) { + stop("Detect certain grid points have NAs but not consistent across time ", + "dimension. If the grid point is NA, it should have NA at all time step.") + } + } + + # Build the mask + mask <- ano[1, , ] + mask[!is.finite(mask)] <- NA + mask[is.finite(mask)] <- 1 + dim(mask) <- c(ny, nx) + + # Replace mask of NAs with 0s for EOF analysis. + ano[!is.finite(ano)] <- 0 + + ano <- ano * InsertDim(wght, 1, nt) + + # The use of the correlation matrix is done under the option corr. + if (corr == TRUE) { + stdv <- apply(ano, c(2, 3), sd, na.rm = T) + ano <- ano/InsertDim(stdv, 1, nt) + } + + # Time/space matrix for SVD + dim(ano) <- c(nt, ny * nx) + dim.dat <- dim(ano) + + # 'transpose' means the array needs to be transposed before + # calling La.svd for computational efficiency because the + # spatial dimension is larger than the time dimension. This + # goes with transposing the outputs of LA.svd also. + if (dim.dat[2] > dim.dat[1]) { + transpose <- TRUE + } else { + transpose <- FALSE + } + if (transpose) { + pca <- La.svd(t(ano)) + } else { + pca <- La.svd(ano) + } + + # La.svd conventions: decomposition X = U D t(V) La.svd$u + # returns U La.svd$d returns diagonal values of D La.svd$v + # returns t(V) !! The usual convention is PC=U and EOF=V. + # If La.svd is called for ano (transpose=FALSE case): EOFs: + # $v PCs: $u If La.svd is called for t(ano) (transposed=TRUE + # case): EOFs: t($u) PCs: t($v) + + if (transpose) { + pca.EOFs <- t(pca$u) + pca.PCs <- t(pca$v) + } else { + pca.EOFs <- pca$v + pca.PCs <- pca$u + } + + # The numbers of transposition is limited to neofs + PC <- pca.PCs[, 1:neofs] + EOF <- pca.EOFs[1:neofs, ] + dim(EOF) <- c(neofs, ny, nx) + + # To sort out crash when neofs=1. + if (neofs == 1) { + PC <- InsertDim(PC, 2, 1, name = 'new') + } + + # Computation of the % of variance associated with each mode + W <- pca$d[1:neofs] + tot.var <- sum(pca$d^2) + var.eof <- 100 * pca$d[1:neofs]^2/tot.var + + for (e in 1:neofs) { + # Set all masked grid points to NA in the EOFs + # Divide patterns by area weights so that EOF * PC gives unweigthed (original) data + EOF[e, , ] <- EOF[e, , ] * mask / wght + # PC is multiplied by the explained variance, + # so that the reconstruction is only EOF * PC + PC[, e] <- PC[, e] * W[e] + } + + if (neofs == 1) { + var.eof <- as.array(var.eof) + } + + return(invisible(list(EOFs = EOF, PCs = PC, var = var.eof, tot_var = tot.var, mask = mask))) +} diff --git a/R/Eno.R b/R/Eno.R index 9375b78bb1c7f7789ea0a71f24c9b7918374d964..8c8d16bfdf8aa4998df634b8f18b54942856ab3d 100644 --- a/R/Eno.R +++ b/R/Eno.R @@ -65,7 +65,7 @@ Eno <- function(data, time_dim = 'sdate', na.action = na.pass, ncores = NULL) { } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } diff --git a/R/EuroAtlanticTC.R b/R/EuroAtlanticTC.R new file mode 100644 index 0000000000000000000000000000000000000000..2860a5336695698ecd62eb43e6bef81e13d68b57 --- /dev/null +++ b/R/EuroAtlanticTC.R @@ -0,0 +1,208 @@ +#'Teleconnection indices in European Atlantic Ocean region +#' +#'Calculate the four main teleconnection indices in European Atlantic Ocean +#'region: North Atlantic oscillation (NAO), East Atlantic Pattern (EA), East +#'Atlantic/Western Russia (EAWR), and Scandinavian pattern (SCA). The function +#'\code{REOF()} is used for the calculation, and the first four modes are +#'returned. +#' +#'@param ano A numerical array of anomalies with named dimensions to calculate +#' REOF then the four teleconnections. The dimensions must have at least +#' 'time_dim' and 'space_dim', and the data should cover the European Atlantic +#' Ocean area (20N-80N, 90W-60E). +#'@param lat A vector of the latitudes of 'ano'. It should be 20N-80N. +#'@param lon A vector of the longitudes of 'ano'. It should be 90W-60E. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param ntrunc A positive integer of the modes to be kept. The default value +#' is 30. If time length or the product of latitude length and longitude +#' length is less than ntrunc, ntrunc is equal to the minimum of the three +#' values. +#'@param corr A logical value indicating whether to base on a correlation (TRUE) +#' or on a covariance matrix (FALSE). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{patterns}{ +#' An array of the first four REOF patterns normalized to 1 (unitless) with +#' dimensions (modes = 4, the rest of the dimensions of 'ano' except +#' 'time_dim'). The modes represent NAO, EA, EAWR, and SCA, of which the order +#' and sign changes depending on the dataset and period employed, so manual +#' reordering may be needed. Multiplying 'patterns' by 'indices' gives the +#' original reconstructed field. +#'} +#'\item{indices}{ +#' An array of the first four principal components with the units of the +#' original field to the power of 2, with dimensions (time_dim, modes = 4, the +#' rest of the dimensions of 'ano' except 'space_dim'). +#'} +#'\item{var}{ +#' An array of the percentage (%) of variance fraction of total variance +#' explained by each mode. The dimensions are (modes = ntrunc, the rest of the +#' dimensions of 'ano' except 'time_dim' and 'space_dim'). +#'} +#'\item{wght}{ +#' An array of the area weighting with dimensions 'space_dim'. It is calculated +#' by the square root of cosine of 'lat' and used to compute the fraction of +#' variance explained by each REOFs. +#'} +#'@examples +#'# Use synthetic data +#'set.seed(1) +#'dat <- array(rnorm(800), dim = c(dat = 2, sdate = 5, lat = 8, lon = 15)) +#'lat <- seq(10, 90, length.out = 8) +#'lon <- seq(-100, 70, length.out = 15) +#'res <- EuroAtlanticTC(dat, lat = lat, lon = lon) +#' +#'@seealso REOF NAO +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +EuroAtlanticTC <- function(ano, lat, lon, ntrunc = 30, time_dim = 'sdate', + space_dim = c('lat', 'lon'), corr = FALSE, + ncores = NULL) { + + # Check inputs + ## ano + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (any(!space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## lat and lon + if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.")) + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } + if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.")) + } + if (all(lon >= 0)) { + if (any(lon > 360 | lon < 0)) { + stop("Parameter 'lon' must be within the range [-180, 180] or [0, 360].") + } + } else { + if (any(lon < -180 | lon > 180)) { + stop("Parameter 'lon' must be within the range [-180, 180] or [0, 360].") + } + } + stop_needed <- FALSE + # A preset region for computing EuroAtlantic teleconnections + lat.min <- 20 + lat.max <- 80 + lon.min <- -90 # Write this as a negative number please! + lon.max <- 60 + + # Choose lats and lons inside the Euroatlantic region. + # Change lon to [-180, 180] if it isn't + lon <- ifelse(lon < 180, lon, lon - 360) + ind_lat <- which(lat >= lat.min & lat <= lat.max) + ind_lon <- which(lon >= lon.min & lon <= lon.max) + + # Subset + lat <- lat[ind_lat] + lon <- lon[ind_lon] + + # Lat should be [20, 80] (5deg tolerance) + if (max(lat) < (lat.max - 5) | min(lat) > (lat.min + 5)) { + stop_needed <- TRUE + } + # Lon should be [-90, 60] (5deg tolerance) + if (!(min(lon) < (lon.min + 5) & max(lon) > (lon.max - 5))) { + stop_needed <- TRUE + } + if (stop_needed) { + stop("The provided data does not cover the EuroAtlantic region (20N-80N, 90W-60E).") + } + ## ntrunc + if (!is.numeric(ntrunc) | ntrunc %% 1 != 0 | ntrunc <= 0 | length(ntrunc) > 1) { + stop("Parameter 'ntrunc' must be a positive integer.") + } + ## corr + if (!is.logical(corr) | length(corr) > 1) { + stop("Parameter 'corr' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate indices + + ano <- ClimProjDiags::Subset(ano, space_dim, list(ind_lat, ind_lon), drop = FALSE) + + # ntrunc is bounded + if (ntrunc != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), ntrunc)) { + ntrunc <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), ntrunc) + .warning(paste0("Parameter 'ntrunc' is changed to ", ntrunc, ", the minimum among ", + "the length of time_dim, the production of the length of space_dim, ", + "and ntrunc.")) + } + if (ntrunc < 4) { + .warning(paste0("Parameter 'ntrunc' is ", ntrunc, " so only the first ", ntrunc, + " modes will be calculated.")) + } + + # Area weighting is needed to compute the fraction of variance explained by + # each mode + space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) + wght <- array(cos(lat * pi/180), dim = dim(ano)[space_ind]) + + # We want the covariance matrix to be weigthed by the grid + # cell area so the anoaly field is weighted by its square + # root since the covariance matrix equals transpose(ano) + # times ano. + wght <- sqrt(wght) + + reofs <- Apply(ano, + target_dims = c(time_dim, space_dim), + output_dims = list(REOFs = c('mode', space_dim), + RPCs = c(time_dim, 'mode'), + var = 'mode'), + fun = .REOF, + corr = corr, ntrunc = ntrunc, wght = wght, + ncores = ncores) + + if (ntrunc >= 4) { + TCP <- ClimProjDiags::Subset(reofs$REOFs, 'mode', 1:4, drop = FALSE) + TCI <- ClimProjDiags::Subset(reofs$RPCs, 'mode', 1:4, drop = FALSE) + } else { + TCP <- reofs$REOFs + TCI <- reofs$RPCs + } + + return(list(patterns = TCP, indices = TCI, var = reofs$var, wght = wght)) +} + diff --git a/R/Filter.R b/R/Filter.R new file mode 100644 index 0000000000000000000000000000000000000000..c4e76bf20faba914f7b287c2b0b6d6b902df8ec0 --- /dev/null +++ b/R/Filter.R @@ -0,0 +1,121 @@ +#'Filter frequency peaks from an array +#' +#'Filter out the selected frequency from a time series. The filtering is +#'performed by dichotomy, seeking for a frequency around the parameter 'freq' +#'and the phase that maximizes the signal to subtract from the time series. +#'The maximization of the signal to subtract relies on a minimization of the +#'mean square differences between the time series ('data') and the cosine of +#'the specified frequency and phase. +#' +#'@param data A numeric vector or array of the data to be filtered. +#' If it's a vector, it should be a time series. If it's an array, +#' the dimensions must have at least 'time_dim'. +#'@param freq A number of the frequency to filter. +#'@param time_dim A character string indicating the dimension along which to +#' compute the filtering. The default value is 'ftime'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numeric vector or array of the filtered data with the dimensions +#' the same as 'data'. +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'ensmod <- MeanDims(sampleData$mod, 2) +#'spectrum <- Spectrum(ensmod) +#' +#'for (jsdate in 1:dim(spectrum)['sdate']) { +#' for (jlen in 1:dim(spectrum)['ftime']) { +#' if (spectrum[jlen, 2, 1, jsdate] > spectrum[jlen, 3, 1, jsdate]) { +#' ensmod[1, jsdate, ] <- Filter(ensmod[1, jsdate, ], spectrum[jlen, 1, 1, jsdate]) +#' } +#' } +#'} +#' \donttest{ +#'PlotAno(InsertDim(ensmod, 2, 1), sdates = startDates) +#' } +#' +#'@import multiApply +#'@importFrom stats lm +#'@export +Filter <- function(data, freq, time_dim = 'ftime', ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(dim(data))) { #is vector + dim(data) <- c(length(data)) + names(dim(data)) <- time_dim + } + if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## freq + if (is.null(freq)) { + stop("Parameter 'freq' cannot be NULL.") + } + if (!is.numeric(freq) | length(freq) != 1) { + stop("Parameter 'freq' must be a number.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Filter + output <- Apply(list(data), + target_dims = time_dim, + fun = .Filter, + freq = freq, + output_dims = time_dim, + ncores = ncores)$output1 + + return(output) +} + +.Filter <- function(data, freq) { + # data: [ftime] + + fac1 <- 1 + fac2 <- 1 + ndat <- length(data) + ndat2 <- length(which(!is.na(data))) + maxi <- 0 + endphase <- 0 + + for (jfreq in seq(freq - 0.5 / ndat2, freq + 0.5 / ndat2, 0.1 / (ndat2 * fac1))) { + for (phase in seq(0, pi, (pi / (10 * fac2)))) { + xtest <- cos(phase + c(1:ndat) * jfreq * 2 * pi) + test <- lm(data[is.na(data) == FALSE] ~ xtest[ + is.na(data) == FALSE])$fitted.values + if (sum(test ^ 2) > maxi) { + endphase <- phase + endfreq <- jfreq + } + maxi <- max(sum(test ^ 2), maxi) + } + } + xend <- cos(endphase + c(1:ndat) * endfreq * 2 * pi) + data[is.na(data) == FALSE] <- data[is.na(data) == FALSE] - lm( + data[is.na(data) == FALSE] ~ xend[is.na(data) == FALSE] + )$fitted.values + + return(invisible(data)) +} diff --git a/R/GMST.R b/R/GMST.R index c922eaec4ada4ba132f7a6793d52655f2dfbd551..0d6c49e8aa3a827d69d5dc9905fab23c4197efd7 100644 --- a/R/GMST.R +++ b/R/GMST.R @@ -2,24 +2,23 @@ #' #'The Global Mean Surface Temperature (GMST) anomalies are computed as the #'weighted-averaged surface air temperature anomalies over land and sea surface -#'temperature anomalies over the ocean. +#'temperature anomalies over the ocean. If different members and/or datasets are provided, +#'the climatology (used to calculate the anomalies) is computed individually for all of them. #' -#'@param data_tas A numerical array indicating the surface air temperature data -#' to be used for the index computation with the dimensions: 1) latitude, -#' longitude, start date, forecast month, and member (in case of decadal -#' predictions), 2) latitude, longitude, year, month and member (in case of -#' historical simulations), or 3) latitude, longitude, year and month (in case -#' of observations or reanalyses). This data has to be provided, at least, -#' over the whole region needed to compute the index. The dimensions must be -#' identical to those of data_tos. -#'@param data_tos A numerical array indicating the sea surface temperature data -#' to be used for the index computation with the dimensions: 1) latitude, -#' longitude, start date, forecast month, and member (in case of decadal -#' predictions), 2) latitude, longitude, year, month and member (in case of -#' historical simulations), or 3) latitude, longitude, year and month (in case -#' of observations or reanalyses). This data has to be provided, at least, -#' over the whole region needed to compute the index. The dimensions must be -#' identical to those of data_tas. +#'@param data_tas A numerical array with the surface air temperature data +#' to be used for the index computation with, at least, the +#' dimensions: 1) latitude, longitude, start date and forecast month +#' (in case of decadal predictions), 2) latitude, longitude, year and month +#' (in case of historical simulations or observations). This data has to be +#' provided, at least, over the whole region needed to compute the index. +#' The dimensions must be identical to thos of data_tos. +#'@param data_tos A numerical array with the sea surface temperature data +#' to be used for the index computation with, at least, the +#' dimensions: 1) latitude, longitude, start date and forecast month +#' (in case of decadal predictions), 2) latitude, longitude, year and month +#' (in case of historical simulations or observations). This data has to be +#' provided, at least, over the whole region needed to compute the index. +#' The dimensions must be identical to thos of data_tas. #'@param data_lats A numeric vector indicating the latitudes of the data. #'@param data_lons A numeric vector indicating the longitudes of the data. #'@param mask_sea_land An array with dimensions [lat_dim = data_lats, lon_dim = @@ -55,21 +54,22 @@ #' anomalies, set it to FALSE. The default value is NULL.\cr #' In case of parameter 'type' is 'dcpp', 'indices_for_clim' must be relative #' to the first forecast year, and the climatology is automatically computed -#' over the actual common period for the different forecast years. +#' over the common calendar period for the different forecast years. #'@param year_dim A character string indicating the name of the year dimension #' The default value is 'year'. Only used if parameter 'type' is 'hist' or #' 'obs'. #'@param month_dim A character string indicating the name of the month #' dimension. The default value is 'month'. Only used if parameter 'type' is #' 'hist' or 'obs'. -#'@param member_dim A character string indicating the name of the member -#' dimension. The default value is 'member'. Only used if parameter 'type' is -#' 'dcpp' or 'hist'. +#'@param na.rm A logical value indicanting whether to remove NA values. The default +#' value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. #' -#'@return A numerical array of the GMST anomalies with the dimensions of: -#' 1) sdate, forecast year, and member (in case of decadal predictions); -#' 2) year and member (in case of historical simulations); or -#' 3) year (in case of observations or reanalyses). +#'@return A numerical array with the GMST anomalies with the same dimensions as data_tas except +#' the lat_dim, lon_dim and fmonth_dim (month_dim) in case of decadal predictions +#' (historical simulations or observations). In case of decadal predictions, a new dimension +#' 'fyear' is added. #' #'@examples #' ## Observations or reanalyses @@ -111,7 +111,7 @@ GMST <- function(data_tas, data_tos, data_lats, data_lons, mask_sea_land, sea_value, type, mask = NULL, lat_dim = 'lat', lon_dim = 'lon', monini = 11, fmonth_dim = 'fmonth', sdate_dim = 'sdate', indices_for_clim = NULL, - year_dim = 'year', month_dim = 'month', member_dim = 'member') { + year_dim = 'year', month_dim = 'month', na.rm = TRUE, ncores = NULL) { ## Input Checks # data_tas and data_tos @@ -234,13 +234,15 @@ GMST <- function(data_tas, data_tos, data_lats, data_lons, mask_sea_land, sea_va stop("Parameter 'month_dim' is not found in 'data_tas' or 'data_tos' dimension.") } } - # member_dim - if (type == 'hist' | type == 'dcpp') { - if (!(is.character(member_dim) & length(member_dim) == 1)) { - stop("Parameter 'member_dim' must be a character string.") - } - if (!member_dim %in% names(dim(data_tas)) | !member_dim %in% names(dim(data_tos))) { - stop("Parameter 'member_dim' is not found in 'data_tas' or 'data_tos' dimension.") + # na.rm + if (!na.rm %in% c(TRUE,FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") + } + # ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") } } @@ -254,7 +256,7 @@ GMST <- function(data_tas, data_tos, data_lats, data_lons, mask_sea_land, sea_va data <- multiApply::Apply(data = list(data_tas, data_tos), target_dims = c(lat_dim, lon_dim), fun = mask_tas_tos, mask_sea_land = mask_sea_land, - sea_value = sea_value)$output1 + sea_value = sea_value, ncores = ncores)$output1 data <- drop(data) rm(data_tas, data_tos) @@ -266,7 +268,7 @@ GMST <- function(data_tas, data_tos, data_lats, data_lons, mask_sea_land, sea_va return(data) } data <- multiApply::Apply(data = data, target_dims = c(lat_dim, lon_dim), - fun = fun_mask, mask = mask)$output1 + fun = fun_mask, mask = mask, ncores = ncores)$output1 } data <- ClimProjDiags::WeightedMean(data = data, lon = data_lons, lat = data_lats, @@ -274,9 +276,16 @@ GMST <- function(data_tas, data_tos, data_lats, data_lons, mask_sea_land, sea_va londim = which(names(dim(data)) == lon_dim), latdim = which(names(dim(data)) == lat_dim)) - INDEX <- .Indices(data = data, type = type, monini = monini, - indices_for_clim = indices_for_clim, fmonth_dim = fmonth_dim, - sdate_dim = sdate_dim, year_dim = year_dim, - month_dim = month_dim, member_dim = member_dim) + if (type == 'dcpp'){ + target_dims <- c(sdate_dim, fmonth_dim) + } else if (type %in% c('hist','obs')){ + target_dims <- c(year_dim, month_dim) + } + + INDEX <- multiApply::Apply(data = data, target_dims = target_dims, fun = .Indices, + type = type, monini = monini, indices_for_clim = indices_for_clim, + fmonth_dim = fmonth_dim, sdate_dim = sdate_dim, + year_dim = year_dim, month_dim = month_dim, + na.rm = na.rm, ncores = ncores)$output1 return(INDEX) } diff --git a/R/GSAT.R b/R/GSAT.R index d76484320a9dd952f70115c94d7a24d28c0dd2e5..1774bd684533081137b75961907dac3750ad9a21 100644 --- a/R/GSAT.R +++ b/R/GSAT.R @@ -1,13 +1,14 @@ #'Compute the Global Surface Air Temperature (GSAT) anomalies #' #'The Global Surface Air Temperature (GSAT) anomalies are computed as the -#'weighted-averaged surface air temperature anomalies over the global region. +#'weighted-averaged surface air temperature anomalies over the global region. +#'If different members and/or datasets are provided, the climatology (used to +#'calculate the anomalies) is computed individually for all of them. #' -#'@param data A numerical array to be used for the index computation with the -#' dimensions: 1) latitude, longitude, start date, forecast month, and member -#' (in case of decadal predictions), 2) latitude, longitude, year, month and -#' member (in case of historical simulations), or 3) latitude, longitude, year -#' and month (in case of observations or reanalyses). This data has to be +#'@param data A numerical array to be used for the index computation with, at least, the +#' dimensions: 1) latitude, longitude, start date and forecast month +#' (in case of decadal predictions), 2) latitude, longitude, year and month +#' (in case of historical simulations or observations). This data has to be #' provided, at least, over the whole region needed to compute the index. #'@param data_lats A numeric vector indicating the latitudes of the data. #'@param data_lons A numeric vector indicating the longitudes of the data. @@ -40,21 +41,22 @@ #' anomalies, set it to FALSE. The default value is NULL.\cr #' In case of parameter 'type' is 'dcpp', 'indices_for_clim' must be relative #' to the first forecast year, and the climatology is automatically computed -#' over the actual common period for the different forecast years. +#' over the common calendar period for the different forecast years. #'@param year_dim A character string indicating the name of the year dimension #' The default value is 'year'. Only used if parameter 'type' is 'hist' or #' 'obs'. #'@param month_dim A character string indicating the name of the month #' dimension. The default value is 'month'. Only used if parameter 'type' is #' 'hist' or 'obs'. -#'@param member_dim A character string indicating the name of the member -#' dimension. The default value is 'member'. Only used if parameter 'type' is -#' 'dcpp' or 'hist'. +#'@param na.rm A logical value indicanting whether to remove NA values. The default +#' value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. #' -#'@return A numerical array of the GSAT anomalies with the dimensions of: -#' 1) sdate, forecast year, and member (in case of decadal predictions); -#' 2) year and member (in case of historical simulations); or -#' 3) year (in case of observations or reanalyses). +#'@return A numerical array with the GSAT anomalies with the same dimensions as data except +#' the lat_dim, lon_dim and fmonth_dim (month_dim) in case of decadal predictions +#' (historical simulations or observations). In case of decadal predictions, a new dimension +#' 'fyear' is added. #' #'@examples #' ## Observations or reanalyses @@ -81,7 +83,7 @@ GSAT <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lon', mask = NULL, monini = 11, fmonth_dim = 'fmonth', sdate_dim = 'sdate', indices_for_clim = NULL, year_dim = 'year', month_dim = 'month', - member_dim = 'member') { + na.rm = TRUE, ncores = NULL) { ## Input Checks # data @@ -125,6 +127,13 @@ GSAT <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l stop(paste0("The longitude dimension of parameter 'data' must be the same", " length of parameter 'data_lons'.")) } + # ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } # mask if (!is.null(mask)) { if (is.array(mask) & identical(names(dim(mask)), c(lat_dim,lon_dim)) & @@ -136,7 +145,7 @@ GSAT <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l return(data) } data <- multiApply::Apply(data = data, target_dims = c(lat_dim, lon_dim), - fun = fun_mask, mask = mask)$output1 + fun = fun_mask, mask = mask, ncores = ncores)$output1 } else { stop(paste0("Parameter 'mask' must be NULL (no mask) or a numerical array ", "with c(lat_dim, lon_dim) dimensions and 0 in those grid ", @@ -195,14 +204,9 @@ GSAT <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l stop("Parameter 'month_dim' is not found in 'data' dimension.") } } - # member_dim - if (type == 'hist' | type == 'dcpp') { - if (!(is.character(member_dim) & length(member_dim) == 1)) { - stop("Parameter 'member_dim' must be a character string.") - } - if (!member_dim %in% names(dim(data))) { - stop("Parameter 'member_dim' is not found in 'data' dimension.") - } + # na.rm + if (!na.rm %in% c(TRUE,FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") } data <- ClimProjDiags::WeightedMean(data = data, lon = data_lons, lat = data_lats, @@ -210,9 +214,16 @@ GSAT <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l londim = which(names(dim(data)) == lon_dim), latdim = which(names(dim(data)) == lat_dim)) - INDEX <- .Indices(data = data, type = type, monini = monini, - indices_for_clim = indices_for_clim, fmonth_dim = fmonth_dim, - sdate_dim = sdate_dim, year_dim = year_dim, - month_dim = month_dim, member_dim = member_dim) + if (type == 'dcpp'){ + target_dims <- c(sdate_dim, fmonth_dim) + } else if (type %in% c('hist','obs')){ + target_dims <- c(year_dim, month_dim) + } + + INDEX <- multiApply::Apply(data = data, target_dims = target_dims, fun = .Indices, + type = type, monini = monini, indices_for_clim = indices_for_clim, + fmonth_dim = fmonth_dim, sdate_dim = sdate_dim, + year_dim = year_dim, month_dim = month_dim, + na.rm = na.rm, ncores = ncores)$output1 return(INDEX) } diff --git a/R/Histo2Hindcast.R b/R/Histo2Hindcast.R new file mode 100644 index 0000000000000000000000000000000000000000..860f56b96bae59a9b06e0eff937a24d5df79668d --- /dev/null +++ b/R/Histo2Hindcast.R @@ -0,0 +1,161 @@ +#'Chunk long simulations for comparison with hindcasts +#' +#'Reorganize a long run (historical typically) with only one start date into +#'chunks corresponding to a set of start dates. The time frequency of the data +#'should be monthly. +#' +#'@param data A numeric array of model or observational data with dimensions +#' at least sdate_dim and ftime_dim. +#'@param sdatesin A character string of the start date of 'data'. The format +#' should be 'YYYYMMDD' or 'YYYYMM'. +#'@param sdatesout A vector of character string indicating the expected start +#' dates of the output. The format should be 'YYYYMMDD' or 'YYYYMM'. +#'@param nleadtimesout A positive integer indicating the length of leadtimes of +#' the output. +#'@param sdate_dim A character string indicating the name of the sdate date +#' dimension of 'data'. The default value is 'sdate'. +#'@param ftime_dim A character string indicating the name of the lead time +#' dimension of 'data'. The default value is 'ftime'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numeric array with the same dimensions as data, except the length +#' of sdate_dim is 'sdatesout' and the length of ftime_dim is nleadtimesout. +#' +#'@examples +#' \dontshow{ +#'startDates <- c('19901101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 60, +#' output = 'areave', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#' } +#' +#'sdates_out <- c('19901101', '19911101', '19921101', '19931101', '19941101') +#'leadtimes_per_startdate <- 12 +#'exp_data <- Histo2Hindcast(sampleData$mod, startDates, +#' sdates_out, leadtimes_per_startdate) +#'obs_data <- Histo2Hindcast(sampleData$obs, startDates, +#' sdates_out, leadtimes_per_startdate) +#' \dontrun{ +#'exp_data <- Reorder(exp_data, c(3, 4, 1, 2)) +#'obs_data <- Reorder(obs_data, c(3, 4, 1, 2)) +#'PlotAno(exp_data, obs_data, sdates_out, +#' toptitle = paste('Anomalies reorganized into shorter chunks'), +#' ytitle = 'K', fileout = NULL) +#' } +#' +#'@import multiApply +#'@export +Histo2Hindcast <- function(data, sdatesin, sdatesout, nleadtimesout, + sdate_dim = 'sdate', ftime_dim = 'ftime', + ncores = NULL) { + + ## Input Checks + # data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + # sdatesin + if (is.null(sdatesin)) { + stop("Parameter 'sdatesin' cannot be NULL.") + } + if (!is.character(sdatesin) | length(sdatesin) > 1) { + stop(paste0("Parameter 'sdatesin' must be a character string in the format", + " 'YYYYMMDD' or 'YYYYMM'.")) + } else if (!nchar(sdatesin) %in% c(6, 8) | is.na(as.numeric(sdatesin))) { + stop(paste0("Parameter 'sdatesin' must be a character string in the format", + " 'YYYYMMDD' or 'YYYYMM'.")) + } + # sdatesout + if (is.null(sdatesout)) { + stop("Parameter 'sdatesout' cannot be NULL.") + } + if (!is.character(sdatesout) | !is.vector(sdatesout)) { + stop(paste0("Parameter 'sdatesout' must be a vector of character in the ", + "format 'YYYYMMDD' or 'YYYYMM'.")) + } else if (!all(nchar(sdatesout) %in% c(6, 8)) | any(is.na(as.numeric(sdatesin)))) { + stop(paste0("Parameter 'sdatesout' must be a vector of character in the ", + "format 'YYYYMMDD' or 'YYYYMM'.")) + } + # nleadtimesout + if (is.null(nleadtimesout)) { + stop("Parameter 'nleadtimesout' cannot be NULL.") + } + if (!is.numeric(nleadtimesout) | nleadtimesout %% 1 != 0 | + nleadtimesout < 0 | length(nleadtimesout) > 1) { + stop("Parameter 'nleadtimesout' must be a positive integer.") + } + # sdate_dim + if (!is.character(sdate_dim) | length(sdate_dim) > 1) { + stop("Parameter 'sdate_dim' must be a character string.") + } + if (!sdate_dim %in% names(dim(data))) { + stop("Parameter 'sdate_dim' is not found in 'data' dimension.") + } + if (dim(data)[sdate_dim] > 1) { + stop("The dimension length of sdate_dim of 'data' must be 1.") + } + # ftime_dim + if (!is.character(ftime_dim) | length(ftime_dim) > 1) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!ftime_dim %in% names(dim(data))) { + stop("Parameter 'ftime_dim' is not found in 'data' dimension.") + } + # ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + yrin <- as.numeric(substr(sdatesin, 1, 4)) + yrout <- as.numeric(substr(sdatesout, 1, 4)) + mthin <- as.numeric(substr(sdatesin, 5, 6)) + if (mthin > 12) { + stop(paste0("Parameter 'sdatesin' must be in the format 'YYYYMMDD' or ", + "'YYYYMM'. Found the month is over 12.")) + } + mthout <- as.numeric(substr(sdatesout, 5, 6)) + if (any(mthout > 12)) { + stop(paste0("Parameter 'sdatesout' must be a vector of character in the ", + "format 'YYYYMMDD' or 'YYYYMM'. Found certain month is over 12.")) + } + + res <- Apply(data, + target_dims = c(sdate_dim, ftime_dim), + output_dims = c(sdate_dim, ftime_dim), + fun = .Histo2Hindcast, + yrin = yrin, yrout = yrout, + mthin = mthin, mthout = mthout, + nleadtimesout = nleadtimesout, + ncores = ncores)$output1 + + return(res) + +} + +.Histo2Hindcast <- function(data, yrin = yrin, yrout = yrout, mthin = mthin, mthout = mthout, nleadtimesout) { + # data: [sdate = 1, ftime] + + res <- array(dim = c(sdate = length(yrout), ftime = nleadtimesout)) + + diff_mth <- (yrout - yrin) * 12 + (mthout - mthin) + for (i in 1:length(diff_mth)) { + if (diff_mth[i] < dim(data)[2]) { + ftime_ind <- max(1 + diff_mth[i], 1):min(nleadtimesout + diff_mth[i], dim(data)[2]) + res[i, 1:length(ftime_ind)] <- data[1, ftime_ind] + } + } + + return(res) +} diff --git a/R/InsertDim.R b/R/InsertDim.R index 195b8066ceeb79a29bc14a338eef5806cb314149..36ce2f87e4512e99577dd92aadfdef1a1fb97b60 100644 --- a/R/InsertDim.R +++ b/R/InsertDim.R @@ -9,7 +9,7 @@ #'@param name A character string indicating the name for the new dimension. #' The default value is NULL. #'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. +#' computation. The default value is NULL. This parameter is deprecated now. #' #'@return An array as parameter 'data' but with the added named dimension. #' @@ -62,54 +62,26 @@ InsertDim <- function(data, posdim, lendim, name = NULL, ncores = NULL) { } } ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores)) { - stop("Parameter 'ncores' must be a positive integer.") - } else if (ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { - stop("Parameter 'ncores' must be a positive integer.") - } - } + if (!missing("ncores")) + warning("Argument 'ncores' is deprecated.") ############################### # Calculate InsertDim + names(lendim) <- name - ## create output dimension - if (posdim == 1) { # first dim - outdim <- c(lendim, dim(data)) - } else { - if (posdim > length(dim(data))) { # last dim - outdim <- c(dim(data), lendim) - } else { # middle dim - outdim <- c(dim(data)[1:(posdim - 1)], lendim, dim(data)[posdim:length(dim(data))]) - } - } - - ## create output array - outvar <- array(dim = c(outdim)) - ## give temporary names for Apply(). The name will be replaced by data in the end - names(dim(outvar)) <- paste0('D', 1:length(outdim)) - names(dim(outvar))[posdim] <- name + ## Put the new dim at the end first + data <- array(data, dim = c(dim(data), lendim)) - res <- Apply(list(outvar), - margins = name, - fun = .InsertDim, - val = data, - ncores = ncores)$output1 - - if (posdim != 1) { - if (posdim < length(outdim)) { - res <- Reorder(res, c(1:(posdim - 1), length(outdim), posdim:(length(outdim) - 1))) - } else { #posdim = length(outdim) - res <- Reorder(res, c(1:(posdim - 1), length(outdim))) - } - } else { - res <- Reorder(res, c(length(outdim), 1:(length(outdim) - 1))) + ## Reorder dimension + if (posdim == 1) { + order <- c(length(dim(data)), 1:(length(dim(data)) - 1)) + data <- Reorder(data, order) + } else if (posdim == length(dim(data))) { # last dim + + } else { # middle dim + order <- c(1:(posdim - 1), length(dim(data)), posdim:(length(dim(data)) - 1)) + data <- Reorder(data, order) } - return(res) -} - -.InsertDim <- function(x, val) { - x <- val - return(x) + return(data) } diff --git a/R/Load.R b/R/Load.R index 0392c747980022a45f36c9c2585176b8eae34a01..955c8942acabdb74a28c38568afcb644a6d0ac1f 100644 --- a/R/Load.R +++ b/R/Load.R @@ -1376,7 +1376,6 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, dims2define <- TRUE is_file_per_member_exp <- rep(nmod, FALSE) exp_work_pieces <- list() - first_time_step_list <- NULL jmod <- 1 while (jmod <= nmod) { first_dataset_file_found <- FALSE @@ -1422,21 +1421,6 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, if (is_file_per_member_exp[jmod]) { replace_values[["MEMBER_NUMBER"]] <- '*' } - if (jsdate == 1) { - work_piecetime <- list(dataset_type = dataset_type, - filename = .ConfigReplaceVariablesInString(quasi_final_path, - replace_values), - namevar = namevar, grid = grid, remap = remap, - remapcells = remapcells, - is_file_per_member = is_file_per_member_exp[jmod], - is_file_per_dataset = FALSE, - lon_limits = c(lonmin, lonmax), - lat_limits = c(latmin, latmax), dimnames = exp[[jmod]][['dimnames']], - single_dataset = single_dataset) - looking_time <- .LoadDataFile(work_piecetime, explore_dims = TRUE, - silent = silent) - first_time_step_list <- c(first_time_step_list, list(looking_time$time_dim)) - } # If the dimensions of the output matrices are still to define, we try to read # the metadata of the data file that corresponds to the current iteration if (dims2define) { @@ -1541,7 +1525,6 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, jsdate <- jsdate + 1 } replace_values[extra_vars] <- NULL - #first_dataset_file_found <- FALSE jmod <- jmod + 1 } if (dims2define && length(exp) > 0) { @@ -1582,52 +1565,6 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, } leadtimes <- seq(leadtimemin, leadtimemax, sampleperiod) } - # If there are differences in the first time stamp in exp files: - if (!is.null(exp)) { - in_date <- lapply(first_time_step_list, function(x) { - origin <- as.POSIXct( - paste(strsplit(x$time_units, " ")[[1]][c(3,4)], - collapse = " "), tz = 'UTC') - units <- strsplit(x$time_units, " ")[[1]][1] - if (units == 'hours') { - exp_first_time_step <- as.POSIXct( - x$first_time_step_in_file * - 3600, origin = origin, tz = 'UTC') - } else if (units == 'days') { - exp_first_time_step <- as.POSIXct( - x$first_time_step_in_file * - 86400, origin = origin, tz = 'UTC') - } - day <- as.numeric(format(exp_first_time_step, "%d")) - return(day) - }) - exp_first_time_step <- min(unlist(in_date)) - if (max(unlist(in_date)) > 1) { - leadtimes <- seq(exp_first_time_step, leadtimemax + max(unlist(in_date)) - 1, - sampleperiod) - } - if (leadtimemin > 1 & length(in_date) > 1) { - lags <- lapply(in_date, function(x) {x - in_date[[1]]}) - new_leadtimemin <- lapply(lags, function(x) {leadtimemin - x}) - new_leadtimemax <- lapply(lags, function(x) {leadtimemax - x}) - jmod <- 2 - npieces <- length(exp_work_pieces)/nmod - while (jmod <= nmod) { - jpiece <- 1 - while (jpiece <= npieces) { - exp_work_pieces[[npieces * (jmod - 1) + jpiece]]$leadtimes <- - seq(new_leadtimemin[[jmod]], new_leadtimemax[[jmod]], sampleperiod) - jpiece <- jpiece + 1 - } - jmod <- jmod + 1 - } - } - lag <- 1 - in_date[[1]] - leadtimes <- seq(leadtimemin - lag, leadtimemax #+ max(unlist(in_date)) + lag, - - lag, - sampleperiod) - exp_first_time_step <- leadtimemin - lag - } # Now we start iterating over observations. We try to find the output matrix # dimensions and we build anyway the work pieces corresponding to the observational # data that time-corresponds the experimental data or the time-steps until the @@ -1691,7 +1628,6 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, found_data <- .LoadDataFile(work_piece, explore_dims = TRUE, silent = silent) found_dims <- found_data$dims var_long_name <- found_data$var_long_name - first_time_step_list <- c(first_time_step_list, list(found_data$time_dim)) units <- found_data$units if (!is.null(found_dims)) { is_2d_var <- found_data$is_2d_var @@ -1789,18 +1725,8 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, ## This condition must be fulfilled to put all the month time steps ## in the dimension of length nleadtimes. Otherwise it must be cut: #(length(leadtimes) - 1)*sampleperiod + 1 - (jleadtime - 1)*sampleperiod >= days_in_month - day + 1 - - ## The first time step in exp could be different from sdate: - if (jleadtime == 1 & !is.null(exp)) { - if (is.null(first_time_step_list[[1]])) { - stop("Check 'time' variable in the experimental files ", - "since not units or first time step have been found.") - } else { - day <- leadtimes[1] - } - } - obs_file_indices <- seq(day, min(days_in_month, - (length(leadtimes) - jleadtime) * sampleperiod + day), sampleperiod) + obs_file_indices <- seq(day, min(days_in_month, (length(leadtimes) - jleadtime) * sampleperiod + day), sampleperiod) + } else { obs_file_indices <- 1 } @@ -1896,8 +1822,7 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, } if (storefreq == 'daily') { - startdate <- startdate + 86400 * sampleperiod * - max(obs_file_indices) + startdate <- startdate + 86400 * sampleperiod * length(obs_file_indices) year <- as.integer(substr(startdate, 1, 4)) month <- as.integer(substr(startdate, 6, 7)) day <- as.integer(substr(startdate, 9, 10)) @@ -2300,24 +2225,7 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, # Start is a list with as many components as start dates. # Each component is a vector of the initial POSIXct date of each # forecast time step - if (!is.null(exp)) { - if (storefreq == 'daily' & leadtimes[[1]] > 1) { - origin <- leadtimes[[1]] - 1 - leadtimemin <- 1 - } else { - origin <- 0 - } - dates[["start"]] <- do.call(c, lapply(sdates, - function(x) { - do.call(c, lapply((origin:(origin + number_ftime - 1)) * sampleperiod, - function(y) { - addTime(as.POSIXct(x, format = "%Y%m%d", tz = "UTC"), - store_period, y + leadtimemin - 1) - })) - })) - } else { - origin <- 0 - dates[["start"]] <- do.call(c, lapply(sdates, + dates[["start"]] <- do.call(c, lapply(sdates, function(x) { do.call(c, lapply((0:(number_ftime - 1)) * sampleperiod, function(y) { @@ -2325,7 +2233,6 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, store_period, y + leadtimemin - 1) })) })) - } attr(dates[["start"]], "tzone") <- "UTC" # end is similar to start, but contains the end dates of each forecast # time step diff --git a/R/MeanDims.R b/R/MeanDims.R index 2da3144c55259551d7f7cb30fef36f26776d0e17..7d3cb445487f60aefb8020f2cb598c28d2777fd2 100644 --- a/R/MeanDims.R +++ b/R/MeanDims.R @@ -3,24 +3,20 @@ #'This function returns the mean of an array along a set of dimensions and #'preserves the dimension names if it has. #' -#'@details It is recommended to use \code{'apply(x, dim, mean)'} to improve the -#' efficiency when the dimension to be averaged is only one. -#' #'@param data An array to be averaged. #'@param dims A vector of numeric or charactor string, indicating along which #' dimensions to average. #'@param na.rm A logical value indicating whether to ignore NA values (TRUE) or -#' not (FALSE). The default value is FALSE. -#' +#' not (FALSE). #'@return An array with the same dimension as parameter 'data' except the 'dims' #' dimensions. #' removed. #' #'@examples -#'a <- array(rnorm(24), dim = c(a = 2, b= 3, c = 4)) -#'print(dim(MeanDims(a, 2))) -#'print(dim(MeanDims(a, c(2, 3)))) -#'print(dim(MeanDims(a, c('a', 'b')))) +#'a <- array(rnorm(24), dim = c(2, 3, 4)) +#'MeanDims(a, 2) +#'MeanDims(a, c(2, 3)) +#'@import multiApply #'@export MeanDims <- function(data, dims, na.rm = FALSE) { @@ -59,32 +55,17 @@ MeanDims <- function(data, dims, na.rm = FALSE) { stop("Parameter 'na.rm' must be one logical value.") } - - ############################### # Calculate MeanDims - - ## Change character dims into indices - if (is.character(dims)) { - tmp <- rep(0, length(dims)) - for (i in 1:length(dims)) { - tmp[i] <- which(names(dim(data)) == dims[i]) - } - dims <- tmp - } - - if (length(dim(data)) == 1) { - res <- mean(data, na.rm = na.rm) + if (length(dims) == length(dim(data))) { + data <- mean(data, na.rm = na.rm) } else { - - margins <- setdiff(c(1:length(dim(data))), dims) - res <- as.array(apply(data, margins, mean, na.rm = na.rm)) - if (!is.null(names(dim(data))[margins]) & is.null(names(dim(res)))) { - names(dim(res)) <- names(dim(data))[margins] + if (is.character(dims)) { + dims <- which(names(dim(data)) %in% dims) } + pos <- (1:length(dim(data)))[-dims] + data <- apply(data, pos, mean, na.rm = na.rm) } - - return(res) - + return(data) } diff --git a/R/NAO.R b/R/NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..af4893ad0c5b904d0f62893f986282b4883d401d --- /dev/null +++ b/R/NAO.R @@ -0,0 +1,424 @@ +#'Compute the North Atlantic Oscillation (NAO) Index +#' +#'Compute the North Atlantic Oscillation (NAO) index based on the leading EOF +#'of the sea level pressure (SLP) anomalies over the north Atlantic region +#'(20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and +#'observed anomalies onto the observed EOF pattern or the forecast +#'anomalies onto the EOF pattern of the other years of the forecast. +#'By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month +#'lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns +#'cross-validated PCs of the NAO index for forecast (exp) and observations +#'(obs) based on the leading EOF pattern. +#' +#'@param exp A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of observational data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param obs A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' observed anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of experimental data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param lat A vector of the latitudes of 'exp' and 'obs'. +#'@param lon A vector of the longitudes of 'exp' and 'obs'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'exp' and 'obs'. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member +#' dimension of 'exp' (and 'obs', optional). If 'obs' has memb_dim, the length +#' must be 1. The default value is 'member'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension of 'exp' and 'obs'. The default value is 'ftime'. +#'@param ftime_avg A numeric vector of the forecast time steps to average +#' across the target period. The default value is 2:4, i.e., from 2nd to 4th +#' forecast time steps. +#'@param obsproj A logical value indicating whether to compute the NAO index by +#' projecting the forecast anomalies onto the leading EOF of observational +#' reference (TRUE) or compute the NAO by first computing the leading +#' EOF of the forecast anomalies (in cross-validation mode, i.e. leaving the +#' year you are evaluating out), and then projecting forecast anomalies onto +#' this EOF (FALSE). The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list which contains: +#'\item{exp}{ +#' A numeric array of forecast NAO index in verification format with the same +#' dimensions as 'exp' except space_dim and ftime_dim. +#' } +#'\item{obs}{ +#' A numeric array of observed NAO index in verification format with the same +#' dimensions as 'obs' except space_dim and ftime_dim. +#'} +#' +#'@references +#'Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of +#' multi-model seasonal forecasts of the wintertime North Atlantic +#' Oscillation. Climate Dynamics, 21, 501-514. +#' DOI: 10.1007/s00382-003-0350-4 +#' +#'@examples +#'# Make up synthetic data +#'set.seed(1) +#'exp <- array(rnorm(1620), dim = c(member = 2, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'set.seed(2) +#'obs <- array(rnorm(1620), dim = c(member = 1, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'lat <- seq(20, 80, length.out = 6) +#'lon <- seq(-80, 40, length.out = 9) +#'nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) +#' +#'# plot the NAO index +#' \dontrun{ +#'nao$exp <- Reorder(nao$exp, c(2, 1)) +#'nao$obs <- Reorder(nao$obs, c(2, 1)) +#'PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", +#' monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") +#' } +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', + memb_dim = 'member', space_dim = c('lat', 'lon'), + ftime_dim = 'ftime', ftime_avg = 2:4, + obsproj = TRUE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(obs) & is.null(exp)) { + stop("Parameter 'exp' and 'obs' cannot both be NULL.") + } + if (!is.null(exp)) { + if (!is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (is.null(dim(exp))) { + stop(paste0("Parameter 'exp' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.")) + } + if(any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0)) { + stop("Parameter 'exp' must have dimension names.") + } + } + if (!is.null(obs)) { + if (!is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (is.null(dim(obs))) { + stop(paste0("Parameter 'obs' must have at least dimensions ", + "time_dim, space_dim, and ftime_dim.")) + } + if(any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'obs' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!time_dim %in% names(dim(exp))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + if (!is.null(obs)) { + if (memb_dim %in% names(dim(obs))) { + if (dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") + } else { + add_member_back <- TRUE + obs <- ClimProjDiags::Subset(obs, memb_dim, 1, drop = 'selected') + } + } else { + add_member_back <- FALSE + } + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!is.null(exp)) { + if (any(!space_dim %in% names(dim(exp)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (any(!space_dim %in% names(dim(obs)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## ftime_dim + if (!is.character(ftime_dim) | length(ftime_dim) > 1) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!ftime_dim %in% names(dim(exp))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!ftime_dim %in% names(dim(obs))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## exp and obs (2) + if (!is.null(exp) & !is.null(obs)) { + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + throw_error <- FALSE + if (length(name_exp) != length(name_obs)) { + throw_error <- TRUE + } else if (any(name_exp != name_obs)) { + throw_error <- TRUE + } else if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + throw_error <- TRUE + } + if (throw_error) { + stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'memb_dim'.")) + } + } + ## ftime_avg + if (!is.vector(ftime_avg) | !is.integer(ftime_avg)) { + stop("Parameter 'ftime_avg' must be an integer vector.") + } + if (!is.null(exp)) { + if (max(ftime_avg) > dim(exp)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } else { + if (max(ftime_avg) > dim(obs)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + ## sdate >= 2 + if (!is.null(exp)) { + if (dim(exp)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } else { + if (dim(obs)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } + ## lat and lon + if (!is.null(exp)) { + if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.")) + } + if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.")) + } + } else { + if (!is.numeric(lat) | length(lat) != dim(obs)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.")) + } + if (!is.numeric(lon) | length(lon) != dim(obs)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.")) + } + } + stop_needed <- FALSE + if (max(lat) > 80 | min(lat) < 20) { + stop_needed <- TRUE + } + #NOTE: different from s2dverification + # lon is not used in the calculation actually. EOF only uses lat to do the + # weight. So we just need to ensure the data is in this region, regardless + # the order. + if (any(lon < 0)) { #[-180, 180] + if (!(min(lon) > -90 & min(lon) < -70 & max(lon) < 50 & max(lon) > 30)) { + stop_needed <- TRUE + } + } else { #[0, 360] + if (any(lon >= 50 & lon <= 270)) { + stop_needed <- TRUE + } else { + lon_E <- lon[which(lon < 50)] + lon_W <- lon[-which(lon < 50)] + if (max(lon_E) < 30 | min(lon_W) > 290) { + stop_needed <- TRUE + } + } + } + if (stop_needed) { + stop(paste0("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.")) + } + ## obsproj + if (!is.logical(obsproj) | length(obsproj) > 1) { + stop("Parameter 'obsproj' must be either TRUE or FALSE.") + } + if (obsproj) { + if (is.null(obs)) { + stop("Parameter 'obsproj' set to TRUE but no 'obs' provided.") + } + if (is.null(exp)) { + .warning("parameter 'obsproj' set to TRUE but no 'exp' provided.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # Average ftime + if (!is.null(exp)) { + exp_sub <- ClimProjDiags::Subset(exp, ftime_dim, ftime_avg, drop = FALSE) + exp <- MeanDims(exp_sub, ftime_dim, na.rm = TRUE) + ## Cross-validated PCs. Fabian. This should be extended to + ## nmod and nlt by simple loops. Virginie + } + if (!is.null(obs)) { + obs_sub <- ClimProjDiags::Subset(obs, ftime_dim, ftime_avg, drop = FALSE) + obs <- MeanDims(obs_sub, ftime_dim, na.rm = TRUE) + } + + # wght + wght <- array(sqrt(cos(lat * pi/180)), dim = c(length(lat), length(lon))) + + if (!is.null(exp) & !is.null(obs)) { + res <- Apply(list(exp, obs), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } else if (!is.null(exp)) { + res <- Apply(list(exp = exp), + target_dims = list(exp = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, obs = NULL, + obsproj = obsproj, add_member_back = FALSE, + ncores = ncores) + } else if (!is.null(obs)) { + if (add_member_back) { + output_dims <- list(obs = c(time_dim, memb_dim)) + } else { + output_dims <- list(obs = time_dim) + } + res <- Apply(list(obs = obs), + target_dims = list(obs = c(time_dim, space_dim)), + output_dims = output_dims, + fun = .NAO, + lat = lat, wght = wght, exp = NULL, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + return(res) +} + +.NAO <- function(exp = NULL, obs = NULL, lat, wght, obsproj = TRUE, add_member_back = FALSE) { + # exp: [memb_exp, sdate, lat, lon] + # obs: [sdate, lat, lon] + # wght: [lat, lon] + + if (!is.null(exp)) { + ntime <- dim(exp)[2] + nlat <- dim(exp)[3] + nlon <- dim(exp)[4] + nmemb_exp <- dim(exp)[1] + } else { + ntime <- dim(obs)[1] + nlat <- dim(obs)[2] + nlon <- dim(obs)[3] + } + + if (!is.null(obs)) NAOO.ver <- array(NA, dim = ntime) + if (!is.null(exp)) NAOF.ver <- array(NA, dim = c(ntime, nmemb_exp)) + + for (tt in 1:ntime) { #sdate + + if (!is.null(obs)) { + ## Calculate observation EOF. Excluding one forecast start year. + obs_sub <- obs[c(1:ntime)[-tt], , , drop = FALSE] + obs_EOF <- .EOF(obs_sub, neofs = 1, wght = wght) # $EOFs: [mode, lat, lon] + + ## Correct polarity of pattern. + # dim(obs_EOF$EOFs): [mode, lat, lon] + if (0 < mean(obs_EOF$EOFs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + obs_EOF$EOFs <- obs_EOF$EOFs * (-1) +# obs_EOF$PCs <- obs_EOF$PCs * (-1) # not used + } + ## Project observed anomalies. + PF <- .ProjectField(obs, eof_mode = obs_EOF$EOFs[1, , ], wght = wght) # [sdate] + ## Keep PCs of excluded forecast start year. Fabian. + NAOO.ver[tt] <- PF[tt] + } + + if (!is.null(exp)) { + if (!obsproj) { + exp_sub <- exp[, c(1:ntime)[-tt], , , drop = FALSE] + # Combine 'memb' and 'sdate' to calculate EOF + dim(exp_sub) <- c(nmemb_exp * (ntime - 1), nlat, nlon) + exp_EOF <- .EOF(exp_sub, neofs = 1, wght = wght) # $EOFs: [mode, lat, lon] + + ## Correct polarity of pattern. + ##NOTE: different from s2dverification, which doesn't use mean(). +# if (0 < exp_EOF$EOFs[1, which.min(abs(lat - 65)), ]) { + if (0 < mean(exp_EOF$EOFs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + exp_EOF$EOFs <- exp_EOF$EOFs * (-1) +# exp_EOF$PCs <- exp_EOF$PCs * sign # not used + } + + ### Lines below could be simplified further by computing + ### ProjectField() only on the year of interest... (though this is + ### not vital). Lauriane + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = exp_EOF$EOFs[1, , ], wght = wght) # [sdate, memb] + NAOF.ver[tt, imemb] <- PF[tt] + } + } else { + ## Project forecast anomalies on obs EOF + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = obs_EOF$EOFs[1, , ], wght = wght) # [sdate] + NAOF.ver[tt, imemb] <- PF[tt] + } + } + } + + } # for loop sdate + + # add_member_back + if (add_member_back) { + suppressWarnings( + NAOO.ver <- InsertDim(NAOO.ver, 2, 1, name = names(dim(exp))[1]) + ) + } + + #NOTE: EOFs_obs is not returned because it's only the result of the last sdate + # (It is returned in s2dverification.) + if (!is.null(exp) & !is.null(obs)) { + return(list(exp = NAOF.ver, obs = NAOO.ver)) #, EOFs_obs = obs_EOF)) + } else if (!is.null(exp)) { + return(list(exp = NAOF.ver)) + } else if (!is.null(obs)) { + return(list(obs = NAOO.ver)) + } +} diff --git a/R/Persistence.R b/R/Persistence.R index 3aa0636caf7b502e913927cd8d14bd4dd5efbd18..5a53857f797c47db5e99bb912c2be44d52e8b001 100644 --- a/R/Persistence.R +++ b/R/Persistence.R @@ -8,14 +8,16 @@ #' including the time dimension along which the autoregression is computed. #' The data should start at least 40 time steps (years or days) before #' 'start'. -#'@param dates A sequence of 4-digit integers (YYYY) or dates (YYYY-MM-DD) -#' indicating the dates available in the observations. +#'@param dates A sequence of 4-digit integers (YYYY) or string (YYYY-MM-DD) +#' in class 'Date' indicating the dates available in the observations. #'@param time_dim A character string indicating the dimension along which to #' compute the autoregression. The default value is 'time'. -#'@param start A 4-digit integer (YYYY) or a date in the ISOdate format -#' (YYYY-MM-DD) indicating the first start date of the persistence forecast. -#'@param end A 4-digit integer (YYYY) or a date in the ISOdate format -#' (YYYY-MM-DD) indicating the last start date of the persistence forecast. +#'@param start A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date' +#' indicating the first start date of the persistence forecast. It must be +#' between 1850 and 2020. +#'@param end A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date' +#' indicating the last start date of the persistence forecast. It must be +#' between 1850 and 2020. #'@param ft_start An integer indicating the forecast time for which the #' persistence forecast should be calculated, or the first forecast time of #' the average forecast times for which persistence should be calculated. @@ -69,13 +71,22 @@ #'} #' #'@examples -#'#Building an example dataset with yearly start dates from 1920 to 2009 +#'# Case 1: year +#'# Building an example dataset with yearly start dates from 1920 to 2009 #'set.seed(1) -#'obs1 <- rnorm(1 * 70 * 6 * 7) -#'dim(obs1) <- c(member = 1, time = 70, lat = 6, lon = 7) -#'dates <- seq(1940, 2009, 1) -#'persist <- Persistence(obs1, dates = dates, start = 1961, end = 2005, ft_start = 1, -#' nmemb = 40) +#'obs1 <- rnorm(1 * 70 * 2 * 2) +#'dim(obs1) <- c(member = 1, time = 70, lat = 2, lon = 2) +#'dates <- seq(1920, 1989, 1) +#'res <- Persistence(obs1, dates = dates, start = 1961, end = 1980, ft_start = 1, +#' nmemb = 2) +#'# Case 2: day +#'dates <- seq(as.Date(ISOdate(1990, 1, 1)), as.Date(ISOdate(1990, 4, 1)) ,1) +#'start <- as.Date(ISOdate(1990, 2, 15)) +#'end <- as.Date(ISOdate(1990, 4, 1)) +#'set.seed(1) +#'data <- rnorm(1 * length(dates)) +#'dim(data) <- c(member = 1, time = length(dates)) +#'res <- Persistence(data, dates = dates, start = start, end = end, ft_start = 1) #' #'@import multiApply #'@export @@ -95,7 +106,7 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, dim(data) <- c(length(data)) names(dim(data)) <- time_dim } - if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + if(any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } ## time_dim @@ -106,27 +117,79 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, stop("Parameter 'time_dim' is not found in 'data' dimension.") } ## dates + if (is.numeric(dates)) { #(YYYY) + if (any(nchar(dates) != 4) | any(dates %% 1 != 0) | any(dates <= 0)) { + stop(paste0("Parameter 'dates' must be a sequence of integer (YYYY) or ", + "string (YYYY-MM-DD) in class 'Date'.")) + } + } else if (class(dates) == 'Date') { #(YYYY-MM-DD) + + } else { + stop(paste0("Parameter 'dates' must be a sequence of integer (YYYY) or ", + "string (YYYY-MM-DD) in class 'Date'.")) + } if (length(dates) != dim(data)[time_dim]) { stop("Parameter 'dates' must have the same length as in 'time_dim'.") } + ## dates, start, and end + if (!all(sapply(list(class(dates), class(start)), function(x) x == class(end)))) { + stop("Parameter 'dates', 'start', and 'end' should be the same format.") + } ## start -# if (!is.numeric(start) | start %% 1 != 0 | start < 0 | -# length(start) > 1 | start < 1850 | start > 2020) { -# stop("Parameter 'start' must be an integer between 1850 and 2020.") -# } -# if (start < dates[1] + 40) { -# stop("Parameter 'start' must start at least 40 time steps after the -# first start date of 'data'.") -# } + if (is.numeric(start)) { #(YYYY) + if (length(start) > 1 | any(start %% 1 != 0) | any(start < 1850) | any(start > 2020)) { + stop(paste0("Parameter 'start' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } + if (is.na(match(start, dates))) { + stop("Parameter 'start' must be one of the values of 'dates'.") + } + if (start < dates[1] + 40) { + stop(paste0("Parameter 'start' must start at least 40 time steps after ", + "the first 'dates'.")) + } + } else if (class(start) == 'Date') { + if (length(start) > 1 | any(start < as.Date(ISOdate(1850, 1, 1))) | + any(start > as.Date(ISOdate(2021, 1, 1)))) { + stop(paste0("Parameter 'start' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } + if (is.na(match(start, dates))) { + stop("Parameter 'start' must be one of the values of 'dates'.") + } + if (start < dates[1] + 40) { + stop(paste0("Parameter 'start' must start at least 40 time steps after ", + "the first 'dates'.")) + } + } else { + stop(paste0("Parameter 'start' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } + ## end -# if (!is.numeric(end) | end %% 1 != 0 | end < 0 | -# length(end) > 1 | end < 1850 | end > 2020) { -# stop("Parameter 'end' must be an integer between 1850 and 2020.") -# } -# if (end > dates[length(dates)] + 1) { -# stop("Parameter 'end' must end at most 1 time step after the -# last start date of 'data'.") -# } + if (is.numeric(end)) { #(YYYY) + if (length(end) > 1 | any(end %% 1 != 0) | any(end < 1850) | any(end > 2020)) { + stop(paste0("Parameter 'end' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } + if (end > dates[length(dates)] + 1) { + stop(paste0("Parameter 'end' must end at most 1 time steps after ", + "the last 'dates'.")) + } + } else if (class(end) == 'Date') { + if (length(end) > 1 | any(end < as.Date(ISOdate(1850, 1, 1))) | + any(end > as.Date(ISOdate(2020, 12, 31)))) { + stop(paste0("Parameter 'end' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } + if (end > dates[length(dates)] + 1) { + stop(paste0("Parameter 'end' must end at most 1 time steps after ", + "the last 'dates'.")) + } + } else { + stop(paste0("Parameter 'end' must be an integer or a string in class ", + "'Date' between 1850 and 2020.")) + } ## ft_start if (!is.numeric(ft_start) | ft_start %% 1 != 0 | ft_start < 0 | length(ft_start) > 1) { @@ -195,7 +258,6 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, # ft_start/ft_end are indices .Persistence <- function(x, dates, time_dim = 'time', start, end, ft_start = 1, ft_end = 1, max_ft = 10, nmemb = 1, na.action = 10) { - tm <- end - start + 1 max_date <- match(start, dates) interval <- ft_end - ft_start @@ -204,7 +266,7 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, persistence <- matrix(NA, nrow = nmemb, ncol = tm) names(dim(persistence)) <- c('realization', time_dim) - for (sdate in tm:1){ + for (sdate in tm:1) { min_y = max_ft + ft_start max_y = max_date + sdate - 2 min_x = max_ft # for extreme case: ex. forecast years 1-10, interval = 9 @@ -243,9 +305,9 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, persistence.predint[sdate] <- stdev_reg * sqrt(1 + 1 / n + X_sq / S_sq) AR.slope[sdate] <- a AR.intercept[sdate] <- b - AR.lowCI[sdate] <- reg$regression[1] - AR.highCI[sdate] <- reg$regression[3] - persistence[ ,sdate] <- rnorm(n = nmemb, mean = persistence.mean[sdate], + AR.lowCI[sdate] <- reg$conf.lower[2] + AR.highCI[sdate] <- reg$conf.upper[2] + persistence[ , sdate] <- rnorm(n = nmemb, mean = persistence.mean[sdate], sd = persistence.predint[sdate]) } diff --git a/R/Plot2VarsVsLTime.R b/R/Plot2VarsVsLTime.R new file mode 100644 index 0000000000000000000000000000000000000000..1c784dd5fb430090d00378f04b180ac0e6349111 --- /dev/null +++ b/R/Plot2VarsVsLTime.R @@ -0,0 +1,256 @@ +#'Plot two scores with confidence intervals in a common plot +#' +#'Plot two input variables that have the same dimensions in a common plot. +#'One plot for all experiments. +#'The input variables should have dimensions (nexp/nmod, nltime). +#' +#'@param var1 Matrix of dimensions (nexp/nmod, nltime). +#'@param var2 Matrix of dimensions (nexp/nmod, nltime). +#'@param toptitle Main title, optional. +#'@param ytitle Title of Y-axis, optional. +#'@param monini Starting month between 1 and 12. Default = 1. +#'@param freq 1 = yearly, 12 = monthly, 4 = seasonal, ... Default = 12. +#'@param nticks Number of ticks and labels on the x-axis, optional. +#'@param limits c(lower limit, upper limit): limits of the Y-axis, optional. +#'@param listexp List of experiment names, up to three, optional. +#'@param listvars List of names of input variables, optional. +#'@param biglab TRUE/FALSE for presentation/paper plot. Default = FALSE. +#'@param hlines c(a, b, ...) Add horizontal black lines at Y-positions a, b, +#' ... The default value is NULL. +#'@param leg TRUE/FALSE if legend should be added or not to the plot. +#' Default = TRUE. +#'@param siglev TRUE/FALSE if significance level should replace confidence +#' interval.\cr +#' Default = FALSE. +#'@param sizetit Multiplicative factor to change title size, optional. +#'@param show_conf TRUE/FALSE to show/not confidence intervals for input +#' variables. +#'@param fileout Name of output file. Extensions allowed: eps/ps, jpeg, png, +#' pdf, bmp and tiff. The default value is NULL. +#'@param width File width, in the units specified in the parameter size_units +#' (inches by default). Takes 8 by default. +#'@param height File height, in the units specified in the parameter +#' size_units (inches by default). Takes 5 by default. +#'@param size_units Units of the size of the device (file or window) to plot +#' in. Inches ('in') by default. See ?Devices and the creator function of the +#' corresponding device. +#'@param res Resolution of the device (file or window) to plot in. See +#' ?Devices and the creator function of the corresponding device. +#'@param ... Arguments to be passed to the method. Only accepts the following +#' graphical parameters:\cr +#' adj ann ask bg bty cex.sub cin col.axis col.lab col.main col.sub cra crt +#' csi cxy err family fg fig font font.axis font.lab font.main font.sub lend +#' lheight ljoin lmitre mar mex mfcol mfrow mfg mkh oma omd omi page pch plt +#' smo srt tck tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog \cr +#' For more information about the parameters see `par`. +#' +#'@details +#'Examples of input:\cr +#'------------------\cr\cr +#'RMSE error for a number of experiments and along lead-time: (nexp, nltime) +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'runmean_months <- 12 +#'smooth_ano_exp <- Smoothing(data = ano_exp, runmeanlen = runmean_months) +#'smooth_ano_obs <- Smoothing(data = ano_obs, runmeanlen = runmean_months) +#'dim_to_mean <- 'member' # mean along members +#'required_complete_row <- 'ftime' # discard startdates for which there are NA leadtimes +#'leadtimes_per_startdate <- 60 +#'rms <- RMS(MeanDims(smooth_ano_exp, dim_to_mean), +#' MeanDims(smooth_ano_obs, dim_to_mean), +#' comp_dim = required_complete_row, +#' limits = c(ceiling((runmean_months + 1) / 2), +#' leadtimes_per_startdate - floor(runmean_months / 2))) +#'smooth_ano_exp_m_sub <- smooth_ano_exp - InsertDim(MeanDims(smooth_ano_exp, 'member', +#' na.rm = TRUE), +#' posdim = 3, +#' lendim = dim(smooth_ano_exp)['member'], +#' name = 'member') +#'spread <- Spread(smooth_ano_exp_m_sub, compute_dim = c('member', 'sdate')) +#'#Combine rms outputs into one array +#'rms_combine <- abind::abind(rms$conf.lower, rms$rms, rms$conf.upper, along = 0) +#'rms_combine <- Reorder(rms_combine, c(2, 3, 1, 4)) +#' \donttest{ +#'Plot2VarsVsLTime(InsertDim(rms_combine[, , , ], 1, 1), Reorder(spread$sd, c(1, 3, 2)), +#' toptitle = 'RMSE and spread', monini = 11, freq = 12, +#' listexp = c('CMIP5 IC3'), listvar = c('RMSE', 'spread')) +#' } +#' +#'@importFrom grDevices png jpeg postscript pdf svg bmp tiff postscript dev.cur dev.new dev.off +#'@importFrom stats ts +#'@export +Plot2VarsVsLTime <- function(var1, var2, toptitle = '', ytitle = '', monini = 1, + freq = 12, nticks = NULL, limits = NULL, listexp = + c('exp1', 'exp2', 'exp3'), listvars = c('var1', + 'var2'), biglab = FALSE, hlines = NULL, leg = TRUE, + siglev = FALSE, sizetit = 1, show_conf = TRUE, + fileout = NULL, + width = 8, height = 5, size_units = 'in', res = 100, ...) { + # Process the user graphical parameters that may be passed in the call + ## Graphical parameters to exclude + excludedArgs <- c("cex", "cex.axis", "cex.lab", "cex.main", "col", "fin", "lab", "las", "lty", "lwd", "mai", "mgp", "new", "pin", "ps", "pty") + userArgs <- .FilterUserGraphicArgs(excludedArgs, ...) + + # If there is any filenames to store the graphics, process them + # to select the right device + if (!is.null(fileout)) { + deviceInfo <- .SelectDevice(fileout = fileout, width = width, height = height, units = size_units, res = res) + saveToFile <- deviceInfo$fun + fileout <- deviceInfo$files + } + + # + nvars <- 2 + + if (length(dim(var1)) != length(dim(var2))) { + print("the two input variables should have the same dimensions") + stop() + } + if (length(dim(var1)) >= 4) { + print("dimensions of input variables should be 3") + stop() + } + nleadtime <- dim(var1)[3] + nexp <- dim(var1)[1] + var <- array(dim = c(nvars, nexp, 3, nleadtime)) + for (jvar in 1:nvars) { + varname <- paste("var", as.character(jvar), sep = "") + var[jvar, , , ] <- get(varname) + rm(varname) + } + + if (is.null(limits) == TRUE) { + ll <- min(var1, na.rm = TRUE) + ul <- max(var1, na.rm = TRUE) + if (biglab) { + ul <- ul + 0.4 * (ul - ll) + } else { + ul <- ul + 0.3 * (ul - ll) + } + } else { + ll <- limits[1] + ul <- limits[2] + } + lastyear <- (monini + (nleadtime - 1) * 12 / freq - 1) %/% 12 + lastmonth <- (monini + (nleadtime - 1) * 12 / freq - 1) %% 12 + 1 + empty_ts <- ts(start = c(0000, (monini - 1) %/% (12 / freq) + 1), + end = c(lastyear, (lastmonth - 1) %/% (12 / freq) + 1), + frequency = freq) + empty <- array(dim = length(empty_ts)) + # + # Define some plot parameters + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + if (is.null(nticks)) { + if (biglab) { + nticks <- 5 + } else { + nticks <- 10 + } + } + labind <- seq(1, nleadtime, max(nleadtime %/% nticks, 1)) + months <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", + "Oct", "Nov", "Dec") + labyear <- ((labind - 1) * 12 / freq + monini - 1) %/% 12 + labmonth <- months[((labind - 1) * 12 / freq + monini - 1) %% 12 + 1] + for (jx in 1:length(labmonth)) { + y2o3dig <- paste("0", as.character(labyear[jx]), sep = "") + labmonth[jx] <- paste(labmonth[jx], "\nYr ", substr(y2o3dig, nchar(y2o3dig) + - 1, nchar(y2o3dig)), sep = "") + } + color <- c("red1", "dodgerblue1", "green1", "orange1", "lightblue1", + "deeppink1", "mediumpurple1", "lightgoldenrod1", "olivedrab1", + "mediumorchid1") + type <- c(1, 3) + if (siglev == TRUE) { + lines <- c("n", "l", "n") + } + else{ + lines <- c("l", "l", "l") + } + thickness <- array(dim = c(3)) + thickness[1] <- c(1) + thickness[2] <- c(8) + thickness[3] <- thickness[1] + + # + # Define plot layout + # ~~~~~~~~~~~~~~~~~~~~ + # + + # Open connection to graphical device + if (!is.null(fileout)) { + saveToFile(fileout) + } else if (names(dev.cur()) == 'null device') { + dev.new(units = size_units, res = res, width = width, height = height) + } + + # Load the user parameters + par(userArgs) + + if (biglab) { + par(mai = c(1.25, 1.4, 0.5, 1), mgp = c(4, 2.5, 0)) + par(cex = 1.3, cex.lab = 2, cex.axis = 1.8) + cexmain <- 2.2 + legsize <- 1.5 + } else { + par(mai = c(1, 1.1, 0.5, 0), mgp = c(3, 1.8, 0)) + par(cex = 1.3, cex.lab = 1.5, cex.axis = 1.1) + cexmain <- 1.5 + legsize <- 1 + } + plot(empty, ylim = c(ll, ul), xlab = "Time (months)", ylab = ytitle, + main = toptitle, cex.main = cexmain * sizetit, axes = FALSE) + axis(1, at = labind, labels = labmonth) + axis(2) + box() + if (is.null(hlines) != TRUE) { + for (jy in 1:length(hlines)) { + par(new = TRUE) + abline(h = hlines[jy]) + } + } + # + # Loop on experimental data + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + legendnames <- array(dim = nexp * nvars) + legendthick <- array(dim = nexp * nvars) + legendsty <- array(dim = nexp * nvars) + legendcol <- array(dim = nexp * nvars) + if (show_conf == TRUE) { + start_line <- 3 + end_line <- 1 + } else { + start_line <- 2 + end_line <- 2 + } + for (jint in seq(start_line, end_line, -1)) { + ind <- 1 + for (jexp in 1:nexp) { + for (jvar in 1:nvars) { + par(new = TRUE) + plot(var[jvar, jexp, jint, ], type = lines[jint], ylim = c(ll, ul), + col = color[jexp], lty = type[jvar], lwd = thickness[jint], + ylab = "", xlab = "", axes = FALSE) + legendnames[ind] <- paste(listexp[jexp], listvars[jvar]) + legendthick[ind] <- 2 + legendsty[ind] <- type[jvar] + legendcol[ind] <- color[jexp] + ind <- ind + 1 + } + } + } + if (leg) { + legend(1, ul, legendnames, lty = legendsty, lwd = legendthick, + col = legendcol, cex = legsize) + } + + # If the graphic was saved to file, close the connection with the device + if(!is.null(fileout)) dev.off() +} diff --git a/R/PlotACC.R b/R/PlotACC.R new file mode 100644 index 0000000000000000000000000000000000000000..3bffa68f279234a7d6881eb1848830df6f7ad3d9 --- /dev/null +++ b/R/PlotACC.R @@ -0,0 +1,238 @@ +#'Plot Plumes/Timeseries Of Anomaly Correlation Coefficients +#' +#'Plots plumes/timeseries of ACC from an array with dimensions +#'(output from \code{ACC()}): \cr +#'c(nexp, nobs, nsdates, nltime, 4)\cr +#'where the fourth dimension is of length 4 and contains the lower limit of +#'the 95\% confidence interval, the ACC, the upper limit of the 95\% +#'confidence interval and the 95\% significance level given by a one-sided +#'T-test. +#' +#'@param ACC An ACC array with with dimensions:\cr +#' c(nexp, nobs, nsdates, nltime, 4)\cr +#' with the fourth dimension of length 4 containing the lower limit of the +#' 95\% confidence interval, the ACC, the upper limit of the 95\% confidence +#' interval and the 95\% significance level. +#'@param sdates A character vector of startdates: c('YYYYMMDD','YYYYMMDD'). +#'@param toptitle A character string of the main title, optional. +#'@param sizetit A multiplicative factor to scale title size, optional. +#'@param ytitle A character string of the title of Y-axis for each experiment: +#' c('', ''), optional. +#'@param limits A numeric vector c(lower limit, upper limit): limits of the +#' Y-axis, optional. +#'@param legends A character vector of flags to be written in the legend, +#' optional. +#'@param freq A integer: 1 = yearly, 12 = monthly, 4 = seasonal, ... Default: 12. +#'@param biglab A logical value for presentation/paper plot, Default = FALSE. +#'@param fill A logical value if filled confidence interval. Default = FALSE. +#'@param linezero A logical value if a line at y=0 should be added. Default = FALSE. +#'@param points A logical value if points instead of lines. Default = TRUE.\cr +#' Must be TRUE if only 1 leadtime. +#'@param vlines A vector of x location where to add vertical black lines, optional. +#'@param fileout A character string of the output file name. Extensions allowed: +#' eps/ps, jpeg, png, pdf, bmp and tiff. Default is NULL. +#'@param width A numeric of the file width, in the units specified in the +#' parameter size_units (inches by default). Takes 8 by default. +#'@param height A numeric of the file height, in the units specified in the parameter +#' size_units (inches by default). Takes 5 by default. +#'@param size_units A character string of the units of the size of the device +#' (file or window) to plot in. Inches ('in') by default. See ?Devices and the +#' creator function of the corresponding device. +#'@param res Resolution of the device (file or window) to plot in. See +#' ?Devices and the creator function of the corresponding device. +#'@param \dots Arguments to be passed to the method. Only accepts the following +#' graphical parameters:\cr +#' adj ann ask bg bty cex.sub cin col.axis col.lab col.main col.sub cra crt +#' csi cxy err family fg fig fin font font.axis font.lab font.main font.sub +#' lend lheight ljoin lmitre mar mex mfcol mfrow mfg mkh oma omd omi page +#' plt smo srt tck tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog\cr +#' For more information about the parameters see `par`. +#' +#'@examples +#' \dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#' } +#'sampleData$mod <- Season(sampleData$mod, monini = 11, moninf = 12, monsup = 2) +#'sampleData$obs <- Season(sampleData$obs, monini = 11, moninf = 12, monsup = 2) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) + +#'acc <- ACC(ano_exp, ano_obs) +#'acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap') +#'# Combine acc results for PlotACC +#'res <- array(c(acc$conf.lower, acc$acc, acc$conf.upper, acc$p.val), +#' dim = c(dim(acc$acc), 4)) +#'res_bootstrap <- array(c(acc$acc_conf.lower, acc$acc, acc$acc_conf.upper, acc$p.val), +#' dim = c(dim(acc$acc), 4)) +#' \donttest{ +#'PlotACC(res, startDates) +#'PlotACC(res_bootstrap, startDates) +#' } +#'@importFrom grDevices dev.cur dev.new dev.off +#'@importFrom stats ts +#'@export +PlotACC <- function(ACC, sdates, toptitle = "", sizetit = 1, ytitle = "", + limits = NULL, legends = NULL, freq = 12, biglab = FALSE, + fill = FALSE, linezero = FALSE, points = TRUE, vlines = NULL, + fileout = NULL, + width = 8, height = 5, size_units = 'in', res = 100, ...) { + # Process the user graphical parameters that may be passed in the call + ## Graphical parameters to exclude + excludedArgs <- c("cex", "cex.axis", "cex.lab", "cex.main", "col", "lab", "las", "lty", "lwd", "mai", "mgp", "new", "pch", "pin", "ps", "pty") + userArgs <- .FilterUserGraphicArgs(excludedArgs, ...) + + # If there is any filenames to store the graphics, process them + # to select the right device + if (!is.null(fileout)) { + deviceInfo <- .SelectDevice(fileout = fileout, width = width, height = height, units = size_units, res = res) + saveToFile <- deviceInfo$fun + fileout <- deviceInfo$files + } + + # + if (length(dim(ACC)) != 5 | dim(ACC)[5] != 4) { + stop("5 dim needed : c(nexp, nobs, nsdates, nltime, 4)") + } + nexp <- dim(ACC)[1] + nobs <- dim(ACC)[2] + nleadtime <- dim(ACC)[4] + nsdates <- dim(ACC)[3] + if (is.null(limits) == TRUE) { + ll <- min(ACC, na.rm = TRUE) + ul <- max(ACC, na.rm = TRUE) + if (biglab) { + ul <- ul + 0.3 * (ul - ll) + } else { + ul <- ul + 0.2 * (ul - ll) + } + } else { + ll <- limits[1] + ul <- limits[2] + } + yearinit <- as.integer(substr(sdates[1], 1, 4)) + moninit <- as.integer(substr(sdates[1], 5, 6)) + lastyear <- as.integer(substr(sdates[nsdates], 1, 4)) + (moninit + ( + nleadtime - 1) * 12 / freq - 1) %/% 12 + lastmonth <- (moninit + (nleadtime - 1) * (12 / freq) - 1) %% 12 + 1 + empty_ts <- ts(start = c(yearinit, (moninit - 1) %/% (12 / freq) + 1), + end = c(lastyear, (lastmonth - 1) %/% (12 / freq) + 1), + frequency = freq) + color <- c("red4", "dodgerblue4", "lightgoldenrod4", "deeppink4", + "mediumpurple4", "green4", "orange4", "lightblue4", "mediumorchid4", + "olivedrab4") + colorblock <- c("red1", "dodgerblue1", "lightgoldenrod1", "deeppink1", + "mediumpurple1", "green1", "orange1", "lightblue1", + "mediumorchid1", "olivedrab1") + + # Open connection to graphical device + if (!is.null(fileout)) { + saveToFile(fileout) + } else if (names(dev.cur()) == 'null device') { + dev.new(units = size_units, res = res, width = width, height = height) + } + + # Load the user parameters + par(userArgs) + + if (biglab) { + par(mai = c(1, 1.1, 0.5, 0), mgp = c(2.8, 0.9, 0)) + par(cex = 1.3, cex.lab = 2, cex.axis = 1.8) + cexmain <- 2.2 + legsize <- 1.5 + } else { + par(mai = c(0.8, 0.8, 0.5, 0.1), mgp = c(2, 0.5, 0)) + par(cex = 1.3, cex.lab = 1.5, cex.axis = 1.1) + cexmain <- 1.5 + legsize <- 1 + } + plot(empty_ts, ylim = c(ll, ul), xlab = "Time (years)", ylab = ytitle, + main = toptitle, cex.main = cexmain * sizetit) + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + numcol <- jobs + (jexp - 1) * nobs + for (jdate in 1:nsdates) { + year0 <- as.integer(substr(sdates[jdate], 1, 4)) + mon0 <- as.integer(substr(sdates[jdate], 5, 6)) + start <- (year0 - yearinit) * freq + 1 + end <- start + nleadtime - 1 + var <- array(dim = c(3, length(empty_ts))) + var[, start:end] <- t(ACC[jexp, jobs, jdate, , 1:3]) + if (fill) { + par(new = TRUE) + bordup <- ACC[jexp, jobs, jdate, , 3] + borddown <- ACC[jexp, jobs, jdate, , 1] + tmp <- c(start:end) + xout <- is.na(bordup + borddown) + tmp <- tmp[which(xout == FALSE)] + xx <- c(tmp, rev(tmp)) + bordup <- bordup[which(xout == FALSE)] + borddown <- borddown[which(xout == FALSE)] + yy <- c(bordup, rev(borddown)) + if (jdate == 1) { + matplot(t(var), type = "l", lty = 1, lwd = 1, ylim = c(ll, ul), + col = color[numcol], xlab = "", ylab = "", axes = FALSE) + } + polygon(xx, yy, col = colorblock[numcol], border = NA) + } + if (points) { + par(new = TRUE) + plot(var[2, ], type = "p", lty = 1, lwd = 6, ylim = c(ll, ul), + col = color[numcol], xlab = "", ylab = "", axes = FALSE, + cex = 0.6) + par(new = TRUE) + plot(var[1, ], type = "p", pch = 6, lwd = 3, ylim = c(ll, ul), + col = color[numcol], xlab = "", ylab = "", axes = FALSE, + cex = 0.6) + par(new = TRUE) + plot(var[3, ], type = "p", pch = 2, lwd = 3, ylim = c(ll, ul), + col = color[numcol], xlab = "", ylab = "", axes = FALSE, + cex = 0.6) + par(new = TRUE) + for (jind in start:end) { + lines(c(jind, jind), var[c(1, 3), jind], lwd = 1, + ylim = c(ll, ul), col = color[numcol], xlab = "", + ylab = "", axes = FALSE) + } + } else { + par(new = TRUE) + plot(var[2, ], type = "l", lty = 1, lwd = 4, ylim = c(ll, ul), + col = color[numcol], xlab = "", ylab = "", axes = FALSE) + par(new = TRUE) + plot(var[1, ], type = "l", lty = 1, lwd = 1, ylim = c(ll, ul), + col = color[numcol], xlab = "", ylab = "", axes = FALSE) + par(new = TRUE) + plot(var[3, ], type = "l", lty = 1, lwd = 1, ylim = c(ll, ul), + col = color[numcol], xlab = "", ylab = "", axes = FALSE) + } + } + } + } + if (linezero) { + abline(h = 0, col = "black") + } + if (is.null(vlines) == FALSE) { + for (x in vlines) { + abline(v = x, col = "black") + } + } + if (is.null(legends) == FALSE) { + if (points) { + legend(0, ul, legends[1:(nobs * nexp)], lty = 3, lwd = 10, + col = color[1:(nobs * nexp)], cex = legsize) + } else { + legend(0, ul, legends[1:(nobs * nexp)], lty = 1, lwd = 4, + col = color[1:(nobs * nexp)], cex = legsize) + } + } + + # If the graphic was saved to file, close the connection with the device + if(!is.null(fileout)) dev.off() +} diff --git a/R/PlotAno.R b/R/PlotAno.R index 08a9ab88ceff4bfbcd3130203f42e031f0a3ceed..9a4a97141268bde9e6c80c985e7342dbeb9898f2 100644 --- a/R/PlotAno.R +++ b/R/PlotAno.R @@ -110,6 +110,11 @@ PlotAno <- function(exp_ano, obs_ano = NULL, sdates, toptitle = rep('', 15), } else { nobs <- 0 } + # sdate check + if (!all(nchar(sdates) == 8)) { + stop ("The parameter 'sdates' must be formatted as YYYYMMDD.") + } + if (is.null(limits) == TRUE) { if (memb) { ll <- min(min(exp_ano, na.rm = TRUE), min(obs_ano, na.rm = TRUE), na.rm = TRUE) diff --git a/R/PlotBoxWhisker.R b/R/PlotBoxWhisker.R new file mode 100644 index 0000000000000000000000000000000000000000..2ddcec0d59a7627432b388db555678dcaf4041e8 --- /dev/null +++ b/R/PlotBoxWhisker.R @@ -0,0 +1,242 @@ +#'Box-And-Whisker Plot of Time Series with Ensemble Distribution +#' +#'Produce time series of box-and-whisker plot showing the distribution of the +#'members of a forecast vs. the observed evolution. The correlation between +#'forecast and observational data is calculated and displayed. Only works for +#'n-monthly to n-yearly time series. +#' +#'@param exp Forecast array of multi-member time series, e.g., the NAO index +#' of one experiment. The expected dimensions are +#' c(members, start dates/forecast horizons). A vector with only the time +#' dimension can also be provided. Only monthly or lower frequency time +#' series are supported. See parameter freq. +#'@param obs Observational vector or array of time series, e.g., the NAO index +#' of the observations that correspond the forecast data in \code{exp}. +#' The expected dimensions are c(start dates/forecast horizons) or +#' c(1, start dates/forecast horizons). Only monthly or lower frequency time +#' series are supported. See parameter freq. +#'@param toptitle Character string to be drawn as figure title. +#'@param ytitle Character string to be drawn as y-axis title. +#'@param monini Number of the month of the first time step, from 1 to 12. +#'@param yearini Year of the first time step. +#'@param freq Frequency of the provided time series: 1 = yearly, 12 = monthly, +# 4 = seasonal, ... Default = 12. +#'@param expname Experimental dataset name. +#'@param obsname Name of the observational reference dataset. +#'@param drawleg TRUE/FALSE: whether to draw the legend or not. +#'@param fileout Name of output file. Extensions allowed: eps/ps, jpeg, png, +#' pdf, bmp and tiff. \cr +#' Default = 'output_PlotBox.ps'. +#'@param width File width, in the units specified in the parameter size_units +#' (inches by default). Takes 8 by default. +#'@param height File height, in the units specified in the parameter +#' size_units (inches by default). Takes 5 by default. +#'@param size_units Units of the size of the device (file or window) to plot +#' in. Inches ('in') by default. See ?Devices and the creator function of the +#' corresponding device. +#'@param res Resolution of the device (file or window) to plot in. See +#' ?Devices and the creator function of the corresponding device. +#'@param ... Arguments to be passed to the method. Only accepts the following +#' graphical parameters:\cr +#' ann ask bg cex.lab cex.sub cin col.axis col.lab col.main col.sub cra crt +#' csi cxy err family fg fig font font.axis font.lab font.main font.sub lend +#' lheight ljoin lmitre mex mfcol mfrow mfg mkh oma omd omi page pin plt pty +#' smo srt tck tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog \cr +#' For more information about the parameters see `par`. +#' +#'@return Generates a file at the path specified via \code{fileout}. +#' +#'@seealso EOF, ProjectField, NAO +#'@keywords datagen +#'@author History:\cr +#'0.1 - 2013-09 (F. Lienert, \email{flienert@@ic3.cat}) - Original code\cr +#'0.2 - 2015-03 (L. Batte, \email{lauriane.batte@@ic3.cat}) - Removed all\cr +#' normalization for sake of clarity. +#'1.0 - 2016-03 (N. Manubens, \email{nicolau.manubens@@bsc.es}) - Formatting to R CRAN +#'@examples +#'# See examples on Load() to understand the first lines in this example +#' \dontrun{ +#'data_path <- system.file('sample_data', package = 's2dverification') +#'expA <- list(name = 'experiment', path = file.path(data_path, +#' 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', +#' '$VAR_NAME$_$START_DATE$.nc')) +#'obsX <- list(name = 'observation', path = file.path(data_path, +#' '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', +#' '$VAR_NAME$_$YEAR$$MONTH$.nc')) +#' +#'# Now we are ready to use Load(). +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- Load('tos', list(expA), list(obsX), startDates, +#' leadtimemin = 1, leadtimemax = 4, output = 'lonlat', +#' latmin = 27, latmax = 48, lonmin = -12, lonmax = 40) +#' } +#' \dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 20, latmax = 80, +#' lonmin = -80, lonmax = 40) +#'# No example data is available over NAO region, so in this example we will +#'# tweak the available data. In a real use case, one can Load() the data over +#'# NAO region directly. +#'sampleData$lon[] <- c(40, 280, 340) +#'sampleData$lat[] <- c(20, 80) +#' } +#'# Now ready to compute the EOFs and project on, for example, the first +#'# variability mode. +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'ano_exp <- array(ano$exp, dim = dim(ano$exp)[-2]) +#'ano_obs <- array(ano$obs, dim = dim(ano$obs)[-2]) +#'nao <- NAO(ano_exp, ano_obs, sampleData$lat, sampleData$lon) +#'# Finally plot the nao index +#' \dontrun{ +#'nao$exp <- Reorder(nao$exp, c(2, 1)) +#'nao$obs <- Reorder(nao$obs, c(2, 1)) +#'PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", +#' monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") +#' } +#' +#'@importFrom grDevices dev.cur dev.new dev.off +#'@importFrom stats cor +#'@export +PlotBoxWhisker <- function(exp, obs, toptitle = '', ytitle = '', monini = 1, + yearini = 0, freq = 1, expname = "exp 1", + obsname = "obs 1", drawleg = TRUE, + fileout = NULL, + width = 8, height = 5, size_units = 'in', res = 100, ...) { + + # Process the user graphical parameters that may be passed in the call + ## Graphical parameters to exclude + excludedArgs <- c("adj", "bty", "cex", "cex.axis", "cex.main", "col", "din", "fin", "lab", "las", "lty", "lwd", "mai", "mar", "mgp", "new", "pch", "ps") + userArgs <- .FilterUserGraphicArgs(excludedArgs, ...) + + # If there is any filenames to store the graphics, process them + # to select the right device + if (!is.null(fileout)) { + deviceInfo <- .SelectDevice(fileout = fileout, width = width, height = height, units = size_units, res = res) + saveToFile <- deviceInfo$fun + fileout <- deviceInfo$files + } + + # Checking exp + if (is.numeric(exp)) { + if (is.null(dim(exp)) || length(dim(exp)) == 1) { + dim(exp) <- c(1, length(exp)) + } + } + if (!is.numeric(exp) || length(dim(exp)) != 2) { + stop("Parameter 'exp' must be a numeric vector or array of dimensions c(forecast horizons/start dates) or c(ensemble members, forecast horizons/start dates)") + } + + # Checking obs + if (is.numeric(obs)) { + if (is.null(dim(obs)) || length(dim(obs)) == 1) { + dim(obs) <- c(1, length(obs)) + } + } + if (!is.numeric(obs) || length(dim(obs)) != 2) { + stop("Parameter 'obs' must be a numeric vector or array of dimensions c(forecast horizons/start dates) or c(1, forecast horizons/start dates)") + } + + # Checking consistency in exp and obs + if (dim(exp)[2] != dim(obs)[2]) { + stop("'exp' and 'obs' must have data for the same amount of time steps.") + } + + if (!is.character(toptitle) || !is.character(ytitle)) { + stop("Parameters 'ytitle' and 'toptitle' must be character strings.") + } + + if (!is.numeric(monini)) { + stop("'monini' must be a month number, from 1 to 12.") + } + if (monini < 1 || monini > 12) { + stop("'monini' must be >= 1 and <= 12.") + } + + if (!is.numeric(yearini)) { + stop("'yearini' must be a month number, from 1 to 12.") + } + + if (!is.numeric(freq)) { + stop("'freq' must be a number <= 12.") + } + + if (!is.character(expname) || !is.character(obsname)) { + stop("'expname' and 'obsname' must be character strings.") + } + + if (!is.logical(drawleg)) { + stop("Parameter 'drawleg' must be either TRUE or FALSE.") + } + + if (!is.character(fileout) && !is.null(fileout)) { + stop("Parameter 'fileout' must be a character string.") + } + + ntimesteps <- dim(exp)[2] + lastyear <- (monini + (ntimesteps - 1) * 12 / freq - 1) %/% 12 + yearini + lastmonth <- (monini + (ntimesteps - 1) * 12 / freq - 1) %% 12 + 1 + # + # Define some plot parameters + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + labind <- seq(1, ntimesteps) + months <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", + "Oct", "Nov", "Dec") + labyear <- ((labind - 1) * 12 / freq + monini - 1) %/% 12 + yearini + labmonth <- months[((labind - 1) * 12 / freq + monini - 1) %% 12 + 1] + for (jx in 1:length(labmonth)) { + y2o3dig <- paste("0", as.character(labyear[jx]), sep = "") + labmonth[jx] <- paste(labmonth[jx], "\nYr ", substr(y2o3dig, + nchar(y2o3dig) - 1, nchar(y2o3dig)), sep = "") + } + + # Open connection to graphical device + if (!is.null(fileout)) { + saveToFile(fileout) + } else if (names(dev.cur()) == 'null device') { + dev.new(units = size_units, res = res, width = width, height = height) + } + + # Load the user parameters + par(userArgs) + + ## Observed time series. + #pc.o <- ts(obs[1, ], deltat = 1, start = yr1, end = yr2) + pc.o <- obs[1, ] + ## Normalization of obs, forecast members. Fabian + ## Normalization of forecast should be according to ensemble + ## mean, to keep info on ensemble spread, no? Lauriane pc.o <- + ## pc.o/sd(pc.o) sd.fc <- apply(exp,c(1),sd) + ## exp <- exp/sd.fc mn.fc <- + ## apply(exp,2, mean) exp <- + ## exp/sd(mn.fc) Produce plot. + par(mar = c(5, 6, 4, 2)) + boxplot(exp, add = FALSE, main = toptitle, + ylab = "", xlab = "", col = "red", lwd = 2, t = "b", + axes = FALSE, cex.main = 2, ylim = c(-max(abs(c(exp, pc.o))), max(abs(c(exp, pc.o))))) + lines(1:ntimesteps, pc.o, lwd = 3, col = "blue") + abline(h = 0, lty = 1) + if (drawleg) { + legend("bottomleft", c(obsname, expname), lty = c(1, 1), lwd = c(3, + 3), pch = c(NA, NA), col = c("blue", "red"), horiz = FALSE, + bty = "n", inset = 0.05) + } + ##mtext(1, line = 3, text = tar, cex = 1.9) + mtext(3, line = -2, text = paste(" AC =", round(cor(pc.o, + apply(exp, c(2), mean)), 2)), cex = 1.9, adj = 0) + axis(2, cex.axis = 2) + mtext(2, line = 3, text = ytitle, cex = 1.9) + par(mgp = c(0, 4, 0)) + ##axis(1, c(1:ntimesteps), NA, cex.axis = 2) + axis(1, seq(1, ntimesteps, by = 1), labmonth, cex.axis = 2) + box() + + # If the graphic was saved to file, close the connection with the device + if(!is.null(fileout)) dev.off() +} + diff --git a/R/PlotEquiMap.R b/R/PlotEquiMap.R index 37847dcc40166713a346b3859c38a75f85699fcc..f6a85380bedaa0649ae56026c2721a6e308f4f2f 100644 --- a/R/PlotEquiMap.R +++ b/R/PlotEquiMap.R @@ -6,8 +6,9 @@ #'A colour bar (legend) can be plotted and adjusted. It is possible to draw #'superimposed arrows, dots, symbols, contour lines and boxes. A number of #'options is provided to adjust the position, size and colour of the -#'components. This plot function is compatible with figure layouts if colour -#'bar is disabled. +#'components. Some parameters are provided to add and adjust the masks that +#'include continents, oceans, and lakes. This plot function is compatible with +#'figure layouts if colour bar is disabled. #' #'@param var Array with the values at each cell of a grid on a regular #' rectangular or gaussian grid. The array is expected to have two @@ -60,10 +61,15 @@ #'@param filled.continents Colour to fill in drawn projected continents. #' Takes the value gray(0.5) by default or, if 'square = FALSE', takes the #' value FALSE. If set to FALSE, continents are not filled in. +#'@param filled.oceans A logical value or the color name to fill in drawn +#' projected oceans. The default value is FALSE. If it is TRUE, the default +#' colour is "light blue". #'@param coast_color Colour of the coast line of the drawn projected continents. #' Takes the value gray(0.5) by default. #'@param coast_width Line width of the coast line of the drawn projected #' continents. Takes the value 1 by default. +#'@param lake_color Colour of the lake or other water body inside continents. +#' The default value is NULL. #'@param contours Array of same dimensions as 'var' to be added to the plot #' and displayed with contours. Parameter 'brks2' is required to define the #' magnitude breaks for each contour curve. Disregarded if 'square = FALSE'. @@ -75,6 +81,8 @@ #' and 'brks2', or if 'square = FALSE'. #'@param contour_lty Line type of the contour curves. Takes 1 (solid) by #' default. See help on 'lty' in par() for other accepted values. +#'@param contour_draw_label A logical value indicating whether to draw the +#' contour labels or not. The default value is TRUE. #'@param contour_label_scale Scale factor for the superimposed labels when #' drawing contour levels. #'@param dots Array of same dimensions as 'var' or with dimensions @@ -111,6 +119,13 @@ #' TRUE by default. #'@param labW Whether to label the longitude axis with a 'W' instead of minus #' for negative values. Defaults to FALSE. +#'@param lab_dist_x A numeric of the distance of the longitude labels to the +#' box borders. The default value is NULL and is automatically adjusted by +#' the function. +#'@param lab_dist_y A numeric of the distance of the latitude labels to the +#' box borders. The default value is NULL and is automatically adjusted by +#' the function. +#'@param degree_sym A logical indicating whether to include degree symbol (30° N) or not (30N; default). #'@param intylat Interval between latitude ticks on y-axis, in degrees. #' Defaults to 20. #'@param intxlon Interval between latitude ticks on x-axis, in degrees. @@ -213,15 +228,17 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, triangle_ends = NULL, col_inf = NULL, col_sup = NULL, colNA = NULL, color_fun = clim.palette(), square = TRUE, filled.continents = NULL, - coast_color = NULL, coast_width = 1, + filled.oceans = FALSE, + coast_color = NULL, coast_width = 1, lake_color = NULL, contours = NULL, brks2 = NULL, contour_lwd = 0.5, contour_color = 'black', contour_lty = 1, - contour_label_scale = 1, + contour_draw_label = TRUE, contour_label_scale = 1, dots = NULL, dot_symbol = 4, dot_size = 1, arr_subsamp = floor(length(lon) / 30), arr_scale = 1, arr_ref_len = 15, arr_units = "m/s", arr_scale_shaft = 1, arr_scale_shaft_angle = 1, axelab = TRUE, labW = FALSE, + lab_dist_x = NULL, lab_dist_y = NULL, degree_sym = FALSE, intylat = 20, intxlon = 20, axes_tick_scale = 1, axes_label_scale = 1, drawleg = TRUE, subsampleg = NULL, @@ -403,10 +420,20 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, } else if (!is.logical(filled.continents)) { continent_color <- filled.continents filled.continents <- TRUE - } else if (filled.continents) { + } else { continent_color <- gray(0.5) } + # Check filled.oceans + if (!.IsColor(filled.oceans) & !is.logical(filled.oceans)) { + stop("Parameter 'filled.oceans' must be logical or a colour identifier.") + } else if (!is.logical(filled.oceans)) { + ocean_color <- filled.oceans + filled.oceans <- TRUE + } else if (filled.oceans) { + ocean_color <- "light blue" + } + # Check coast_color if (is.null(coast_color)) { if (filled.continents) { @@ -424,6 +451,17 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, stop("Parameter 'coast_width' must be numeric.") } + # Check lake_color + if (!is.null(lake_color)) { +# if (filled.continents) { +# lake_color <- 'white' +# } +# } else { + if (!.IsColor(lake_color)) { + stop("Parameter 'lake_color' must be a valid colour identifier.") + } + } + # Check contours if (!is.null(contours)) { if (dim(contours)[1] != dims[1] || dim(contours)[2] != dims[2]) { @@ -460,6 +498,11 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, stop("Parameter 'contour_lty' must be either a number or a character string.") } + # Check contour_draw_label + if (!is.logical(contour_draw_label)) { + stop("Parameter 'contour_draw_label' must be logical.") + } + # Check contour_label_scale if (!is.numeric(contour_label_scale)) { stop("Parameter 'contour_label_scale' must be numeric.") @@ -515,6 +558,16 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, if (!is.logical(labW)) { stop("Parameter 'labW' must be logical.") } + if (!is.null(lab_dist_x)) { + if (!is.numeric(lab_dist_x)) { + stop("Parameter 'lab_dist_x' must be numeric.") + } + } + if (!is.null(lab_dist_y)) { + if (!is.numeric(lab_dist_y)) { + stop("Parameter 'lab_dist_y' must be numeric.") + } + } if (!is.numeric(intylat)) { stop("Parameter 'intylat' must be numeric.") } else { @@ -648,6 +701,7 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, margin_scale[1] <- margin_scale[1] - 1 } margins <- rep(0.4, 4) * margin_scale + margins[4] <- margins[4] + 1 cex_title <- 2 * title_scale cex_axes_labels <- 1.3 * axes_label_scale cex_axes_ticks <- -0.5 * axes_tick_scale @@ -656,20 +710,34 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, ypos <- seq(latmin, latmax, intylat) xpos <- seq(lonmin, lonmax, intxlon) letters <- array('', length(ypos)) - letters[ypos < 0] <- 'S' - letters[ypos > 0] <- 'N' + if (degree_sym == FALSE) { + letters[ypos < 0] <- 'S' + letters[ypos > 0] <- 'N' + } else { + letters[ypos < 0] <- paste(intToUtf8(176), 'S') + letters[ypos > 0] <- paste(intToUtf8(176), 'N') + } ylabs <- paste(as.character(abs(ypos)), letters, sep = '') letters <- array('', length(xpos)) if (labW) { xpos2 <- xpos xpos2[xpos2 > 180] <- 360 - xpos2[xpos2 > 180] } - letters[xpos < 0] <- 'W' - letters[xpos > 0] <- 'E' + if (degree_sym == FALSE) { + letters[xpos < 0] <- 'W' + letters[xpos > 0] <- 'E' + } else { + letters[xpos < 0] <- paste(intToUtf8(176), 'W') + letters[xpos > 0] <- paste(intToUtf8(176), 'E') + } if (labW) { letters[xpos == 0] <- ' ' letters[xpos == 180] <- ' ' - letters[xpos > 180] <- 'W' + if (degree_sym == FALSE) { + letters[xpos > 180] <- 'W' + } else { + letters[xpos > 180] <- paste(intToUtf8(176), 'W') + } xlabs <- paste(as.character(abs(xpos2)), letters, sep = '') } else { xlabs <- paste(as.character(abs(xpos)), letters, sep = '') @@ -695,13 +763,31 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, par(userArgs) par(mar = margins, cex.main = cex_title, cex.axis = cex_axes_labels, mgp = c(0, spaceticklab, 0), las = 0) - plot.window(xlim = range(lonb$x, finite = TRUE), ylim = range(latb$x, finite = TRUE), - xaxs = 'i', yaxs = 'i') + + #NOTE: Here creates the window for later plot. If 'usr' for par() is not specified, + # use the lat/lon as the borders. If 'usr' is specified, use the assigned values. + if (is.null(userArgs$usr)) { + #NOTE: The grids are assumed to be equally spaced + xlim_cal <- c(lonb$x[1] - (lonb$x[2] - lonb$x[1]) / 2, + lonb$x[length(lonb$x)] + (lonb$x[2] - lonb$x[1]) / 2) + ylim_cal <- c(latb$x[1] - (latb$x[2] - latb$x[1]) / 2, + latb$x[length(latb$x)] + (latb$x[2] - latb$x[1]) / 2) + plot.window(xlim = xlim_cal, ylim = ylim_cal, xaxs = 'i', yaxs = 'i') +# Below is Old code. The border grids are only half plotted. +# plot.window(xlim = range(lonb$x, finite = TRUE), ylim = range(latb$x, finite = TRUE), +# xaxs = 'i', yaxs = 'i') + } else { + plot.window(xlim = par("usr")[1:2], ylim = par("usr")[3:4], xaxs = 'i', yaxs = 'i') + } + if (axelab) { + lab_distance_y <- ifelse(is.null(lab_dist_y), spaceticklab + 0.2, lab_dist_y) + lab_distance_x <- ifelse(is.null(lab_dist_x), spaceticklab + cex_axes_labels / 2 - 0.3, lab_dist_x) + axis(2, at = ypos, labels = ylabs, cex.axis = cex_axes_labels, tcl = cex_axes_ticks, - mgp = c(0, spaceticklab + 0.2, 0)) + mgp = c(0, lab_distance_y, 0)) axis(1, at = xpos, labels = xlabs, cex.axis = cex_axes_labels, tcl = cex_axes_ticks, - mgp = c(0, spaceticklab + cex_axes_labels / 2 - 0.3, 0)) + mgp = c(0, lab_distance_x, 0)) } title(toptitle, cex.main = cex_title) rect(par("usr")[1], par("usr")[3], par("usr")[2], par("usr")[4], col = colNA) @@ -717,10 +803,16 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, col = c(col_inf_image, cols, col_sup_image)) } if (!is.null(contours)) { +#NOTE: 'labcex' is the absolute size of contour labels. Parameter 'contour_label_scale' +# is provided in PlotEquiMap() but it was not used. Here, 'cex_axes_labels' was used +# and it was calculated from 'axes_label_scale', the size of lat/lon axis label. +# It is changed to use contour_label_scale*par('cex'). contour(lonb$x, latb$x, contours[lonb$ix, latb$ix], levels = brks2, - method = "edge", add = TRUE, - labcex = cex_axes_labels, lwd = contour_lwd, lty = contour_lty, - col = contour_color) + method = "edge", add = TRUE, +# labcex = cex_axes_labels, + labcex = contour_label_scale * par("cex"), + lwd = contour_lwd, lty = contour_lty, + col = contour_color, drawlabels = contour_draw_label) } # @@ -740,30 +832,77 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, # # Plotting continents # ~~~~~~~~~~~~~~~~~~~~~ - # - coast <- map(continents, interior = FALSE, wrap = TRUE, - fill = filled.continents, add = TRUE, plot = FALSE) - if (filled.continents) { - old_lwd <- par('lwd') - par(lwd = coast_width) - if (min(lon) >= 0) { - ylat <- latmin:latmax - xlon <- lonmin:lonmax - proj <- GEOmap::setPROJ(1, LON0 = mean(xlon), - LAT0 = mean(ylat), LATS = ylat, LONS = xlon) - lakes <- which(coastmap$STROKES$col == "blue") + # + # maps::map has the global map range [0, 360] or [-180, 180]. + # Set xlim so lon = 0 won't show a straight line when lon = [0, 359]. + # NOTE: It works except Antartic area. Don't know why. ylim is also set + # but it doesn't work. + if (continents == 'world') { # [-180, 180] + xlim_conti <- c(-179.99, 179.99) + } else { # [0, 360] + xlim_conti <- c(0.01, 359.99) + } + old_lwd <- par('lwd') + par(lwd = coast_width) + # If [0, 360], use GEOmap; if [-180, 180], use maps + if (min(lon) >= 0) { + ylat <- latmin:latmax + xlon <- lonmin:lonmax + proj <- GEOmap::setPROJ(1, LON0 = mean(xlon), + LAT0 = mean(ylat), LATS = ylat, LONS = xlon) + lakes <- which(coastmap$STROKES$col == "blue") + par(new = TRUE) + if (filled.continents) { coastmap$STROKES$col[which(coastmap$STROKES$col != "blue")] <- continent_color - coastmap$STROKES$col[lakes] <- "white" - par(new = TRUE) + if (is.null(lake_color)) { + coastmap$STROKES$col[lakes] <- continent_color + } else { + coastmap$STROKES$col[lakes] <- lake_color #"white" + } GEOmap::plotGEOmap(coastmap, PROJ = proj, border = coast_color, add = TRUE, lwd = coast_width) } else { - polygon(coast, col = continent_color, border = coast_color, lwd = coast_width) + coastmap$STROKES$col[which(coastmap$STROKES$col != "blue")] <- coast_color + if (is.null(lake_color)) { + coastmap$STROKES$col[lakes] <- coast_color + } else { + coastmap$STROKES$col[lakes] <- lake_color #"white" + } + GEOmap::plotGEOmap(coastmap, PROJ = proj, #MAPcol = coast_color, + add = TRUE, lwd = coast_width, MAPstyle = 2) } - par(lwd = old_lwd) + } else { - lines(coast, col = coast_color, lwd = coast_width) + # [-180, 180] + coast <- map(continents, interior = FALSE, wrap = TRUE, + xlim = xlim_conti, ylim = c(-89.99, 89.99), + fill = filled.continents, add = TRUE, plot = FALSE) + if (filled.continents) { + polygon(coast, col = continent_color, border = coast_color, lwd = coast_width) + } else { + lines(coast, col = coast_color, lwd = coast_width) + } + if (!is.null(lake_color)) { + map('lakes', add = TRUE, fill = filled.continents, col = lake_color) + } } + par(lwd = old_lwd) + + # filled.oceans + if (filled.oceans) { + old_lwd <- par('lwd') + par(lwd = coast_width) + + outline <- map(continents, fill = T, plot = FALSE) # must be fill = T + xbox <- xlim_conti + c(-2, 2) + ybox <- c(-92, 92) + outline$x <- c(outline$x, NA, c(xbox, rev(xbox), xbox[1])) + outline$y <- c(outline$y, NA, rep(ybox, each = 2), ybox[1]) + polypath(outline, col = ocean_color, rule = 'evenodd', border = NA) + + par(lwd = old_lwd) + } + box() # Draw rectangle on the map if (!is.null(boxlim)) { @@ -803,8 +942,8 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, # if (!is.null(varu) && !is.null(varv)) { # Create a two dimention array of longitude and latitude - lontab <- InsertDim(lonb$x, 2, length( latb$x)) - lattab <- InsertDim(latb$x, 1, length( lonb$x)) + lontab <- InsertDim(lonb$x, 2, length(latb$x), name = 'lat') + lattab <- InsertDim(latb$x, 1, length(lonb$x), name = 'lon') varplotu <- varu[lonb$ix, latb$ix] varplotv <- varv[lonb$ix, latb$ix] diff --git a/R/PlotStereoMap.R b/R/PlotStereoMap.R index d4e8e2d243024b18eb0d0cf510772b8f245c486b..4b4fbd22b93110e15e51ffe63071fac24f3b1d41 100644 --- a/R/PlotStereoMap.R +++ b/R/PlotStereoMap.R @@ -4,8 +4,8 @@ #'a polar stereographic world projection with coloured grid cells. Only the #'region within a specified latitude interval is displayed. A colour bar #'(legend) can be plotted and adjusted. It is possible to draw superimposed -#'dots, symbols and boxes. A number of options is provided to adjust the -#'position, size and colour of the components. This plot function is +#'dots, symbols, boxes, contours, and arrows. A number of options is provided to +#'adjust the position, size and colour of the components. This plot function is #'compatible with figure layouts if colour bar is disabled. #' #'@param var Array with the values at each cell of a grid on a regular @@ -24,6 +24,10 @@ #'@param lat Numeric vector of latitude locations of the cell centers of the #' grid of 'var', in any order (same as 'var'). Expected to be from a regular #' rectangular or gaussian grid, within the range [-90, 90]. +#'@param varu Array of the zonal component of wind/current/other field with +#' the same dimensions as 'var'. +#'@param varv Array of the meridional component of wind/current/other field +#' with the same dimensions as 'var'. #'@param latlims Latitudinal limits of the figure.\cr #' Example : c(60, 90) for the North Pole\cr #' c(-90,-60) for the South Pole @@ -58,6 +62,24 @@ #' continents. Takes the value gray(0.5) by default. #'@param coast_width Line width of the coast line of the drawn projected #' continents. Takes the value 1 by default. +#'@param contours Array of same dimensions as 'var' to be added to the plot +#' and displayed with contours. Parameter 'brks2' is required to define the +#' magnitude breaks for each contour curve. +#'@param brks2 A numeric value or vector of magnitude breaks where to draw +#' contour curves for the array provided in 'contours'. If it is a number, it +#' represents the number of breaks (n) that defines (n - 1) intervals to +#' classify 'contours'. +#'@param contour_lwd Line width of the contour curves provided via 'contours' +#' and 'brks2'. The default value is 0.5. +#'@param contour_color Line color of the contour curves provided via 'contours' +#' and 'brks2'. +#'@param contour_lty Line type of the contour curves. Takes 1 (solid) by +#' default. See help on 'lty' in par() for other accepted values. +#'@param contour_label_draw A logical value indicating whether to draw the +#' contour labels (TRUE) or not (FALSE) when 'contours' is used. The default +#' value is TRUE. +#'@param contour_label_scale Scale factor for the superimposed labels when +#' drawing contour levels. The default value is 0.6. #'@param dots Array of same dimensions as 'var' or with dimensions #' c(n, dim(var)), where n is the number of dot/symbol layers to add to the #' plot. A value of TRUE at a grid cell will draw a dot/symbol on the @@ -74,6 +96,23 @@ #' layers in 'dots'. Takes 1 by default. #'@param intlat Interval between latitude lines (circles), in degrees. #' Defaults to 10. +#'@param arr_subsamp A number as subsampling factor to select a subset of arrows +#' in 'varu' and 'varv' to be drawn. Only one out of arr_subsamp arrows will +#' be drawn. The default value is 1. +#'@param arr_scale A number as scale factor for drawn arrows from 'varu' and +#' 'varv'. The default value is 1. +#'@param arr_ref_len A number of the length of the refence arrow to be drawn as +#' legend at the bottom of the figure (in same units as 'varu' and 'varv', only +#' affects the legend for the wind or variable in these arrays). The default +#' value is 15. +#'@param arr_units Units of 'varu' and 'varv', to be drawn in the legend. +#' Takes 'm/s' by default. +#'@param arr_scale_shaft A number for the scale of the shaft of the arrows +#' (which also depend on the number of figures and the arr_scale parameter). +#' The default value is 1. +#'@param arr_scale_shaft_angle A number for the scale of the angle of the +#' shaft of the arrows (which also depend on the number of figure and the +#' arr_scale parameter). The default value is 1. #'@param drawleg Whether to plot a color bar (legend, key) or not. #' Defaults to TRUE. #'@param boxlim Limits of a box to be added to the plot, in degrees: @@ -137,15 +176,21 @@ #'@importFrom grDevices dev.cur dev.new dev.off gray #'@importFrom stats median #'@export -PlotStereoMap <- function(var, lon, lat, latlims = c(60, 90), +PlotStereoMap <- function(var, lon, lat, varu = NULL, varv = NULL, latlims = c(60, 90), toptitle = NULL, sizetit = NULL, units = NULL, brks = NULL, cols = NULL, bar_limits = NULL, triangle_ends = NULL, col_inf = NULL, col_sup = NULL, colNA = NULL, color_fun = clim.palette(), filled.continents = FALSE, coast_color = NULL, coast_width = 1, + contours = NULL, brks2 = NULL, contour_lwd = 0.5, + contour_color = 'black', contour_lty = 1, + contour_label_draw = TRUE, contour_label_scale = 0.6, dots = NULL, dot_symbol = 4, dot_size = 0.8, - intlat = 10, + intlat = 10, + arr_subsamp = floor(length(lon) / 30), arr_scale = 1, + arr_ref_len = 15, arr_units = "m/s", + arr_scale_shaft = 1, arr_scale_shaft_angle = 1, drawleg = TRUE, subsampleg = NULL, bar_extra_labels = NULL, draw_bar_ticks = TRUE, draw_separators = FALSE, triangle_ends_scale = 1, @@ -197,17 +242,40 @@ PlotStereoMap <- function(var, lon, lat, latlims = c(60, 90), stop("Parameter 'var' must be a numeric array with two dimensions.") } dims <- dim(var) + + # Check varu and varv + if (!is.null(varu) && !is.null(varv)) { + if (!is.array(varu) || !(length(dim(varu)) == 2)) { + stop("Parameter 'varu' must be a numerical array with two dimensions.") + } + if (!is.array(varv) || !(length(dim(varv)) == 2)) { + stop("Parameter 'varv' must be a numerical array with two dimensions.") + } + } else if (!is.null(varu) || !is.null(varv)) { + stop("Only one of the components 'varu' or 'varv' has been provided. Both must be provided.") + } + + if (!is.null(varu) && !is.null(varv)) { + if (dim(varu)[1] != dims[1] || dim(varu)[2] != dims[2]) { + stop("Parameter 'varu' must have same number of longitudes and latitudes as 'var'.") + } + if (dim(varv)[1] != dims[1] || dim(varv)[2] != dims[2]) { + stop("Parameter 'varv' must have same number of longitudes and latitudes as 'var'.") + } + } + # Transpose the input matrices because the base plot functions work directly # with dimensions c(lon, lat). if (dims[1] != length(lon) || dims[2] != length(lat)) { if (dims[1] == length(lat) && dims[2] == length(lon)) { var <- t(var) + if (!is.null(varu)) varu <- t(varu) + if (!is.null(varv)) varv <- t(varv) if (!is.null(dots)) dots <- aperm(dots, c(1, 3, 2)) dims <- dim(var) } } - # Check lon if (length(lon) != dims[1]) { stop("Parameter 'lon' must have as many elements as the number of cells along longitudes in the input array 'var'.") @@ -218,6 +286,14 @@ PlotStereoMap <- function(var, lon, lat, latlims = c(60, 90), stop("Parameter 'lat' must have as many elements as the number of cells along longitudes in the input array 'var'.") } + # Prepare sorted lon/lat and other arguments + latb <- sort(lat, index.return = TRUE) + lonb <- sort(lon, index.return = TRUE) + latmin <- floor(min(lat) / 10) * 10 + latmax <- ceiling(max(lat) / 10) * 10 + lonmin <- floor(min(lon) / 10) * 10 + lonmax <- ceiling(max(lon) / 10) * 10 + # Check latlims if (!is.numeric(latlims) || length(latlims) != 2) { stop("Parameter 'latlims' must be a numeric vector with two elements.") @@ -227,12 +303,16 @@ PlotStereoMap <- function(var, lon, lat, latlims = c(60, 90), if (max(abs(latlims - center_at)) > 90 + 20) { stop("The range specified in 'latlims' is too wide. 110 degrees supported maximum.") } - dlon <- median(lon[2:dims[1]] - lon[1:(dims[1] - 1)]) / 2 - dlat <- median(lat[2:dims[2]] - lat[1:(dims[2] - 1)]) / 2 + dlon <- median(lonb$x[2:dims[1]] - lonb$x[1:(dims[1] - 1)]) / 2 + dlat <- median(latb$x[2:dims[2]] - latb$x[1:(dims[2] - 1)]) / 2 original_last_lat <- latlims[which.min(abs(latlims))] - last_lat <- lat[which.min(abs(lat - original_last_lat))] - dlat * sign(center_at) + last_lat <- latb$x[which.min(abs(latb$x - original_last_lat))] - dlat * sign(center_at) latlims[which.min(abs(latlims))] <- last_lat + # Subset lat by latlims + lat_plot_ind <- which(lat >= latlims[1] & lat <= latlims[2]) + latb_plot_ind <- which(latb$x >= latlims[1] & latb$x <= latlims[2]) + # Check toptitle if (is.null(toptitle) || is.na(toptitle)) { toptitle <- '' @@ -309,6 +389,69 @@ PlotStereoMap <- function(var, lon, lat, latlims = c(60, 90), if (!is.numeric(coast_width)) { stop("Parameter 'coast_width' must be numeric.") } + # Check contours + if (!is.null(contours)) { + if (!is.array(contours)) { + stop("Parameter 'contours' must be a numeric array.") + } + if (length(dim(contours)) > 2) { + contours <- drop(contours) + dim(contours) <- head(c(dim(contours), 1, 1), 2) + } + if (length(dim(contours)) > 2) { + stop("Parameter 'contours' must be a numeric array with two dimensions.") + } else if (length(dim(contours)) < 2) { + stop("Parameter 'contours' must be a numeric array with two dimensions.") + } + # Transpose the input matrices because the base plot functions work directly + # with dimensions c(lon, lat). + if (dim(contours)[1] == dims[2] & dim(contours)[2] == dims[1]) { + contours <- t(contours) + } else { + stop("Parameter 'contours' must have the same number of longitudes and latitudes as 'var'.") + } + } + + # Check brks2 + if (!is.null(contours)) { + if (is.null(brks2)) { + ll <- signif(min(contours, na.rm = TRUE), 2) + ul <- signif(max(contours, na.rm = TRUE), 2) + brks2 <- unique(signif(seq(ll, ul, length.out = length(brks)), 2)) + + } else if (is.numeric(brks2) & length(brks2) == 1) { + ll <- signif(min(contours, na.rm = TRUE), 2) + ul <- signif(max(contours, na.rm = TRUE), 2) + brks2 <- unique(signif(seq(ll, ul, length.out = brks2), 2)) + } else if (!is.numeric(brks2)) { + stop("Parameter 'brks2' must be a numeric value or vector.") + } + } + + # Check contour_lwd + if (!is.numeric(contour_lwd)) { + stop("Parameter 'contour_lwd' must be numeric.") + } + + # Check contour_color + if (!.IsColor(contour_color)) { + stop("Parameter 'contour_color' must be a valid colour identifier.") + } + + # Check contour_lty + if (!is.numeric(contour_lty) && !is.character(contour_lty)) { + stop("Parameter 'contour_lty' must be either a number or a character string.") + } + + # Check contour_label_draw + if (!is.logical(contour_label_draw)) { + stop("Parameter 'contour_label_draw' must be a logical value.") + } + + # Check contour_label_scale + if (!is.numeric(contour_label_scale)) { + stop("Parameter 'contour_label_scale' must be numeric.") + } # Check dots, dot_symbol and dot_size if (!is.null(dots)) { @@ -338,6 +481,26 @@ PlotStereoMap <- function(var, lon, lat, latlims = c(60, 90), stop("Parameter 'intlat' must be numeric.") } + # Check arrow parameters + if (!is.numeric(arr_subsamp)) { + stop("Parameter 'arr_subsamp' must be numeric.") + } + if (!is.numeric(arr_scale)) { + stop("Parameter 'arr_scale' must be numeric.") + } + if (!is.numeric(arr_ref_len)) { + stop("Parameter 'arr_ref_len' must be numeric.") + } + if (!is.character(arr_units)) { + stop("Parameter 'arr_units' must be character.") + } + if (!is.numeric(arr_scale_shaft)) { + stop("Parameter 'arr_scale_shaft' must be numeric.") + } + if (!is.numeric(arr_scale_shaft_angle)) { + stop("Parameter 'arr_scale_shaft_angle' must be numeric.") + } + # Check legend parameters if (!is.logical(drawleg)) { stop("Parameter 'drawleg' must be logical.") @@ -397,6 +560,9 @@ PlotStereoMap <- function(var, lon, lat, latlims = c(60, 90), title_scale <- title_scale * scale margin_scale <- margin_scale * scale dot_size <- dot_size * scale + arr_scale <- arr_scale * scale + contour_label_scale <- contour_label_scale * scale + contour_lwd <- contour_lwd * scale } } @@ -427,6 +593,10 @@ PlotStereoMap <- function(var, lon, lat, latlims = c(60, 90), bar_extra_margin[1] <- bar_extra_margin[1] + margins[1] bar_extra_margin[3] <- bar_extra_margin[3] + margins[3] + if (!is.null(varu)) { + margins[1] <- margins[1] + 2.2 * units_scale + } + if (drawleg) { layout(matrix(1:2, ncol = 2, nrow = 1), widths = c(8, 2)) } @@ -462,34 +632,92 @@ PlotStereoMap <- function(var, lon, lat, latlims = c(60, 90), col_sup_image <- ifelse(is.null(col_sup), colNA, col_sup) # Draw the data polygons for (jx in 1:dims[1]) { - for (jy in 1:dims[2]) { - if (lat[jy] >= latlims[1] && latlims[2] >= lat[jy]) { - coord <- mapproj::mapproject(c(lon[jx] - dlon, lon[jx] + dlon, - lon[jx] + dlon, lon[jx] - dlon), - c(lat[jy] - dlat, lat[jy] - dlat, - lat[jy] + dlat, lat[jy] + dlat)) - if (is.na(var[jx, jy] > 0)) { - col <- colNA - } else if (var[jx, jy] <= brks[1]) { - col <- col_inf_image - } else if (var[jx, jy] >= tail(brks, 1)) { - col <- col_sup_image - } else { - ind <- which(brks[-1] >= var[jx, jy] & var[jx, jy] > brks[-length(brks)]) - col <- cols[ind] + for (jy in 1:length(lat_plot_ind)) { + coord <- mapproj::mapproject(c(lon[jx] - dlon, lon[jx] + dlon, + lon[jx] + dlon, lon[jx] - dlon), + c(lat[lat_plot_ind][jy] - dlat, lat[lat_plot_ind][jy] - dlat, + lat[lat_plot_ind][jy] + dlat, lat[lat_plot_ind][jy] + dlat)) + if (is.na(var[jx, lat_plot_ind[jy]] > 0)) { + col <- colNA + } else if (var[jx, lat_plot_ind[jy]] <= brks[1]) { + col <- col_inf_image + } else if (var[jx, lat_plot_ind[jy]] >= tail(brks, 1)) { + col <- col_sup_image + } else { + ind <- which(brks[-1] >= var[jx, lat_plot_ind[jy]] & var[jx, lat_plot_ind[jy]] > brks[-length(brks)]) + col <- cols[ind] + } + polygon(coord, col = col, border = NA) + } + } + + # contours + if (!is.null(contours)) { + nbrks2 <- length(brks2) + for (n_brks2 in 1:nbrks2) { + cl <- grDevices::contourLines(x = lonb$x, y = latb$x[latb_plot_ind], + z = contours[lonb$ix, latb$ix[latb_plot_ind]], + levels = brks2[n_brks2]) + if (length(cl) > 0) { + for (i in seq_along(cl)) { + xy <- mapproj::mapproject(cl[[i]]$x, cl[[i]]$y) + xc <- xy$x + yc <- xy$y + nc <- length(xc) + lines(xc, yc, col = contour_color, lwd = contour_lwd, lty = contour_lty) + + # draw label + if (contour_label_draw) { + label_char <- as.character(signif(brks2[n_brks2], 2)) + ## Check if the label has enough space to draw first. + last_slope <- Inf + put_label <- FALSE + for (p1 in 1:nc) { + p2 <- p1 + while (p2 < nc) { + dist <- sqrt((yc[p2] - yc[p1])^2 + (xc[p2] - xc[p1])^2) + if (!is.infinite(dist) & + dist > 1.2 * strwidth(label_char, cex = contour_label_scale)) { + put_label <- TRUE + slope <- (yc[p2] - yc[p1]) / (xc[p2] - xc[p1]) + # flatter is better + if (abs(slope) < abs(last_slope)) { + last_slope <- slope + last_p1 <- p1 + last_p2 <- p2 + } + break # Found a proper space for label. Move to the next p1. + } + p2 <- p2 + 1 # If the dist is not enough, try next p2. + } + } + + ## If label can be put + if (put_label) { + # Label should be at the middle of p1 and p2 + p_label <- (last_p1 + last_p2) / 2 + # string rotation angle is calculated from the slope + srt_label <- atan(last_slope) * 57.2958 # radian to degree + + #NOTE: 'cex' in text() is the scale factor. The actual size will be + # contour_label_scale * par("cex") + text(xc[p_label], yc[p_label], label_char, + cex = contour_label_scale, col = contour_color, srt = srt_label) + } + } } - polygon(coord, col = col, border = NA) } } } + # Draw the dots if (!is.null(dots)) { numbfig <- 1 # for compatibility with PlotEquiMap code - dots <- dots[, , which(lat >= latlims[1] & lat <= latlims[2]), drop = FALSE] - data_avail <- !is.na(var[, which(lat >= latlims[1] & lat <= latlims[2]), drop = FALSE]) + dots <- dots[, , lat_plot_ind, drop = FALSE] + data_avail <- !is.na(var[, lat_plot_ind, drop = FALSE]) for (counter in 1:(dim(dots)[1])) { points <- which(dots[counter, , ] & data_avail, arr.ind = TRUE) - points_proj <- mapproj::mapproject(lon[points[, 1]], lat[points[, 2]]) + points_proj <- mapproj::mapproject(lon[points[, 1]], lat[lat_plot_ind][points[, 2]]) points(points_proj$x, points_proj$y, pch = dot_symbol[counter], cex = dot_size[counter] * 3 / sqrt(sqrt(sum(lat >= latlims[which.min(abs(latlims))]) * length(lon))), @@ -535,6 +763,84 @@ PlotStereoMap <- function(var, lon, lat, latlims = c(60, 90), } } + # + # PlotWind + # ~~~~~~~~~~ + # + if (!is.null(varu) && !is.null(varv)) { + # Create a two dimention array of longitude and latitude + lontab <- InsertDim(lonb$x, 2, length(latb$x[latb_plot_ind]), name = 'lat') + lattab <- InsertDim(latb$x[latb_plot_ind], 1, length(lonb$x), name = 'lon') + # Select a subsample of the points to an arrow for each "subsample" grid point + # latmin has the most arrows, and latmax (polar point) has no arrow. + sublon_max <- seq(1, length(lonb$x), arr_subsamp) + sublat_max <- seq(1, length(latb$x[latb_plot_ind]), arr_subsamp) + ## calculate the length of sublon for each lat + arr_num_at_lat <- round(seq(length(sublon_max), 0, length.out = length(lat[lat_plot_ind]))) + ## If south hemisphere, revserse arr_num_at_lat (smaller lat has less arrows) + if (center_at < 0) { + arr_num_at_lat <- rev(arr_num_at_lat) + } + for (n_lat in seq_along(sublat_max)) { + sublat <- sublat_max[n_lat] + if (arr_num_at_lat[sublat] != 0) { + sublon <- round(seq(1, length(lon), length.out = arr_num_at_lat[sublat])) + # end points (start points + varu/varv) + uaux <- lontab[sublon, sublat] + varu[lonb$ix, latb$ix[latb_plot_ind]][sublon, sublat] * 0.5 * arr_scale + vaux <- lattab[sublon, sublat] + varv[lonb$ix, latb$ix[latb_plot_ind]][sublon, sublat] * 0.5 * arr_scale + + # project the start and end points on stereographic + xy0 <- mapproj::mapproject(lontab[sublon, sublat], lattab[sublon, sublat]) + xy1 <- mapproj::mapproject(uaux, vaux) + xc0 <- xy0$x + yc0 <- xy0$y + xc1 <- xy1$x + yc1 <- xy1$y + nc <- length(xc0) + + lenshaft <- 0.18 * arr_scale * arr_scale_shaft + angleshaft <- 12 * arr_scale_shaft_angle + + # Plot Wind + arrows(xc0, yc0, + xc1, yc1, + angle = angleshaft, + length = lenshaft) + } + } + + # Plot an arrow at the bottom of the plot for the legend + # Put arrow at lon = 0, lat = lowest lat (i.e., biggest circle) - (latmax - latmin)/8 + delta_arr_lengend <- (0.5 * arr_scale * arr_ref_len) + posarlon <- c(0 - delta_arr_lengend / 2, 0 + delta_arr_lengend / 2) + posarlat <- rep(min(abs(lat[lat_plot_ind])) - diff(range(lat[lat_plot_ind]))/8, 2) +#NOTE: The following lines put legend at bottom left corner. But it's hard to put it horizontal +# delta_arr_lengend <- (0.5 * arr_scale * arr_ref_len)/sqrt(2) +# posarlat[1] <- posarlat[1] - delta_arr_lengend / 2 +# posarlat[2] <- posarlat[2] + delta_arr_lengend / 2 + ## turn into stereographic + arr_lengend <- mapproj::mapproject(posarlon, posarlat) + + arrows(arr_lengend$x[1], arr_lengend$y[1], + arr_lengend$x[2], arr_lengend$y[2], + length = lenshaft, angle = angleshaft, + xpd = TRUE) + #save the parameter value + xpdsave <- par('xpd') + #desactivate xpd to be able to plot in margen + par(xpd = NA) + #plot text + mtext(paste(as.character(arr_ref_len), arr_units, sep = ""), + line = min(arr_lengend$y) + 1.8 * abs(min(arr_lengend$y)), + side = 1, + at = mean(arr_lengend$x), + cex = units_scale) + #come back to the previous xpd value + par(xpd = xpdsave) + + } + + # # Colorbar # ~~~~~~~~~~ diff --git a/R/PlotVsLTime.R b/R/PlotVsLTime.R new file mode 100644 index 0000000000000000000000000000000000000000..94c82e0735c61c4ecf0f8186e09fbe681560a8cb --- /dev/null +++ b/R/PlotVsLTime.R @@ -0,0 +1,265 @@ +#'Plot a score along the forecast time with its confidence interval +#' +#'Plot the correlation (\code{Corr()}), the root mean square error +#'(\code{RMS()}) between the forecast values and their observational +#'counterpart, the slope of their trend (\code{Trend()}), the +#'InterQuartile range, maximum-mininum, standard deviation or median absolute +#'Deviation of the ensemble members (\code{Spread()}), or the ratio between +#'the ensemble spread and the RMSE of the ensemble mean (\code{RatioSDRMS()}) +#'along the forecast time for all the input experiments on the same figure +#'with their confidence intervals. +#' +#'@param var Matrix containing any Prediction Score with dimensions:\cr +#' (nexp/nmod, 3/4 ,nltime)\cr +#' or (nexp/nmod, nobs, 3/4 ,nltime). +#'@param toptitle Main title, optional. +#'@param ytitle Title of Y-axis, optional. +#'@param monini Starting month between 1 and 12. Default = 1. +#'@param freq 1 = yearly, 12 = monthly, 4 = seasonal, ... Default = 12. +#'@param nticks Number of ticks and labels on the x-axis, optional. +#'@param limits c(lower limit, upper limit): limits of the Y-axis, optional. +#'@param listexp List of experiment names, optional. +#'@param listobs List of observation names, optional. +#'@param biglab TRUE/FALSE for presentation/paper plot. Default = FALSE. +#'@param hlines c(a,b, ..) Add horizontal black lines at Y-positions a,b, ...\cr +#' Default = NULL. +#'@param leg TRUE/FALSE if legend should be added or not to the plot. +#' Default = TRUE. +#'@param siglev TRUE/FALSE if significance level should replace confidence +#' interval.\cr +#' Default = FALSE. +#'@param sizetit Multiplicative factor to change title size, optional. +#'@param show_conf TRUE/FALSE to show/not confidence intervals for input +#' variables. +#'@param fileout Name of output file. Extensions allowed: eps/ps, jpeg, png, +#' pdf, bmp and tiff. The default value is NULL. +#'@param width File width, in the units specified in the parameter size_units +#' (inches by default). Takes 8 by default. +#'@param height File height, in the units specified in the parameter +#' size_units (inches by default). Takes 5 by default. +#'@param size_units Units of the size of the device (file or window) to plot +#' in. Inches ('in') by default. See ?Devices and the creator function of the +#' corresponding device. +#'@param res Resolution of the device (file or window) to plot in. See +#' ?Devices and the creator function of the corresponding device. +#'@param ... Arguments to be passed to the method. Only accepts the following +#' graphical parameters:\cr +#' adj ann ask bg bty cex.sub cin col.axis col.lab col.main col.sub cra crt +#' csi cxy err family fg fig font font.axis font.lab font.main font.sub +#' lheight ljoin lmitre mar mex mfcol mfrow mfg mkh oma omd omi page pch plt +#' smo srt tck tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog \cr +#' For more information about the parameters see `par`. +#' +#'@details +#'Examples of input:\cr +#'Model and observed output from \code{Load()} then \code{Clim()} then +#'\code{Ano()} then \code{Smoothing()}:\cr +#'(nmod, nmemb, nsdate, nltime) and (nobs, nmemb, nsdate, nltime)\cr +#'then averaged over the members\cr +#'\code{Mean1Dim(var_exp/var_obs, posdim = 2)}:\cr +#'(nmod, nsdate, nltime) and (nobs, nsdate, nltime)\cr +#'then passed through\cr +#' \code{Corr(exp, obs, posloop = 1, poscor = 2)} or\cr +#' \code{RMS(exp, obs, posloop = 1, posRMS = 2)}:\cr +#' (nmod, nobs, 3, nltime)\cr +#'would plot the correlations or RMS between each exp & each obs as a function +#'of the forecast time. +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'runmean_months <- 12 +#'smooth_ano_exp <- Smoothing(data = ano_exp, runmeanlen = runmean_months) +#'smooth_ano_obs <- Smoothing(data = ano_obs, runmeanlen = runmean_months) +#'dim_to_mean <- 'member' # mean along members +#'required_complete_row <- 'ftime' # discard startdates for which there are NA leadtimes +#'leadtimes_per_startdate <- 60 +#'corr <- Corr(MeanDims(smooth_ano_exp, dim_to_mean), +#' MeanDims(smooth_ano_obs, dim_to_mean), +#' comp_dim = required_complete_row, +#' limits = c(ceiling((runmean_months + 1) / 2), +#' leadtimes_per_startdate - floor(runmean_months / 2))) +#'# Combine corr results for plotting +#'corr_combine <- abind::abind(corr$conf.lower, corr$corr, corr$conf.upper, corr$p.val, along = 0) +#'corr_combine <- Reorder(corr_combine, c(2, 3, 1, 4)) +#'\donttest{ +#'PlotVsLTime(corr_combine, toptitle = "correlations", ytitle = "correlation", +#' monini = 11, limits = c(-1, 2), listexp = c('CMIP5 IC3'), +#' listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1)) +#' } +#' +#'@importFrom grDevices dev.cur dev.new dev.off +#'@importFrom stats ts +#'@export +PlotVsLTime <- function(var, toptitle = '', ytitle = '', monini = 1, freq = 12, + nticks = NULL, limits = NULL, + listexp = c('exp1', 'exp2', 'exp3'), + listobs = c('obs1', 'obs2', 'obs3'), biglab = FALSE, hlines = NULL, + leg = TRUE, siglev = FALSE, sizetit = 1, show_conf = TRUE, + fileout = NULL, + width = 8, height = 5, size_units = 'in', res = 100, ...) { + # Process the user graphical parameters that may be passed in the call + ## Graphical parameters to exclude + excludedArgs <- c("cex", "cex.axis", "cex.lab", "cex.main", "col", "fin", "lab", "las", "lend", "lty", "lwd", "mai", "mgp", "new", "pin", "ps", "pty") + userArgs <- .FilterUserGraphicArgs(excludedArgs, ...) + + # If there is any filenames to store the graphics, process them + # to select the right device + if (!is.null(fileout)) { + deviceInfo <- .SelectDevice(fileout = fileout, width = width, height = height, units = size_units, res = res) + saveToFile <- deviceInfo$fun + fileout <- deviceInfo$files + } + + # + # Get some arguments + # ~~~~~~~~~~~~~~~~~~~~ + # + if (length(dim(var)) == 3) { + var <- InsertDim(var, posdim = 2, lendim = 1) + } else if (length(dim(var)) != 4) { + stop("Parameter 'var' should have 3 or 4 dimensions: c(n. exp[, n. obs], 3/4, n. lead-times)") + } + nleadtime <- dim(var)[4] + nexp <- dim(var)[1] + nobs <- dim(var)[2] + if (is.null(limits) == TRUE) { + if (all(is.na(var > 0))) { + ll <- ul <- 0 + } else { + ll <- min(var, na.rm = TRUE) + ul <- max(var, na.rm = TRUE) + } + if (biglab) { + ul <- ul + 0.4 * (ul - ll) + } else { + ul <- ul + 0.3 * (ul - ll) + } + } else { + ll <- limits[1] + ul <- limits[2] + } + lastyear <- (monini + (nleadtime - 1) * 12 / freq - 1) %/% 12 + lastmonth <- (monini + (nleadtime - 1) * 12 / freq - 1) %% 12 + 1 + empty_ts <- ts(start = c(0000, (monini - 1) %/% (12 / freq) + 1), + end = c(lastyear, (lastmonth - 1) %/% (12 / freq) + 1), + frequency = freq) + empty <- array(dim = length(empty_ts)) + # + # Define some plot parameters + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + if (is.null(nticks)) { + if (biglab) { + nticks <- 5 + } else { + nticks <- 10 + } + } + labind <- seq(1, nleadtime, max(nleadtime %/% nticks, 1)) + months <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", + "Oct", "Nov", "Dec") + labyear <- ((labind - 1) * 12 / freq + monini - 1) %/% 12 + labmonth <- months[((labind - 1) * 12 / freq + monini -1 ) %% 12 + 1] + for (jx in 1:length(labmonth)) { + y2o3dig <- paste("0", as.character(labyear[jx]), sep = "") + labmonth[jx] <- paste(labmonth[jx], "\nYr ", substr(y2o3dig, nchar(y2o3dig) + - 1, nchar(y2o3dig)), sep = "") + } + color <- c("red1", "dodgerblue1", "green1", "orange1", "lightblue1", + "deeppink1", "mediumpurple1", "lightgoldenrod1", "olivedrab1", + "mediumorchid1") + type <- c(1, 3, 2, 4) + thickness <- array(dim = c(4, 4)) + thickness[, 1] <- c(1, 2, 1, 1.5) + thickness[, 2] <- c(8, 12, 8, 10) + thickness[, 3] <- thickness[, 1] + thickness[, 4] <- c(4, 6, 4, 5) + if (siglev == TRUE) { + lines <- c("n", "l", "n", "l") + } else { + lines <- c("l", "l", "l", "n") + } + # + # Define plot layout + # ~~~~~~~~~~~~~~~~~~~~ + # + + # Open connection to graphical device + if (!is.null(fileout)) { + saveToFile(fileout) + } else if (names(dev.cur()) == 'null device') { + dev.new(units = size_units, res = res, width = width, height = height) + } + + # Load the user parameters + par(userArgs) + + if (biglab) { + par(mai = c(1.25, 1.4, 0.5, 1), mgp = c(4, 2.5, 0)) + par(cex = 1.3, cex.lab = 2, cex.axis = 1.8) + cexmain <- 2.2 + legsize <- 1.5 + } else { + par(mai = c(1, 1.1, 0.5, 0), mgp = c(3, 1.8, 0)) + par(cex = 1.3, cex.lab = 1.5, cex.axis = 1.1) + cexmain <- 1.5 + legsize <- 1 + } + plot(empty, ylim = c(ll, ul), xlab = "Time (months)", ylab = ytitle, + main = toptitle, cex.main = cexmain*sizetit, axes = FALSE) + axis(1, at = labind, labels = labmonth) + axis(2) + box() + if (is.null(hlines) != TRUE) { + for (jy in 1:length(hlines)) { + par(new = TRUE) + abline(h = hlines[jy]) + } + } + # + # Loop on experimental & observational data + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # + legendnames <- array(dim = nobs * nexp) + legendthick <- array(dim = nobs * nexp) + legendsty <- array(dim = nobs * nexp) + legendcol <- array(dim = nobs * nexp) + ind <- 1 + if (show_conf == TRUE) { + start_line <- dim(var)[3] + end_line <- 1 + } else { + start_line <- 2 + end_line <- 2 + } + for (jt in seq(start_line, end_line, -1)) { + ind <- 1 + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + par(new = TRUE) + plot(var[jexp, jobs, jt, ], type = lines[jt], ylim = c(ll, ul), + col = color[jexp], lty = type[jobs], lwd = thickness[jobs, jt], + ylab = "", xlab = "", axes = FALSE) + legendnames[ind] <- paste(listexp[jexp], 'vs', listobs[jobs]) + legendthick[ind] <- thickness[jobs, 1] * 3 + legendsty[ind] <- type[jobs] + legendcol[ind] <- color[jexp] + ind <- ind + 1 + } + } + } + if (leg) { + if (nobs == 1) { + legendnames <- listexp[1:nexp] + } + legend(1, ul, legendnames, lty = legendsty, lwd = legendthick, + col = legendcol, cex = legsize) + } + + # If the graphic was saved to file, close the connection with the device + if(!is.null(fileout)) dev.off() +} diff --git a/R/ProbBins.R b/R/ProbBins.R new file mode 100644 index 0000000000000000000000000000000000000000..327ceb34fafb48b6425373536e2b1bf7d70041dc --- /dev/null +++ b/R/ProbBins.R @@ -0,0 +1,213 @@ +#'Compute probabilistic information of a forecast relative to a threshold or a quantile +#' +#'Compute probabilistic bins of a set of forecast years ('fcyr') relative to +#'the forecast climatology over the whole period of anomalies, optionally excluding +#'the selected forecast years ('fcyr') or the forecast year for which the +#'probabilistic bins are being computed (see 'compPeriod'). +#' +#'@param data An numeric array of anomalies with the dimensions 'time_dim' and +#' 'memb_dim' at least. It can be generated by \code{Ano()}. +#'@param thr A numeric vector used as the quantiles (if 'quantile' is TRUE) or +#' thresholds (if 'quantile' is FALSE) to bin the anomalies. If it is quantile, +#' it must be within [0, 1]. +#'@param fcyr A numeric vector of the indices of the forecast years (i.e., +#' time_dim) to compute the probabilistic bins for, or 'all' to compute the +#' bins for all the years. E.g., c(1:5), c(1, 4), 4, or 'all'. The default +#' value is 'all'. +#'@param time_dim A character string indicating the dimension along which to +#' compute the probabilistic bins. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member +#' dimension or the dimension to be merged with 'time_dim' for probabilistic +#' calculation. The default value is 'member'. +#'@param quantile A logical value indicating if the thresholds ('thr') are +#' quantiles (TRUE) or the absolute thresholds of the bins (FALSE). The +#' default value is TRUE. +#'@param compPeriod A character string referring to three computation options:\cr +#' "Full period": The probabilities are computed based on 'data';\cr +#' "Without fcyr": The probabilities are computed based on 'data' with all +#' 'fcyr' removed;\cr +#' "Cross-validation": The probabilities are computed based on leave-one-out +#' cross-validation.\cr +#' The default value is "Full period". +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numeric array of probabilistic information with dimensions:\cr +#' c(bin = length of 'thr' + 1, time_dim = length of 'fcyr', memb_dim, the +#' rest of dimensions of 'data')\cr +#' The values along the 'bin' dimension take values 0 or 1 depending on which +#' of the 'thr' + 1 cathegories the forecast or observation at the corresponding +#' grid point, time step, member and start date belongs to. +#' +#'@examples +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'clim <- Clim(sampleMap$mod, sampleMap$obs) +#'ano_exp <- Ano(sampleMap$mod, clim$clim_exp) +#'PB <- ProbBins(ano_exp, fcyr = 3, thr = c(1/3, 2/3), quantile = TRUE) +#' +#'@import multiApply +#'@importFrom abind abind +#'@export +ProbBins <- function(data, thr, fcyr = 'all', time_dim = 'sdate', memb_dim = 'member', + quantile = TRUE, compPeriod = "Full period", + ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } +# if (is.null(dim(data))) { #is vector +# dim(data) <- c(length(data)) +# names(dim(data)) <- time_dim +# } + if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## thr + if (is.null(thr)) { + stop("Parameter 'thr' cannot be NULL.") + } + if (!is.numeric(thr) | !is.vector(thr)) { + stop("Parameter 'thr' must be a numeric vector.") + } else if (quantile) { + if (!all(thr <= 1 & thr >= 0)) { + stop("Parameter 'thr' must be within the range [0, 1] if 'quantile' is TRUE.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimension.") + } + ## fcyr + if (fcyr != 'all') { + if (!is.numeric(fcyr) | !is.vector(fcyr)) { + stop("Parameter 'fcyr' must be a numeric vector or 'all'.") + } else if (any(fcyr %% 1 != 0) | min(fcyr) < 1 | max(fcyr) > dim(data)[time_dim]) { + stop(paste0("Parameter 'fcyr' must be the indices of 'time_dim' within ", + "the range [1, ", dim(data)[time_dim], "].")) + } + } else { + fcyr <- 1:dim(data)[time_dim] + } + ## quantile + if (!is.logical(quantile) | length(quantile) > 1) { + stop("Parameter 'quantile' must be one logical value.") + } + ## compPeriod + if (length(compPeriod) != 1 | any(!compPeriod %in% c('Full period', 'Without fcyr', 'Cross-validation'))) { + stop("Parameter 'compPeriod' must be either 'Full period', 'Without fcyr', or 'Cross-validation'.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate ProbBins + + res <- Apply(list(data), + target_dims = list(c(time_dim, memb_dim)), + output_dims = list(c('bin', time_dim, memb_dim)), + fun = .ProbBins, + thr = thr, fcyr = fcyr, quantile = quantile, + compPeriod = compPeriod, + ncores = ncores)$output1 + + return(res) +} + +.ProbBins <- function(data, thr = thr, fcyr = 'all', quantile, compPeriod = "Full period") { + + # data: [sdate, member] + + if (compPeriod != 'Cross-validation') { + # forecast + fore <- data[fcyr, ] + sample_fore <- as.vector(fore) # vector: [fcyr + member] + # hindcast + if (compPeriod == "Full period") { + hind <- data + sample <- as.vector(hind) # vector: [sdate + member] + } else if (compPeriod == "Without fcyr") { + hind <- data[-fcyr, ] + sample <- as.vector(hind) # vector: [sdate - fcyr + member] + } + + # quantiles + if (quantile) { + qum <- quantile(sample, probs = thr, na.rm = TRUE, names = FALSE, + type = 8) # vector: [length(thr)] + } else { + qum <- thr + } + + # PBF: Probabilistic bins of a forecast + # This array contains 0s and 1s that indicate the category where the forecast is. + PBF <- array(counts(c(qum, sample_fore), nbthr = length(thr)), + dim = c(length(thr) + 1, length(fcyr), dim(data)[2])) +# names(dim(PBF)) <- c('bin', 'sdate', 'member') + + return(invisible(PBF)) + + + } else { # Cross-Validation + + result <- NULL + for (iyr in fcyr) { + if (is.null(result)) { + result <- .ProbBins(data, fcyr = iyr, thr = thr, quantile = quantile, + compPeriod = "Without fcyr") # [bin, sdate, member] + } else { + result <- abind::abind(result, .ProbBins(data, fcyr = iyr, thr = thr, + quantile = quantile, + compPeriod = "Without fcyr"), + along = 2) # along sdate + } + } + + return(result) + + } + +} + +# This function assign the values to a category which is limited by the thresholds +# It provides binary information +counts <- function (dat, nbthr) { + thr <- dat[1:nbthr] + data <- dat[nbthr + 1:(length(dat) - nbthr)] + prob <- array(NA, dim = c(nbthr + 1, length(dat) - nbthr)) + prob[1, ] <- 1*(data <= thr[1]) + if (nbthr != 1) { + for (ithr in 2:(nbthr)) { + prob[ithr, ] <- 1 * ((data > thr[ithr - 1]) & (data <= thr[ithr])) + } + } + prob[nbthr + 1, ] <- 1 * (data > thr[nbthr]) + return(prob) +} + diff --git a/R/ProjectField.R b/R/ProjectField.R new file mode 100644 index 0000000000000000000000000000000000000000..309f3efd731824e3e031a473a22a657e7d7ed845 --- /dev/null +++ b/R/ProjectField.R @@ -0,0 +1,268 @@ +#'Project anomalies onto modes of variability +#' +#'Project anomalies onto modes of variability to get the temporal evolution of +#'the EOF mode selected. It returns principal components (PCs) by area-weighted +#'projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from +#'\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and +#'returns NA if the whole spatial pattern is NA. +#' +#'@param ano A numerical array of anomalies with named dimensions. The +#' dimensions must have at least 'time_dim' and 'space_dim'. It can be +#' generated by Ano(). +#'@param eof A list that contains at least 'EOFs' or 'REOFs' and 'wght', which +#' are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and +#' 'space_dim' at least. 'wght' has dimensions space_dim. It can be generated +#' by EOF() or REOF(). +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param mode An integer of the variability mode number in the EOF to be +#' projected on. The default value is NULL, which means all the modes of 'eof' +#' is calculated. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numerical array of the principal components in the verification +#' format. The dimensions are the same as 'ano' except 'space_dim'. +#' +#'@seealso EOF, NAO, PlotBoxWhisker +#'@examples +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) +#'eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) +#'mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1) +#'mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1) +#' +#'\dontrun{ +#' # Plot the forecast and the observation of the first mode for the last year +#' # of forecast +#' sdate_dim_length <- dim(mode1_obs)['sdate'] +#' plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), +#' lwd = 2) +#' for (i in 1:dim(mode1_exp)['member']) { +#' par(new = TRUE) +#' plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], +#' ylim = c(-15000, 15000)) +#' } +#'} +#' +#'@import multiApply +#'@export +ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon'), + mode = NULL, ncores = NULL) { + + # Check inputs + ## ano (1) + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## eof (1) + if (is.null(eof)) { + stop("Parameter 'eof' cannot be NULL.") + } + if (!is.list(eof)) { + stop("Parameter 'eof' must be a list generated by EOF() or REOF().") + } + if ('EOFs' %in% names(eof)) { + EOFs <- "EOFs" + } else if ('REOFs' %in% names(eof)) { + EOFs <- "REOFs" + } else if ('patterns' %in% names(eof)) { + EOFs <- "patterns" + } else { + stop(paste0("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", + "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().")) + } + if (!'wght' %in% names(eof)) { + stop(paste0("Parameter 'eof' must be a list that contains 'wght'. ", + "It can be generated by EOF() or REOF().")) + } + if (!is.numeric(eof[[EOFs]]) || !is.array(eof[[EOFs]])) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array.") + } + if (!is.numeric(eof$wght) || !is.array(eof$wght)) { + stop("The component 'wght' of parameter 'eof' must be a numeric array.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (any(!space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## ano (2) + if (!all(space_dim %in% names(dim(ano))) | !time_dim %in% names(dim(ano))) { + stop(paste0("Parameter 'ano' must be an array with dimensions named as ", + "parameter 'space_dim' and 'time_dim'.")) + } + ## eof (2) + if (!all(space_dim %in% names(dim(eof[[EOFs]]))) | + !'mode' %in% names(dim(eof[[EOFs]]))) { + stop(paste0("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim' and 'mode'.")) + } + if (length(dim(eof$wght)) != 2 | !all(names(dim(eof$wght)) %in% space_dim)) { + stop(paste0("The component 'wght' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim'.")) + } + ## mode + if (!is.null(mode)) { + if (!is.numeric(mode) | mode %% 1 != 0 | mode < 0 | length(mode) > 1) { + stop("Parameter 'mode' must be NULL or a positive integer.") + } + if (mode > dim(eof[[EOFs]])['mode']) { + stop(paste0("Parameter 'mode' is greater than the number of available ", + "modes in 'eof'.")) + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + +#------------------------------------------------------- + + # Keep the chosen mode + if (!is.null(mode)) { + eof_mode <- ClimProjDiags::Subset(eof[[EOFs]], 'mode', mode, drop = 'selected') + } else { + eof_mode <- eof[[EOFs]] + } + + if ('mode' %in% names(dim(eof_mode))) { + dimnames_without_mode <- names(dim(eof_mode))[-which(names(dim(eof_mode)) == 'mode')] + } else { + dimnames_without_mode <- names(dim(eof_mode)) + } + + if (all(dimnames_without_mode %in% space_dim)) { # eof_mode: [lat, lon] or [mode, lat, lon] + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + + } else { + + if (!all(dimnames_without_mode %in% names(dim(ano)))) { + stop(paste0("The array 'EOF' in parameter 'eof' has dimension not in parameter ", + "'ano'. Check if 'ano' and 'eof' are compatible.")) + } + + common_dim_ano <- dim(ano)[which(names(dim(ano)) %in% dimnames_without_mode)] + if (any(common_dim_ano[match(dimnames_without_mode, names(common_dim_ano))] != + dim(eof_mode)[dimnames_without_mode])) { + stop(paste0("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ", + "with different length. Check if 'ano' and 'eof' are compatible.")) + } + + # Enlarge eof/ano is needed. The margin_dims of Apply() must be consistent + # between ano and eof. + additional_dims <- dim(ano)[-which(names(dim(ano)) %in% names(dim(eof_mode)))] + additional_dims <- additional_dims[-which(names(additional_dims) == time_dim)] + if (length(additional_dims) != 0) { + for (i in 1:length(additional_dims)) { + eof_mode <- InsertDim(eof_mode, posdim = (length(dim(eof_mode)) + 1), + lendim = additional_dims[i], name = names(additional_dims)[i]) + } + } + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + } + + return(res) +} + + +.ProjectField <- function(ano, eof_mode, wght) { + # ano: [sdate, lat, lon] + # eof_mode: [lat, lon] or [mode, lat, lon] + # wght: [lat, lon] + + ntime <- dim(ano)[1] + + if (length(dim(eof_mode)) == 2) { # mode != NULL + # Initialization of pc.ver. + pc.ver <- array(NA, dim = ntime) #[sdate] + + # Weight + e.1 <- eof_mode * wght + ano <- ano * InsertDim(wght, 1, ntime) + + na <- apply(ano, 1, mean, na.rm = TRUE) # if [lat, lon] all NA, it's NA + tmp <- ano * InsertDim(e.1, 1, ntime) # [sdate, lat, lon] + pc.ver <- apply(tmp, 1, sum, na.rm = TRUE) + pc.ver[which(is.na(na))] <- NA + + } else { # mode = NULL + # Weight + e.1 <- eof_mode * InsertDim(wght, 1, dim(eof_mode)[1]) + dim(e.1) <- c(dim(eof_mode)[1], prod(dim(eof_mode)[2:3])) # [mode, lat*lon] + ano <- ano * InsertDim(wght, 1, ntime) + dim(ano) <- c(ntime, prod(dim(ano)[2:3])) # [sdate, lat*lon] + + na <- apply(ano, 1, mean, na.rm = TRUE) # if [lat, lon] all NA, it's NA + na <- aperm(array(na, dim = c(ntime, dim(e.1)[1])), c(2, 1)) + + # Matrix multiplication e.1 [mode, lat*lon] by ano [lat*lon, sdate] + # Result: [mode, sdate] + pc.ver <- e.1 %*% t(ano) + pc.ver[which(is.na(na))] <- NA + +# # Change back dimensions to feet original input +# dim(projection) <- c(moredims, mode = unname(neofs)) +# return(projection) + } + + return(pc.ver) +} + diff --git a/R/REOF.R b/R/REOF.R new file mode 100644 index 0000000000000000000000000000000000000000..c9c82cf94e1091f88ea7a8c71dd2957e5079de80 --- /dev/null +++ b/R/REOF.R @@ -0,0 +1,224 @@ +#'Area-weighted empirical orthogonal function analysis with varimax rotation using SVD +#' +#'Perform an area-weighted EOF analysis with varimax rotation using single +#'value decomposition (SVD) based on a covariance matrix or a correlation matrix if +#'parameter 'corr' is set to TRUE. The internal s2dv function \code{.EOF()} is used +#'internally. +#' +#'@param ano A numerical array of anomalies with named dimensions to calculate +#' REOF. The dimensions must have at least 'time_dim' and 'space_dim'. +#'@param lat A vector of the latitudes of 'ano'. +#'@param lon A vector of the longitudes of 'ano'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param ntrunc A positive integer of the number of eofs to be kept for varimax +#' rotation. This function uses this value as 'neof' too, which is the number +#' of eofs to return by \code{.EOF()}. The default value is 15. If time length +#' or the product of latitude length and longitude length is less than +#' 'ntrunc', 'ntrunc' is equal to the minimum of the three values. +#'@param corr A logical value indicating whether to base on a correlation (TRUE) +#' or on a covariance matrix (FALSE). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{REOFs}{ +#' An array of REOF patterns normalized to 1 (unitless) with dimensions +#' (number of modes, the rest of the dimensions of 'ano' except +#' 'time_dim'). Multiplying 'REOFs' by 'RPCs' gives the original +#' reconstructed field. +#'} +#'\item{RPCs}{ +#' An array of principal components with the units of the original field to +#' the power of 2, with dimensions (time_dim, number of modes, the rest of the +#' dimensions of 'ano' except 'space_dim'). +#'} +#'\item{var}{ +#' An array of the percentage (%) of variance fraction of total variance +#' explained by each mode. The dimensions are (number of modes, the rest of +#' the dimension except 'time_dim' and 'space_dim'). +#'} +#'\item{wght}{ +#' An array of the area weighting with dimensions 'space_dim'. It is calculated +#' by the square root of cosine of 'lat' and used to compute the fraction of +#' variance explained by each REOFs. +#'} +#' +#'@seealso EOF +#'@examples +#'# This example computes the REOFs along forecast horizons and plots the one +#'# that explains the greatest amount of variability. The example data has low +#'# resolution so the result may not be explanatory, but it displays how to +#'# use this function. +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'ano <- MeanDims(ano$exp, c('dataset', 'member')) +#'res <- REOF(ano, lat = sampleData$lat, lon = sampleData$lon, ntrunc = 5) +#'\dontrun{ +#'PlotEquiMap(eof$EOFs[1, , , 1], sampleData$lat, sampleData$lon) +#'} +#' +#'@import multiApply +#'@importFrom stats varimax +#'@export +REOF <- function(ano, lat, lon, ntrunc = 15, time_dim = 'sdate', + space_dim = c('lat', 'lon'), corr = FALSE, ncores = NULL) { + + # Check inputs + ## ano + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (any(!space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## lat + if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.")) + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } + ## lon + if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { + stop(paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.")) + } + if (all(lon >= 0)) { + if (any(lon > 360 | lon < 0)) { + stop("Parameter 'lon' must be within the range [-180, 180] or [0, 360].") + } + } else { + if (any(lon < -180 | lon > 180)) { + stop("Parameter 'lon' must be within the range [-180, 180] or [0, 360].") + } + } + ## ntrunc + if (!is.numeric(ntrunc) | ntrunc %% 1 != 0 | ntrunc <= 0 | length(ntrunc) > 1) { + stop("Parameter 'ntrunc' must be a positive integer.") + } + ## corr + if (!is.logical(corr) | length(corr) > 1) { + stop("Parameter 'corr' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate REOF + + # ntrunc is bounded + if (ntrunc != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), ntrunc)) { + ntrunc <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), ntrunc) + .warning(paste0("Parameter 'ntrunc' is changed to ", ntrunc, ", the minimum among ", + "the length of time_dim, the production of the length of space_dim, ", + "and ntrunc.")) + } + + # Area weighting is needed to compute the fraction of variance explained by + # each mode + space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) + wght <- array(cos(lat * pi/180), dim = dim(ano)[space_ind]) + + # We want the covariance matrix to be weigthed by the grid + # cell area so the anomaly field is weighted by its square + # root since the covariance matrix equals transpose(ano) + # times ano. + wght <- sqrt(wght) + + res <- Apply(ano, + target_dims = c(time_dim, space_dim), + output_dims = list(REOFs = c('mode', space_dim), + RPCs = c(time_dim, 'mode'), + var = 'mode'), + fun = .REOF, + corr = corr, ntrunc = ntrunc, wght = wght, + ncores = ncores) + + return(c(res, wght = list(wght))) + +} + + +.REOF <- function(ano, ntrunc, corr = FALSE, wght = wght) { + # ano: [sdate, lat, lon] + + # Dimensions + nt <- dim(ano)[1] + ny <- dim(ano)[2] + nx <- dim(ano)[3] + + # Get the first ntrunc EOFs: + eofs <- .EOF(ano = ano, neofs = ntrunc, corr = corr, wght = wght) #list(EOFs = EOF, PCs = PC, var = var.eof, mask = mask) + + # Recover loadings (with norm 1), weight the EOFs by the weigths + # eofs$EOFs: [mode, lat, lon] + Loadings <- apply(eofs$EOFs, 1, '*', wght) # [lat*lon, mode] + + # Rotate the loadings: + varim <- varimax(Loadings) + + # Weight back the rotated loadings (REOFs): + if (is.list(varim)) { + varim_loadings <- varim$loadings # [lat*lon, mode] + } else { # if mode = 1, varim is an array + varim_loadings <- varim + } + REOFs <- apply(varim_loadings, 2, '/', wght) + dim(REOFs) <- c(ny, nx, ntrunc) + + # Reorder dimensions to match EOF conventions: [mode, lat, lon] + REOFs <- aperm(REOFs, c(3, 1, 2)) + + # Compute the rotated PCs (RPCs): multiply the weigthed anomalies by the loading patterns. + ano.wght <- apply(ano, 1, '*', wght) # [lat*lon, sdate] + RPCs <- t(ano.wght) %*% varim_loadings # [sdate, mode] + + ## Alternative methods suggested here: + ## https://stats.stackexchange.com/questions/59213/how-to-compute-varimax-rotated-principal-components-in-r/137003#137003 + ## gives same results as pinv is just transpose in this case, as loadings are ortonormal! + # invLoadings <- t(pracma::pinv(varim$loadings)) ## invert and traspose the rotated loadings. pinv uses a SVD again (!) + # RPCs <- ano.wght %*% invLoadings + + # Compute explained variance fraction: + var <- apply(RPCs, 2, function(x) { sum(x*x) } ) * 100 / eofs$tot_var # [mode] + dim(var) <- c(mode = length(var)) + + return(invisible(list(REOFs = REOFs, RPCs = RPCs, var = var))) +} diff --git a/R/RMS.R b/R/RMS.R index c2cb8bccd5405fae4bcb31aeb35119818c3cf187..b3c8ad4b016d18cc9f3dfd79ddaeb3bb38ba15b0 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -143,7 +143,7 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } @@ -190,12 +190,13 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', c(time_dim, dat_dim)), fun = .RMS, time_dim = time_dim, dat_dim = dat_dim, - conf = conf, conf.lev = conf.lev, ncores = ncores) + conf = conf, conf.lev = conf.lev, ncores_input = ncores, + ncores = ncores) return(res) } .RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', - conf = TRUE, conf.lev = 0.95) { + conf = TRUE, conf.lev = 0.95, ncores_input = NULL) { # exp: [sdate, dat_exp] # obs: [sdate, dat_obs] @@ -220,7 +221,7 @@ RMS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', if (conf) { #eno <- Eno(dif, 1) #count effective sample along sdate. dim = c(nexp, nobs) - eno <- Eno(dif, time_dim) #change to this line when Eno() is done + eno <- Eno(dif, time_dim, ncores = ncores_input) #change to this line when Eno() is done # conf.lower chi <- sapply(1:nobs, function(i) { diff --git a/R/RMSSS.R b/R/RMSSS.R index a00606666df797dbb441e87191f7d7d4a0ac12e1..5fa96596ebacc4a2d46ed4f9e511adf504eb3e04 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -108,7 +108,7 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } @@ -143,13 +143,14 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', c(time_dim, dat_dim)), fun = .RMSSS, time_dim = time_dim, dat_dim = dat_dim, - pval = pval, #conf = conf, conf.lev = conf.lev, + pval = pval, ncores_input = ncores, ncores = ncores) return(res) } -.RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', pval = TRUE) { +.RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', pval = TRUE, + ncores_input = NULL) { # exp: [sdate, dat_exp] # obs: [sdate, dat_obs] nexp <- as.numeric(dim(exp)[2]) @@ -189,8 +190,8 @@ RMSSS <- function(exp, obs, time_dim = 'sdate', dat_dim = 'dataset', ## pval and conf if (pval) { - eno1 <- Eno(dif1, time_dim) - eno2 <- Eno(obs, time_dim) + eno1 <- Eno(dif1, time_dim, ncores = ncores_input) + eno2 <- Eno(obs, time_dim, ncores = ncores_input) eno2 <- array(eno2, dim = c(nobs = nobs, nexp = nexp)) eno2 <- Reorder(eno2, c(2, 1)) } diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index e818f57de0fe6e394d60bfbb7fdbccee9dc50946..494be6520dba4d6aedff0449b23e59a321e00732 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -57,7 +57,7 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', ncores = NULL){ stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") } if (!is.null(ncores)){ - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | length(ncores) > 1){ + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1){ stop("Parameter 'ncores' must be a positive integer.") } } diff --git a/R/RatioRMS.R b/R/RatioRMS.R new file mode 100644 index 0000000000000000000000000000000000000000..f7e34b42f01fb2893a3d38112b49b69ce7c6d3b1 --- /dev/null +++ b/R/RatioRMS.R @@ -0,0 +1,188 @@ +#'Compute the ratio between the RMSE of two experiments +#' +#'Calculate the ratio of the RMSE for two forecasts with the same observation, +#'that is, RMSE(ens, obs) / RMSE(ens.ref, obs). The p-value is provided by a +#'two-sided Fischer test. +#' +#'@param exp1 A numeric array with named dimensions of the first experimental +#' data. It must have at least 'time_dim' and have the same dimensions as +#' 'exp2' and 'obs'. +#'@param exp2 A numeric array with named dimensions of the second experimental +#' data. It must have at least 'time_dim' and have the same dimensions as +#' 'exp1' and 'obs'. +#'@param obs A numeric array with named dimensions of the observational data. +#' It must have at least 'time_dim' and have the same dimensions as 'exp1' and +#' 'exp2'. +#'@param time_dim A character string of the dimension name along which RMS is +#' computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute the p-value of Ho: +#' RMSE1/RMSE2 = 1 or not. The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list containing the numeric arrays with dimensions identical with +#' 'exp1', 'exp2', and 'obs', expect 'time_dim': +#'\item{$ratiorms}{ +#' The ratio between the RMSE (i.e., RMSE1/RMSE2). +#'} +#'\item{$p.val}{ +#' The p-value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. Only +#' exists if 'pval' is TRUE. +#'} +#' +#'@examples +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'# Compute DJF seasonal means and anomalies. +#'initial_month <- 11 +#'mean_start_month <- 12 +#'mean_stop_month <- 2 +#'sampleData$mod <- Season(sampleData$mod, monini = initial_month, +#' moninf = mean_start_month, monsup = mean_stop_month) +#'sampleData$obs <- Season(sampleData$obs, monini = initial_month, +#' moninf = mean_start_month, monsup = mean_stop_month) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'# Generate two experiments with 2 and 1 members from the only experiment +#'# available in the sample data. Take only data values for a single forecast +#'# time step. +#'ano_exp_1 <- ClimProjDiags::Subset(ano_exp, 'member', c(1, 2)) +#'ano_exp_2 <- ClimProjDiags::Subset(ano_exp, 'member', c(3)) +#'ano_exp_1 <- ClimProjDiags::Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +#'ano_exp_2 <- ClimProjDiags::Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +#'ano_obs <- ClimProjDiags::Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +#'# Compute ensemble mean and provide as inputs to RatioRMS. +#'rrms <- RatioRMS(MeanDims(ano_exp_1, 'member'), +#' MeanDims(ano_exp_2, 'member'), +#' MeanDims(ano_obs, 'member')) +#'# Plot the RatioRMS for the first forecast time step. +#'\donttest{ +#'PlotEquiMap(rrms$ratiorms, sampleData$lon, sampleData$lat, +#' toptitle = 'Ratio RMSE') +#'} +#' +#'@import multiApply +#'@export +RatioRMS <- function(exp1, exp2, obs, time_dim = 'sdate', pval = TRUE, ncores = NULL) { + + # Check inputs + ## exp1, exp2, obs + if (is.null(exp1) | is.null(exp2) | is.null(obs)) { + stop("Parameter 'exp1', 'exp2', and 'obs' cannot be NULL.") + } + if (!is.numeric(exp1) | !is.numeric(exp2) | !is.numeric(obs)) { + stop("Parameter 'exp1', 'exp2', and 'obs' must be a numeric array.") + } + if (is.null(dim(exp1))) { #is vector + dim(exp1) <- c(length(exp1)) + names(dim(exp1)) <- time_dim + } + if (is.null(dim(exp2))) { #is vector + dim(exp2) <- c(length(exp2)) + names(dim(exp2)) <- time_dim + } + if (is.null(dim(obs))) { #is vector + dim(obs) <- c(length(obs)) + names(dim(obs)) <- time_dim + } + if(any(is.null(names(dim(exp1))))| any(nchar(names(dim(exp1))) == 0) | + any(is.null(names(dim(exp2))))| any(nchar(names(dim(exp2))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp1', 'exp2', and 'obs' must have dimension names.") + } + if(!all(names(dim(exp1)) %in% names(dim(exp2))) | + !all(names(dim(exp2)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp1)))) { + stop("Parameter 'exp1', 'exp2', and 'obs' must have same dimension names.") + } + name_1 <- sort(names(dim(exp1))) + name_2 <- sort(names(dim(exp2))) + name_3 <- sort(names(dim(obs))) + if (!all(dim(exp1)[name_1] == dim(exp2)[name_2]) | + !all(dim(exp1)[name_1] == dim(obs)[name_3])) { + stop(paste0("Parameter 'exp1', 'exp2', and 'obs' must have the same length of ", + "all the dimensions.")) + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp1))) { + stop("Parameter 'time_dim' is not found in 'exp1', 'exp2', and 'obs' dimensions.") + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate RatioRMS + if (is.null(ncores)) { + use_Apply <- FALSE + } else if (ncores == 1) { + use_Apply <- FALSE + } else { + use_Apply <- TRUE + } + + if (use_Apply) { + res <- Apply(list(exp1, exp2, obs), + target_dims = list(c(names(dim(exp1))), + c(names(dim(exp1))), + c(names(dim(exp1)))), + fun = .RatioRMS, + time_dim = time_dim, pval = pval, + ncores = ncores) + } else { + res <- .RatioRMS(exp1, exp2, obs, time_dim = time_dim, pval = pval) + } + + return(res) +} + +.RatioRMS <- function(exp1, exp2, obs, time_dim = 'sdate', pval = TRUE) { + + # exp1, exp2, obs: [all_dim] + dif1 <- exp1 - obs + dif2 <- exp2 - obs + rms1 <- MeanDims(dif1^2, time_dim, na.rm = TRUE)^0.5 + rms2 <- MeanDims(dif2^2, time_dim, na.rm = TRUE)^0.5 + rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs(rms2), na.rm = TRUE) / 1000 + ratiorms <- rms1 / rms2 + + if (pval) { + eno1 <- Eno(dif1, time_dim) + eno2 <- Eno(dif2, time_dim) + F <- (eno1 * (rms1) ** 2 / (eno1 - 1)) / (eno2 * (rms2) ** 2 / (eno2 - 1)) + F[which(F < 1)] <- 1 / F[which(F < 1)] + + if (is.null(dim(ratiorms))) { + p.val <- c() + } else { + p.val <- array(dim = dim(ratiorms)) + } + avail_ind <- which(!is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2) + p.val[avail_ind] <- (1 - pf(F,eno1[avail_ind] - 1, eno2[avail_ind] - 1)) * 2 + ratiorms[-avail_ind] <- NA + } + + if (pval) { + return(invisible(list(ratiorms = ratiorms, p.val = p.val))) + } else { + return(invisible(list(ratiorms = ratiorms))) + } +} diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R new file mode 100644 index 0000000000000000000000000000000000000000..4d833d7ff12e05085c9bea899e8950ada803b4fd --- /dev/null +++ b/R/RatioSDRMS.R @@ -0,0 +1,200 @@ +#'Compute the ratio between the ensemble spread and RMSE +#' +#'Compute the ratio between the standard deviation of the members around the +#'ensemble mean in experimental data and the RMSE between the ensemble mean of +#'experimental and observational data. The p-value is provided by a one-sided +#'Fischer test. +#' +#'@param exp A named numeric array of experimental data with at least two +#' dimensions 'memb_dim' and 'time_dim'. +#'@param obs A named numeric array of observational data with at least two +#' dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. If there is no dataset dimension, set as NULL. The default value +#' is 'dataset'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. The default value +#' is 'member'. +#'@param time_dim A character string indicating the name of dimension along +#' which the ratio is computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of two arrays with dimensions c(nexp, nobs, the rest of +#' dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is +#' the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. +#' (only present if \code{pval = TRUE}) of the one-sided Fisher test with +#'Ho: SD/RMSE = 1.\cr\cr +#'\item{$ratio}{ +#' The ratio of the ensemble spread and RMSE. +#'} +#'\item{$p_val}{ +#' The p-value of the one-sided Fisher test with Ho: SD/RMSE = 1. Only present +#' if \code{pval = TRUE}. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs) +#'# Reorder the data in order to plot it with PlotVsLTime +#'rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) +#'rsdrms_plot[, , 2, ] <- rsdrms$ratio +#'rsdrms_plot[, , 4, ] <- rsdrms$p.val +#'\donttest{ +#'PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", +#' monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), +#' listobs = c('ERSST'), biglab = FALSE, siglev = TRUE, +#' fileout = 'tos_rsdrms.eps') +#'} +#' +#'@import multiApply +#'@export +RatioSDRMS <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', + time_dim = 'sdate', pval = TRUE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions memb_dim and time_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if(!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { + stop("Parameter 'exp' and 'obs' must have the same dimension names.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions expect 'dat_dim' and 'memb_dim'.")) + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate RatioSDRMS + + # If dat_dim = NULL, insert dat dim + remove_dat_dim <- FALSE + if (is.null(dat_dim)) { + dat_dim <- 'dataset' + exp <- InsertDim(exp, posdim = 1, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = 'dataset') + remove_dat_dim <- TRUE + } + + res <- Apply(list(exp, obs), + target_dims = list(c(dat_dim, memb_dim, time_dim), + c(dat_dim, memb_dim, time_dim)), + pval = pval, + fun = .RatioSDRMS, + ncores = ncores) + + if (remove_dat_dim) { + if (length(dim(res[[1]])) > 2) { + res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') + } else { + res <- lapply(res, as.numeric) + } + } + + return(res) +} + +.RatioSDRMS <- function(exp, obs, pval = TRUE) { + + # exp: [dat_exp, member, sdate] + # obs: [dat_obs, member, sdate] + nexp <- dim(exp)[1] + nobs <- dim(obs)[1] + + # ensemble mean + ens_exp <- MeanDims(exp, 2, na.rm = TRUE) # [dat, sdate] + ens_obs <- MeanDims(obs, 2, na.rm = TRUE) + + dif <- exp - InsertDim(ens_exp, 2, dim(exp)[2]) # [nexp, member, sdate] + std <- apply(dif, 1, sd, na.rm = TRUE) # [nexp] + enosd <- apply(Eno(dif, names(dim(exp))[3]), 1, sum, na.rm = TRUE) + + # Create empty arrays + ratiosdrms <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + p.val <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + dif <- ens_exp[jexp, ] - ens_obs[jobs, ] + rms <- mean(dif^2, na.rm = TRUE)^0.5 + enorms <- Eno(dif) + ratiosdrms[jexp, jobs] <- std[jexp]/rms + + if (pval) { + F <- (enosd[jexp] * std[jexp]^2 / (enosd[jexp] - 1)) / (enorms * rms^2 / (enorms - 1)) + if (!is.na(F) & !is.na(enosd) & !is.na(enorms) & enosd > 2 && enorms > 2) { + p.val[jexp, jobs] <- 1 - pf(F, enosd[jexp] - 1, enorms - 1) + } else { + ratiosdrms[jexp, jobs] <- NA + } + } + } + } + + if (pval) { + return(invisible(list(ratio = ratiosdrms, p.val = p.val))) + } else { + return(invisible(list(ratio = ratiosdrms))) + } +} diff --git a/R/Regression.R b/R/Regression.R index 8e5d8af41cb26c09ef37bff974d7584d06067a8f..244ddc729e94f8046b9c0ff1f0c47c89b08df62b 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -153,7 +153,7 @@ Regression <- function(datay, datax, reg_dim = 'sdate', formula = y ~ x, } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } diff --git a/R/Reorder.R b/R/Reorder.R index 8a248e936a5b9ab8f821690e2712064434a41142..04312071a110a35f5feb396cd0fbe18f494ab010 100644 --- a/R/Reorder.R +++ b/R/Reorder.R @@ -58,11 +58,7 @@ Reorder <- function(data, order) { ## If order is character string, find the indices if (is.character(order)) { - tmp <- rep(0, length(order)) - for (i in 1:length(order)) { - tmp[i] <- which(names(dim(data)) == order[i]) - } - order <- tmp + order <- match(order, names(dim(data))) } ## reorder diff --git a/R/SPOD.R b/R/SPOD.R index 30527f136223cf57ce65d0c2f4495df0a4c8cada..51717b31ee05cedb757347e3bcf7b02ba644c75b 100644 --- a/R/SPOD.R +++ b/R/SPOD.R @@ -4,13 +4,14 @@ #'Nino-Southern Oscillation (ENSO) and the Inderdecadal Pacific Oscillation #'(IPO). The SPOD index is computed as the difference of weighted-averaged SST #'anomalies over 20ºS-48ºS, 165ºE-190ºE (NW pole) and the weighted-averaged SST -#' anomalies over 44ºS-65ºS, 220ºE-260ºE (SE pole) (Saurral et al., 2020). +#'anomalies over 44ºS-65ºS, 220ºE-260ºE (SE pole) (Saurral et al., 2020). +#'If different members and/or datasets are provided, the climatology (used to +#'calculate the anomalies) is computed individually for all of them. #' -#'@param data A numerical array to be used for the index computation with the -#' dimensions: 1) latitude, longitude, start date, forecast month, and member -#' (in case of decadal predictions), 2) latitude, longitude, year, month and -#' member (in case of historical simulations), or 3) latitude, longitude, year -#' and month (in case of observations or reanalyses). This data has to be +#'@param data A numerical array to be used for the index computation with, at least, the +#' dimensions: 1) latitude, longitude, start date and forecast month +#' (in case of decadal predictions), 2) latitude, longitude, year and month +#' (in case of historical simulations or observations). This data has to be #' provided, at least, over the whole region needed to compute the index. #'@param data_lats A numeric vector indicating the latitudes of the data. #'@param data_lons A numeric vector indicating the longitudes of the data. @@ -43,21 +44,22 @@ #' anomalies, set it to FALSE. The default value is NULL.\cr #' In case of parameter 'type' is 'dcpp', 'indices_for_clim' must be relative #' to the first forecast year, and the climatology is automatically computed -#' over the actual common period for the different forecast years. +#' over the common calendar period for the different forecast years. #'@param year_dim A character string indicating the name of the year dimension #' The default value is 'year'. Only used if parameter 'type' is 'hist' or #' 'obs'. #'@param month_dim A character string indicating the name of the month #' dimension. The default value is 'month'. Only used if parameter 'type' is #' 'hist' or 'obs'. -#'@param member_dim A character string indicating the name of the member -#' dimension. The default value is 'member'. Only used if parameter 'type' is -#' 'dcpp' or 'hist'. +#'@param na.rm A logical value indicanting whether to remove NA values. The default +#' value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. #' -#'@return A numerical array of the SPOD index with the dimensions of: -#' 1) sdate, forecast year, and member (in case of decadal predictions); -#' 2) year and member (in case of historical simulations); or -#' 3) year (in case of observations or reanalyses). +#'@return A numerical array with the SPOD index with the same dimensions as data except +#' the lat_dim, lon_dim and fmonth_dim (month_dim) in case of decadal predictions +#' (historical simulations or observations). In case of decadal predictions, a new dimension +#' 'fyear' is added. #' #'@examples #' ## Observations or reanalyses @@ -84,7 +86,7 @@ SPOD <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lon', mask = NULL, monini = 11, fmonth_dim = 'fmonth', sdate_dim = 'sdate', indices_for_clim = NULL, year_dim = 'year', month_dim = 'month', - member_dim = 'member') { + na.rm = TRUE, ncores = NULL) { ## Input Checks # data @@ -128,6 +130,13 @@ SPOD <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l stop(paste0("The longitude dimension of parameter 'data' must be the same", " length of parameter 'data_lons'.")) } + # ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } # mask if (!is.null(mask)) { if (is.array(mask) & identical(names(dim(mask)), c(lat_dim,lon_dim)) & @@ -139,7 +148,7 @@ SPOD <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l return(data) } data <- multiApply::Apply(data = data, target_dims = c(lat_dim, lon_dim), - fun = fun_mask, mask = mask)$output1 + fun = fun_mask, mask = mask, ncores = ncores)$output1 } else { stop(paste0("Parameter 'mask' must be NULL (no mask) or a numerical array ", "with c(lat_dim, lon_dim) dimensions and 0 in those grid ", @@ -198,14 +207,9 @@ SPOD <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l stop("Parameter 'month_dim' is not found in 'data' dimension.") } } - # member_dim - if (type == 'hist' | type == 'dcpp') { - if (!(is.character(member_dim) & length(member_dim) == 1)) { - stop("Parameter 'member_dim' must be a character string.") - } - if (!member_dim %in% names(dim(data))) { - stop("Parameter 'member_dim' is not found in 'data' dimension.") - } + # na.rm + if (!na.rm %in% c(TRUE,FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") } ## Regions for IPO_SPOD (Saurral et al., 2020) @@ -229,9 +233,16 @@ SPOD <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l data <- ClimProjDiags::CombineIndices(indices = list(mean_1,mean_2), weights = NULL, operation = 'subtract') # (mean_1 - mean_2) - INDEX <- .Indices(data = data, type = type, monini = monini, - indices_for_clim = indices_for_clim, fmonth_dim = fmonth_dim, - sdate_dim = sdate_dim, year_dim = year_dim, - month_dim = month_dim, member_dim = member_dim) + if (type == 'dcpp'){ + target_dims <- c(sdate_dim, fmonth_dim) + } else if (type %in% c('hist','obs')){ + target_dims <- c(year_dim, month_dim) + } + + INDEX <- multiApply::Apply(data = data, target_dims = target_dims, fun = .Indices, + type = type, monini = monini, indices_for_clim = indices_for_clim, + fmonth_dim = fmonth_dim, sdate_dim = sdate_dim, + year_dim = year_dim, month_dim = month_dim, + na.rm = na.rm, ncores = ncores)$output1 return(INDEX) } diff --git a/R/Season.R b/R/Season.R index 7bc5c52154adda2d6db36f1454db2a16b019d6f3..d56aa1637fd77204db76061492a7d14fea9e8a87 100644 --- a/R/Season.R +++ b/R/Season.R @@ -96,7 +96,7 @@ Season <- function(data, time_dim = 'ftime', monini, moninf, monsup, } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } @@ -127,16 +127,21 @@ Season <- function(data, time_dim = 'ftime', monini, moninf, monsup, } if (use_apply) { - time_dim_ind <- match(time_dim, names(dim(data))) - res <- apply(data, c(1:length(dim(data)))[-time_dim_ind], .Season, - monini = monini, moninf = moninf, monsup = monsup, - method = method, na.rm = na.rm) - if (length(dim(res)) < length(dim(data))) { - res <- InsertDim(res, posdim = 1, lendim = 1, name = time_dim) + if (length(dim(data)) == 1) { + res <- .Season(data, monini = monini, moninf = moninf, monsup = monsup, + method = method, na.rm = na.rm) + names(dim(res)) <- time_dim } else { - names(dim(res))[1] <- time_dim + time_dim_ind <- match(time_dim, names(dim(data))) + res <- apply(data, c(1:length(dim(data)))[-time_dim_ind], .Season, + monini = monini, moninf = moninf, monsup = monsup, + method = method, na.rm = na.rm) + if (length(dim(res)) < length(dim(data))) { + res <- InsertDim(res, posdim = 1, lendim = 1, name = time_dim) + } else { + names(dim(res))[1] <- time_dim + } } - } else { res <- Apply(list(data), target_dims = time_dim, diff --git a/R/Smoothing.R b/R/Smoothing.R index b2b11c7539b49d26b0cc889e252055914f1ac0df..d5fd2a5eac95d11d7217b05d2c5b795f8792d71d 100644 --- a/R/Smoothing.R +++ b/R/Smoothing.R @@ -72,7 +72,7 @@ Smoothing <- function(data, time_dim = 'ftime', runmeanlen = 12, ncores = NULL) } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } diff --git a/R/Spectrum.R b/R/Spectrum.R new file mode 100644 index 0000000000000000000000000000000000000000..2cbb16793d42fb398bc45ca7173fcd0f09283567 --- /dev/null +++ b/R/Spectrum.R @@ -0,0 +1,129 @@ +#'Estimate frequency spectrum +#' +#'Estimate the frequency spectrum of the data array together with a +#'user-specified confidence level. The output is provided as an array with +#'dimensions c(number of frequencies, stats = 3, other margin dimensions of +#'data). The 'stats' dimension contains the frequencies at which the spectral +#'density is estimated, the estimates of the spectral density, and the +#'significance level.\cr +#'The spectrum estimation relies on an R built-in function \code{spectrum()} +#'and the confidence interval is estimated by the Monte-Carlo method. +#' +#'@param data A vector or numeric array of which the frequency spectrum is +#' required. If it's a vector, it should be a time series. If it's an array, +#' the dimensions must have at least 'time_dim'. The data is assumed to be +#' evenly spaced in time. +#'@param time_dim A character string indicating the dimension along which to +#' compute the frequency spectrum. The default value is 'ftime'. +#'@param conf.lev A numeric indicating the confidence level for the Monte-Carlo +#' significance test. The default value is 0.95. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numeric array of the frequency spectrum with dimensions +#' c( = number of frequencies, stats = 3, the rest of the +#' dimensions of 'data'). The 'stats' dimension contains the frequency values, +#' the spectral density, and the confidence interval. +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'ensmod <- MeanDims(sampleData$mod, 2) +#'spectrum <- Spectrum(ensmod) +#' +#'for (jsdate in 1:dim(spectrum)['sdate']) { +#' for (jlen in 1:dim(spectrum)['ftime']) { +#' if (spectrum[jlen, 2, 1, jsdate] > spectrum[jlen, 3, 1, jsdate]) { +#' ensmod[1, jsdate, ] <- Filter(ensmod[1, jsdate, ], spectrum[jlen, 1, 1, jsdate]) +#' } +#' } +#'} +#' \donttest{ +#'PlotAno(InsertDim(ensmod, 2, 1), sdates = startDates) +#' } +#' +#'@import multiApply +#'@importFrom stats spectrum cor rnorm sd quantile +#'@export +Spectrum <- function(data, time_dim = 'ftime', conf.lev = 0.95, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(dim(data))) { #is vector + dim(data) <- c(length(data)) + names(dim(data)) <- time_dim + } + if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## conf.lev + if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { + stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Spectrum + + output <- Apply(list(data), + target_dims = time_dim, + fun = .Spectrum, + output_dims = c(time_dim, 'stats'), + conf.lev = conf.lev, + ncores = ncores)$output1 + + return(output) +} + +.Spectrum <- function(data, conf.lev = 0.95) { + # data: [time] + + data <- data[is.na(data) == FALSE] + ndat <- length(data) + + if (ndat >= 3) { + tmp <- spectrum(data, plot = FALSE) + output <- array(dim = c(length(tmp$spec), 3)) + output[, 1] <- tmp$freq + output[, 2] <- tmp$spec + ntir <- 100 + store <- array(dim = c(ntir, length(tmp$spec))) + for (jt in 1:ntir) { + toto <- mean(data) + alpha1 <- cor(data[2:ndat], data[1:(ndat - 1)]) + for (ind in 2:ndat) { + b <- rnorm(1, mean(data) * (1 - alpha1), sd(data) * sqrt(1 - + alpha1 ^ 2)) + toto <- c(toto, toto[ind - 1] * alpha1 + b) + } + toto2 <- spectrum(toto, plot = FALSE) + store[jt, ] <- toto2$spec + } + for (jx in 1:length(tmp$spec)) { + output[jx, 3] <- quantile(store[, jx], conf.lev) + } + } else { + output <- NA + } + + return(invisible(output)) +} diff --git a/R/Spread.R b/R/Spread.R new file mode 100644 index 0000000000000000000000000000000000000000..4b3bc6b1167534dc9004a91d99da686851ed4611 --- /dev/null +++ b/R/Spread.R @@ -0,0 +1,203 @@ +#'Compute interquartile range, maximum-minimum, standard deviation and median +#'absolute deviation +#' +#'Compute interquartile range, maximum-minimum, standard deviation and median +#'absolute deviation along the list of dimensions provided by the compute_dim +#'argument (typically along the ensemble member and start date dimension). +#'The confidence interval is computed by bootstrapping by 100 times. The input +#'data can be the output of \code{Load()}, \code{Ano()}, or +#'\code{Ano_CrossValid()}, for example. +#' +#'@param data A numeric vector or array with named dimensions to compute the +#' statistics. The dimensions should at least include 'compute_dim'. +#'@param compute_dim A vector of character strings of the dimension names along +#' which to compute the statistics. The default value is 'member'. +#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or +#' kept (FALSE) for computation. The default value is TRUE. +#'@param conf A logical value indicating whether to compute the confidence +#' intervals or not. The default value is TRUE. +#'@param conf.lev A numeric value of the confidence level for the computation. +#' The default value is 0.95. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list of numeric arrays with the same dimensions as 'data' but without +#''compute_dim' and with the first dimension 'stats'. If 'conf' is TRUE, the +#'length of 'stats' is 3 corresponding to the lower limit of the confidence +#'interval, the spread, and the upper limit of the confidence interval. If +#''conf' is FALSE, the length of 'stats' is 1 corresponding to the spread. +#'\item{$iqr}{ +#' InterQuartile Range. +#'} +#'\item{$maxmin}{ +#' Maximum - Minimum. +#'} +#'\item{$sd}{ +#' Standard Deviation. +#'} +#'\item{$mad}{ +#' Median Absolute Deviation. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'runmean_months <- 12 +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) +#'smooth_ano_exp_m_sub <- smooth_ano_exp - InsertDim(MeanDims(smooth_ano_exp, 'member', +#' na.rm = TRUE), +#' posdim = 3, +#' lendim = dim(smooth_ano_exp)['member'], +#' name = 'member') +#'spread <- Spread(smooth_ano_exp_m_sub, compute_dim = c('member', 'sdate')) +#' +#'\donttest{ +#'PlotVsLTime(Reorder(spread$iqr, c('dataset', 'stats', 'ftime')), +#' toptitle = "Inter-Quartile Range between ensemble members", +#' ytitle = "K", monini = 11, limits = NULL, +#' listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, +#' hlines = c(0), fileout = 'tos_iqr.png') +#'PlotVsLTime(Reorder(spread$maxmin, c('dataset', 'stats', 'ftime')), +#' toptitle = "Maximum minus minimum of the members", +#' ytitle = "K", monini = 11, limits = NULL, +#' listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, +#' hlines = c(0), fileout = 'tos_maxmin.png') +#'PlotVsLTime(Reorder(spread$sd, c('dataset', 'stats', 'ftime')), +#' toptitle = "Standard deviation of the members", +#' ytitle = "K", monini = 11, limits = NULL, +#' listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, +#' hlines = c(0), fileout = 'tos_sd.png') +#'PlotVsLTime(Reorder(spread$mad, c('dataset', 'stats', 'ftime')), +#' toptitle = "Median Absolute Deviation of the members", +#' ytitle = "K", monini = 11, limits = NULL, +#' listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, +#' hlines = c(0), fileout = 'tos_mad.png') +#'} +#' +#'@import multiApply +#'@importFrom stats IQR sd mad runif quantile +#'@export +Spread <- function(data, compute_dim = 'member', na.rm = TRUE, + conf = TRUE, conf.lev = 0.95, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(dim(data))) { #is vector + dim(data) <- c(length(data)) + names(dim(data)) <- compute_dim[1] + } + if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## compute_dim + if (!is.character(compute_dim)) { + stop("Parameter 'compute_dim' must be a character vector.") + } + if (any(!compute_dim %in% names(dim(data)))) { + stop("Parameter 'compute_dim' has some element not in 'data' dimension names.") + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ## conf.lev + if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { + stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Spread + + output <- Apply(list(data), + target_dims = compute_dim, + fun = .Spread, + output_dims = list(iqr = 'stats', maxmin = 'stats', + sd = 'stats', mad = 'stats'), + na.rm = na.rm, + conf = conf, conf.lev = conf.lev, + ncores = ncores) + + return(output) +} + +.Spread <- function(data, compute_dim = 'member', na.rm = TRUE, + conf = TRUE, conf.lev = 0.95) { + + # data: compute_dim. [member] or [member, sdate] for example + + # Compute spread + res_iqr <- IQR(data, na.rm = na.rm) + res_maxmin <- max(data, na.rm = na.rm) - min(data, na.rm = na.rm) + res_sd <- sd(data, na.rm = na.rm) + res_mad <- mad(data, na.rm = na.rm) + + # Compute conf (bootstrapping) + if (conf) { + # The output length is 3, [conf.low, spread, conf.high] + res_iqr <- rep(res_iqr, 3) + res_maxmin <- rep(res_maxmin, 3) + res_sd <- rep(res_sd, 3) + res_mad <- rep(res_mad, 3) + + conf_low <- (1 - conf.lev) / 2 + conf_high <- 1 - conf_low + + # Create vector for saving bootstrap result + iqr_bs <- c() + maxmin_bs <- c() + sd_bs <- c() + mad_bs <- c() + + # bootstrapping for 100 times + num <- length(data) + for (jmix in 1:100) { + drawings <- round(runif(num, 0.5, num + 0.5)) + iqr_bs <- c(iqr_bs, IQR(data[drawings], na.rm = na.rm)) + maxmin_bs <- c(maxmin_bs, max(data[drawings], na.rm = na.rm) - + min(data[drawings], na.rm = na.rm)) + sd_bs <- c(sd_bs, sd(data[drawings], na.rm = na.rm)) + mad_bs <- c(mad_bs, mad(data[drawings], na.rm = na.rm)) + } + + # Calculate confidence interval with the bootstrapping results + res_iqr[1] <- quantile(iqr_bs, conf_low, na.rm = na.rm) + res_iqr[3] <- quantile(iqr_bs, conf_high, na.rm = na.rm) + res_maxmin[1] <- res_maxmin[2] + (quantile(maxmin_bs, conf_low, na.rm = na.rm) - + quantile(maxmin_bs, conf_high, na.rm = na.rm)) / 2 + res_maxmin[3] <- res_maxmin[2] - (quantile(maxmin_bs, conf_low, na.rm = na.rm) - + quantile(maxmin_bs, conf_high, na.rm = na.rm)) / 2 + res_sd[1] <- quantile(sd_bs, conf_low, na.rm = na.rm) + res_sd[3] <- quantile(sd_bs, conf_high, na.rm = na.rm) + res_mad[1] <- res_mad[2] + (quantile(mad_bs, conf_low, na.rm = na.rm) - + quantile(mad_bs, conf_high, na.rm = na.rm)) + res_mad[3] <- res_mad[2] - (quantile(mad_bs, conf_low, na.rm = na.rm) - + quantile(mad_bs, conf_high, na.rm = na.rm)) + + } + + # Turn infinite to NA + res_maxmin[which(is.infinite(res_maxmin))] <- NA + + return(invisible(list(iqr = as.array(res_iqr), maxmin = as.array(res_maxmin), + sd = as.array(res_sd), mad = as.array(res_mad)))) +} diff --git a/R/StatSeasAtlHurr.R b/R/StatSeasAtlHurr.R new file mode 100644 index 0000000000000000000000000000000000000000..9d0ec3865ace345c0f17407ee6f7294a081cd9b0 --- /dev/null +++ b/R/StatSeasAtlHurr.R @@ -0,0 +1,225 @@ +#'Compute estimate of seasonal mean of Atlantic hurricane activity +#' +#'Compute one of G. Villarini's statistically downscaled measure of mean +#'Atlantic hurricane activity and its variance. The hurricane activity is +#'estimated using seasonal averages of sea surface temperature anomalies over +#'the tropical Atlantic (bounded by 10N-25N and 80W-20W) and the tropics at +#'large (bounded by 30N-30S). The anomalies are for the JJASON season.\cr +#'The estimated seasonal average is either 1) number of hurricanes, 2) number +#'of tropical cyclones with lifetime >=48h or 3) power dissipation index +#'(PDI; in 10^11 m^3 s^{-2}).\cr +#'The statistical models used in this function are described in references. +#' +#'@param atlano A numeric array with named dimensions of Atlantic sea surface +#' temperature anomalies. It must have the same dimensions as 'tropano'. +#'@param tropano A numeric array with named dimensions of tropical sea surface +#' temperature anomalies. It must have the same dimensions as 'atlano'. +#'@param hrvar A character string of the seasonal average to be estimated. The +#' options are either "HR" (hurricanes), "TC" (tropical cyclones with lifetime +#' >=48h), or "PDI" (power dissipation index). The default value is 'HR'. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list composed of two arrays with the same dimensions as 'atlano' +#' and 'tropano'. +#'\item{$mean}{ +#' The mean of the desired quantity. +#'} +#'\item{$var}{ +#' The variance of that quantity. +#'} +#' +#'@references +#'Villarini et al. (2010) Mon Wea Rev, 138, 2681-2705.\cr +#'Villarini et al. (2012) Mon Wea Rev, 140, 44-65.\cr +#'Villarini et al. (2012) J Clim, 25, 625-637.\cr +#'An example of how the function can be used in hurricane forecast studies +#' is given in\cr +#'Caron, L.-P. et al. (2014) Multi-year prediction skill of Atlantic hurricane +#' activity in CMIP5 decadal hindcasts. Climate Dynamics, 42, 2675-2690. +#' doi:10.1007/s00382-013-1773-1. +#' +#'@examples +#'# Let AtlAno represents 5 different 5-year forecasts of seasonally averaged +#'# Atlantic sea surface temperature anomalies. +#'AtlAno <- array(runif(25, -1, 1), dim = c(sdate = 5, ftime = 5)) +#'# Let TropAno represents 5 corresponding 5-year forecasts of seasonally +#'# averaged tropical sea surface temperature anomalies. +#'TropAno <- array(runif(25, -1, 1), dim = c(sdate = 5, ftime = 5)) +#'# The seasonal average of hurricanes for each of the five forecasted years, +#'# for each forecast, would then be given by. +#'hr_count <- StatSeasAtlHurr(atlano = AtlAno, tropano = TropAno, hrvar = 'HR') +#' +#'@import multiApply +#'@export +StatSeasAtlHurr <- function(atlano, tropano, hrvar = "HR", ncores = NULL) { + + # Check inputs + ## atlano and tropano + if (is.null(atlano) | is.null(tropano)) { + stop("Parameter 'atlano' and 'tropano' cannot be NULL.") + } + if (!is.numeric(atlano) | !is.numeric(tropano)) { + stop("Parameter 'atlano' and 'tropano' must be a numeric array.") + } + if (is.null(dim(atlano))) { #is vector + dim(atlano) <- c(length(atlano)) + names(dim(atlano)) <- 'dim1' + } + if (is.null(dim(tropano))) { #is vector + dim(tropano) <- c(length(tropano)) + names(dim(tropano)) <- 'dim1' + } + if(any(is.null(names(dim(atlano))))| any(nchar(names(dim(atlano))) == 0) | + any(is.null(names(dim(tropano))))| any(nchar(names(dim(tropano))) == 0)) { + stop("Parameter 'atlano' and 'tropano' must have dimension names.") + } + if(!all(names(dim(atlano)) %in% names(dim(tropano))) | + !all(names(dim(tropano)) %in% names(dim(atlano)))) { + stop("Parameter 'atlano' and 'tropano' must have same dimension names.") + } + name_1 <- sort(names(dim(atlano))) + name_2 <- sort(names(dim(tropano))) + if (!all(dim(atlano)[name_1] == dim(tropano)[name_2])) { + stop(paste0("Parameter 'atlano' and 'tropano' must have the same length of ", + "all the dimensions.")) + } + ## hrvar + if (hrvar != "HR" & hrvar != "TC" & hrvar != "PDI") { + stop("The parameter 'hrvar' must be either 'HR', 'TC', or 'PDI'.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate StatSeasAtlHurr + if (is.null(ncores)) { + use_Apply <- FALSE + } else if (ncores == 1) { + use_Apply <- FALSE + } else { + use_Apply <- TRUE + } + + if (use_Apply) { + res <- Apply(list(atlano, tropano), + target_dims = list(c(names(which.max(dim(atlano)))), + c(names(which.max(dim(atlano))))), + fun = .StatSeasAtlHurr, + hrvar = hrvar, + ncores = ncores) + } else { + + # Get the values of the betas according to the hurricane + # activity measure we specified. + # ------------------------------------------------------ + if (hrvar == "HR") { + # beta's are derived from Villarini et al. (2012), Mon Wea + # Rev, 140, 44-65. beta's are for corrected hurricane data + + # ERSST with SBC criteria (table 2) + beta0 <- 1.85 + betaAtl <- 1.05 + betaTrop <- -1.17 + } else if (hrvar == "TC") { + # beta's are from Villarini et al. (2010), Mon Wea Rev, 138, + # 2681-2705. beta's are for corrected TC data (lifetime >= + # 48h) + ERSST (table 5) + beta0 <- 2.1 + betaAtl <- 1.02 + betaTrop <- -1.05 + } else if (hrvar == "PDI") { + # beta's are from Villarini et al. (2012), J Clim, 25, + # 625-637. beta's are from ERSST, with SBC penalty criterion + # (table 1) + beta0 <- 0.76 + betaAtl <- 1.94 + betaTrop <- -1.78 + } + # Create matrix of similar dimension as atlano for beta0. + # ------------------------------------------------------- + intercept <- array(beta0, dim(atlano)) + # Compute statistical relationship b/w SSTAs and mean + # hurricane activity. + # --------------------------------------------------- + atl <- betaAtl * atlano + trop <- betaTrop * tropano + # + temp <- intercept + atl + trop + # + res <- list(mean = array(NA, dim(atl)), var = array(NA, dim(atl))) + res$mean[] <- vapply(X = temp, FUN = exp, numeric(1)) + # Compute the variance of the distribution. TC and HR follow + # a Poisson distribution, so the variance is equal to the + # mean. PDI follows a gamma distribution, with sigma = + # -0.57. (variance = sigma^2 * mean^2). + # ----------------------------------------------------------- + if (hrvar == "HR" | hrvar == "TC") { + res$var <- res$mean + } else { + sigma <- -0.57 + res$var[] <- sigma^2 * vapply(X = res$mean, FUN = function(x) x^2, numeric(1)) + } + + } + + return(res) +} + +.StatSeasAtlHurr <- function(atlano, tropano, hrvar = "HR") { + + # atlano and tropano: a vector with same length + + # Get the values of the betas according to the hurricane activity measure we + # specified. + # ------------------------------------------------------ + if (hrvar == "HR") { + # beta's are derived from Villarini et al. (2012), Mon Wea + # Rev, 140, 44-65. beta's are for corrected hurricane data + + # ERSST with SBC criteria (table 2) + beta0 <- 1.85 + betaAtl <- 1.05 + betaTrop <- -1.17 + } else if (hrvar == "TC") { + # beta's are from Villarini et al. (2010), Mon Wea Rev, 138, + # 2681-2705. beta's are for corrected TC data (lifetime >= + # 48h) + ERSST (table 5) + beta0 <- 2.1 + betaAtl <- 1.02 + betaTrop <- -1.05 + } else if (hrvar == "PDI") { + # beta's are from Villarini et al. (2012), J Clim, 25, + # 625-637. beta's are from ERSST, with SBC penalty criterion + # (table 1) + beta0 <- 0.76 + betaAtl <- 1.94 + betaTrop <- -1.78 + } + + # Compute statistical relationship b/w SSTAs and mean + # hurricane activity. + # --------------------------------------------------- + atl <- betaAtl * atlano + trop <- betaTrop * tropano + temp <- beta0 + atl + trop + stat_mean <- exp(temp) + + # Compute the variance of the distribution. TC and HR follow + # a Poisson distribution, so the variance is equal to the + # mean. PDI follows a gamma distribution, with sigma = + # -0.57. (variance = sigma^2 * mean^2). + # ----------------------------------------------------------- + if (hrvar == "HR" | hrvar == "TC") { + stat_var <- stat_mean + } else { + sigma <- -0.57 + stat_var <- sigma^2 * stat_mean^2 + } + + return(invisible(list(mean = stat_mean, var = stat_var))) +} diff --git a/R/TPI.R b/R/TPI.R index a041c69c9be9e8ee7ef97910d25d79967bc5a334..80d958ea2fb27d20acbceb26f0f4a00e29ba0f3d 100644 --- a/R/TPI.R +++ b/R/TPI.R @@ -3,13 +3,14 @@ #'The Tripole Index (TPI) for the Interdecadal Pacific Oscillation (IPO) is #'computed as the difference of weighted-averaged SST anomalies over 10ºS-10ºN, #'170ºE-270ºE minus the mean of the weighted-averaged SST anomalies over -#'25ºN-45ºN, 140ºE-215ºE and 50ºS-15ºS, 150ºE-200ºE (Henley et al., 2015). +#'25ºN-45ºN, 140ºE-215ºE and 50ºS-15ºS, 150ºE-200ºE (Henley et al., 2015). +#'If different members and/or datasets are provided, the climatology (used to +#'calculate the anomalies) is computed individually for all of them. #' -#'@param data A numerical array to be used for the index computation with the -#' dimensions: 1) latitude, longitude, start date, forecast month, and member -#' (in case of decadal predictions), 2) latitude, longitude, year, month and -#' member (in case of historical simulations), or 3) latitude, longitude, year -#' and month (in case of observations or reanalyses). This data has to be +#'@param data A numerical array to be used for the index computation with, at least, the +#' dimensions: 1) latitude, longitude, start date and forecast month +#' (in case of decadal predictions), 2) latitude, longitude, year and month +#' (in case of historical simulations or observations). This data has to be #' provided, at least, over the whole region needed to compute the index. #'@param data_lats A numeric vector indicating the latitudes of the data. #'@param data_lons A numeric vector indicating the longitudes of the data. @@ -42,21 +43,22 @@ #' anomalies, set it to FALSE. The default value is NULL.\cr #' In case of parameter 'type' is 'dcpp', 'indices_for_clim' must be relative #' to the first forecast year, and the climatology is automatically computed -#' over the actual common period for the different forecast years. +#' over the common calendar period for the different forecast years. #'@param year_dim A character string indicating the name of the year dimension #' The default value is 'year'. Only used if parameter 'type' is 'hist' or #' 'obs'. #'@param month_dim A character string indicating the name of the month #' dimension. The default value is 'month'. Only used if parameter 'type' is #' 'hist' or 'obs'. -#'@param member_dim A character string indicating the name of the member -#' dimension. The default value is 'member'. Only used if parameter 'type' is -#' 'dcpp' or 'hist'. +#'@param na.rm A logical value indicanting whether to remove NA values. The default +#' value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. #' -#'@return A numerical array of the TPI index with the dimensions of: -#' 1) sdate, forecast year, and member (in case of decadal predictions); -#' 2) year and member (in case of historical simulations); or -#' 3) year (in case of observations or reanalyses). +#'@return A numerical array with the TPI index with the same dimensions as data except +#' the lat_dim, lon_dim and fmonth_dim (month_dim) in case of decadal predictions +#' (historical simulations or observations). In case of decadal predictions, a new dimension +#' 'fyear' is added. #' #'@examples #' ## Observations or reanalyses @@ -83,7 +85,7 @@ TPI <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lon', mask = NULL, monini = 11, fmonth_dim = 'fmonth', sdate_dim = 'sdate', indices_for_clim = NULL, year_dim = 'year', month_dim = 'month', - member_dim = 'member') { + na.rm = TRUE, ncores = NULL) { ## Input Checks # data @@ -127,6 +129,13 @@ TPI <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo stop(paste0("The longitude dimension of parameter 'data' must be the same", " length of parameter 'data_lons'.")) } + # ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } # mask if (!is.null(mask)) { if (is.array(mask) & identical(names(dim(mask)), c(lat_dim,lon_dim)) & @@ -138,7 +147,7 @@ TPI <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo return(data) } data <- multiApply::Apply(data = data, target_dims = c(lat_dim, lon_dim), - fun = fun_mask, mask = mask)$output1 + fun = fun_mask, mask = mask, ncores = ncores)$output1 } else { stop(paste0("Parameter 'mask' must be NULL (no mask) or a numerical array ", "with c(lat_dim, lon_dim) dimensions and 0 in those grid ", @@ -197,14 +206,9 @@ TPI <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo stop("Parameter 'month_dim' is not found in 'data' dimension.") } } - # member_dim - if (type == 'hist' | type == 'dcpp') { - if (!(is.character(member_dim) & length(member_dim) == 1)) { - stop("Parameter 'member_dim' must be a character string.") - } - if (!member_dim %in% names(dim(data))) { - stop("Parameter 'member_dim' is not found in 'data' dimension.") - } + # na.rm + if (!na.rm %in% c(TRUE,FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") } # Regions for IPO_TPI (psl.noaa.gov/data/timeseries/IPOTPI) @@ -238,9 +242,16 @@ TPI <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo data <- ClimProjDiags::CombineIndices(indices = list(mean_2, mean_1_3), weights = NULL, operation = 'subtract') # mean_2 - ((mean_1 + mean_3)/2) - INDEX <- .Indices(data = data, type = type, monini = monini, - indices_for_clim = indices_for_clim, fmonth_dim = fmonth_dim, - sdate_dim = sdate_dim, year_dim = year_dim, - month_dim = month_dim, member_dim = member_dim) + if (type == 'dcpp'){ + target_dims <- c(sdate_dim, fmonth_dim) + } else if (type %in% c('hist','obs')){ + target_dims <- c(year_dim, month_dim) + } + + INDEX <- multiApply::Apply(data = data, target_dims = target_dims, fun = .Indices, + type = type, monini = monini, indices_for_clim = indices_for_clim, + fmonth_dim = fmonth_dim, sdate_dim = sdate_dim, + year_dim = year_dim, month_dim = month_dim, + na.rm = na.rm, ncores = ncores)$output1 return(INDEX) } diff --git a/R/Trend.R b/R/Trend.R index 4afb5237abb47ab5c7ec0e20f9ee2b9bb142fa77..1f714a6fac3b87e9c2796afcbfde6d3d4cb42fc7 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -115,16 +115,15 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | - length(ncores) > 1) { + if (!is.numeric(ncores)) { + stop("Parameter 'ncores' must be a positive integer.") + } else if (ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } } ############################### # Calculate Trend - dim_names <- names(dim(data)) - if (conf & pval) { output_dims <- list(trend = 'stats', conf.lower = 'stats', conf.upper = 'stats', p.val = 'stats', detrended = time_dim) @@ -136,22 +135,22 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, } else { output_dims <- list(trend = 'stats', detrended = time_dim) } - - + output <- Apply(list(data), target_dims = time_dim, fun = .Trend, output_dims = output_dims, - time_dim = time_dim, interval = interval, + interval = interval, polydeg = polydeg, conf = conf, conf.lev = conf.lev, pval = pval, ncores = ncores) - return(output) + return(invisible(output)) } -.Trend <- function(x, time_dim = 'ftime', interval = 1, polydeg = 1, +.Trend <- function(x, interval = 1, polydeg = 1, conf = TRUE, conf.lev = 0.95, pval = TRUE) { + # x: [ftime] mon <- seq(x) * interval @@ -193,7 +192,6 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, } } - if (conf & pval) { return(list(trend = trend, conf.lower = conf.lower, conf.upper = conf.upper, p.val = p.val, detrended = detrended)) diff --git a/R/UltimateBrier.R b/R/UltimateBrier.R new file mode 100644 index 0000000000000000000000000000000000000000..aeaddcdcadcdd9d25e3a283d1ec8d24534f969d0 --- /dev/null +++ b/R/UltimateBrier.R @@ -0,0 +1,318 @@ +#'Compute Brier scores +#' +#'Interface to compute probabilistic scores (Brier Score, Brier Skill Score) +#'from the forecast and observational data anomalies. It provides six types +#'to choose. +#' +#'@param exp A numeric array of forecast anomalies with named dimensions that +#' at least include 'dat_dim', 'memb_dim', and 'time_dim'. It can be provided +#' by \code{Ano()}. +#'@param obs A numeric array of observational reference anomalies with named +#' dimensions that at least include 'dat_dim' and 'time_dim'. If it has +#' 'memb_dim', the length must be 1. The dimensions should be consistent with +#' 'exp' except 'dat_dim' and 'memb_dim'. It can be provided by \code{Ano()}. +#'@param dat_dim A character string indicating the name of the dataset +#' dimension in 'exp' and 'obs'. The default value is 'dataset'. +#'@param memb_dim A character string indicating the name of the member +#' dimension in 'exp' (and 'obs') for ensemble mean calculation. The default +#' value is 'member'. +#'@param time_dim A character string indicating the dimension along which to +#' compute the probabilistic scores. The default value is 'sdate'. +#'@param quantile A logical value to decide whether a quantile (TRUE) or a +#' threshold (FALSE) is used to estimate the forecast and observed +#' probabilities. If 'type' is 'FairEnsembleBS' or 'FairEnsembleBSS', it must +#' be TRUE. The default value is TRUE. +#'@param thr A numeric vector to be used in probability calculation (for 'BS', +#' 'FairStartDatesBS', 'BSS', and 'FairStartDatesBSS') and binary event +#' judgement (for 'FairEnsembleBS' and 'FairEnsembleBSS'). It is as +#' quantiles if 'quantile' is TRUE or as thresholds if 'quantile' is FALSE. +#' The default value is \code{c(0.05, 0.95)} for 'quantile = TRUE'. +#'@param type A character string of the desired score type. It can be the +#' following values: +#'\itemize{ +#' \item{'BS': Simple Brier Score. Use SpecsVerification::BrierDecomp inside.} +#' \item{'FairEnsembleBS': Corrected Brier Score computed across ensemble +#' members. Use SpecsVerification::FairBrier inside.} +#' \item{'FairStartDatesBS': Corrected Brier Score computed across starting +#' dates. Use s2dv:::.BrierScore inside.} +#' \item{'BSS': Simple Brier Skill Score. Use s2dv:::.BrierScore inside.} +#' \item{'FairEnsembleBSS': Corrected Brier Skill Score computed across +#' ensemble members. Use SpecsVerification::FairBrierSs inside.} +#' \item{'FairStartDatesBSS': Corrected Brier Skill Score computed across +#' starting dates. Use s2dv:::.BrierScore inside.} +#'} +#' The default value is 'BS'. +#'@param decomposition A logical value to determine whether the decomposition +#' of the Brier Score should be provided (TRUE) or not (FALSE). It is only +#' used when 'type' is 'BS' or 'FairStartDatesBS'. The default value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'If 'type' is 'BS' or 'FairStartDatesBS' and 'decomposition' is TRUE, the +#'output is a list of 4 arrays (see details below.) In other cases, the output +#'is an array of Brier scores or Brier skill scores. All the arrays have the +#'same dimensions: +#'c(nexp, nobs, no. of bins, the rest dimensions of 'exp' except 'time_dim' and +#''memb_dim'). 'nexp' and 'nobs' is the length of dataset dimension in 'exp' +#'and 'obs' respectively.\cr +#'The list of 4 includes: +#' \itemize{ +#' \item{$bs: Brier Score} +#' \item{$rel: Reliability component} +#' \item{$res: Resolution component} +#' \item{$unc: Uncertainty component} +#' } +#' +#'@examples +#' \dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#' } +#'sampleData$mod <- Season(sampleData$mod, monini = 11, moninf = 12, monsup = 2) +#'sampleData$obs <- Season(sampleData$obs, monini = 11, moninf = 12, monsup = 2) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'exp <- Ano(sampleData$mod, clim$clim_exp) +#'obs <- Ano(sampleData$obs, clim$clim_obs) +#'bs <- UltimateBrier(exp, obs) +#'bss <- UltimateBrier(exp, obs, type = 'BSS') +#' +#'@import SpecsVerification plyr multiApply +#'@export +UltimateBrier <- function(exp, obs, dat_dim = 'dataset', memb_dim = 'member', time_dim = 'sdate', + quantile = TRUE, thr = c(5/100, 95/100), type = 'BS', + decomposition = TRUE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a vector or a numeric array.") + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## dat_dim + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!memb_dim %in% names(dim(obs))) { + # Insert memb_dim into obs for the ease of later calculation + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } else if (dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + if (any(name_exp != name_obs)) { + stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'dat_dim' and 'memb_dim'.")) + } + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'dat_dim' and 'memb_dim'.")) + } + ## quantile + if (!is.logical(quantile) | length(quantile) > 1) { + stop("Parameter 'quantile' must be one logical value.") + } + ## thr + if (!is.numeric(thr) | !is.vector(thr)) { + stop("Parameter 'thr' must be a numeric vector.") + } + if (quantile) { + if (!all(thr < 1 & thr > 0)) { + stop("Parameter 'thr' must be between 0 and 1 when quantile is TRUE.") + } + } + if (!quantile & (type %in% c('FairEnsembleBSS', 'FairEnsembleBS'))) { + stop("Parameter 'quantile' must be TRUE if 'type' is 'FairEnsembleBSS' or 'FairEnsembleBS'.") + } + ## type + if (!(type %in% c("BS", "BSS", "FairEnsembleBS", "FairEnsembleBSS", "FairStartDatesBS", "FairStartDatesBSS"))) { + stop("Parameter 'type' must be one of 'BS', 'BSS', 'FairEnsembleBS', 'FairEnsembleBSS', 'FairStartDatesBS' or 'FairStartDatesBSS'.") + } + ## decomposition + if (!is.logical(decomposition) | length(decomposition) > 1) { + stop("Parameter 'decomposition' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate UltimateBrier + + if (type %in% c('FairEnsembleBSS', 'FairEnsembleBS')) { + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim, memb_dim), + c(time_dim, dat_dim, memb_dim)), + fun = .UltimateBrier, + thr = thr, type = type, + decomposition = decomposition, + ncores = ncores)$output1 + + } else { + # Calculate probablities by ProbBins() and ensemble mean first. + # The first dim will become 'bin' and memb_dim is gone. + exp <- MeanDims( + ProbBins(exp, thr = thr, time_dim = time_dim, memb_dim = memb_dim, + quantile = quantile, ncores = ncores), + memb_dim) + obs <- MeanDims( + ProbBins(obs, thr = thr, time_dim = time_dim, memb_dim = memb_dim, + quantile = quantile, ncores = ncores), + memb_dim) + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim), + c(time_dim, dat_dim)), + fun = .UltimateBrier, + thr = thr, type = type, + decomposition = decomposition, + ncores = ncores) + + if (type %in% c('BSS', 'FairStartDatesBSS')) { + res <- res$output1 + } else if (!decomposition) { + res <- res$bs + } + } + + return(res) +} + +.UltimateBrier <- function(exp, obs, thr = c(5/100, 95/100), type = 'BS', + decomposition = TRUE) { + # If exp and obs are probablistics + # exp: [sdate, nexp] + # obs: [sdate, nobs] + # If exp and obs are anomalies + # exp: [sdate, nexp, memb] + # obs: [sdate, nobs, memb] + + #NOTE: 'thr' is used in 'FairEnsembleBSS' and 'FairEnsembleBS'. But if quantile = F and + # thr is real value, does it work? + if (type == 'FairEnsembleBSS') { + size_ens_ref <- prod(dim(obs)[c(1, 3)]) + res <- array(dim = c(nexp = as.numeric(dim(exp)[2]), + nobs = as.numeric(dim(obs)[2]), + bin = length(thr) + 1)) + for (n_exp in 1:dim(exp)[2]) { + for (n_obs in 1:dim(obs)[2]) { + ens_ref <- matrix(obs[, n_obs, 1], size_ens_ref, size_ens_ref, byrow = TRUE) + for (n_thr in 1:length(c(thr, 1))) { + #NOTE: FairBreirSs is deprecated now. Should change to SkillScore (according to + # SpecsVerification's documentation) + res[n_exp, n_obs, n_thr] <- SpecsVerification::FairBrierSs(exp[, n_exp, ] > c(thr, 1)[n_thr], + ens_ref > c(thr, 1)[n_thr], + obs[, n_obs, 1] > c(thr, 1)[n_thr])['skillscore'] + } + } + } + + } else if (type == 'FairEnsembleBS') { + #NOTE: The calculation in s2dverification::UltimateBrier is wrong. In the final stage, + # the function calculates like "take(result, 3, 1) - take(result, 3, 2) + take(result, 3, 3)", + # but the 3rd dim of result is 'bins' instead of decomposition. 'FairEnsembleBS' does + # not have decomposition. + # The calculation is fixed here. + res <- array(dim = c(nexp = as.numeric(dim(exp)[2]), + nobs = as.numeric(dim(obs)[2]), + bin = length(thr) + 1)) + for (n_exp in 1:dim(exp)[2]) { + for (n_obs in 1:dim(obs)[2]) { + for (n_thr in 1:length(c(thr, 1))) { + fb <- SpecsVerification::FairBrier(ens = exp[, n_exp, ] > c(thr, 1)[n_thr], + obs = obs[, n_obs, 1] > c(thr, 1)[n_thr]) + res[n_exp, n_obs, n_thr] <- mean(fb, na.rm = T) + } + } + } +# tmp <- res[, , 1] - res[, , 2] + res[, , 3] +# res <- array(tmp, dim = c(nexp = as.numeric(dim(exp)[2]), nobs = as.numeric(dim(obs)[2]))) + + } else if (type == 'BS') { + comp <- array(dim = c(nexp = as.numeric(dim(exp)[2]), + nobs = as.numeric(dim(obs)[2]), + comp = 3)) + for (n_exp in 1:dim(exp)[2]) { + for (n_obs in 1:dim(obs)[2]) { + #NOTE: Parameter 'bins' is default. + comp[n_exp, n_obs, ] <- SpecsVerification::BrierDecomp(p = exp[, n_exp], + y = obs[, n_obs])[1, ] + } + } + if (decomposition) { + rel <- comp[, , 1] + dim(rel) <- c(nexp = as.numeric(dim(exp)[2]), nobs = as.numeric(dim(obs)[2])) + res <- comp[, , 2] + dim(res) <- c(nexp = as.numeric(dim(exp)[2]), nobs = as.numeric(dim(obs)[2])) + unc <- comp[, , 3] + dim(unc) <- c(nexp = as.numeric(dim(exp)[2]), nobs = as.numeric(dim(obs)[2])) + bs <- rel - res + unc + dim(bs) <- c(nexp = as.numeric(dim(exp)[2]), nobs = as.numeric(dim(obs)[2])) + res <- list(bs = bs, rel = rel, res = res, unc = unc) + } else { + bs <- comp[, , 1] - comp[, , 2] + comp[, , 3] + dim(bs) <- c(nexp = as.numeric(dim(exp)[2]), nobs = as.numeric(dim(obs)[2])) + res <- list(bs = bs) + } + + } else if (type == 'FairStartDatesBS') { + #NOTE: parameter 'thresholds' is not specified. + res <- .BrierScore(exp = exp, obs = obs) + if (decomposition) { + res <- list(bs = res$bs, rel = res$rel, res = res$res, unc = res$unc) + } else { + res <- list(bs = res$bs) + } + + } else if (type == 'BSS') { + #NOTE: parameter 'thresholds' is not specified. + res <- .BrierScore(exp = exp, obs = obs)$bss_res + + } else if (type == 'FairStartDatesBSS') { + #NOTE: parameter 'thresholds' is not specified. + res <- .BrierScore(exp = exp, obs = obs)$bss_gres + } + + return(res) + +} + diff --git a/R/Utils.R b/R/Utils.R index 6e781e1a9d0d527db76b5bea8f3a875e7e686c15..e0e1f91fddd2bcd61460149d49060ec65ab4d018 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -327,11 +327,9 @@ if ((grid_lons != common_grid_lons) || (grid_lats != common_grid_lats) || (grid_type != common_grid_type) || - ((lon[1] != first_common_grid_lon) - && !work_piece[['single_dataset']])) { + (lon[1] != first_common_grid_lon)) { if (grid_lons == common_grid_lons && grid_lats == common_grid_lats && - grid_type == common_grid_type && lon[1] != first_common_grid_lon && - !work_piece[['single_dataset']]) { + grid_type == common_grid_type && lon[1] != first_common_grid_lon) { remove_shift <- TRUE } remap_needed <- TRUE @@ -420,7 +418,9 @@ filecopy <- tempfile(pattern = "load", fileext = ".nc") file.copy(filein, filecopy) filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") - system(paste0("cdo -s remap", work_piece[['remap']], ",", + # "-L" is to serialize I/O accesses. It prevents potential segmentation fault in the + # underlying hdf5 library. + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", common_grid_name, " -selname,", namevar, " ", filecopy, " ", filein, " 2>/dev/null", sep = "")) @@ -600,9 +600,6 @@ nltime <- fnc$var[[namevar]][['dim']][[match(time_dimname, var_dimnames)]]$len expected_dims <- c(expected_dims, time_dimname) dim_matches <- match(expected_dims, var_dimnames) - first_time_step_in_file <- fnc$var[[namevar]][['dim']][[match(time_dimname, - var_dimnames)]]$vals[1] - time_units <- fnc$var[[namevar]][['dim']][[match(time_dimname, var_dimnames)]]$units } else { if (!is.null(old_members_dimname)) { expected_dims[which(expected_dims == 'lev')] <- old_members_dimname @@ -755,7 +752,7 @@ ncatt_put(fnc2, ncdf_var, 'scale_factor', scale_factor) } nc_close(fnc2) - system(paste0("cdo -s -sellonlatbox,", if (lonmin > lonmax) { + system(paste0("cdo -L -s -sellonlatbox,", if (lonmin > lonmax) { "0,360," } else { paste0(lonmin, ",", lonmax, ",") @@ -797,7 +794,7 @@ fnc_mask <- nc_create(mask_file, list(ncdf_var)) ncvar_put(fnc_mask, ncdf_var, array(rep(0, 4), dim = c(2, 2))) nc_close(fnc_mask) - system(paste0("cdo -s remap", work_piece[['remap']], ",", common_grid_name, + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", common_grid_name, " ", mask_file, " ", mask_file_remap, " 2>/dev/null", sep = "")) fnc_mask <- nc_open(mask_file_remap) mask_lons <- ncvar_get(fnc_mask, 'lon') @@ -950,9 +947,7 @@ if (explore_dims) { list(dims = dims, is_2d_var = is_2d_var, grid = grid_name, units = units, var_long_name = var_long_name, - data_across_gw = data_across_gw, array_across_gw = array_across_gw, - time_dim = list(first_time_step_in_file = first_time_step_in_file, - time_units = time_units)) + data_across_gw = data_across_gw, array_across_gw = array_across_gw) } else { ###if (!silent && !is.null(progress_connection) && !is.null(work_piece[['progress_amount']])) { ### foobar <- writeBin(work_piece[['progress_amount']], progress_connection) @@ -1670,107 +1665,58 @@ } # to be used in AMV.R, TPI.R, SPOD.R, GSAT.R and GMST.R -.Indices <- function(data, type, monini, indices_for_clim, - fmonth_dim, sdate_dim, year_dim, month_dim, member_dim) { +.Indices <- function(data, type, monini, indices_for_clim, + fmonth_dim, sdate_dim, year_dim, month_dim, na.rm) { - data = drop(data) - - if(member_dim %in% names(dim(data))){ - if (type == 'dcpp'){ - data = s2dv::Reorder(data = data, order = c(sdate_dim,fmonth_dim,member_dim)) - } else if (type %in% c('hist','obs')){ - data = s2dv::Reorder(data = data, order = c(year_dim,month_dim,member_dim)) - } - } - - if (type == 'dcpp'){ + if (type == 'dcpp') { - data = s2dv::Season(data = data, time_dim = fmonth_dim, - monini = monini, moninf = 1, monsup = 12, - method = mean, na.rm = FALSE) - names(dim(data))[which(names(dim(data))==fmonth_dim)] = 'fyear' - if (member_dim %in% names(dim(data))){ - data = s2dv::Reorder(data = data, order = c('fyear',sdate_dim,member_dim)) - } else { - data = s2dv::Reorder(data = data, order = c('fyear',sdate_dim)) - } + fyear_dim <- 'fyear' + data <- s2dv::Season(data = data, time_dim = fmonth_dim, + monini = monini, moninf = 1, monsup = 12, + method = mean, na.rm = na.rm) + names(dim(data))[which(names(dim(data))==fmonth_dim)] <- fyear_dim - if (is.logical(indices_for_clim)) { - if(!any(indices_for_clim)) { - # indices_for_clim == FALSE -> anomalies are directly given - anom = data - } + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies - } else { - - ## Different indices_for_clim for each forecast year (same actual years) + anom <- data - n_fyears = as.numeric(dim(data)['fyear']) - n_sdates = as.numeric(dim(data)[sdate_dim]) + } else { ## Different indices_for_clim for each forecast year (to use the same calendar years) - if (is.null(indices_for_clim)){ - - # indices_for_clim == NULL -> anomalies based on the whole (common) period - first_years_for_clim = n_fyears : 1 - last_years_for_clim = n_sdates : (n_sdates - n_fyears + 1) - - } else { - - first_years_for_clim = seq(from = indices_for_clim[1], by = -1, length.out = n_fyears) - last_years_for_clim = seq(from = indices_for_clim[length(indices_for_clim)], by = -1, length.out = n_fyears) - - } - - anom = array(data = NA, dim = dim(data)) - if (member_dim %in% names(dim(data))){ - clim = array(data = NA, dim = c(dim(data)['fyear'],dim(data)[member_dim])) - } else { - clim = array(data = NA, dim = c(dim(data)['fyear'])) + n_fyears <- as.numeric(dim(data)[fyear_dim]) + n_sdates <- as.numeric(dim(data)[sdate_dim]) + + if (is.null(indices_for_clim)) { ## climatology over the whole (common) period + first_years_for_clim <- n_fyears : 1 + last_years_for_clim <- n_sdates : (n_sdates - n_fyears + 1) + } else { ## indices_for_clim specified as a numeric vector + first_years_for_clim <- seq(from = indices_for_clim[1], by = -1, length.out = n_fyears) + last_years_for_clim <- seq(from = indices_for_clim[length(indices_for_clim)], by = -1, length.out = n_fyears) } - for (i in 1:n_fyears){ - if (member_dim %in% names(dim(data))){ - for (m in 1:as.numeric(dim(data)[member_dim])){ - clim[i,m] = mean(data[i,first_years_for_clim[i]:last_years_for_clim[i],m]) - anom[i,,m] = data[i,,m] - clim[i,m] - } - } else { - clim = mean(data[i,first_years_for_clim[i]:last_years_for_clim[i]]) - anom[i,] = data[i,] - clim - } + + data <- s2dv::Reorder(data = data, order = c(fyear_dim, sdate_dim)) + anom <- array(data = NA, dim = dim(data)) + for (i in 1:n_fyears) { + clim <- mean(data[i,first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm) + anom[i,] <- data[i,] - clim } } - } else if (type %in% c('obs','hist')){ + } else if (type %in% c('obs','hist')) { - data = multiApply::Apply(data = data, target_dims = month_dim, fun = mean)$output1 + data <- multiApply::Apply(data = data, target_dims = month_dim, fun = mean, na.rm = na.rm)$output1 - if (is.logical(indices_for_clim)) { - if(!any(indices_for_clim)) { - anom = data - } - - } else { - - if (is.null(indices_for_clim)){ - - clim = multiApply::Apply(data = data, target_dims = year_dim, fun = mean)$output1 - - } else { - - if (member_dim %in% names(dim(data))){ - target_dims = c(year_dim,member_dim) - } else { - target_dims = year_dim - } - clim = multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, indices = indices_for_clim), - target_dims = target_dims, fun = mean)$output1 - } - anom = multiApply::Apply(data = data, target_dims = year_dim, - fun = function(data,clim){data-clim}, clim = clim)$output1 + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + clim <- 0 + } else if (is.null(indices_for_clim)) { ## climatology over the whole period + clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 + } else { ## indices_for_clim specified as a numeric vector + clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, indices = indices_for_clim), + target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 } + anom <- data - clim + } else {stop('type must be dcpp, hist or obs')} return(anom) } - diff --git a/README.md b/README.md index f9cce382008cbb08f2e099f82e6e39d273a94808..63b261da62e2c0bc343acef04242b584d88d72b9 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,27 @@ correlation with reliability indicators such as p-values and confidence interval - **Visualization** module: Plotting functions are also provided to plot the results obtained from any of the modules above. +One important feature of s2dv is the named dimension of the data array. All the +data input of the functions should have names for all the dimensions. It should +not be a problem since the data retrieved by s2dv::Load or startR::Start have +named dimension inherently. Take the sample data in s2dv as an example: +```r +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), c('observation'), + '19901101', leadtimemin = 1, leadtimemax = 4, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) +# It returns an object 'sampleData' +dim(sampleData$mod) +dataset member sdate ftime lat lon + 1 3 1 4 2 3 +dim(sampleData$obs) +dataset member sdate ftime lat lon + 1 1 1 4 2 3 +``` +The feature provides security during the analysis, ensuring that the dimensions +under operation are the desired ones. + Contribute ---------- diff --git a/inst/doc/FAQ.md b/inst/doc/FAQ.md index e6a58be2fbb54b999d1d7d7657e7fdee0d0aa61b..a8e49db9024f6e063a2fbe59db9274a4efd20b18 100644 --- a/inst/doc/FAQ.md +++ b/inst/doc/FAQ.md @@ -6,6 +6,10 @@ This document intends to be the first reference for any doubts that you may have 1. **How to** 1. [Global Map with non-standard longitudinal boundaries](#1-global-map-with-non-standard-longitudinal-boundaries) +2. **Something goes wrong...** + 1. [CDORemap() returns errors or warnings with specific module versions](#1-cdoremap-returns-errors-or-warnings-with-specific-module-versions) + + ## 1. How to ### 1. Global Map with non-standard longitudinal boundaries @@ -40,3 +44,27 @@ Note: You can adjust many parameters to visualize the plot, here we are just sho If you want to add other information to the plot (e.g.: hatching, points, countours, ...), you can add it just before ColorBar() function. + + +## 2. Something goes wrong... + +### 1. CDORemap() returns errors or warnings with specific module versions +CDORemap() uses cdo and ncdf4 inside, and the performance is impacted by those tools a lot. +Some instances may work with a specific set of module combination but not with another. +Since the incompatibility is not from the R code, it is hard to improve or prevent the failure. +Here are some detected cases that specific versions need to be used. +(1) The 'grid' parameter is a file +- The workable version combination: +CDO/1.9.8-foss-2015a +R/3.6.1-foss-2015a-bare +HDF5/1.8.14-foss-2015a +- The unworkable version combination: +_It returns a warning about HDF5._ +CDO/1.6.3-foss-2015a +R/3.6.1-foss-2015a-bare +HDF5/1.10.5-foss-2015a + + + + + diff --git a/inst/doc/profiling_compare_apply.md b/inst/doc/profiling_compare_apply.md new file mode 100644 index 0000000000000000000000000000000000000000..967785c2eb172777038bcfc9a7dec190db35fb6d --- /dev/null +++ b/inst/doc/profiling_compare_apply.md @@ -0,0 +1,95 @@ +This document records the profiling tests of those functions using apply() and Apply() +depending on 'ncores'. The comparison is among apply(), Apply() with one core, and Apply() with two cores. Two different data sizes are tested. The testing package is "peakRAM". + + +- Ano() +For small data, apply() is better than Apply() both in time and memory usage. However, if +the data size is larger, apply() requires more memory even if it saves time still. Using +2 cores can save memory usage but time is even longer. + + - small data +```r + set.seed(1) + dat1 <- array(rnorm(10000), c(dat = 10, member = 30, sdate = 400, ftime = 10)) + set.seed(2) + clim1 <- array(rnorm(10000), c(dat = 10, member = 30, sdate = 400)) + pryr::object_size(dat1) + 9.6 MB + + # (1) apply + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 0.016 9.1 68.6 + + # (2) Apply, 1 core + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 0.039 9.1 82.4 + + # (3) Apply, 2 cores + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 0.117 9.1 50.4 +``` + + - large data +```r + set.seed(1) + dat2 <- array(rnorm(10000), c(dat = 10, member = 30, sdate = 400, ftime = 10, lon = 150)) + set.seed(2) + clim2 <- array(rnorm(10000), c(dat = 10, member = 30, sdate = 400)) + pryr::object_size(dat2) + 1.44GB + + # (1) apply + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 6.368 1373.3 6004 + + # (2) Apply, 1 core + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 15.211 1373.3 5844.3 + + # (3) Apply, 2 cores + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 20.193 1373.3 4718.9 +``` + +- Trend +Because the returned value is list, apply() is not suitable for Trend(). For small data, +2 cores is twice faster than 1 core. The peak RAM is around 6-7x of data. For larger data, +1 core is a bit faster than 2 cores. The peak RAM is around 4-5x of data. + - small data +```r + set.seed(1) + dat1 <- array(rnorm(10000), c(dat = 10, member = 30, sdate = 40, ftime = 100)) + pryr::object_size(dat1) + 9.6 MB + + # (1) Apply, 1 core + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 21.324 9.8 56.4 + + # (2) Apply, 2 cores + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 11.327 9.8 63.1 + + # (3) Apply, 4 cores + +``` + + - large data +```r + set.seed(1) + dat2 <- array(rnorm(10000), c(dat = 10, member = 10, sdate = 400, ftime = 1000, lon = 4)) + pryr::object_size(dat2) + 1.28GB + + # (1) Apply, 1 core + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 602.273 1230.4 6004.3 + + # (2) Apply, 2 cores + Elapsed_Time_sec Total_RAM_Used_MiB Peak_RAM_Used_MiB + 632.638 1229.8 5979.2 + +``` + + + diff --git a/man/ACC.Rd b/man/ACC.Rd new file mode 100644 index 0000000000000000000000000000000000000000..d1389fdb596ee9a2162723d8c1ce1410f3542b19 --- /dev/null +++ b/man/ACC.Rd @@ -0,0 +1,155 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ACC.R +\name{ACC} +\alias{ACC} +\title{Compute the anomaly correlation coefficient between the forecast and corresponding observation} +\usage{ +ACC( + exp, + obs, + dat_dim = "dataset", + space_dim = c("lat", "lon"), + avg_dim = "sdate", + memb_dim = "member", + lat = NULL, + lon = NULL, + lonlatbox = NULL, + conf = TRUE, + conftype = "parametric", + conf.lev = 0.95, + pval = TRUE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A numeric array of experimental anomalies with named dimensions. +It must have at least 'dat_dim' and 'space_dim'.} + +\item{obs}{A numeric array of observational anomalies with named dimensions. +It must have the same dimensions as 'exp' except the length of 'dat_dim' +and 'memb_dim'.} + +\item{dat_dim}{A character string indicating the name of dataset (nobs/nexp) +dimension. The default value is 'dataset'.} + +\item{space_dim}{A character string vector of 2 indicating the name of the +latitude and longitude dimensions (in order) along which ACC is computed. +The default value is c('lat', 'lon').} + +\item{avg_dim}{A character string indicating the name of the dimension to be +averaged. It must be one of 'time_dim'. The mean ACC is calculated along +averaged. If no need to calculate mean ACC, set as NULL. The default value +is 'sdate'.} + +\item{memb_dim}{A character string indicating the name of the member +dimension. If the data are not ensemble ones, set as NULL. The default +value is 'member'.} + +\item{lat}{A vector of the latitudes of the exp/obs grids. Only required when +the domain of interested is specified. The default value is NULL.} + +\item{lon}{A vector of the longitudes of the exp/obs grids. Only required when +the domain of interested is specified. The default value is NULL.} + +\item{lonlatbox}{A numeric vector of 4 indicating the corners of the domain of +interested: c(lonmin, lonmax, latmin, latmax). Only required when the domain +of interested is specified. The default value is NULL.} + +\item{conf}{A logical value indicating whether to retrieve the confidence +intervals or not. The default value is TRUE.} + +\item{conftype}{A charater string of "parametric" or "bootstrap". +"parametric" provides a confidence interval for the ACC computed by a +Fisher transformation and a significance level for the ACC from a one-sided +student-T distribution. "bootstrap" provides a confidence interval for the +ACC and MACC computed from bootstrapping on the members with 100 drawings +with replacement. To guarantee the statistical robustness of the result, +make sure that your experiment and observation always have the same number +of members. "bootstrap" requires 'memb_dim' has value. The default value is +'parametric'.} + +\item{conf.lev}{A numeric indicating the confidence level for the +regression computation. The default value is 0.95.} + +\item{pval}{A logical value indicating whether to compute the p-value or not. +The default value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing the numeric arrays:\cr +\item{acc}{ + The ACC with the dimensions c(nexp, nobs, the rest of the dimension except + space_dim and memb_dim). nexp is the number of experiment (i.e., dat_dim in + exp), and nobs is the number of observation (i.e., dat_dim in obs). +} +\item{conf.lower (if conftype = "parametric") or acc_conf.lower (if + conftype = "bootstrap")}{ + The lower confidence interval of ACC with the same dimensions as ACC. Only + present if \code{conf = TRUE}. +} +\item{conf.upper (if conftype = "parametric") or acc_conf.upper (if + conftype = "bootstrap")}{ + The upper confidence interval of ACC with the same dimensions as ACC. Only + present if \code{conf = TRUE}. +} +\item{p.val}{ + The p-value with the same dimensions as ACC. Only present if + \code{pval = TRUE} and code{conftype = "parametric"}. +} +\item{macc}{ + The mean anomaly correlation coefficient with dimensions + c(nexp, nobs, the rest of the dimension except space_dim, memb_dim, and + avg_dim). Only present if 'avg_dim' is not NULL. +} +\item{macc_conf.lower}{ + The lower confidence interval of MACC with the same dimensions as MACC. + Only present if \code{conftype = "bootstrap"}. +} +\item{macc_conf.upper}{ + The upper confidence interval of MACC with the same dimensions as MACC. + Only present if \code{conftype = "bootstrap"}. +} +} +\description{ +Calculate the anomaly correlation coefficient for the ensemble mean of each +model and the corresponding references over a spatial domain. It can return a +forecast time series if the data contain forest time dimension, and also the +start date mean if the data contain start date dimension. +The domain of interest can be specified by providing the list +of longitudes/latitudes (lon/lat) of the data together with the corners +of the domain: lonlatbox = c(lonmin, lonmax, latmin, latmax). +} +\examples{ + \dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + leadtimemin = 1, + leadtimemax = 4, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) + } +sampleData$mod <- Season(sampleData$mod, monini = 11, moninf = 12, monsup = 2) +sampleData$obs <- Season(sampleData$obs, monini = 11, moninf = 12, monsup = 2) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +acc <- ACC(ano_exp, ano_obs) +acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap') +# Combine acc results for PlotACC +res <- array(c(acc$conf.lower, acc$acc, acc$conf.upper, acc$p.val), + dim = c(dim(acc$acc), 4)) +res_bootstrap <- array(c(acc$acc_conf.lower, acc$acc, acc$acc_conf.upper, acc$p.val), + dim = c(dim(acc$acc), 4)) + \donttest{ +PlotACC(res, startDates) +PlotACC(res_bootstrap, startDates) + } +} +\references{ +Joliffe and Stephenson (2012). Forecast Verification: A + Practitioner's Guide in Atmospheric Science. Wiley-Blackwell. +} diff --git a/man/AMV.Rd b/man/AMV.Rd index 5aa6d3012cec89094ea1231f7e70d61c078d450f..3cc4113effc0c1aac77cf7498cba2b54e11478ea 100644 --- a/man/AMV.Rd +++ b/man/AMV.Rd @@ -4,17 +4,29 @@ \alias{AMV} \title{Compute the Atlantic Multidecadal Variability (AMV) index} \usage{ -AMV(data, data_lats, data_lons, type, lat_dim = "lat", lon_dim = "lon", - mask = NULL, monini = 11, fmonth_dim = "fmonth", sdate_dim = "sdate", - indices_for_clim = NULL, year_dim = "year", month_dim = "month", - member_dim = "member") +AMV( + data, + data_lats, + data_lons, + type, + lat_dim = "lat", + lon_dim = "lon", + mask = NULL, + monini = 11, + fmonth_dim = "fmonth", + sdate_dim = "sdate", + indices_for_clim = NULL, + year_dim = "year", + month_dim = "month", + na.rm = TRUE, + ncores = NULL +) } \arguments{ -\item{data}{A numerical array to be used for the index computation with the -dimensions: 1) latitude, longitude, start date, forecast month, and member -(in case of decadal predictions), 2) latitude, longitude, year, month and -member (in case of historical simulations), or 3) latitude, longitude, year -and month (in case of observations or reanalyses). This data has to be +\item{data}{A numerical array to be used for the index computation with, at least, the +dimensions: 1) latitude, longitude, start date and forecast month +(in case of decadal predictions), 2) latitude, longitude, year and month +(in case of historical simulations or observations). This data has to be provided, at least, over the whole region needed to compute the index.} \item{data_lats}{A numeric vector indicating the latitudes of the data.} @@ -57,7 +69,7 @@ climatology is calculated over the whole period. If the data are already anomalies, set it to FALSE. The default value is NULL.\cr In case of parameter 'type' is 'dcpp', 'indices_for_clim' must be relative to the first forecast year, and the climatology is automatically computed -over the actual common period for the different forecast years.} +over the common calendar period for the different forecast years.} \item{year_dim}{A character string indicating the name of the year dimension The default value is 'year'. Only used if parameter 'type' is 'hist' or @@ -67,15 +79,17 @@ The default value is 'year'. Only used if parameter 'type' is 'hist' or dimension. The default value is 'month'. Only used if parameter 'type' is 'hist' or 'obs'.} -\item{member_dim}{A character string indicating the name of the member -dimension. The default value is 'member'. Only used if parameter 'type' is -'dcpp' or 'hist'.} +\item{na.rm}{A logical value indicanting whether to remove NA values. The default +value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} } \value{ -A numerical array of the AMV index with the dimensions of: - 1) sdate, forecast year, and member (in case of decadal predictions); - 2) year and member (in case of historical simulations); or - 3) year (in case of observations or reanalyses). +A numerical array with the AMV index with the same dimensions as data except + the lat_dim, lon_dim and fmonth_dim (month_dim) in case of decadal predictions + (historical simulations or observations). In case of decadal predictions, a new dimension + 'fyear' is added. } \description{ The Atlantic Multidecadal Variability (AMV), also known as Atlantic @@ -84,7 +98,8 @@ surface temperatures (SST) over the North Atlantic Ocean on multi-decadal time scales. The AMV index is computed as the difference of weighted-averaged SST anomalies over the North Atlantic region (0ºN-60ºN, 280ºE-360ºE) and the weighted-averaged SST anomalies over 60ºS-60ºN, 0ºE-360ºE (Trenberth & -Dennis, 2005; Doblas-Reyes et al., 2013). +Dennis, 2005; Doblas-Reyes et al., 2013). If different members and/or datasets are provided, +the climatology (used to calculate the anomalies) is computed individually for all of them. } \examples{ ## Observations or reanalyses @@ -106,4 +121,3 @@ lon <- seq(0, 360, 10) index_dcpp <- AMV(data = dcpp, data_lats = lat, data_lons = lon, type = 'dcpp', monini = 1) } - diff --git a/man/AnimateMap.Rd b/man/AnimateMap.Rd index d2003ee4a385fb495d3571354a55e8623af56946..2ec930d5c964189f4c3e39ce167037c9cbe1d6fd 100644 --- a/man/AnimateMap.Rd +++ b/man/AnimateMap.Rd @@ -4,13 +4,33 @@ \alias{AnimateMap} \title{Animate Maps of Forecast/Observed Values or Scores Over Forecast Time} \usage{ -AnimateMap(var, lon, lat, toptitle = rep("", 11), sizetit = 1, units = "", - monini = 1, freq = 12, msk95lev = FALSE, brks = NULL, cols = NULL, - filled.continents = FALSE, lonmin = 0, lonmax = 360, latmin = -90, - latmax = 90, intlon = 20, intlat = 30, drawleg = TRUE, - subsampleg = 1, colNA = "white", equi = TRUE, +AnimateMap( + var, + lon, + lat, + toptitle = rep("", 11), + sizetit = 1, + units = "", + monini = 1, + freq = 12, + msk95lev = FALSE, + brks = NULL, + cols = NULL, + filled.continents = FALSE, + lonmin = 0, + lonmax = 360, + latmin = -90, + latmax = 90, + intlon = 20, + intlat = 30, + drawleg = TRUE, + subsampleg = 1, + colNA = "white", + equi = TRUE, fileout = c("output1_animvsltime.gif", "output2_animvsltime.gif", - "output3_animvsltime.gif"), ...) + "output3_animvsltime.gif"), + ... +) } \arguments{ \item{var}{Matrix of dimensions (nltime, nlat, nlon) or @@ -162,4 +182,3 @@ AnimateMap(clim$clim_exp, sampleData$lon, sampleData$lat, # More examples in s2dverification but are deleted for now } - diff --git a/man/Ano.Rd b/man/Ano.Rd index 2dd4dead6f14828bf18cc108b050e53c6f3fa98c..d15ffd14bdbfbb52a3975d8146267edb20eca0b9 100644 --- a/man/Ano.Rd +++ b/man/Ano.Rd @@ -20,8 +20,7 @@ same length.} computation. The default value is NULL.} } \value{ -An array with same dimensions as parameter 'data' but with different - dimension order. The dimensions in parameter 'clim' are ordered first. +An array with same dimensions as parameter 'data'. } \description{ This function computes anomalies from a multidimensional data array and a @@ -33,12 +32,9 @@ example(Load) clim <- Clim(sampleData$mod, sampleData$obs) ano_exp <- Ano(sampleData$mod, clim$clim_exp) ano_obs <- Ano(sampleData$obs, clim$clim_obs) -ano_exp <- Reorder(ano_exp, c(1, 2, 4, 3)) -ano_obs <- Reorder(ano_obs, c(1, 2, 4, 3)) \donttest{ PlotAno(ano_exp, ano_obs, startDates, toptitle = 'Anomaly', ytitle = c('K', 'K', 'K'), legends = 'ERSST', biglab = FALSE, fileout = 'tos_ano.png') } } - diff --git a/man/Ano_CrossValid.Rd b/man/Ano_CrossValid.Rd new file mode 100644 index 0000000000000000000000000000000000000000..1e91528335c2e68b812594311f1446a8d7eabeb3 --- /dev/null +++ b/man/Ano_CrossValid.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Ano_CrossValid.R +\name{Ano_CrossValid} +\alias{Ano_CrossValid} +\title{Compute anomalies in cross-validation mode} +\usage{ +Ano_CrossValid( + exp, + obs, + time_dim = "sdate", + dat_dim = c("dataset", "member"), + memb_dim = "member", + memb = TRUE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numeric array of experimental data, with at least +dimensions 'time_dim' and 'dat_dim'.} + +\item{obs}{A named numeric array of observational data, same dimensions as +parameter 'exp' except along 'dat_dim'.} + +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'sdate'.} + +\item{dat_dim}{A character vector indicating the name of the dataset and +member dimensions. When calculating the climatology, if data at one +startdate (i.e., 'time_dim') is not complete along 'dat_dim', this startdate +along 'dat_dim' will be discarded. The default value is +"c('dataset', 'member')".} + +\item{memb_dim}{A character string indicating the name of the member +dimension. Only used when parameter 'memb' is FALSE. It must be one element +in 'dat_dim'. The default value is 'member'.} + +\item{memb}{A logical value indicating whether to subtract the climatology +based on the individual members (TRUE) or the ensemble mean over all +members (FALSE) when calculating the anomalies. The default value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list of 2: +\item{$exp}{ + A numeric array with the same dimensions as 'exp'. The dimension order may + change. +} +\item{$obs}{ + A numeric array with the same dimensions as 'obs'.The dimension order may + change. +} +} +\description{ +Compute the anomalies from the arrays of the experimental and observational +data output by subtracting the climatologies computed with a leave-one-out +cross validation technique and a per-pair method (Garcia-Serrano and +Doblas-Reyes, CD, 2012). +Per-pair climatology means that only the start dates covered by the +whole experiments/observational datasets will be used. In other words, the +startdates which do not all have values along 'dat_dim' dimension of both +the 'exp' and 'obs' are excluded when computing the climatologies. +} +\examples{ +# Load sample data as in Load() example: +example(Load) +anomalies <- Ano_CrossValid(sampleData$mod, sampleData$obs) +\dontrun{ +PlotAno(anomalies$exp, anomalies$obs, startDates, + toptitle = paste('anomalies'), ytitle = c('K', 'K', 'K'), + legends = 'ERSST', biglab = FALSE, fileout = 'tos_ano_crossvalid.eps') +} +} diff --git a/man/BrierScore.Rd b/man/BrierScore.Rd new file mode 100644 index 0000000000000000000000000000000000000000..9271a2adb8fca99ed95483a36b46545e219cd47c --- /dev/null +++ b/man/BrierScore.Rd @@ -0,0 +1,112 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/BrierScore.R +\name{BrierScore} +\alias{BrierScore} +\title{Compute Brier score, its decomposition, and Brier skill score} +\usage{ +BrierScore( + exp, + obs, + thresholds = seq(0.1, 0.9, 0.1), + time_dim = "sdate", + dat_dim = NULL, + memb_dim = NULL, + ncores = NULL +) +} +\arguments{ +\item{exp}{A vector or a numeric array with named dimensions. It should be +the predicted probabilities which are within the range [0, 1] if memb_dim +doesn't exist. If it has memb_dim, the value should be 0 or 1, and the +predicted probabilities will be computed by ensemble mean. The dimensions +must at least have 'time_dim'. +range [0, 1].} + +\item{obs}{A numeric array with named dimensions of the binary observations +(0 or 1). The dimension must be the same as 'exp' except memb_dim, which is +optional. If it has 'memb_dim', then the length must be 1. The length of +'dat_dim' can be different from 'exp' if it has.} + +\item{thresholds}{A numeric vector used to bin the forecasts. The default +value is \code{seq(0.1, 0.9, 0.1)}, which means that the bins are + \code{[0, 0.1), [0.1, 0.2), ... [0.9, 1]}.} + +\item{time_dim}{A character string indicating the name of dimension along +which Brier score is computed. The default value is 'sdate'.} + +\item{dat_dim}{A character string indicating the name of dataset dimension in +'exp' and 'obs'. The length of this dimension can be different between +'exp' and 'obs'. The default value is NULL.} + +\item{memb_dim}{A character string of the name of the member dimension in +'exp' (and 'obs', optional). The function will do the ensemble mean +over this dimension. If there is no member dimension, set NULL. The default +value is NULL.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list that contains: +\item{$rel}{standard reliability} +\item{$res}{standard resolution} +\item{$unc}{standard uncertainty} +\item{$bs}{Brier score} +\item{$bs_check_res}{rel - res + unc} +\item{$bss_res}{res - rel / unc} +\item{$gres}{generalized resolution} +\item{$bs_check_gres}{rel - gres + unc} +\item{$bss_gres}{gres - rel / unc} +\item{$rel_bias_corrected}{bias - corrected rel} +\item{$gres_bias_corrected}{bias - corrected gres} +\item{$unc_bias_corrected}{bias - corrected unc} +\item{$bss_bias_corrected}{gres_bias_corrected - rel_bias_corrected / unc_bias_corrected} +\item{$nk}{number of forecast in each bin} +\item{$fkbar}{average probability of each bin} +\item{$okbar}{relative frequency that the observed event occurred} +The data type and dimensions of the items depend on if the input 'exp' and +'obs' are:\cr +(a) Vectors\cr +(b) Arrays with 'dat_dim' specified\cr +(c) Arrays with no 'dat_dim' specified\cr +Items 'rel', 'res', 'unc', 'bs', 'bs_check_res', 'bss_res', 'gres', +'bs_check_gres', 'bss_gres', 'rel_bias_corrected', 'gres_bias_corrected', +'unc_bias_corrected', and 'bss_bias_corrected' are (a) a number (b) an array +with dimensions c(nexp, nobs, all the rest dimensions in 'exp' and 'obs' +expect 'time_dim' and 'memb_dim') (c) an array with dimensions of +'exp' and 'obs' except 'time_dim' and 'memb_dim'\cr +Items 'nk', 'fkbar', and 'okbar' are (a) a vector of length of bin number +determined by 'threshold' (b) an array with dimensions c(nexp, nobs, +no. of bins, all the rest dimensions in 'exp' and 'obs' expect 'time_dim' and +'memb_dim') (c) an array with dimensions c(no. of bin, all the rest dimensions +in 'exp' and 'obs' expect 'time_dim' and 'memb_dim') +} +\description{ +Compute the Brier score (BS) and the components of its standard decompostion +with the two within-bin components described in Stephenson et al., (2008). It +also returns the bias-corrected decomposition of the BS (Ferro and Fricker, +2012). BSS has the climatology as the reference forecast. +} +\examples{ +# Inputs are vectors +exp <- runif(10) +obs <- round(exp) +x <- BrierScore(exp, obs) + +# Inputs are arrays +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +bins_ano_exp <- ProbBins(ano_exp, thr = c(1/3, 2/3)) +bins_ano_obs <- ProbBins(ano_obs, thr = c(1/3, 2/3)) +res <- BrierScore(bins_ano_exp, MeanDims(bins_ano_obs, 'member'), memb_dim = 'member') + +} +\references{ +Wilks (2006) Statistical Methods in the Atmospheric Sciences.\cr +Stephenson et al. (2008). Two extra components in the Brier score decomposition. + Weather and Forecasting, 23: 752-757.\cr +Ferro and Fricker (2012). A bias-corrected decomposition of the BS. + Quarterly Journal of the Royal Meteorological Society, DOI: 10.1002/qj.1924. +} diff --git a/man/CDORemap.Rd b/man/CDORemap.Rd new file mode 100644 index 0000000000000000000000000000000000000000..c7a00a8f7789ec5c8162a2a3b03d0a8a0b5eafd3 --- /dev/null +++ b/man/CDORemap.Rd @@ -0,0 +1,232 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CDORemap.R +\name{CDORemap} +\alias{CDORemap} +\title{Interpolate arrays with longitude and latitude dimensions using CDO} +\usage{ +CDORemap( + data_array = NULL, + lons, + lats, + grid, + method, + avoid_writes = TRUE, + crop = TRUE, + force_remap = FALSE, + write_dir = tempdir() +) +} +\arguments{ +\item{data_array}{Multidimensional numeric array to be interpolated. If +provided, it must have at least a longitude and a latitude dimensions, +identified by the array dimension names. The names for these dimensions +must be one of the recognized by s2dverification (can be checked with +\code{s2dverification:::.KnownLonNames()} and +\code{s2dverification:::.KnownLatNames()}).} + +\item{lons}{Numeric vector or array of longitudes of the centers of the grid +cells. Its size must match the size of the longitude/latitude dimensions +of the input array.} + +\item{lats}{Numeric vector or array of latitudes of the centers of the grid +cells. Its size must match the size of the longitude/latitude dimensions +of the input array.} + +\item{grid}{Character string specifying either a name of a target grid +(recognized by CDO; e.g.: 'r256x128', 't106grid') or a path to another +NetCDF file which to read the target grid from (a single grid must be +defined in such file).} + +\item{method}{Character string specifying an interpolation method +(recognized by CDO; e.g.: 'con', 'bil', 'bic', 'dis'). The following +long names are also supported: 'conservative', 'bilinear', 'bicubic' and +'distance-weighted'.} + +\item{avoid_writes}{The step of permutation is needed when the input array +has more than 3 dimensions and none of the longitude or latitude dimensions + in the right-most position (CDO would not accept it without permuting +previously). This step, executed by default when needed, can be avoided +for the price of writing more intermediate files (whis usually is +unconvenient) by setting the parameter \code{avoid_writes = TRUE}.} + +\item{crop}{Whether to crop the data after interpolation with +'cdo sellonlatbox' (TRUE) or to extend interpolated data to the whole +world as CDO does by default (FALSE). If \code{crop = TRUE} then the +longitude and latitude borders which to crop at are taken as the limits of +the cells at the borders ('lons' and 'lats' are perceived as cell centers), +i.e. the resulting array will contain data that covers the same area as +the input array. This is equivalent to specifying \code{crop = 'preserve'}, +i.e. preserving area. If \code{crop = 'tight'} then the borders which to +crop at are taken as the minimum and maximum cell centers in 'lons' and +'lats', i.e. the area covered by the resulting array may be smaller if +interpolating from a coarse grid to a fine grid. The parameter 'crop' also +accepts a numeric vector of custom borders which to crop at: +c(western border, eastern border, southern border, northern border).} + +\item{force_remap}{Whether to force remapping, even if the input data array +is already on the target grid.} + +\item{write_dir}{Path to the directory where to create the intermediate +files for CDO to work. By default, the R session temporary directory is +used (\code{tempdir()}).} +} +\value{ +A list with the following components: + \item{'data_array'}{The interpolated data array (if an input array + is provided at all, NULL otherwise).} + \item{'lons'}{The longitudes of the data on the destination grid.} + \item{'lats'}{The latitudes of the data on the destination grid.} +} +\description{ +This function takes as inputs a multidimensional array (optional), a vector +or matrix of longitudes, a vector or matrix of latitudes, a destination grid +specification, and the name of a method to be used to interpolate (one of +those available in the 'remap' utility in CDO). The interpolated array is +returned (if provided) together with the new longitudes and latitudes.\cr\cr +\code{CDORemap()} permutes by default the dimensions of the input array (if +needed), splits it in chunks (CDO can work with data arrays of up to 4 +dimensions), generates a file with the data of each chunk, interpolates it +with CDO, reads it back into R and merges it into a result array. If no +input array is provided, the longitude and latitude vectors will be +transformed only. If the array is already on the desired destination grid, +no transformation is performed (this behvaiour works only for lonlat and +gaussian grids). \cr\cr +Any metadata attached to the input data array, longitudes or latitudes will +be preserved or accordingly modified. +} +\examples{ + \dontrun{ +# Interpolating only vectors of longitudes and latitudes +lon <- seq(0, 360 - 360/50, length.out = 50) +lat <- seq(-90, 90, length.out = 25) +tas2 <- CDORemap(NULL, lon, lat, 't170grid', 'bil', TRUE) + +# Minimal array interpolation +tas <- array(1:50, dim = c(25, 50)) +names(dim(tas)) <- c('lat', 'lon') +lon <- seq(0, 360 - 360/50, length.out = 50) +lat <- seq(-90, 90, length.out = 25) +tas2 <- CDORemap(tas, lon, lat, 't170grid', 'bil', TRUE) + +# Metadata can be attached to the inputs. It will be preserved and +# accordignly modified. +tas <- array(1:50, dim = c(25, 50)) +names(dim(tas)) <- c('lat', 'lon') +lon <- seq(0, 360 - 360/50, length.out = 50) +metadata <- list(lon = list(units = 'degrees_east')) +attr(lon, 'variables') <- metadata +lat <- seq(-90, 90, length.out = 25) +metadata <- list(lat = list(units = 'degrees_north')) +attr(lat, 'variables') <- metadata +metadata <- list(tas = list(dim = list(lat = list(len = 25, + vals = lat), + lon = list(len = 50, + vals = lon) + ))) +attr(tas, 'variables') <- metadata +tas2 <- CDORemap(tas, lon, lat, 't170grid', 'bil', TRUE) + +# Arrays of any number of dimensions in any order can be provided. +num_lats <- 25 +num_lons <- 50 +tas <- array(1:(10*num_lats*10*num_lons*10), + dim = c(10, num_lats, 10, num_lons, 10)) +names(dim(tas)) <- c('a', 'lat', 'b', 'lon', 'c') +lon <- seq(0, 360 - 360/num_lons, length.out = num_lons) +metadata <- list(lon = list(units = 'degrees_east')) +attr(lon, 'variables') <- metadata +lat <- seq(-90, 90, length.out = num_lats) +metadata <- list(lat = list(units = 'degrees_north')) +attr(lat, 'variables') <- metadata +metadata <- list(tas = list(dim = list(a = list(), + lat = list(len = num_lats, + vals = lat), + b = list(), + lon = list(len = num_lons, + vals = lon), + c = list() + ))) +attr(tas, 'variables') <- metadata +tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil', TRUE) +# The step of permutation can be avoided but more intermediate file writes +# will be performed. +tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil', FALSE) + +# If the provided array has the longitude or latitude dimension in the +# right-most position, the same number of file writes will be performed, +# even if avoid_wrties = FALSE. +num_lats <- 25 +num_lons <- 50 +tas <- array(1:(10*num_lats*10*num_lons*10), + dim = c(10, num_lats, 10, num_lons)) +names(dim(tas)) <- c('a', 'lat', 'b', 'lon') +lon <- seq(0, 360 - 360/num_lons, length.out = num_lons) +metadata <- list(lon = list(units = 'degrees_east')) +attr(lon, 'variables') <- metadata +lat <- seq(-90, 90, length.out = num_lats) +metadata <- list(lat = list(units = 'degrees_north')) +attr(lat, 'variables') <- metadata +metadata <- list(tas = list(dim = list(a = list(), + lat = list(len = num_lats, + vals = lat), + b = list(), + lon = list(len = num_lons, + vals = lon) + ))) +attr(tas, 'variables') <- metadata +tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil', TRUE) +tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil', FALSE) + +# An example of an interpolation from and onto a rectangular regular grid +num_lats <- 25 +num_lons <- 50 +tas <- array(1:(1*num_lats*num_lons), dim = c(num_lats, num_lons)) +names(dim(tas)) <- c('y', 'x') +lon <- array(seq(0, 360 - 360/num_lons, length.out = num_lons), + dim = c(num_lons, num_lats)) +metadata <- list(lon = list(units = 'degrees_east')) +names(dim(lon)) <- c('x', 'y') +attr(lon, 'variables') <- metadata +lat <- t(array(seq(-90, 90, length.out = num_lats), + dim = c(num_lats, num_lons))) +metadata <- list(lat = list(units = 'degrees_north')) +names(dim(lat)) <- c('x', 'y') +attr(lat, 'variables') <- metadata +tas2 <- CDORemap(tas, lon, lat, 'r100x50', 'bil') + +# An example of an interpolation from an irregular grid onto a gaussian grid +num_lats <- 25 +num_lons <- 50 +tas <- array(1:(10*num_lats*10*num_lons*10), + dim = c(10, num_lats, 10, num_lons)) +names(dim(tas)) <- c('a', 'j', 'b', 'i') +lon <- array(seq(0, 360 - 360/num_lons, length.out = num_lons), + dim = c(num_lons, num_lats)) +metadata <- list(lon = list(units = 'degrees_east')) +names(dim(lon)) <- c('i', 'j') +attr(lon, 'variables') <- metadata +lat <- t(array(seq(-90, 90, length.out = num_lats), + dim = c(num_lats, num_lons))) +metadata <- list(lat = list(units = 'degrees_north')) +names(dim(lat)) <- c('i', 'j') +attr(lat, 'variables') <- metadata +tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil') + +# Again, the dimensions can be in any order +num_lats <- 25 +num_lons <- 50 +tas <- array(1:(10*num_lats*10*num_lons), + dim = c(10, num_lats, 10, num_lons)) +names(dim(tas)) <- c('a', 'j', 'b', 'i') +lon <- array(seq(0, 360 - 360/num_lons, length.out = num_lons), + dim = c(num_lons, num_lats)) +names(dim(lon)) <- c('i', 'j') +lat <- t(array(seq(-90, 90, length.out = num_lats), + dim = c(num_lats, num_lons))) +names(dim(lat)) <- c('i', 'j') +tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil') +tas2 <- CDORemap(tas, lon, lat, 't17grid', 'bil', FALSE) +# It is ossible to specify an external NetCDF file as target grid reference +tas2 <- CDORemap(tas, lon, lat, 'external_file.nc', 'bil') +} +} diff --git a/man/Clim.Rd b/man/Clim.Rd index a997a7f1db2a33a5d63a4fe7219fe70af176b08f..78559bdbefc9c5253d510f2c7eee1fbc743324aa 100644 --- a/man/Clim.Rd +++ b/man/Clim.Rd @@ -4,9 +4,18 @@ \alias{Clim} \title{Compute Bias Corrected Climatologies} \usage{ -Clim(exp, obs, time_dim = "sdate", dat_dim = c("dataset", "member"), - method = "clim", ftime_dim = "ftime", memb = TRUE, - memb_dim = "member", na.rm = TRUE, ncores = NULL) +Clim( + exp, + obs, + time_dim = "sdate", + dat_dim = c("dataset", "member"), + method = "clim", + ftime_dim = "ftime", + memb = TRUE, + memb_dim = "member", + na.rm = TRUE, + ncores = NULL +) } \arguments{ \item{exp}{A named numeric array of experimental data, with at least two @@ -82,4 +91,3 @@ PlotClim(clim$clim_exp, clim$clim_obs, listobs = c('ERSST'), biglab = FALSE, fileout = 'tos_clim.eps') } } - diff --git a/man/Cluster.Rd b/man/Cluster.Rd new file mode 100644 index 0000000000000000000000000000000000000000..7ea25de5bfe29b8f49dc2c117a220d01dc6c8236 --- /dev/null +++ b/man/Cluster.Rd @@ -0,0 +1,147 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Cluster.R +\name{Cluster} +\alias{Cluster} +\title{K-means Clustering} +\usage{ +Cluster( + data, + weights = NULL, + time_dim = "sdate", + space_dim = NULL, + nclusters = NULL, + index = "sdindex", + ncores = NULL +) +} +\arguments{ +\item{data}{A numeric array with named dimensions that at least have +'time_dim' corresponding to time and 'space_dim' (optional) corresponding +to either area-averages over a series of domains or the grid points for any +sptial grid structure.} + +\item{weights}{A numeric array with named dimension of multiplicative weights +based on the areas covering each domain/region or grid-cell of 'data'. The +dimensions must be equal to the 'space_dim' in 'data'. The default value is +NULL which means no weighting is applied.} + +\item{time_dim}{A character string indicating the name of time dimension in +'data'. The default value is 'sdate'.} + +\item{space_dim}{A character vector indicating the names of spatial dimensions +in 'data'. The default value is NULL.} + +\item{nclusters}{A positive integer K that must be bigger than 1 indicating +the number of clusters to be computed, or K initial cluster centers to be +used in the method. The default value is NULL, which means that the number +of clusters will be determined by NbClust(). The parameter 'index' +therefore needs to be specified for NbClust() to find the optimal number of +clusters to be used for K-means clustering calculation.} + +\item{index}{A character string of the validity index from NbClust package +that can be used to determine optimal K if K is not specified with +'nclusters'. The default value is 'sdindex' (Halkidi et al. 2001, JIIS). +Other indices available in NBClust are "kl", "ch", "hartigan", "ccc", +"scott", "marriot", "trcovw", "tracew", "friedman", "rubin", "cindex", "db", +"silhouette", "duda", "pseudot2", "beale", "ratkowsky", "ball", +"ptbiserial", "gap", "frey", "mcclain", "gamma", "gplus", "tau", "dunn", +"hubert", "sdindex", and "sdbw". +One can also use all of them with the option 'alllong' or almost all indices +clusters K is detremined by the majority rule (the maximum of histogram of +the results of all indices with finite solutions). Use of some indices on +a big and/or unstructured dataset can be computationally intense and/or +could lead to numerical singularity.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing: +\item{$cluster}{ + An integer array of the occurrence of a cluster along time, i.e., when + certain data member in time is allocated to a specific cluster. The dimensions + are same as 'data' without 'space_dim'. +} +\item{$centers}{ + A nemeric array of cluster centres or centroids (e.g. [1:K, 1:spatial degrees + of freedom]). The rest dimensions are same as 'data' except 'time_dim' + and 'space_dim'. +} +\item{$totss}{ + A numeric array of the total sum of squares. The dimensions are same as 'data' + except 'time_dim' and 'space_dim'. +} +\item{$withinss}{ + A numeric array of within-cluster sum of squares, one component per cluster. + The first dimenion is the number of cluster, and the rest dimensions are + same as 'data' except 'time_dim' and 'space_dim'. +} +\item{$tot.withinss}{ + A numeric array of the total within-cluster sum of squares, i.e., + sum(withinss). The dimensions are same as 'data' except 'time_dim' and + 'space_dim'. +} +\item{$betweenss}{ + A numeric array of the between-cluster sum of squares, i.e. totss-tot.withinss. + The dimensions are same as 'data' except 'time_dim' and 'space_dim'. +} +\item{$size}{ + A numeric array of the number of points in each cluster. The first dimenion + is the number of cluster, and the rest dimensions are same as 'data' except + 'time_dim' and 'space_dim'. +} +\item{$iter}{ + A numeric array of the number of (outer) iterations. The dimensions are + same as 'data' except 'time_dim' and 'space_dim'. +} +\item{$ifault}{ + A numeric array of an indicator of a possible algorithm problem. The + dimensions are same as 'data' except 'time_dim' and 'space_dim'. +} +} +\description{ +Compute cluster centers and their time series of occurrences, with the +K-means clustering method using Euclidean distance, of an array of input data +with any number of dimensions that at least contain time_dim. +Specifically, it partitions the array along time axis in K groups or clusters +in which each space vector/array belongs to (i.e., is a member of) the +cluster with the nearest center or centroid. This function is a wrapper of +kmeans() and relies on the NbClust package (Charrad et al., 2014 JSS) to +determine the optimal number of clusters used for K-means clustering if it is +not provided by users. +} +\examples{ +# Generating synthetic data +a1 <- array(dim = c(200, 4)) +mean1 <- 0 +sd1 <- 0.3 + +c0 <- seq(1, 200) +c1 <- sort(sample(x = 1:200, size = sample(x = 50:150, size = 1), replace = FALSE)) +x1 <- c(1, 1, 1, 1) +for (i1 in c1) { + a1[i1, ] <- x1 + rnorm(4, mean = mean1, sd = sd1) +} + +c1p5 <- c0[!(c0 \%in\% c1)] +c2 <- c1p5[seq(1, length(c1p5), 2)] +x2 <- c(2, 2, 4, 4) +for (i2 in c2) { + a1[i2, ] <- x2 + rnorm(4, mean = mean1, sd = sd1) +} + +c3 <- c1p5[seq(2, length(c1p5), 2)] +x3 <- c(3, 3, 1, 1) +for (i3 in c3) { + a1[i3, ] <- x3 + rnorm(4, mean = mean1, sd = sd1) +} + +# Computing the clusters +names(dim(a1)) <- c('sdate', 'space') +res1 <- Cluster(data = a1, weights = array(1, dim = dim(a1)[2]), nclusters = 3) +res2 <- Cluster(data = a1, weights = array(1, dim = dim(a1)[2])) + +} +\references{ +Wilks, 2011, Statistical Methods in the Atmospheric Sciences, 3rd ed., Elsevire, pp 676. +} diff --git a/man/ColorBar.Rd b/man/ColorBar.Rd index 1287b70ecba354d01441fb103b497e969a177bf9..6d62f153d89f73aae9cffea2e2ead04a66d894f2 100644 --- a/man/ColorBar.Rd +++ b/man/ColorBar.Rd @@ -4,13 +4,30 @@ \alias{ColorBar} \title{Draws a Color Bar} \usage{ -ColorBar(brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, - bar_limits = NULL, var_limits = NULL, triangle_ends = NULL, - col_inf = NULL, col_sup = NULL, color_fun = clim.palette(), - plot = TRUE, draw_ticks = TRUE, draw_separators = FALSE, - triangle_ends_scale = 1, extra_labels = NULL, title = NULL, - title_scale = 1, label_scale = 1, tick_scale = 1, - extra_margin = rep(0, 4), label_digits = 4, ...) +ColorBar( + brks = NULL, + cols = NULL, + vertical = TRUE, + subsampleg = NULL, + bar_limits = NULL, + var_limits = NULL, + triangle_ends = NULL, + col_inf = NULL, + col_sup = NULL, + color_fun = clim.palette(), + plot = TRUE, + draw_ticks = TRUE, + draw_separators = FALSE, + triangle_ends_scale = 1, + extra_labels = NULL, + title = NULL, + title_scale = 1, + label_scale = 1, + tick_scale = 1, + extra_margin = rep(0, 4), + label_digits = 4, + ... +) } \arguments{ \item{brks}{Can be provided in two formats: @@ -175,4 +192,3 @@ cols <- c("dodgerblue4", "dodgerblue1", "forestgreen", "yellowgreen", "white", lims <- seq(-1, 1, 0.2) ColorBar(lims, cols) } - diff --git a/man/Composite.Rd b/man/Composite.Rd index 64d5bfcc5f97ee75516a75fe3a5bc6937790fed2..cc21d389a4de84f7f5f7e4003579cc7008ff49d3 100644 --- a/man/Composite.Rd +++ b/man/Composite.Rd @@ -4,8 +4,17 @@ \alias{Composite} \title{Compute composites} \usage{ -Composite(data, occ, time_dim = "time", space_dim = c("lon", "lat"), - lag = 0, eno = FALSE, K = NULL, fileout = NULL, ncores = NULL) +Composite( + data, + occ, + time_dim = "time", + space_dim = c("lon", "lat"), + lag = 0, + eno = FALSE, + K = NULL, + fileout = NULL, + ncores = NULL +) } \arguments{ \item{data}{A numeric array containing two spatial and one temporal @@ -101,4 +110,3 @@ occ <- c(1, 1, 2, 2, 3, 3) res <- Composite(data, occ, time_dim = 'case', K = 4) } - diff --git a/man/ConfigApplyMatchingEntries.Rd b/man/ConfigApplyMatchingEntries.Rd index 5f0efb100dee5886b69850262cdcb54203bc77d3..ee4cb5a40bf6cc8c44ff82fff0cfecb4279ce298 100644 --- a/man/ConfigApplyMatchingEntries.Rd +++ b/man/ConfigApplyMatchingEntries.Rd @@ -4,8 +4,14 @@ \alias{ConfigApplyMatchingEntries} \title{Apply Matching Entries To Dataset Name And Variable Name To Find Related Info} \usage{ -ConfigApplyMatchingEntries(configuration, var, exp = NULL, obs = NULL, - show_entries = FALSE, show_result = TRUE) +ConfigApplyMatchingEntries( + configuration, + var, + exp = NULL, + obs = NULL, + show_entries = FALSE, + show_result = TRUE +) } \arguments{ \item{configuration}{Configuration object obtained from ConfigFileOpen() @@ -68,4 +74,3 @@ ConfigApplyMatchingEntries, ConfigEditDefinition, ConfigEditEntry, ConfigFileOpen, ConfigShowSimilarEntries, ConfigShowTable } - diff --git a/man/ConfigEditDefinition.Rd b/man/ConfigEditDefinition.Rd index 8e1e968c8e842c471c2cc37dd44fd5284858e6c7..223e95abd08dc79547fcebdd56d154b860b69114 100644 --- a/man/ConfigEditDefinition.Rd +++ b/man/ConfigEditDefinition.Rd @@ -57,4 +57,3 @@ match_info <- ConfigApplyMatchingEntries(configuration, 'tas', [ConfigEditEntry()], [ConfigFileOpen()], [ConfigShowSimilarEntries()], [ConfigShowTable()]. } - diff --git a/man/ConfigEditEntry.Rd b/man/ConfigEditEntry.Rd index 9abf3e522071ece12dec0db31e72c22c456a8c73..e597709dfa2c7a8683a716c0dad12cf4faf7c82c 100644 --- a/man/ConfigEditEntry.Rd +++ b/man/ConfigEditEntry.Rd @@ -1,22 +1,46 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ConfigEditEntry.R \name{ConfigEditEntry} -\alias{ConfigAddEntry} \alias{ConfigEditEntry} +\alias{ConfigAddEntry} \alias{ConfigRemoveEntry} \title{Add, Remove Or Edit Entries In The Configuration} \usage{ -ConfigEditEntry(configuration, dataset_type, position, dataset_name = NULL, - var_name = NULL, main_path = NULL, file_path = NULL, - nc_var_name = NULL, suffix = NULL, varmin = NULL, varmax = NULL) +ConfigEditEntry( + configuration, + dataset_type, + position, + dataset_name = NULL, + var_name = NULL, + main_path = NULL, + file_path = NULL, + nc_var_name = NULL, + suffix = NULL, + varmin = NULL, + varmax = NULL +) -ConfigAddEntry(configuration, dataset_type, position = "last", - dataset_name = ".*", var_name = ".*", main_path = "*", - file_path = "*", nc_var_name = "*", suffix = "*", varmin = "*", - varmax = "*") +ConfigAddEntry( + configuration, + dataset_type, + position = "last", + dataset_name = ".*", + var_name = ".*", + main_path = "*", + file_path = "*", + nc_var_name = "*", + suffix = "*", + varmin = "*", + varmax = "*" +) -ConfigRemoveEntry(configuration, dataset_type, dataset_name = NULL, - var_name = NULL, position = NULL) +ConfigRemoveEntry( + configuration, + dataset_type, + dataset_name = NULL, + var_name = NULL, + position = NULL +) } \arguments{ \item{configuration}{Configuration object obtained via ConfigFileOpen() @@ -99,4 +123,3 @@ ConfigFileSave(configuration, config_file, confirm = FALSE) ConfigApplyMatchingEntries, ConfigEditDefinition, ConfigEditEntry, ConfigFileOpen, ConfigShowSimilarEntries, ConfigShowTable } - diff --git a/man/ConfigFileOpen.Rd b/man/ConfigFileOpen.Rd index eee183f0f87335a6404c1c0f8e975a0252c4d6d6..893900b688a343edc9c27ac4cf31cee49fa2da2c 100644 --- a/man/ConfigFileOpen.Rd +++ b/man/ConfigFileOpen.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ConfigFileOpen.R \name{ConfigFileOpen} -\alias{ConfigFileCreate} \alias{ConfigFileOpen} +\alias{ConfigFileCreate} \alias{ConfigFileSave} \title{Functions To Create Open And Save Configuration File} \usage{ @@ -194,4 +194,3 @@ ConfigFileSave(configuration, config_file, confirm = FALSE) ConfigApplyMatchingEntries, ConfigEditDefinition, ConfigEditEntry, ConfigFileOpen, ConfigShowSimilarEntries, ConfigShowTable } - diff --git a/man/ConfigShowSimilarEntries.Rd b/man/ConfigShowSimilarEntries.Rd index b9f80ced42f8a8683939346b1f8a9616b1940e7e..72b77e1069e0249ef4194312455f56c2b070696e 100644 --- a/man/ConfigShowSimilarEntries.Rd +++ b/man/ConfigShowSimilarEntries.Rd @@ -4,10 +4,18 @@ \alias{ConfigShowSimilarEntries} \title{Find Similar Entries In Tables Of Datasets} \usage{ -ConfigShowSimilarEntries(configuration, dataset_name = NULL, - var_name = NULL, main_path = NULL, file_path = NULL, - nc_var_name = NULL, suffix = NULL, varmin = NULL, varmax = NULL, - n_results = 10) +ConfigShowSimilarEntries( + configuration, + dataset_name = NULL, + var_name = NULL, + main_path = NULL, + file_path = NULL, + nc_var_name = NULL, + suffix = NULL, + varmin = NULL, + varmax = NULL, + n_results = 10 +) } \arguments{ \item{configuration}{Configuration object obtained either from @@ -79,4 +87,3 @@ ConfigShowSimilarEntries(configuration, dataset_name = "Exper", ConfigApplyMatchingEntries, ConfigEditDefinition, ConfigEditEntry, ConfigFileOpen, ConfigShowSimilarEntries, ConfigShowTable } - diff --git a/man/ConfigShowTable.Rd b/man/ConfigShowTable.Rd index 7c08053e7e3aa21ca76a3f2ac951e05ddec0b175..5e4172a70380fdb3a980ce58d003d8515e6e7713 100644 --- a/man/ConfigShowTable.Rd +++ b/man/ConfigShowTable.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ConfigShowTable.R \name{ConfigShowTable} -\alias{ConfigShowDefinitions} \alias{ConfigShowTable} +\alias{ConfigShowDefinitions} \title{Show Configuration Tables And Definitions} \usage{ ConfigShowTable(configuration, dataset_type, line_numbers = NULL) @@ -54,4 +54,3 @@ ConfigShowDefinitions(configuration) [ConfigEditEntry()], [ConfigFileOpen()], [ConfigShowSimilarEntries()], [ConfigShowTable()]. } - diff --git a/man/Consist_Trend.Rd b/man/Consist_Trend.Rd new file mode 100644 index 0000000000000000000000000000000000000000..2ac7d42655c91de8713d5a2ff7a865b6e5da5aa9 --- /dev/null +++ b/man/Consist_Trend.Rd @@ -0,0 +1,100 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Consist_Trend.R +\name{Consist_Trend} +\alias{Consist_Trend} +\title{Compute trend using only model data for which observations are available} +\usage{ +Consist_Trend( + exp, + obs, + dat_dim = "dataset", + time_dim = "sdate", + interval = 1, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numeric array of experimental data, with at least two +dimensions 'time_dim' and 'dat_dim'.} + +\item{obs}{A named numeric array of observational data, same dimensions as +parameter 'exp' except along 'dat_dim'.} + +\item{dat_dim}{A character string indicating the name of the dataset +dimensions. If data at some point of 'time_dim' are not complete along +'dat_dim' in both 'exp' and 'obs', this point in all 'dat_dim' will be +discarded. The default value is 'dataset'.} + +\item{time_dim}{A character string indicating the name of dimension along +which the trend is computed. The default value is 'sdate'.} + +\item{interval}{A positive numeric indicating the unit length between two +points along 'time_dim' dimension. The default value is 1.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing: +\item{$trend}{ + A numeric array of the trend coefficients of model and observational data + with dimensions c(stats = 2, nexp + nobs, the rest dimensions of 'exp' and + 'obs' except time_dim), where 'nexp' is the length of 'dat_dim' in 'exp' + and 'nobs' is the length of 'dat_dim' in 'obs. The 'stats' dimension + contains the intercept and the slope. +} +\item{$conf.lower}{ + A numeric array of the lower limit of 95\% confidence interval with + dimensions same as $trend. The 'stats' dimension contains the lower + confidence level of the intercept and the slope. +} +\item{$conf.upper}{ + A numeric array of the upper limit of 95\% confidence interval with + dimensions same as $trend. The 'stats' dimension contains the upper + confidence level of the intercept and the slope. +} +\item{$detrended_exp}{ + A numeric array of the detrended model data with the same dimensions as + 'exp'. +} +\item{$detrended_obs}{ + A numeric array of the detrended observational data with the same + dimensions as 'obs'. +} +} +\description{ +Compute the linear trend for a time series by least square fitting together +with the associated error interval for both the observational and model data. +The 95\% confidence interval and detrended observational and model data are +also provided.\cr +The function doesn't do the ensemble mean, so if the input data have the +member dimension, ensemble mean needs to be computed beforehand. +} +\examples{ +#'# Load sample data as in Load() example: +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +runmean_months <- 12 +smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) +smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months) +dim_to_mean <- 'member' # average along members +years_between_startdates <- 5 +trend <- Consist_Trend(MeanDims(smooth_ano_exp, dim_to_mean, na.rm = TRUE), + MeanDims(smooth_ano_obs, dim_to_mean, na.rm = TRUE), + interval = years_between_startdates) +#Bind data for plotting +trend_bind <- abind::abind(trend$conf.lower[2, , ], trend$trend[2, , ], + trend$conf.upper[2, , ], trend$trend[1, , ], along = 0) +trend_bind <- Reorder(trend_bind, c(2, 1, 3)) +\donttest{ +PlotVsLTime(trend_bind, toptitle = "trend", ytitle = "K/(5 years)", + monini = 11, limits = c(-0.8, 0.8), listexp = c('CMIP5 IC3'), + listobs = c('ERSST'), biglab = FALSE, hlines = c(0)) +PlotAno(InsertDim(trend$detrended_exp, 2, 1), InsertDim(trend$detrended_obs, 2, 1), + startDates, "Detrended tos anomalies", ytitle = 'K', + legends = 'ERSST', biglab = FALSE) +} + +} diff --git a/man/Corr.Rd b/man/Corr.Rd index bf5575e443e13ff5a9a7d69eb166ec5045042ab3..077791fbeeb5e100a18e5b44c26a0b4753772d39 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -4,16 +4,28 @@ \alias{Corr} \title{Compute the correlation coefficient between an array of forecast and their corresponding observation} \usage{ -Corr(exp, obs, time_dim = "sdate", dat_dim = "dataset", comp_dim = NULL, - limits = NULL, method = "pearson", pval = TRUE, conf = TRUE, - conf.lev = 0.95, ncores = NULL) +Corr( + exp, + obs, + time_dim = "sdate", + dat_dim = "dataset", + comp_dim = NULL, + limits = NULL, + method = "pearson", + memb_dim = NULL, + memb = TRUE, + pval = TRUE, + conf = TRUE, + conf.lev = 0.95, + ncores = NULL +) } \arguments{ \item{exp}{A named numeric array of experimental data, with at least two dimensions 'time_dim' and 'dat_dim'.} \item{obs}{A named numeric array of observational data, same dimensions as -parameter 'exp' except along dat_dim.} +parameter 'exp' except along 'dat_dim' and 'memb_dim'.} \item{time_dim}{A character string indicating the name of dimension along which the correlations are computed. The default value is 'sdate'.} @@ -31,6 +43,14 @@ be completed. The default is c(1, length(comp_dim dimension)).} \item{method}{A character string indicating the type of correlation: 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'.} +\item{memb_dim}{A character string indicating the name of the member +dimension. It must be one dimension in 'exp' and 'obs'. If there is no +member dimension, set NULL. The default value is NULL.} + +\item{memb}{A logical value indicating whether to remain 'memb_dim' dimension +(TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when +'memb_dim' is not NULL. The default value is TRUE.} + \item{pval}{A logical value indicating whether to compute or not the p-value of the test Ho: Corr = 0. The default value is TRUE.} @@ -45,9 +65,12 @@ computation. The default value is NULL.} } \value{ A list containing the numeric arrays with dimension:\cr - c(nexp, nobs, all other dimensions of exp except time_dim).\cr -nexp is the number of experiment (i.e., dat_dim in exp), and nobs is the -number of observation (i.e., dat_dim in obs).\cr + c(nexp, nobs, exp_memb, obs_memb, all other dimensions of exp except + time_dim and memb_dim).\cr +nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is the +number of observation (i.e., 'dat_dim' in obs). exp_memb is the number of +member in experiment (i.e., 'memb_dim' in exp) and obs_memb is the number of +member in observation (i.e., 'memb_dim' in obs).\cr\cr \item{$corr}{ The correlation coefficient. } @@ -79,11 +102,27 @@ have inconsistent length between 'exp' and 'obs'. If all the dimensions of compute the correlation. } \examples{ -# Load sample data as in Load() example: +# Case 1: Load sample data as in Load() example: example(Load) clim <- Clim(sampleData$mod, sampleData$obs) -corr <- Corr(clim$clim_exp, clim$clim_obs, time_dim = 'ftime', dat_dim = 'member') -# Renew the example when Ano and Smoothing is ready +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +runmean_months <- 12 -} +# Smooth along lead-times +smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) +smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months) +required_complete_row <- 3 # Discard start dates which contain any NA lead-times +leadtimes_per_startdate <- 60 +corr <- Corr(MeanDims(smooth_ano_exp, 'member'), + MeanDims(smooth_ano_obs, 'member'), + comp_dim = 'ftime', + limits = c(ceiling((runmean_months + 1) / 2), + leadtimes_per_startdate - floor(runmean_months / 2))) +# Case 2: Keep member dimension +corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member') +# ensemble mean +corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE) + +} diff --git a/man/EOF.Rd b/man/EOF.Rd new file mode 100644 index 0000000000000000000000000000000000000000..3d67308965fbb463092acee4cae2534e7e0e2740 --- /dev/null +++ b/man/EOF.Rd @@ -0,0 +1,115 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/EOF.R +\name{EOF} +\alias{EOF} +\title{Area-weighted empirical orthogonal function analysis using SVD} +\usage{ +EOF( + ano, + lat, + lon, + time_dim = "sdate", + space_dim = c("lat", "lon"), + neofs = 15, + corr = FALSE, + ncores = NULL +) +} +\arguments{ +\item{ano}{A numerical array of anomalies with named dimensions to calculate +EOF. The dimensions must have at least 'time_dim' and 'space_dim'. NAs +could exist but it should be consistent along time_dim. That is, if one grid +point has NAs, all the time steps at this point should be NAs.} + +\item{lat}{A vector of the latitudes of 'ano'.} + +\item{lon}{A vector of the longitudes of 'ano'.} + +\item{time_dim}{A character string indicating the name of the time dimension +of 'ano'. The default value is 'sdate'.} + +\item{space_dim}{A vector of two character strings. The first is the dimension +name of latitude of 'ano' and the second is the dimension name of longitude +of 'ano'. The default value is c('lat', 'lon').} + +\item{neofs}{A positive integer of the modes to be kept. The default value is +15. If time length or the product of the length of space_dim is smaller than +neofs, neofs will be changed to the minimum of the three values.} + +\item{corr}{A logical value indicating whether to base on a correlation (TRUE) +or on a covariance matrix (FALSE). The default value is FALSE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing: +\item{EOFs}{ + An array of EOF patterns normalized to 1 (unitless) with dimensions + (number of modes, rest of the dimensions of 'ano' except 'time_dim'). + Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed + field. +} +\item{PCs}{ + An array of principal components with the units of the original field to + the power of 2, with dimensions (time_dim, number of modes, rest of the + dimensions of 'ano' except 'space_dim'). + 'PCs' contains already the percentage of explained variance so, + to reconstruct the original field it's only needed to multiply 'EOFs' + by 'PCs'. +} +\item{var}{ + An array of the percentage (%) of variance fraction of total variance + explained by each mode (number of modes). The dimensions are (number of + modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim'). +} +\item{mask}{ + An array of the mask with dimensions (space_dim, rest of the dimensions of + 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that + 'ano' has value and NA for the positions that 'ano' has NA. It is used to + replace NAs with 0s for EOF calculation and mask the result with NAs again + after the calculation. +} +\item{wght}{ + An array of the area weighting with dimensions 'space_dim'. It is calculated + by cosine of 'lat' and used to compute the fraction of variance explained by + each EOFs. +} +\item{tot_var}{ + A number or a numeric array of the total variance explained by all the modes. + The dimensions are same as 'ano' except 'time_dim' and 'space_dim'. +} +} +\description{ +Perform an area-weighted EOF analysis using single value decomposition (SVD) +based on a covariance matrix or a correlation matrix if parameter 'corr' is +set to TRUE. +} +\examples{ +# This example computes the EOFs along forecast horizons and plots the one +# that explains the greatest amount of variability. The example data has low +# resolution so the result may not be explanatory, but it displays how to +# use this function. +\dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + leadtimemin = 1, + leadtimemax = 4, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) +} +ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +tmp <- MeanDims(ano$exp, c('dataset', 'member')) +ano <- tmp[1, , ,] +names(dim(ano)) <- names(dim(tmp))[-2] +eof <- EOF(ano, sampleData$lat, sampleData$lon) +\dontrun{ +PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat) +} + +} +\seealso{ +ProjectField, NAO, PlotBoxWhisker +} diff --git a/man/Eno.Rd b/man/Eno.Rd index 32468bdddb048d1cf14c345ef6aedc4e37bb54d8..03c3b4fd1e11456ea2218e234de3cbfed4187c5d 100644 --- a/man/Eno.Rd +++ b/man/Eno.Rd @@ -39,4 +39,3 @@ data[na] <- NA res <- Eno(data) } - diff --git a/man/EuroAtlanticTC.Rd b/man/EuroAtlanticTC.Rd new file mode 100644 index 0000000000000000000000000000000000000000..7f81b249623077ee76d07bb1090484979296b3e4 --- /dev/null +++ b/man/EuroAtlanticTC.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/EuroAtlanticTC.R +\name{EuroAtlanticTC} +\alias{EuroAtlanticTC} +\title{Teleconnection indices in European Atlantic Ocean region} +\usage{ +EuroAtlanticTC( + ano, + lat, + lon, + ntrunc = 30, + time_dim = "sdate", + space_dim = c("lat", "lon"), + corr = FALSE, + ncores = NULL +) +} +\arguments{ +\item{ano}{A numerical array of anomalies with named dimensions to calculate +REOF then the four teleconnections. The dimensions must have at least +'time_dim' and 'space_dim', and the data should cover the European Atlantic +Ocean area (20N-80N, 90W-60E).} + +\item{lat}{A vector of the latitudes of 'ano'. It should be 20N-80N.} + +\item{lon}{A vector of the longitudes of 'ano'. It should be 90W-60E.} + +\item{ntrunc}{A positive integer of the modes to be kept. The default value +is 30. If time length or the product of latitude length and longitude +length is less than ntrunc, ntrunc is equal to the minimum of the three +values.} + +\item{time_dim}{A character string indicating the name of the time dimension +of 'ano'. The default value is 'sdate'.} + +\item{space_dim}{A vector of two character strings. The first is the dimension +name of latitude of 'ano' and the second is the dimension name of longitude +of 'ano'. The default value is c('lat', 'lon').} + +\item{corr}{A logical value indicating whether to base on a correlation (TRUE) +or on a covariance matrix (FALSE). The default value is FALSE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing: +\item{patterns}{ + An array of the first four REOF patterns normalized to 1 (unitless) with + dimensions (modes = 4, the rest of the dimensions of 'ano' except + 'time_dim'). The modes represent NAO, EA, EAWR, and SCA, of which the order + and sign changes depending on the dataset and period employed, so manual + reordering may be needed. Multiplying 'patterns' by 'indices' gives the + original reconstructed field. +} +\item{indices}{ + An array of the first four principal components with the units of the + original field to the power of 2, with dimensions (time_dim, modes = 4, the + rest of the dimensions of 'ano' except 'space_dim'). +} +\item{var}{ + An array of the percentage (%) of variance fraction of total variance + explained by each mode. The dimensions are (modes = ntrunc, the rest of the + dimensions of 'ano' except 'time_dim' and 'space_dim'). +} +\item{wght}{ + An array of the area weighting with dimensions 'space_dim'. It is calculated + by the square root of cosine of 'lat' and used to compute the fraction of + variance explained by each REOFs. +} +} +\description{ +Calculate the four main teleconnection indices in European Atlantic Ocean +region: North Atlantic oscillation (NAO), East Atlantic Pattern (EA), East +Atlantic/Western Russia (EAWR), and Scandinavian pattern (SCA). The function +\code{REOF()} is used for the calculation, and the first four modes are +returned. +} +\examples{ +# Use synthetic data +set.seed(1) +dat <- array(rnorm(800), dim = c(dat = 2, sdate = 5, lat = 8, lon = 15)) +lat <- seq(10, 90, length.out = 8) +lon <- seq(-100, 70, length.out = 15) +res <- EuroAtlanticTC(dat, lat = lat, lon = lon) + +} +\seealso{ +REOF NAO +} diff --git a/man/Filter.Rd b/man/Filter.Rd new file mode 100644 index 0000000000000000000000000000000000000000..f98fe0acbcff6bf4b9e1bd6f716fc7227930006f --- /dev/null +++ b/man/Filter.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Filter.R +\name{Filter} +\alias{Filter} +\title{Filter frequency peaks from an array} +\usage{ +Filter(data, freq, time_dim = "ftime", ncores = NULL) +} +\arguments{ +\item{data}{A numeric vector or array of the data to be filtered. +If it's a vector, it should be a time series. If it's an array, +the dimensions must have at least 'time_dim'.} + +\item{freq}{A number of the frequency to filter.} + +\item{time_dim}{A character string indicating the dimension along which to +compute the filtering. The default value is 'ftime'.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A numeric vector or array of the filtered data with the dimensions + the same as 'data'. +} +\description{ +Filter out the selected frequency from a time series. The filtering is +performed by dichotomy, seeking for a frequency around the parameter 'freq' +and the phase that maximizes the signal to subtract from the time series. +The maximization of the signal to subtract relies on a minimization of the +mean square differences between the time series ('data') and the cosine of +the specified frequency and phase. +} +\examples{ +# Load sample data as in Load() example: +example(Load) +ensmod <- MeanDims(sampleData$mod, 2) +spectrum <- Spectrum(ensmod) + +for (jsdate in 1:dim(spectrum)['sdate']) { + for (jlen in 1:dim(spectrum)['ftime']) { + if (spectrum[jlen, 2, 1, jsdate] > spectrum[jlen, 3, 1, jsdate]) { + ensmod[1, jsdate, ] <- Filter(ensmod[1, jsdate, ], spectrum[jlen, 1, 1, jsdate]) + } + } +} + \donttest{ +PlotAno(InsertDim(ensmod, 2, 1), sdates = startDates) + } + +} diff --git a/man/GMST.Rd b/man/GMST.Rd index 208ff75255f3fd15d87308b99be215131e55945e..0ce858beccfc0da32d0114379388dc4f0c16559c 100644 --- a/man/GMST.Rd +++ b/man/GMST.Rd @@ -4,29 +4,43 @@ \alias{GMST} \title{Compute the Global Mean Surface Temperature (GMST) anomalies} \usage{ -GMST(data_tas, data_tos, data_lats, data_lons, mask_sea_land, sea_value, type, - mask = NULL, lat_dim = "lat", lon_dim = "lon", monini = 11, - fmonth_dim = "fmonth", sdate_dim = "sdate", indices_for_clim = NULL, - year_dim = "year", month_dim = "month", member_dim = "member") +GMST( + data_tas, + data_tos, + data_lats, + data_lons, + mask_sea_land, + sea_value, + type, + mask = NULL, + lat_dim = "lat", + lon_dim = "lon", + monini = 11, + fmonth_dim = "fmonth", + sdate_dim = "sdate", + indices_for_clim = NULL, + year_dim = "year", + month_dim = "month", + na.rm = TRUE, + ncores = NULL +) } \arguments{ -\item{data_tas}{A numerical array indicating the surface air temperature data -to be used for the index computation with the dimensions: 1) latitude, -longitude, start date, forecast month, and member (in case of decadal -predictions), 2) latitude, longitude, year, month and member (in case of -historical simulations), or 3) latitude, longitude, year and month (in case -of observations or reanalyses). This data has to be provided, at least, -over the whole region needed to compute the index. The dimensions must be -identical to those of data_tos.} - -\item{data_tos}{A numerical array indicating the sea surface temperature data -to be used for the index computation with the dimensions: 1) latitude, -longitude, start date, forecast month, and member (in case of decadal -predictions), 2) latitude, longitude, year, month and member (in case of -historical simulations), or 3) latitude, longitude, year and month (in case -of observations or reanalyses). This data has to be provided, at least, -over the whole region needed to compute the index. The dimensions must be -identical to those of data_tas.} +\item{data_tas}{A numerical array with the surface air temperature data +to be used for the index computation with, at least, the +dimensions: 1) latitude, longitude, start date and forecast month +(in case of decadal predictions), 2) latitude, longitude, year and month +(in case of historical simulations or observations). This data has to be +provided, at least, over the whole region needed to compute the index. +The dimensions must be identical to thos of data_tos.} + +\item{data_tos}{A numerical array with the sea surface temperature data +to be used for the index computation with, at least, the +dimensions: 1) latitude, longitude, start date and forecast month +(in case of decadal predictions), 2) latitude, longitude, year and month +(in case of historical simulations or observations). This data has to be +provided, at least, over the whole region needed to compute the index. +The dimensions must be identical to thos of data_tas.} \item{data_lats}{A numeric vector indicating the latitudes of the data.} @@ -74,7 +88,7 @@ climatology is calculated over the whole period. If the data are already anomalies, set it to FALSE. The default value is NULL.\cr In case of parameter 'type' is 'dcpp', 'indices_for_clim' must be relative to the first forecast year, and the climatology is automatically computed -over the actual common period for the different forecast years.} +over the common calendar period for the different forecast years.} \item{year_dim}{A character string indicating the name of the year dimension The default value is 'year'. Only used if parameter 'type' is 'hist' or @@ -84,20 +98,23 @@ The default value is 'year'. Only used if parameter 'type' is 'hist' or dimension. The default value is 'month'. Only used if parameter 'type' is 'hist' or 'obs'.} -\item{member_dim}{A character string indicating the name of the member -dimension. The default value is 'member'. Only used if parameter 'type' is -'dcpp' or 'hist'.} +\item{na.rm}{A logical value indicanting whether to remove NA values. The default +value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} } \value{ -A numerical array of the GMST anomalies with the dimensions of: - 1) sdate, forecast year, and member (in case of decadal predictions); - 2) year and member (in case of historical simulations); or - 3) year (in case of observations or reanalyses). +A numerical array with the GMST anomalies with the same dimensions as data_tas except + the lat_dim, lon_dim and fmonth_dim (month_dim) in case of decadal predictions + (historical simulations or observations). In case of decadal predictions, a new dimension + 'fyear' is added. } \description{ The Global Mean Surface Temperature (GMST) anomalies are computed as the weighted-averaged surface air temperature anomalies over land and sea surface -temperature anomalies over the ocean. +temperature anomalies over the ocean. If different members and/or datasets are provided, +the climatology (used to calculate the anomalies) is computed individually for all of them. } \examples{ ## Observations or reanalyses @@ -134,4 +151,3 @@ index_dcpp <- GMST(data_tas = dcpp_tas, data_tos = dcpp_tos, data_lats = lat, sea_value = sea_value) } - diff --git a/man/GSAT.Rd b/man/GSAT.Rd index 9d3fbb65d61b9aa3d26edbd38a044432476ad7fa..9f03ff2197408b8170393dc66c68c1ba7bb92696 100644 --- a/man/GSAT.Rd +++ b/man/GSAT.Rd @@ -4,17 +4,29 @@ \alias{GSAT} \title{Compute the Global Surface Air Temperature (GSAT) anomalies} \usage{ -GSAT(data, data_lats, data_lons, type, lat_dim = "lat", lon_dim = "lon", - mask = NULL, monini = 11, fmonth_dim = "fmonth", sdate_dim = "sdate", - indices_for_clim = NULL, year_dim = "year", month_dim = "month", - member_dim = "member") +GSAT( + data, + data_lats, + data_lons, + type, + lat_dim = "lat", + lon_dim = "lon", + mask = NULL, + monini = 11, + fmonth_dim = "fmonth", + sdate_dim = "sdate", + indices_for_clim = NULL, + year_dim = "year", + month_dim = "month", + na.rm = TRUE, + ncores = NULL +) } \arguments{ -\item{data}{A numerical array to be used for the index computation with the -dimensions: 1) latitude, longitude, start date, forecast month, and member -(in case of decadal predictions), 2) latitude, longitude, year, month and -member (in case of historical simulations), or 3) latitude, longitude, year -and month (in case of observations or reanalyses). This data has to be +\item{data}{A numerical array to be used for the index computation with, at least, the +dimensions: 1) latitude, longitude, start date and forecast month +(in case of decadal predictions), 2) latitude, longitude, year and month +(in case of historical simulations or observations). This data has to be provided, at least, over the whole region needed to compute the index.} \item{data_lats}{A numeric vector indicating the latitudes of the data.} @@ -57,7 +69,7 @@ climatology is calculated over the whole period. If the data are already anomalies, set it to FALSE. The default value is NULL.\cr In case of parameter 'type' is 'dcpp', 'indices_for_clim' must be relative to the first forecast year, and the climatology is automatically computed -over the actual common period for the different forecast years.} +over the common calendar period for the different forecast years.} \item{year_dim}{A character string indicating the name of the year dimension The default value is 'year'. Only used if parameter 'type' is 'hist' or @@ -67,19 +79,23 @@ The default value is 'year'. Only used if parameter 'type' is 'hist' or dimension. The default value is 'month'. Only used if parameter 'type' is 'hist' or 'obs'.} -\item{member_dim}{A character string indicating the name of the member -dimension. The default value is 'member'. Only used if parameter 'type' is -'dcpp' or 'hist'.} +\item{na.rm}{A logical value indicanting whether to remove NA values. The default +value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} } \value{ -A numerical array of the GSAT anomalies with the dimensions of: - 1) sdate, forecast year, and member (in case of decadal predictions); - 2) year and member (in case of historical simulations); or - 3) year (in case of observations or reanalyses). +A numerical array with the GSAT anomalies with the same dimensions as data except + the lat_dim, lon_dim and fmonth_dim (month_dim) in case of decadal predictions + (historical simulations or observations). In case of decadal predictions, a new dimension + 'fyear' is added. } \description{ The Global Surface Air Temperature (GSAT) anomalies are computed as the -weighted-averaged surface air temperature anomalies over the global region. +weighted-averaged surface air temperature anomalies over the global region. +If different members and/or datasets are provided, the climatology (used to +calculate the anomalies) is computed individually for all of them. } \examples{ ## Observations or reanalyses @@ -101,4 +117,3 @@ lon <- seq(0, 360, 10) index_dcpp <- GSAT(data = dcpp, data_lats = lat, data_lons = lon, type = 'dcpp', monini = 1) } - diff --git a/man/Histo2Hindcast.Rd b/man/Histo2Hindcast.Rd new file mode 100644 index 0000000000000000000000000000000000000000..a2bc5cac3a7ff241d3912edd6b0123f2f890d3d5 --- /dev/null +++ b/man/Histo2Hindcast.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Histo2Hindcast.R +\name{Histo2Hindcast} +\alias{Histo2Hindcast} +\title{Chunk long simulations for comparison with hindcasts} +\usage{ +Histo2Hindcast( + data, + sdatesin, + sdatesout, + nleadtimesout, + sdate_dim = "sdate", + ftime_dim = "ftime", + ncores = NULL +) +} +\arguments{ +\item{data}{A numeric array of model or observational data with dimensions +at least sdate_dim and ftime_dim.} + +\item{sdatesin}{A character string of the start date of 'data'. The format +should be 'YYYYMMDD' or 'YYYYMM'.} + +\item{sdatesout}{A vector of character string indicating the expected start +dates of the output. The format should be 'YYYYMMDD' or 'YYYYMM'.} + +\item{nleadtimesout}{A positive integer indicating the length of leadtimes of +the output.} + +\item{sdate_dim}{A character string indicating the name of the sdate date +dimension of 'data'. The default value is 'sdate'.} + +\item{ftime_dim}{A character string indicating the name of the lead time +dimension of 'data'. The default value is 'ftime'.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A numeric array with the same dimensions as data, except the length + of sdate_dim is 'sdatesout' and the length of ftime_dim is nleadtimesout. +} +\description{ +Reorganize a long run (historical typically) with only one start date into +chunks corresponding to a set of start dates. The time frequency of the data +should be monthly. +} +\examples{ + \dontshow{ +startDates <- c('19901101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + leadtimemin = 1, + leadtimemax = 60, + output = 'areave', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) + } + +sdates_out <- c('19901101', '19911101', '19921101', '19931101', '19941101') +leadtimes_per_startdate <- 12 +exp_data <- Histo2Hindcast(sampleData$mod, startDates, + sdates_out, leadtimes_per_startdate) +obs_data <- Histo2Hindcast(sampleData$obs, startDates, + sdates_out, leadtimes_per_startdate) + \dontrun{ +exp_data <- Reorder(exp_data, c(3, 4, 1, 2)) +obs_data <- Reorder(obs_data, c(3, 4, 1, 2)) +PlotAno(exp_data, obs_data, sdates_out, + toptitle = paste('Anomalies reorganized into shorter chunks'), + ytitle = 'K', fileout = NULL) + } + +} diff --git a/man/InsertDim.Rd b/man/InsertDim.Rd index 8ab628d3eb3d99d50c94e0984aade50707b18e6e..51418f0ba6624687dd12466e0edfeda3cfc98168 100644 --- a/man/InsertDim.Rd +++ b/man/InsertDim.Rd @@ -17,7 +17,7 @@ InsertDim(data, posdim, lendim, name = NULL, ncores = NULL) The default value is NULL.} \item{ncores}{An integer indicating the number of cores to use for parallel -computation. The default value is NULL.} +computation. The default value is NULL. This parameter is deprecated now.} } \value{ An array as parameter 'data' but with the added named dimension. @@ -32,4 +32,3 @@ res <- InsertDim(InsertDim(a, posdim = 2, lendim = 1, name = 'e'), 4, c(f = 2)) dim(res) } - diff --git a/man/LeapYear.Rd b/man/LeapYear.Rd index d261b0aea8d5697c7250114d7254affbca79b26c..c2960f3d4acfe71abf964738110f0901498edc60 100644 --- a/man/LeapYear.Rd +++ b/man/LeapYear.Rd @@ -21,4 +21,3 @@ print(LeapYear(1991)) print(LeapYear(1992)) print(LeapYear(1993)) } - diff --git a/man/Load.Rd b/man/Load.Rd index 214f984225c858da01f12ad9a80d8c66c373beff..10c03f94af836a709e9af17cfc409354681d6ee9 100644 --- a/man/Load.Rd +++ b/man/Load.Rd @@ -4,15 +4,36 @@ \alias{Load} \title{Loads Experimental And Observational Data} \usage{ -Load(var, exp = NULL, obs = NULL, sdates, nmember = NULL, - nmemberobs = NULL, nleadtime = NULL, leadtimemin = 1, - leadtimemax = NULL, storefreq = "monthly", sampleperiod = 1, - lonmin = 0, lonmax = 360, latmin = -90, latmax = 90, - output = "areave", method = "conservative", grid = NULL, - maskmod = vector("list", 15), maskobs = vector("list", 15), - configfile = NULL, varmin = NULL, varmax = NULL, silent = FALSE, - nprocs = NULL, dimnames = NULL, remapcells = 2, - path_glob_permissive = "partial") +Load( + var, + exp = NULL, + obs = NULL, + sdates, + nmember = NULL, + nmemberobs = NULL, + nleadtime = NULL, + leadtimemin = 1, + leadtimemax = NULL, + storefreq = "monthly", + sampleperiod = 1, + lonmin = 0, + lonmax = 360, + latmin = -90, + latmax = 90, + output = "areave", + method = "conservative", + grid = NULL, + maskmod = vector("list", 15), + maskobs = vector("list", 15), + configfile = NULL, + varmin = NULL, + varmax = NULL, + silent = FALSE, + nprocs = NULL, + dimnames = NULL, + remapcells = 2, + path_glob_permissive = "partial" +) } \arguments{ \item{var}{Short name of the variable to load. It should coincide with the @@ -874,4 +895,3 @@ sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), lonmin = -12, lonmax = 40) } } - diff --git a/man/MeanDims.Rd b/man/MeanDims.Rd index 2e6022f7b5bdaf41d9e37606a4b0838b04f236a8..f70b78b391f1205a30614735a8af875a64c9b608 100644 --- a/man/MeanDims.Rd +++ b/man/MeanDims.Rd @@ -13,7 +13,7 @@ MeanDims(data, dims, na.rm = FALSE) dimensions to average.} \item{na.rm}{A logical value indicating whether to ignore NA values (TRUE) or -not (FALSE). The default value is FALSE.} +not (FALSE).} } \value{ An array with the same dimension as parameter 'data' except the 'dims' @@ -24,14 +24,8 @@ An array with the same dimension as parameter 'data' except the 'dims' This function returns the mean of an array along a set of dimensions and preserves the dimension names if it has. } -\details{ -It is recommended to use \code{'apply(x, dim, mean)'} to improve the - efficiency when the dimension to be averaged is only one. -} \examples{ -a <- array(rnorm(24), dim = c(a = 2, b= 3, c = 4)) -print(dim(MeanDims(a, 2))) -print(dim(MeanDims(a, c(2, 3)))) -print(dim(MeanDims(a, c('a', 'b')))) +a <- array(rnorm(24), dim = c(2, 3, 4)) +MeanDims(a, 2) +MeanDims(a, c(2, 3)) } - diff --git a/man/NAO.Rd b/man/NAO.Rd new file mode 100644 index 0000000000000000000000000000000000000000..64b16562fb45f5ad2a84c6efd5dca8ba8e171876 --- /dev/null +++ b/man/NAO.Rd @@ -0,0 +1,112 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/NAO.R +\name{NAO} +\alias{NAO} +\title{Compute the North Atlantic Oscillation (NAO) Index} +\usage{ +NAO( + exp = NULL, + obs = NULL, + lat, + lon, + time_dim = "sdate", + memb_dim = "member", + space_dim = c("lat", "lon"), + ftime_dim = "ftime", + ftime_avg = 2:4, + obsproj = TRUE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. +If only NAO of observational data needs to be computed, this parameter can +be left to NULL. The default value is NULL.} + +\item{obs}{A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +observed anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. +If only NAO of experimental data needs to be computed, this parameter can +be left to NULL. The default value is NULL.} + +\item{lat}{A vector of the latitudes of 'exp' and 'obs'.} + +\item{lon}{A vector of the longitudes of 'exp' and 'obs'.} + +\item{time_dim}{A character string indicating the name of the time dimension +of 'exp' and 'obs'. The default value is 'sdate'.} + +\item{memb_dim}{A character string indicating the name of the member +dimension of 'exp' (and 'obs', optional). If 'obs' has memb_dim, the length +must be 1. The default value is 'member'.} + +\item{space_dim}{A vector of two character strings. The first is the dimension +name of latitude of 'ano' and the second is the dimension name of longitude +of 'ano'. The default value is c('lat', 'lon').} + +\item{ftime_dim}{A character string indicating the name of the forecast time +dimension of 'exp' and 'obs'. The default value is 'ftime'.} + +\item{ftime_avg}{A numeric vector of the forecast time steps to average +across the target period. The default value is 2:4, i.e., from 2nd to 4th +forecast time steps.} + +\item{obsproj}{A logical value indicating whether to compute the NAO index by +projecting the forecast anomalies onto the leading EOF of observational +reference (TRUE) or compute the NAO by first computing the leading +EOF of the forecast anomalies (in cross-validation mode, i.e. leaving the +year you are evaluating out), and then projecting forecast anomalies onto +this EOF (FALSE). The default value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list which contains: +\item{exp}{ + A numeric array of forecast NAO index in verification format with the same + dimensions as 'exp' except space_dim and ftime_dim. + } +\item{obs}{ + A numeric array of observed NAO index in verification format with the same + dimensions as 'obs' except space_dim and ftime_dim. +} +} +\description{ +Compute the North Atlantic Oscillation (NAO) index based on the leading EOF +of the sea level pressure (SLP) anomalies over the north Atlantic region +(20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and +observed anomalies onto the observed EOF pattern or the forecast +anomalies onto the EOF pattern of the other years of the forecast. +By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month +lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns +cross-validated PCs of the NAO index for forecast (exp) and observations +(obs) based on the leading EOF pattern. +} +\examples{ +# Make up synthetic data +set.seed(1) +exp <- array(rnorm(1620), dim = c(member = 2, sdate = 3, ftime = 5, lat = 6, lon = 9)) +set.seed(2) +obs <- array(rnorm(1620), dim = c(member = 1, sdate = 3, ftime = 5, lat = 6, lon = 9)) +lat <- seq(20, 80, length.out = 6) +lon <- seq(-80, 40, length.out = 9) +nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) + +# plot the NAO index + \dontrun{ +nao$exp <- Reorder(nao$exp, c(2, 1)) +nao$obs <- Reorder(nao$obs, c(2, 1)) +PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", + monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") + } + +} +\references{ +Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of + multi-model seasonal forecasts of the wintertime North Atlantic + Oscillation. Climate Dynamics, 21, 501-514. + DOI: 10.1007/s00382-003-0350-4 +} diff --git a/man/Persistence.Rd b/man/Persistence.Rd index 33d48682db86cecb1e87f9263bb78b34d7e1c68c..9b09ac3546a880d605f0b48c6772c3a1c38ceaf8 100644 --- a/man/Persistence.Rd +++ b/man/Persistence.Rd @@ -4,9 +4,19 @@ \alias{Persistence} \title{Compute persistence} \usage{ -Persistence(data, dates, time_dim = "time", start, end, ft_start, - ft_end = ft_start, max_ft = 10, nmemb = 1, na.action = 10, - ncores = NULL) +Persistence( + data, + dates, + time_dim = "time", + start, + end, + ft_start, + ft_end = ft_start, + max_ft = 10, + nmemb = 1, + na.action = 10, + ncores = NULL +) } \arguments{ \item{data}{A numeric array corresponding to the observational data @@ -14,17 +24,19 @@ including the time dimension along which the autoregression is computed. The data should start at least 40 time steps (years or days) before 'start'.} -\item{dates}{A sequence of 4-digit integers (YYYY) or dates (YYYY-MM-DD) -indicating the dates available in the observations.} +\item{dates}{A sequence of 4-digit integers (YYYY) or string (YYYY-MM-DD) +in class 'Date' indicating the dates available in the observations.} \item{time_dim}{A character string indicating the dimension along which to compute the autoregression. The default value is 'time'.} -\item{start}{A 4-digit integer (YYYY) or a date in the ISOdate format -(YYYY-MM-DD) indicating the first start date of the persistence forecast.} +\item{start}{A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date' +indicating the first start date of the persistence forecast. It must be +between 1850 and 2020.} -\item{end}{A 4-digit integer (YYYY) or a date in the ISOdate format -(YYYY-MM-DD) indicating the last start date of the persistence forecast.} +\item{end}{A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date' +indicating the last start date of the persistence forecast. It must be +between 1850 and 2020.} \item{ft_start}{An integer indicating the forecast time for which the persistence forecast should be calculated, or the first forecast time of @@ -89,13 +101,21 @@ observational data along the time dimension, with a measure of forecast uncertainty (prediction interval) based on Coelho et al., 2004.\cr\cr } \examples{ -#Building an example dataset with yearly start dates from 1920 to 2009 +# Case 1: year +# Building an example dataset with yearly start dates from 1920 to 2009 set.seed(1) -obs1 <- rnorm(1 * 70 * 6 * 7) -dim(obs1) <- c(member = 1, time = 70, lat = 6, lon = 7) -dates <- seq(1940, 2009, 1) -persist <- Persistence(obs1, dates = dates, start = 1961, end = 2005, ft_start = 1, - nmemb = 40) +obs1 <- rnorm(1 * 70 * 2 * 2) +dim(obs1) <- c(member = 1, time = 70, lat = 2, lon = 2) +dates <- seq(1920, 1989, 1) +res <- Persistence(obs1, dates = dates, start = 1961, end = 1980, ft_start = 1, + nmemb = 2) +# Case 2: day +dates <- seq(as.Date(ISOdate(1990, 1, 1)), as.Date(ISOdate(1990, 4, 1)) ,1) +start <- as.Date(ISOdate(1990, 2, 15)) +end <- as.Date(ISOdate(1990, 4, 1)) +set.seed(1) +data <- rnorm(1 * length(dates)) +dim(data) <- c(member = 1, time = length(dates)) +res <- Persistence(data, dates = dates, start = start, end = end, ft_start = 1) } - diff --git a/man/Plot2VarsVsLTime.Rd b/man/Plot2VarsVsLTime.Rd new file mode 100644 index 0000000000000000000000000000000000000000..46b9cd50b91f9dae2bbeab55e059d7714150e798 --- /dev/null +++ b/man/Plot2VarsVsLTime.Rd @@ -0,0 +1,136 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Plot2VarsVsLTime.R +\name{Plot2VarsVsLTime} +\alias{Plot2VarsVsLTime} +\title{Plot two scores with confidence intervals in a common plot} +\usage{ +Plot2VarsVsLTime( + var1, + var2, + toptitle = "", + ytitle = "", + monini = 1, + freq = 12, + nticks = NULL, + limits = NULL, + listexp = c("exp1", "exp2", "exp3"), + listvars = c("var1", "var2"), + biglab = FALSE, + hlines = NULL, + leg = TRUE, + siglev = FALSE, + sizetit = 1, + show_conf = TRUE, + fileout = NULL, + width = 8, + height = 5, + size_units = "in", + res = 100, + ... +) +} +\arguments{ +\item{var1}{Matrix of dimensions (nexp/nmod, nltime).} + +\item{var2}{Matrix of dimensions (nexp/nmod, nltime).} + +\item{toptitle}{Main title, optional.} + +\item{ytitle}{Title of Y-axis, optional.} + +\item{monini}{Starting month between 1 and 12. Default = 1.} + +\item{freq}{1 = yearly, 12 = monthly, 4 = seasonal, ... Default = 12.} + +\item{nticks}{Number of ticks and labels on the x-axis, optional.} + +\item{limits}{c(lower limit, upper limit): limits of the Y-axis, optional.} + +\item{listexp}{List of experiment names, up to three, optional.} + +\item{listvars}{List of names of input variables, optional.} + +\item{biglab}{TRUE/FALSE for presentation/paper plot. Default = FALSE.} + +\item{hlines}{c(a, b, ...) Add horizontal black lines at Y-positions a, b, +... The default value is NULL.} + +\item{leg}{TRUE/FALSE if legend should be added or not to the plot. +Default = TRUE.} + +\item{siglev}{TRUE/FALSE if significance level should replace confidence +interval.\cr +Default = FALSE.} + +\item{sizetit}{Multiplicative factor to change title size, optional.} + +\item{show_conf}{TRUE/FALSE to show/not confidence intervals for input +variables.} + +\item{fileout}{Name of output file. Extensions allowed: eps/ps, jpeg, png, +pdf, bmp and tiff. The default value is NULL.} + +\item{width}{File width, in the units specified in the parameter size_units +(inches by default). Takes 8 by default.} + +\item{height}{File height, in the units specified in the parameter +size_units (inches by default). Takes 5 by default.} + +\item{size_units}{Units of the size of the device (file or window) to plot +in. Inches ('in') by default. See ?Devices and the creator function of the +corresponding device.} + +\item{res}{Resolution of the device (file or window) to plot in. See +?Devices and the creator function of the corresponding device.} + +\item{...}{Arguments to be passed to the method. Only accepts the following +graphical parameters:\cr +adj ann ask bg bty cex.sub cin col.axis col.lab col.main col.sub cra crt +csi cxy err family fg fig font font.axis font.lab font.main font.sub lend +lheight ljoin lmitre mar mex mfcol mfrow mfg mkh oma omd omi page pch plt +smo srt tck tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog \cr +For more information about the parameters see `par`.} +} +\description{ +Plot two input variables that have the same dimensions in a common plot. +One plot for all experiments. +The input variables should have dimensions (nexp/nmod, nltime). +} +\details{ +Examples of input:\cr +------------------\cr\cr +RMSE error for a number of experiments and along lead-time: (nexp, nltime) +} +\examples{ +# Load sample data as in Load() example: +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +runmean_months <- 12 +smooth_ano_exp <- Smoothing(data = ano_exp, runmeanlen = runmean_months) +smooth_ano_obs <- Smoothing(data = ano_obs, runmeanlen = runmean_months) +dim_to_mean <- 'member' # mean along members +required_complete_row <- 'ftime' # discard startdates for which there are NA leadtimes +leadtimes_per_startdate <- 60 +rms <- RMS(MeanDims(smooth_ano_exp, dim_to_mean), + MeanDims(smooth_ano_obs, dim_to_mean), + comp_dim = required_complete_row, + limits = c(ceiling((runmean_months + 1) / 2), + leadtimes_per_startdate - floor(runmean_months / 2))) +smooth_ano_exp_m_sub <- smooth_ano_exp - InsertDim(MeanDims(smooth_ano_exp, 'member', + na.rm = TRUE), + posdim = 3, + lendim = dim(smooth_ano_exp)['member'], + name = 'member') +spread <- Spread(smooth_ano_exp_m_sub, compute_dim = c('member', 'sdate')) +#Combine rms outputs into one array +rms_combine <- abind::abind(rms$conf.lower, rms$rms, rms$conf.upper, along = 0) +rms_combine <- Reorder(rms_combine, c(2, 3, 1, 4)) + \donttest{ +Plot2VarsVsLTime(InsertDim(rms_combine[, , , ], 1, 1), Reorder(spread$sd, c(1, 3, 2)), + toptitle = 'RMSE and spread', monini = 11, freq = 12, + listexp = c('CMIP5 IC3'), listvar = c('RMSE', 'spread')) + } + +} diff --git a/man/PlotACC.Rd b/man/PlotACC.Rd new file mode 100644 index 0000000000000000000000000000000000000000..cd2b35729db4cdb4d2cabb7005ea7340b6f783a3 --- /dev/null +++ b/man/PlotACC.Rd @@ -0,0 +1,124 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PlotACC.R +\name{PlotACC} +\alias{PlotACC} +\title{Plot Plumes/Timeseries Of Anomaly Correlation Coefficients} +\usage{ +PlotACC( + ACC, + sdates, + toptitle = "", + sizetit = 1, + ytitle = "", + limits = NULL, + legends = NULL, + freq = 12, + biglab = FALSE, + fill = FALSE, + linezero = FALSE, + points = TRUE, + vlines = NULL, + fileout = NULL, + width = 8, + height = 5, + size_units = "in", + res = 100, + ... +) +} +\arguments{ +\item{ACC}{An ACC array with with dimensions:\cr +c(nexp, nobs, nsdates, nltime, 4)\cr +with the fourth dimension of length 4 containing the lower limit of the +95\% confidence interval, the ACC, the upper limit of the 95\% confidence +interval and the 95\% significance level.} + +\item{sdates}{A character vector of startdates: c('YYYYMMDD','YYYYMMDD').} + +\item{toptitle}{A character string of the main title, optional.} + +\item{sizetit}{A multiplicative factor to scale title size, optional.} + +\item{ytitle}{A character string of the title of Y-axis for each experiment: +c('', ''), optional.} + +\item{limits}{A numeric vector c(lower limit, upper limit): limits of the +Y-axis, optional.} + +\item{legends}{A character vector of flags to be written in the legend, +optional.} + +\item{freq}{A integer: 1 = yearly, 12 = monthly, 4 = seasonal, ... Default: 12.} + +\item{biglab}{A logical value for presentation/paper plot, Default = FALSE.} + +\item{fill}{A logical value if filled confidence interval. Default = FALSE.} + +\item{linezero}{A logical value if a line at y=0 should be added. Default = FALSE.} + +\item{points}{A logical value if points instead of lines. Default = TRUE.\cr +Must be TRUE if only 1 leadtime.} + +\item{vlines}{A vector of x location where to add vertical black lines, optional.} + +\item{fileout}{A character string of the output file name. Extensions allowed: +eps/ps, jpeg, png, pdf, bmp and tiff. Default is NULL.} + +\item{width}{A numeric of the file width, in the units specified in the +parameter size_units (inches by default). Takes 8 by default.} + +\item{height}{A numeric of the file height, in the units specified in the parameter +size_units (inches by default). Takes 5 by default.} + +\item{size_units}{A character string of the units of the size of the device +(file or window) to plot in. Inches ('in') by default. See ?Devices and the +creator function of the corresponding device.} + +\item{res}{Resolution of the device (file or window) to plot in. See +?Devices and the creator function of the corresponding device.} + +\item{\dots}{Arguments to be passed to the method. Only accepts the following +graphical parameters:\cr +adj ann ask bg bty cex.sub cin col.axis col.lab col.main col.sub cra crt +csi cxy err family fg fig fin font font.axis font.lab font.main font.sub +lend lheight ljoin lmitre mar mex mfcol mfrow mfg mkh oma omd omi page +plt smo srt tck tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog\cr +For more information about the parameters see `par`.} +} +\description{ +Plots plumes/timeseries of ACC from an array with dimensions +(output from \code{ACC()}): \cr +c(nexp, nobs, nsdates, nltime, 4)\cr +where the fourth dimension is of length 4 and contains the lower limit of +the 95\% confidence interval, the ACC, the upper limit of the 95\% +confidence interval and the 95\% significance level given by a one-sided +T-test. +} +\examples{ + \dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + leadtimemin = 1, + leadtimemax = 4, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) + } +sampleData$mod <- Season(sampleData$mod, monini = 11, moninf = 12, monsup = 2) +sampleData$obs <- Season(sampleData$obs, monini = 11, moninf = 12, monsup = 2) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +acc <- ACC(ano_exp, ano_obs) +acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap') +# Combine acc results for PlotACC +res <- array(c(acc$conf.lower, acc$acc, acc$conf.upper, acc$p.val), + dim = c(dim(acc$acc), 4)) +res_bootstrap <- array(c(acc$acc_conf.lower, acc$acc, acc$acc_conf.upper, acc$p.val), + dim = c(dim(acc$acc), 4)) + \donttest{ +PlotACC(res, startDates) +PlotACC(res_bootstrap, startDates) + } +} diff --git a/man/PlotAno.Rd b/man/PlotAno.Rd index 18bf0c903db72df4d7ff929b6f8360be5adf19ef..6591ef19f52879f38d3968450b5cc92b471f0ceb 100644 --- a/man/PlotAno.Rd +++ b/man/PlotAno.Rd @@ -4,12 +4,30 @@ \alias{PlotAno} \title{Plot Anomaly time series} \usage{ -PlotAno(exp_ano, obs_ano = NULL, sdates, toptitle = rep("", 15), - ytitle = rep("", 15), limits = NULL, legends = NULL, freq = 12, - biglab = FALSE, fill = TRUE, memb = TRUE, ensmean = TRUE, - linezero = FALSE, points = FALSE, vlines = NULL, sizetit = 1, - fileout = NULL, width = 8, height = 5, size_units = "in", res = 100, - ...) +PlotAno( + exp_ano, + obs_ano = NULL, + sdates, + toptitle = rep("", 15), + ytitle = rep("", 15), + limits = NULL, + legends = NULL, + freq = 12, + biglab = FALSE, + fill = TRUE, + memb = TRUE, + ensmean = TRUE, + linezero = FALSE, + points = FALSE, + vlines = NULL, + sizetit = 1, + fileout = NULL, + width = 8, + height = 5, + size_units = "in", + res = 100, + ... +) } \arguments{ \item{exp_ano}{A numerical array containing the experimental data:\cr @@ -100,4 +118,3 @@ PlotAno(smooth_ano_exp, smooth_ano_obs, startDates, legends = 'ERSST', biglab = FALSE) } - diff --git a/man/PlotBoxWhisker.Rd b/man/PlotBoxWhisker.Rd new file mode 100644 index 0000000000000000000000000000000000000000..9c5a3f48ab5ae30097f36dbc78bf141b0f648c9c --- /dev/null +++ b/man/PlotBoxWhisker.Rd @@ -0,0 +1,146 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PlotBoxWhisker.R +\name{PlotBoxWhisker} +\alias{PlotBoxWhisker} +\title{Box-And-Whisker Plot of Time Series with Ensemble Distribution} +\usage{ +PlotBoxWhisker( + exp, + obs, + toptitle = "", + ytitle = "", + monini = 1, + yearini = 0, + freq = 1, + expname = "exp 1", + obsname = "obs 1", + drawleg = TRUE, + fileout = NULL, + width = 8, + height = 5, + size_units = "in", + res = 100, + ... +) +} +\arguments{ +\item{exp}{Forecast array of multi-member time series, e.g., the NAO index +of one experiment. The expected dimensions are +c(members, start dates/forecast horizons). A vector with only the time +dimension can also be provided. Only monthly or lower frequency time +series are supported. See parameter freq.} + +\item{obs}{Observational vector or array of time series, e.g., the NAO index +of the observations that correspond the forecast data in \code{exp}. +The expected dimensions are c(start dates/forecast horizons) or +c(1, start dates/forecast horizons). Only monthly or lower frequency time +series are supported. See parameter freq.} + +\item{toptitle}{Character string to be drawn as figure title.} + +\item{ytitle}{Character string to be drawn as y-axis title.} + +\item{monini}{Number of the month of the first time step, from 1 to 12.} + +\item{yearini}{Year of the first time step.} + +\item{freq}{Frequency of the provided time series: 1 = yearly, 12 = monthly,} + +\item{expname}{Experimental dataset name.} + +\item{obsname}{Name of the observational reference dataset.} + +\item{drawleg}{TRUE/FALSE: whether to draw the legend or not.} + +\item{fileout}{Name of output file. Extensions allowed: eps/ps, jpeg, png, +pdf, bmp and tiff. \cr +Default = 'output_PlotBox.ps'.} + +\item{width}{File width, in the units specified in the parameter size_units +(inches by default). Takes 8 by default.} + +\item{height}{File height, in the units specified in the parameter +size_units (inches by default). Takes 5 by default.} + +\item{size_units}{Units of the size of the device (file or window) to plot +in. Inches ('in') by default. See ?Devices and the creator function of the +corresponding device.} + +\item{res}{Resolution of the device (file or window) to plot in. See +?Devices and the creator function of the corresponding device.} + +\item{...}{Arguments to be passed to the method. Only accepts the following +graphical parameters:\cr +ann ask bg cex.lab cex.sub cin col.axis col.lab col.main col.sub cra crt +csi cxy err family fg fig font font.axis font.lab font.main font.sub lend +lheight ljoin lmitre mex mfcol mfrow mfg mkh oma omd omi page pin plt pty +smo srt tck tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog \cr +For more information about the parameters see `par`.} +} +\value{ +Generates a file at the path specified via \code{fileout}. +} +\description{ +Produce time series of box-and-whisker plot showing the distribution of the +members of a forecast vs. the observed evolution. The correlation between +forecast and observational data is calculated and displayed. Only works for +n-monthly to n-yearly time series. +} +\examples{ +# See examples on Load() to understand the first lines in this example + \dontrun{ +data_path <- system.file('sample_data', package = 's2dverification') +expA <- list(name = 'experiment', path = file.path(data_path, + 'model/$EXP_NAME$/$STORE_FREQ$_mean/$VAR_NAME$_3hourly', + '$VAR_NAME$_$START_DATE$.nc')) +obsX <- list(name = 'observation', path = file.path(data_path, + '$OBS_NAME$/$STORE_FREQ$_mean/$VAR_NAME$', + '$VAR_NAME$_$YEAR$$MONTH$.nc')) + +# Now we are ready to use Load(). +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- Load('tos', list(expA), list(obsX), startDates, + leadtimemin = 1, leadtimemax = 4, output = 'lonlat', + latmin = 27, latmax = 48, lonmin = -12, lonmax = 40) + } + \dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + leadtimemin = 1, + leadtimemax = 4, + output = 'lonlat', + latmin = 20, latmax = 80, + lonmin = -80, lonmax = 40) +# No example data is available over NAO region, so in this example we will +# tweak the available data. In a real use case, one can Load() the data over +# NAO region directly. +sampleData$lon[] <- c(40, 280, 340) +sampleData$lat[] <- c(20, 80) + } +# Now ready to compute the EOFs and project on, for example, the first +# variability mode. +ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +ano_exp <- array(ano$exp, dim = dim(ano$exp)[-2]) +ano_obs <- array(ano$obs, dim = dim(ano$obs)[-2]) +nao <- NAO(ano_exp, ano_obs, sampleData$lat, sampleData$lon) +# Finally plot the nao index + \dontrun{ +nao$exp <- Reorder(nao$exp, c(2, 1)) +nao$obs <- Reorder(nao$obs, c(2, 1)) +PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", + monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") + } + +} +\seealso{ +EOF, ProjectField, NAO +} +\author{ +History:\cr +0.1 - 2013-09 (F. Lienert, \email{flienert@ic3.cat}) - Original code\cr +0.2 - 2015-03 (L. Batte, \email{lauriane.batte@ic3.cat}) - Removed all\cr + normalization for sake of clarity. +1.0 - 2016-03 (N. Manubens, \email{nicolau.manubens@bsc.es}) - Formatting to R CRAN +} +\keyword{datagen} diff --git a/man/PlotClim.Rd b/man/PlotClim.Rd index b62ff442b7cb6e82a4f1994a037b8f66518f70d0..9b3381edb0a11749c28c6da41ed5a0abf9a1ff11 100644 --- a/man/PlotClim.Rd +++ b/man/PlotClim.Rd @@ -4,11 +4,26 @@ \alias{PlotClim} \title{Plots Climatologies} \usage{ -PlotClim(exp_clim, obs_clim = NULL, toptitle = "", ytitle = "", - monini = 1, freq = 12, limits = NULL, listexp = c("exp1", "exp2", - "exp3"), listobs = c("obs1", "obs2", "obs3"), biglab = FALSE, - leg = TRUE, sizetit = 1, fileout = NULL, width = 8, height = 5, - size_units = "in", res = 100, ...) +PlotClim( + exp_clim, + obs_clim = NULL, + toptitle = "", + ytitle = "", + monini = 1, + freq = 12, + limits = NULL, + listexp = c("exp1", "exp2", "exp3"), + listobs = c("obs1", "obs2", "obs3"), + biglab = FALSE, + leg = TRUE, + sizetit = 1, + fileout = NULL, + width = 8, + height = 5, + size_units = "in", + res = 100, + ... +) } \arguments{ \item{exp_clim}{Matrix containing the experimental data with dimensions:\cr @@ -79,4 +94,3 @@ PlotClim(clim$clim_exp, clim$clim_obs, toptitle = paste('climatologies'), listobs = c('ERSST'), biglab = FALSE, fileout = NULL) } - diff --git a/man/PlotEquiMap.Rd b/man/PlotEquiMap.Rd index cf45ead4b3555ba1417ebcc18d7b01140550b11c..b574904cedbc13e914b8059e21ef3e8e5119b7a1 100644 --- a/man/PlotEquiMap.Rd +++ b/man/PlotEquiMap.Rd @@ -4,25 +4,78 @@ \alias{PlotEquiMap} \title{Maps A Two-Dimensional Variable On A Cylindrical Equidistant Projection} \usage{ -PlotEquiMap(var, lon, lat, varu = NULL, varv = NULL, toptitle = NULL, - sizetit = NULL, units = NULL, brks = NULL, cols = NULL, - bar_limits = NULL, triangle_ends = NULL, col_inf = NULL, - col_sup = NULL, colNA = NULL, color_fun = clim.palette(), - square = TRUE, filled.continents = NULL, coast_color = NULL, - coast_width = 1, contours = NULL, brks2 = NULL, contour_lwd = 0.5, - contour_color = "black", contour_lty = 1, contour_label_scale = 1, - dots = NULL, dot_symbol = 4, dot_size = 1, - arr_subsamp = floor(length(lon)/30), arr_scale = 1, arr_ref_len = 15, - arr_units = "m/s", arr_scale_shaft = 1, arr_scale_shaft_angle = 1, - axelab = TRUE, labW = FALSE, intylat = 20, intxlon = 20, - axes_tick_scale = 1, axes_label_scale = 1, drawleg = TRUE, - subsampleg = NULL, bar_extra_labels = NULL, draw_bar_ticks = TRUE, - draw_separators = FALSE, triangle_ends_scale = 1, bar_label_digits = 4, - bar_label_scale = 1, units_scale = 1, bar_tick_scale = 1, - bar_extra_margin = rep(0, 4), boxlim = NULL, boxcol = "purple2", - boxlwd = 5, margin_scale = rep(1, 4), title_scale = 1, numbfig = NULL, - fileout = NULL, width = 8, height = 5, size_units = "in", res = 100, - ...) +PlotEquiMap( + var, + lon, + lat, + varu = NULL, + varv = NULL, + toptitle = NULL, + sizetit = NULL, + units = NULL, + brks = NULL, + cols = NULL, + bar_limits = NULL, + triangle_ends = NULL, + col_inf = NULL, + col_sup = NULL, + colNA = NULL, + color_fun = clim.palette(), + square = TRUE, + filled.continents = NULL, + filled.oceans = FALSE, + coast_color = NULL, + coast_width = 1, + lake_color = NULL, + contours = NULL, + brks2 = NULL, + contour_lwd = 0.5, + contour_color = "black", + contour_lty = 1, + contour_draw_label = TRUE, + contour_label_scale = 1, + dots = NULL, + dot_symbol = 4, + dot_size = 1, + arr_subsamp = floor(length(lon)/30), + arr_scale = 1, + arr_ref_len = 15, + arr_units = "m/s", + arr_scale_shaft = 1, + arr_scale_shaft_angle = 1, + axelab = TRUE, + labW = FALSE, + lab_dist_x = NULL, + lab_dist_y = NULL, + degree_sym = FALSE, + intylat = 20, + intxlon = 20, + axes_tick_scale = 1, + axes_label_scale = 1, + drawleg = TRUE, + subsampleg = NULL, + bar_extra_labels = NULL, + draw_bar_ticks = TRUE, + draw_separators = FALSE, + triangle_ends_scale = 1, + bar_label_digits = 4, + bar_label_scale = 1, + units_scale = 1, + bar_tick_scale = 1, + bar_extra_margin = rep(0, 4), + boxlim = NULL, + boxcol = "purple2", + boxlwd = 5, + margin_scale = rep(1, 4), + title_scale = 1, + numbfig = NULL, + fileout = NULL, + width = 8, + height = 5, + size_units = "in", + res = 100, + ... +) } \arguments{ \item{var}{Array with the values at each cell of a grid on a regular @@ -89,12 +142,19 @@ the spaces in between with colours (FALSE). In the latter case, Takes the value gray(0.5) by default or, if 'square = FALSE', takes the value FALSE. If set to FALSE, continents are not filled in.} +\item{filled.oceans}{A logical value or the color name to fill in drawn +projected oceans. The default value is FALSE. If it is TRUE, the default +colour is "light blue".} + \item{coast_color}{Colour of the coast line of the drawn projected continents. Takes the value gray(0.5) by default.} \item{coast_width}{Line width of the coast line of the drawn projected continents. Takes the value 1 by default.} +\item{lake_color}{Colour of the lake or other water body inside continents. +The default value is NULL.} + \item{contours}{Array of same dimensions as 'var' to be added to the plot and displayed with contours. Parameter 'brks2' is required to define the magnitude breaks for each contour curve. Disregarded if 'square = FALSE'.} @@ -111,6 +171,9 @@ and 'brks2', or if 'square = FALSE'.} \item{contour_lty}{Line type of the contour curves. Takes 1 (solid) by default. See help on 'lty' in par() for other accepted values.} +\item{contour_draw_label}{A logical value indicating whether to draw the +contour labels or not. The default value is TRUE.} + \item{contour_label_scale}{Scale factor for the superimposed labels when drawing contour levels.} @@ -159,6 +222,16 @@ TRUE by default.} \item{labW}{Whether to label the longitude axis with a 'W' instead of minus for negative values. Defaults to FALSE.} +\item{lab_dist_x}{A numeric of the distance of the longitude labels to the +box borders. The default value is NULL and is automatically adjusted by +the function.} + +\item{lab_dist_y}{A numeric of the distance of the latitude labels to the +box borders. The default value is NULL and is automatically adjusted by +the function.} + +\item{degree_sym}{A logical indicating whether to include degree symbol (30° N) or not (30N; default).} + \item{intylat}{Interval between latitude ticks on y-axis, in degrees. Defaults to 20.} @@ -244,8 +317,9 @@ grid cells. Only the region for which data has been provided is displayed. A colour bar (legend) can be plotted and adjusted. It is possible to draw superimposed arrows, dots, symbols, contour lines and boxes. A number of options is provided to adjust the position, size and colour of the -components. This plot function is compatible with figure layouts if colour -bar is disabled. +components. Some parameters are provided to add and adjust the masks that +include continents, oceans, and lakes. This plot function is compatible with +figure layouts if colour bar is disabled. } \examples{ # See examples on Load() to understand the first lines in this example @@ -278,4 +352,3 @@ PlotEquiMap(sampleData$mod[1, 1, 1, 1, , ], sampleData$lon, sampleData$lat, toptitle = 'Predicted sea surface temperature for Nov 1960 from 1st Nov', sizetit = 0.5) } - diff --git a/man/PlotLayout.Rd b/man/PlotLayout.Rd index f01fdf9ba3a823bc5c151f8eed280136f62be9fb..453cf2e924074f449efd4af697d21a60e3d5afb6 100644 --- a/man/PlotLayout.Rd +++ b/man/PlotLayout.Rd @@ -4,20 +4,52 @@ \alias{PlotLayout} \title{Arrange and Fill Multi-Pannel Layouts With Optional Colour Bar} \usage{ -PlotLayout(fun, plot_dims, var, ..., special_args = NULL, nrow = NULL, - ncol = NULL, toptitle = NULL, row_titles = NULL, col_titles = NULL, - bar_scale = 1, title_scale = 1, title_margin_scale = 1, - title_left_shift_scale = 1, subtitle_scale = 1, - subtitle_margin_scale = 1, brks = NULL, cols = NULL, drawleg = "S", - titles = NULL, subsampleg = NULL, bar_limits = NULL, - triangle_ends = NULL, col_inf = NULL, col_sup = NULL, - color_fun = clim.colors, draw_bar_ticks = TRUE, draw_separators = FALSE, - triangle_ends_scale = 1, bar_extra_labels = NULL, units = NULL, - units_scale = 1, bar_label_scale = 1, bar_tick_scale = 1, - bar_extra_margin = rep(0, 4), bar_left_shift_scale = 1, - bar_label_digits = 4, extra_margin = rep(0, 4), fileout = NULL, - width = NULL, height = NULL, size_units = "in", res = 100, - close_device = TRUE) +PlotLayout( + fun, + plot_dims, + var, + ..., + special_args = NULL, + nrow = NULL, + ncol = NULL, + toptitle = NULL, + row_titles = NULL, + col_titles = NULL, + bar_scale = 1, + title_scale = 1, + title_margin_scale = 1, + title_left_shift_scale = 1, + subtitle_scale = 1, + subtitle_margin_scale = 1, + brks = NULL, + cols = NULL, + drawleg = "S", + titles = NULL, + subsampleg = NULL, + bar_limits = NULL, + triangle_ends = NULL, + col_inf = NULL, + col_sup = NULL, + color_fun = clim.colors, + draw_bar_ticks = TRUE, + draw_separators = FALSE, + triangle_ends_scale = 1, + bar_extra_labels = NULL, + units = NULL, + units_scale = 1, + bar_label_scale = 1, + bar_tick_scale = 1, + bar_extra_margin = rep(0, 4), + bar_left_shift_scale = 1, + bar_label_digits = 4, + extra_margin = rep(0, 4), + fileout = NULL, + width = NULL, + height = NULL, + size_units = "in", + res = 100, + close_device = TRUE +) } \arguments{ \item{fun}{Plot function (or name of the function) to be called on the @@ -48,6 +80,12 @@ applied to each of them. NAs can be passed to the list: a NA will yield a blank cell in the layout, which can be populated after (see .SwitchToFigure).} +\item{\dots}{Parameters to be sent to the plotting function 'fun'. If +multiple arrays are provided in 'var' and multiple functions are provided +in 'fun', the parameters provided through \dots will be sent to all the +plot functions, as common parameters. To specify concrete arguments for +each of the plot functions see parameter 'special_args'.} + \item{special_args}{List of sub-lists, each sub-list having specific extra arguments for each of the plot functions provided in 'fun'. If you want to fix a different value for each plot in the layout you can do so by @@ -164,12 +202,6 @@ the layout and a 'fileout' has been specified. This is useful to avoid closing the device when saving the layout into a file and willing to add extra elements or figures. Takes TRUE by default. Disregarded if no 'fileout' has been specified.} - -\item{\dots}{Parameters to be sent to the plotting function 'fun'. If -multiple arrays are provided in 'var' and multiple functions are provided -in 'fun', the parameters provided through \dots will be sent to all the -plot functions, as common parameters. To specify concrete arguments for -each of the plot functions see parameter 'special_args'.} } \value{ \item{brks}{ @@ -244,4 +276,3 @@ PlotLayout(PlotEquiMap, c('lat', 'lon'), sampleData$mod[1, , 1, 1, , ], titles = paste('Member', 1:15)) } - diff --git a/man/PlotMatrix.Rd b/man/PlotMatrix.Rd index 24f046d8d5d94170b3bda4ab0e51f6a72b9399be..5275df031e1d329fc3da932e5bd93ab19d5f45a7 100644 --- a/man/PlotMatrix.Rd +++ b/man/PlotMatrix.Rd @@ -4,12 +4,28 @@ \alias{PlotMatrix} \title{Function to convert any numerical table to a grid of coloured squares.} \usage{ -PlotMatrix(var, brks = NULL, cols = NULL, toptitle = NULL, - title.color = "royalblue4", xtitle = NULL, ytitle = NULL, - xlabels = NULL, xvert = FALSE, ylabels = NULL, line = 3, - figure.width = 1, legend = TRUE, legend.width = 0.15, - xlab_dist = NULL, ylab_dist = NULL, fileout = NULL, size_units = "px", - res = 100, ...) +PlotMatrix( + var, + brks = NULL, + cols = NULL, + toptitle = NULL, + title.color = "royalblue4", + xtitle = NULL, + ytitle = NULL, + xlabels = NULL, + xvert = FALSE, + ylabels = NULL, + line = 3, + figure.width = 1, + legend = TRUE, + legend.width = 0.15, + xlab_dist = NULL, + ylab_dist = NULL, + fileout = NULL, + size_units = "px", + res = 100, + ... +) } \arguments{ \item{var}{A numerical matrix containing the values to be displayed in a @@ -93,4 +109,3 @@ PlotMatrix(var = matrix(rnorm(n = 120, mean = 0.3), 10, 12), xlabels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) } - diff --git a/man/PlotSection.Rd b/man/PlotSection.Rd index 413ef63223d806556fb1cb2a8ba42b64d9176bb2..1627339847efecf53cac6c798208eb744c2ec4af 100644 --- a/man/PlotSection.Rd +++ b/man/PlotSection.Rd @@ -4,10 +4,26 @@ \alias{PlotSection} \title{Plots A Vertical Section} \usage{ -PlotSection(var, horiz, depth, toptitle = "", sizetit = 1, units = "", - brks = NULL, cols = NULL, axelab = TRUE, intydep = 200, - intxhoriz = 20, drawleg = TRUE, fileout = NULL, width = 8, - height = 5, size_units = "in", res = 100, ...) +PlotSection( + var, + horiz, + depth, + toptitle = "", + sizetit = 1, + units = "", + brks = NULL, + cols = NULL, + axelab = TRUE, + intydep = 200, + intxhoriz = 20, + drawleg = TRUE, + fileout = NULL, + width = 8, + height = 5, + size_units = "in", + res = 100, + ... +) } \arguments{ \item{var}{Matrix to plot with (longitude/latitude, depth) dimensions.} @@ -69,4 +85,3 @@ sampleData <- s2dv::sampleDepthData PlotSection(sampleData$mod[1, 1, 1, 1, , ], sampleData$lat, sampleData$depth, toptitle = 'temperature 1995-11 member 0') } - diff --git a/man/PlotStereoMap.Rd b/man/PlotStereoMap.Rd index 4b910a90ce21857977ec676bc70d188232e78096..1b7f166a161c3fff921d517bd4569fa54bd734bf 100644 --- a/man/PlotStereoMap.Rd +++ b/man/PlotStereoMap.Rd @@ -4,19 +4,68 @@ \alias{PlotStereoMap} \title{Maps A Two-Dimensional Variable On A Polar Stereographic Projection} \usage{ -PlotStereoMap(var, lon, lat, latlims = c(60, 90), toptitle = NULL, - sizetit = NULL, units = NULL, brks = NULL, cols = NULL, - bar_limits = NULL, triangle_ends = NULL, col_inf = NULL, - col_sup = NULL, colNA = NULL, color_fun = clim.palette(), - filled.continents = FALSE, coast_color = NULL, coast_width = 1, - dots = NULL, dot_symbol = 4, dot_size = 0.8, intlat = 10, - drawleg = TRUE, subsampleg = NULL, bar_extra_labels = NULL, - draw_bar_ticks = TRUE, draw_separators = FALSE, triangle_ends_scale = 1, - bar_label_digits = 4, bar_label_scale = 1, units_scale = 1, - bar_tick_scale = 1, bar_extra_margin = rep(0, 4), boxlim = NULL, - boxcol = "purple2", boxlwd = 5, margin_scale = rep(1, 4), - title_scale = 1, numbfig = NULL, fileout = NULL, width = 6, - height = 5, size_units = "in", res = 100, ...) +PlotStereoMap( + var, + lon, + lat, + varu = NULL, + varv = NULL, + latlims = c(60, 90), + toptitle = NULL, + sizetit = NULL, + units = NULL, + brks = NULL, + cols = NULL, + bar_limits = NULL, + triangle_ends = NULL, + col_inf = NULL, + col_sup = NULL, + colNA = NULL, + color_fun = clim.palette(), + filled.continents = FALSE, + coast_color = NULL, + coast_width = 1, + contours = NULL, + brks2 = NULL, + contour_lwd = 0.5, + contour_color = "black", + contour_lty = 1, + contour_label_draw = TRUE, + contour_label_scale = 0.6, + dots = NULL, + dot_symbol = 4, + dot_size = 0.8, + intlat = 10, + arr_subsamp = floor(length(lon)/30), + arr_scale = 1, + arr_ref_len = 15, + arr_units = "m/s", + arr_scale_shaft = 1, + arr_scale_shaft_angle = 1, + drawleg = TRUE, + subsampleg = NULL, + bar_extra_labels = NULL, + draw_bar_ticks = TRUE, + draw_separators = FALSE, + triangle_ends_scale = 1, + bar_label_digits = 4, + bar_label_scale = 1, + units_scale = 1, + bar_tick_scale = 1, + bar_extra_margin = rep(0, 4), + boxlim = NULL, + boxcol = "purple2", + boxlwd = 5, + margin_scale = rep(1, 4), + title_scale = 1, + numbfig = NULL, + fileout = NULL, + width = 6, + height = 5, + size_units = "in", + res = 100, + ... +) } \arguments{ \item{var}{Array with the values at each cell of a grid on a regular @@ -38,6 +87,12 @@ longitude range can also be provided, e.g. \code{lon = c(0:50, 300:360)} grid of 'var', in any order (same as 'var'). Expected to be from a regular rectangular or gaussian grid, within the range [-90, 90].} +\item{varu}{Array of the zonal component of wind/current/other field with +the same dimensions as 'var'.} + +\item{varv}{Array of the meridional component of wind/current/other field +with the same dimensions as 'var'.} + \item{latlims}{Latitudinal limits of the figure.\cr Example : c(60, 90) for the North Pole\cr c(-90,-60) for the South Pole} @@ -82,6 +137,31 @@ continents. Takes the value gray(0.5) by default.} \item{coast_width}{Line width of the coast line of the drawn projected continents. Takes the value 1 by default.} +\item{contours}{Array of same dimensions as 'var' to be added to the plot +and displayed with contours. Parameter 'brks2' is required to define the +magnitude breaks for each contour curve.} + +\item{brks2}{A numeric value or vector of magnitude breaks where to draw +contour curves for the array provided in 'contours'. If it is a number, it +represents the number of breaks (n) that defines (n - 1) intervals to +classify 'contours'.} + +\item{contour_lwd}{Line width of the contour curves provided via 'contours' +and 'brks2'. The default value is 0.5.} + +\item{contour_color}{Line color of the contour curves provided via 'contours' +and 'brks2'.} + +\item{contour_lty}{Line type of the contour curves. Takes 1 (solid) by +default. See help on 'lty' in par() for other accepted values.} + +\item{contour_label_draw}{A logical value indicating whether to draw the +contour labels (TRUE) or not (FALSE) when 'contours' is used. The default +value is TRUE.} + +\item{contour_label_scale}{Scale factor for the superimposed labels when +drawing contour levels. The default value is 0.6.} + \item{dots}{Array of same dimensions as 'var' or with dimensions c(n, dim(var)), where n is the number of dot/symbol layers to add to the plot. A value of TRUE at a grid cell will draw a dot/symbol on the @@ -102,6 +182,29 @@ layers in 'dots'. Takes 1 by default.} \item{intlat}{Interval between latitude lines (circles), in degrees. Defaults to 10.} +\item{arr_subsamp}{A number as subsampling factor to select a subset of arrows +in 'varu' and 'varv' to be drawn. Only one out of arr_subsamp arrows will +be drawn. The default value is 1.} + +\item{arr_scale}{A number as scale factor for drawn arrows from 'varu' and +'varv'. The default value is 1.} + +\item{arr_ref_len}{A number of the length of the refence arrow to be drawn as +legend at the bottom of the figure (in same units as 'varu' and 'varv', only +affects the legend for the wind or variable in these arrays). The default +value is 15.} + +\item{arr_units}{Units of 'varu' and 'varv', to be drawn in the legend. +Takes 'm/s' by default.} + +\item{arr_scale_shaft}{A number for the scale of the shaft of the arrows +(which also depend on the number of figures and the arr_scale parameter). +The default value is 1.} + +\item{arr_scale_shaft_angle}{A number for the scale of the angle of the +shaft of the arrows (which also depend on the number of figure and the +arr_scale parameter). The default value is 1.} + \item{drawleg}{Whether to plot a color bar (legend, key) or not. Defaults to TRUE.} @@ -172,8 +275,8 @@ Map longitude-latitude array (on a regular rectangular or gaussian grid) on a polar stereographic world projection with coloured grid cells. Only the region within a specified latitude interval is displayed. A colour bar (legend) can be plotted and adjusted. It is possible to draw superimposed -dots, symbols and boxes. A number of options is provided to adjust the -position, size and colour of the components. This plot function is +dots, symbols, boxes, contours, and arrows. A number of options is provided to +adjust the position, size and colour of the components. This plot function is compatible with figure layouts if colour bar is disabled. } \examples{ @@ -183,4 +286,3 @@ y <- seq(from = -90, to = 90, length.out = 50) PlotStereoMap(data, x, y, latlims = c(60, 90), brks = 50, toptitle = "This is the title") } - diff --git a/man/PlotVsLTime.Rd b/man/PlotVsLTime.Rd new file mode 100644 index 0000000000000000000000000000000000000000..05e2b422189793b9c359177ae3ef77ae34cc6ad4 --- /dev/null +++ b/man/PlotVsLTime.Rd @@ -0,0 +1,144 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PlotVsLTime.R +\name{PlotVsLTime} +\alias{PlotVsLTime} +\title{Plot a score along the forecast time with its confidence interval} +\usage{ +PlotVsLTime( + var, + toptitle = "", + ytitle = "", + monini = 1, + freq = 12, + nticks = NULL, + limits = NULL, + listexp = c("exp1", "exp2", "exp3"), + listobs = c("obs1", "obs2", "obs3"), + biglab = FALSE, + hlines = NULL, + leg = TRUE, + siglev = FALSE, + sizetit = 1, + show_conf = TRUE, + fileout = NULL, + width = 8, + height = 5, + size_units = "in", + res = 100, + ... +) +} +\arguments{ +\item{var}{Matrix containing any Prediction Score with dimensions:\cr +(nexp/nmod, 3/4 ,nltime)\cr +or (nexp/nmod, nobs, 3/4 ,nltime).} + +\item{toptitle}{Main title, optional.} + +\item{ytitle}{Title of Y-axis, optional.} + +\item{monini}{Starting month between 1 and 12. Default = 1.} + +\item{freq}{1 = yearly, 12 = monthly, 4 = seasonal, ... Default = 12.} + +\item{nticks}{Number of ticks and labels on the x-axis, optional.} + +\item{limits}{c(lower limit, upper limit): limits of the Y-axis, optional.} + +\item{listexp}{List of experiment names, optional.} + +\item{listobs}{List of observation names, optional.} + +\item{biglab}{TRUE/FALSE for presentation/paper plot. Default = FALSE.} + +\item{hlines}{c(a,b, ..) Add horizontal black lines at Y-positions a,b, ...\cr +Default = NULL.} + +\item{leg}{TRUE/FALSE if legend should be added or not to the plot. +Default = TRUE.} + +\item{siglev}{TRUE/FALSE if significance level should replace confidence +interval.\cr +Default = FALSE.} + +\item{sizetit}{Multiplicative factor to change title size, optional.} + +\item{show_conf}{TRUE/FALSE to show/not confidence intervals for input +variables.} + +\item{fileout}{Name of output file. Extensions allowed: eps/ps, jpeg, png, +pdf, bmp and tiff. The default value is NULL.} + +\item{width}{File width, in the units specified in the parameter size_units +(inches by default). Takes 8 by default.} + +\item{height}{File height, in the units specified in the parameter +size_units (inches by default). Takes 5 by default.} + +\item{size_units}{Units of the size of the device (file or window) to plot +in. Inches ('in') by default. See ?Devices and the creator function of the +corresponding device.} + +\item{res}{Resolution of the device (file or window) to plot in. See +?Devices and the creator function of the corresponding device.} + +\item{...}{Arguments to be passed to the method. Only accepts the following +graphical parameters:\cr +adj ann ask bg bty cex.sub cin col.axis col.lab col.main col.sub cra crt +csi cxy err family fg fig font font.axis font.lab font.main font.sub +lheight ljoin lmitre mar mex mfcol mfrow mfg mkh oma omd omi page pch plt +smo srt tck tcl usr xaxp xaxs xaxt xlog xpd yaxp yaxs yaxt ylbias ylog \cr +For more information about the parameters see `par`.} +} +\description{ +Plot the correlation (\code{Corr()}), the root mean square error +(\code{RMS()}) between the forecast values and their observational +counterpart, the slope of their trend (\code{Trend()}), the +InterQuartile range, maximum-mininum, standard deviation or median absolute +Deviation of the ensemble members (\code{Spread()}), or the ratio between +the ensemble spread and the RMSE of the ensemble mean (\code{RatioSDRMS()}) +along the forecast time for all the input experiments on the same figure +with their confidence intervals. +} +\details{ +Examples of input:\cr +Model and observed output from \code{Load()} then \code{Clim()} then +\code{Ano()} then \code{Smoothing()}:\cr +(nmod, nmemb, nsdate, nltime) and (nobs, nmemb, nsdate, nltime)\cr +then averaged over the members\cr +\code{Mean1Dim(var_exp/var_obs, posdim = 2)}:\cr +(nmod, nsdate, nltime) and (nobs, nsdate, nltime)\cr +then passed through\cr + \code{Corr(exp, obs, posloop = 1, poscor = 2)} or\cr + \code{RMS(exp, obs, posloop = 1, posRMS = 2)}:\cr + (nmod, nobs, 3, nltime)\cr +would plot the correlations or RMS between each exp & each obs as a function +of the forecast time. +} +\examples{ +# Load sample data as in Load() example: +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +runmean_months <- 12 +smooth_ano_exp <- Smoothing(data = ano_exp, runmeanlen = runmean_months) +smooth_ano_obs <- Smoothing(data = ano_obs, runmeanlen = runmean_months) +dim_to_mean <- 'member' # mean along members +required_complete_row <- 'ftime' # discard startdates for which there are NA leadtimes +leadtimes_per_startdate <- 60 +corr <- Corr(MeanDims(smooth_ano_exp, dim_to_mean), + MeanDims(smooth_ano_obs, dim_to_mean), + comp_dim = required_complete_row, + limits = c(ceiling((runmean_months + 1) / 2), + leadtimes_per_startdate - floor(runmean_months / 2))) +# Combine corr results for plotting +corr_combine <- abind::abind(corr$conf.lower, corr$corr, corr$conf.upper, corr$p.val, along = 0) +corr_combine <- Reorder(corr_combine, c(2, 3, 1, 4)) +\donttest{ +PlotVsLTime(corr_combine, toptitle = "correlations", ytitle = "correlation", + monini = 11, limits = c(-1, 2), listexp = c('CMIP5 IC3'), + listobs = c('ERSST'), biglab = FALSE, hlines = c(-1, 0, 1)) + } + +} diff --git a/man/ProbBins.Rd b/man/ProbBins.Rd new file mode 100644 index 0000000000000000000000000000000000000000..cfd7affed05b4ac98400ff9f6c51973ca798a995 --- /dev/null +++ b/man/ProbBins.Rd @@ -0,0 +1,80 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ProbBins.R +\name{ProbBins} +\alias{ProbBins} +\title{Compute probabilistic information of a forecast relative to a threshold or a quantile} +\usage{ +ProbBins( + data, + thr, + fcyr = "all", + time_dim = "sdate", + memb_dim = "member", + quantile = TRUE, + compPeriod = "Full period", + ncores = NULL +) +} +\arguments{ +\item{data}{An numeric array of anomalies with the dimensions 'time_dim' and +'memb_dim' at least. It can be generated by \code{Ano()}.} + +\item{thr}{A numeric vector used as the quantiles (if 'quantile' is TRUE) or +thresholds (if 'quantile' is FALSE) to bin the anomalies. If it is quantile, +it must be within [0, 1].} + +\item{fcyr}{A numeric vector of the indices of the forecast years (i.e., +time_dim) to compute the probabilistic bins for, or 'all' to compute the +bins for all the years. E.g., c(1:5), c(1, 4), 4, or 'all'. The default +value is 'all'.} + +\item{time_dim}{A character string indicating the dimension along which to +compute the probabilistic bins. The default value is 'sdate'.} + +\item{memb_dim}{A character string indicating the name of the member +dimension or the dimension to be merged with 'time_dim' for probabilistic +calculation. The default value is 'member'.} + +\item{quantile}{A logical value indicating if the thresholds ('thr') are +quantiles (TRUE) or the absolute thresholds of the bins (FALSE). The +default value is TRUE.} + +\item{compPeriod}{A character string referring to three computation options:\cr +"Full period": The probabilities are computed based on 'data';\cr +"Without fcyr": The probabilities are computed based on 'data' with all +'fcyr' removed;\cr +"Cross-validation": The probabilities are computed based on leave-one-out +cross-validation.\cr +The default value is "Full period".} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A numeric array of probabilistic information with dimensions:\cr + c(bin = length of 'thr' + 1, time_dim = length of 'fcyr', memb_dim, the + rest of dimensions of 'data')\cr + The values along the 'bin' dimension take values 0 or 1 depending on which + of the 'thr' + 1 cathegories the forecast or observation at the corresponding + grid point, time step, member and start date belongs to. +} +\description{ +Compute probabilistic bins of a set of forecast years ('fcyr') relative to +the forecast climatology over the whole period of anomalies, optionally excluding +the selected forecast years ('fcyr') or the forecast year for which the +probabilistic bins are being computed (see 'compPeriod'). +} +\examples{ +\dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) +} +clim <- Clim(sampleMap$mod, sampleMap$obs) +ano_exp <- Ano(sampleMap$mod, clim$clim_exp) +PB <- ProbBins(ano_exp, fcyr = 3, thr = c(1/3, 2/3), quantile = TRUE) + +} diff --git a/man/ProjectField.Rd b/man/ProjectField.Rd new file mode 100644 index 0000000000000000000000000000000000000000..358f4ee86e8aefaebb4e6cac23eaae6b9e4f8b1e --- /dev/null +++ b/man/ProjectField.Rd @@ -0,0 +1,84 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ProjectField.R +\name{ProjectField} +\alias{ProjectField} +\title{Project anomalies onto modes of variability} +\usage{ +ProjectField( + ano, + eof, + time_dim = "sdate", + space_dim = c("lat", "lon"), + mode = NULL, + ncores = NULL +) +} +\arguments{ +\item{ano}{A numerical array of anomalies with named dimensions. The +dimensions must have at least 'time_dim' and 'space_dim'. It can be +generated by Ano().} + +\item{eof}{A list that contains at least 'EOFs' or 'REOFs' and 'wght', which +are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and +'space_dim' at least. 'wght' has dimensions space_dim. It can be generated +by EOF() or REOF().} + +\item{time_dim}{A character string indicating the name of the time dimension +of 'ano'. The default value is 'sdate'.} + +\item{space_dim}{A vector of two character strings. The first is the dimension +name of latitude of 'ano' and the second is the dimension name of longitude +of 'ano'. The default value is c('lat', 'lon').} + +\item{mode}{An integer of the variability mode number in the EOF to be +projected on. The default value is NULL, which means all the modes of 'eof' +is calculated.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A numerical array of the principal components in the verification + format. The dimensions are the same as 'ano' except 'space_dim'. +} +\description{ +Project anomalies onto modes of variability to get the temporal evolution of +the EOF mode selected. It returns principal components (PCs) by area-weighted +projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from +\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and +returns NA if the whole spatial pattern is NA. +} +\examples{ +\dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + leadtimemin = 1, + leadtimemax = 4, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) +} +ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) +eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) +mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1) +mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1) + +\dontrun{ + # Plot the forecast and the observation of the first mode for the last year + # of forecast + sdate_dim_length <- dim(mode1_obs)['sdate'] + plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), + lwd = 2) + for (i in 1:dim(mode1_exp)['member']) { + par(new = TRUE) + plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], + ylim = c(-15000, 15000)) + } +} + +} +\seealso{ +EOF, NAO, PlotBoxWhisker +} diff --git a/man/REOF.Rd b/man/REOF.Rd new file mode 100644 index 0000000000000000000000000000000000000000..a5d416c22e132b96125b18738ecf7330d9ce7d2f --- /dev/null +++ b/man/REOF.Rd @@ -0,0 +1,100 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/REOF.R +\name{REOF} +\alias{REOF} +\title{Area-weighted empirical orthogonal function analysis with varimax rotation using SVD} +\usage{ +REOF( + ano, + lat, + lon, + ntrunc = 15, + time_dim = "sdate", + space_dim = c("lat", "lon"), + corr = FALSE, + ncores = NULL +) +} +\arguments{ +\item{ano}{A numerical array of anomalies with named dimensions to calculate +REOF. The dimensions must have at least 'time_dim' and 'space_dim'.} + +\item{lat}{A vector of the latitudes of 'ano'.} + +\item{lon}{A vector of the longitudes of 'ano'.} + +\item{ntrunc}{A positive integer of the number of eofs to be kept for varimax +rotation. This function uses this value as 'neof' too, which is the number +of eofs to return by \code{.EOF()}. The default value is 15. If time length +or the product of latitude length and longitude length is less than +'ntrunc', 'ntrunc' is equal to the minimum of the three values.} + +\item{time_dim}{A character string indicating the name of the time dimension +of 'ano'. The default value is 'sdate'.} + +\item{space_dim}{A vector of two character strings. The first is the dimension +name of latitude of 'ano' and the second is the dimension name of longitude +of 'ano'. The default value is c('lat', 'lon').} + +\item{corr}{A logical value indicating whether to base on a correlation (TRUE) +or on a covariance matrix (FALSE). The default value is FALSE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing: +\item{REOFs}{ + An array of REOF patterns normalized to 1 (unitless) with dimensions + (number of modes, the rest of the dimensions of 'ano' except + 'time_dim'). Multiplying 'REOFs' by 'RPCs' gives the original + reconstructed field. +} +\item{RPCs}{ + An array of principal components with the units of the original field to + the power of 2, with dimensions (time_dim, number of modes, the rest of the + dimensions of 'ano' except 'space_dim'). +} +\item{var}{ + An array of the percentage (%) of variance fraction of total variance + explained by each mode. The dimensions are (number of modes, the rest of + the dimension except 'time_dim' and 'space_dim'). +} +\item{wght}{ + An array of the area weighting with dimensions 'space_dim'. It is calculated + by the square root of cosine of 'lat' and used to compute the fraction of + variance explained by each REOFs. +} +} +\description{ +Perform an area-weighted EOF analysis with varimax rotation using single +value decomposition (SVD) based on a covariance matrix or a correlation matrix if +parameter 'corr' is set to TRUE. The internal s2dv function \code{.EOF()} is used +internally. +} +\examples{ +# This example computes the REOFs along forecast horizons and plots the one +# that explains the greatest amount of variability. The example data has low +# resolution so the result may not be explanatory, but it displays how to +# use this function. +\dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + leadtimemin = 1, + leadtimemax = 4, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) +} +ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +ano <- MeanDims(ano$exp, c('dataset', 'member')) +res <- REOF(ano, lat = sampleData$lat, lon = sampleData$lon, ntrunc = 5) +\dontrun{ +PlotEquiMap(eof$EOFs[1, , , 1], sampleData$lat, sampleData$lon) +} + +} +\seealso{ +EOF +} diff --git a/man/RMS.Rd b/man/RMS.Rd index 7bd33b3ae69f711813806ff2dd266147791e8c7d..4391df47947a48b80c855d95f058d5e93034be3a 100644 --- a/man/RMS.Rd +++ b/man/RMS.Rd @@ -4,8 +4,17 @@ \alias{RMS} \title{Compute root mean square error} \usage{ -RMS(exp, obs, time_dim = "sdate", dat_dim = "dataset", comp_dim = NULL, - limits = NULL, conf = TRUE, conf.lev = 0.95, ncores = NULL) +RMS( + exp, + obs, + time_dim = "sdate", + dat_dim = "dataset", + comp_dim = NULL, + limits = NULL, + conf = TRUE, + conf.lev = 0.95, + ncores = NULL +) } \arguments{ \item{exp}{A named numeric array of experimental data, with at least two @@ -79,4 +88,3 @@ The confidence interval is computed by the chi2 distribution.\cr # Renew example when Ano and Smoothing are ready } - diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index 1b8274f41c159812f639b8e580190f2ad3e0dfac..9ebcf65475512398233ccf6da90fe09289bf0388 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -4,8 +4,14 @@ \alias{RMSSS} \title{Compute root mean square error skill score} \usage{ -RMSSS(exp, obs, time_dim = "sdate", dat_dim = "dataset", pval = TRUE, - ncores = NULL) +RMSSS( + exp, + obs, + time_dim = "sdate", + dat_dim = "dataset", + pval = TRUE, + ncores = NULL +) } \arguments{ \item{exp}{A named numeric array of experimental data which contains at least @@ -66,4 +72,3 @@ obs <- array(rnorm(15), dim = c(time = 3, memb = 5, dataset = 1)) res <- RMSSS(exp, obs, time_dim = 'time') } - diff --git a/man/RandomWalkTest.Rd b/man/RandomWalkTest.Rd index 33b226fdd38740051d4a069c392c03252bbe2140..11106487e6ab0e82b7299c0f378ebbb7247d3a55 100644 --- a/man/RandomWalkTest.Rd +++ b/man/RandomWalkTest.Rd @@ -49,4 +49,3 @@ skill_B <- abs(fcst_B - reference) RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', ncores = 1) } - diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd new file mode 100644 index 0000000000000000000000000000000000000000..194c6b9899e84dfd1772117cc9eade22d2e1e7b7 --- /dev/null +++ b/man/RatioRMS.Rd @@ -0,0 +1,85 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RatioRMS.R +\name{RatioRMS} +\alias{RatioRMS} +\title{Compute the ratio between the RMSE of two experiments} +\usage{ +RatioRMS(exp1, exp2, obs, time_dim = "sdate", pval = TRUE, ncores = NULL) +} +\arguments{ +\item{exp1}{A numeric array with named dimensions of the first experimental +data. It must have at least 'time_dim' and have the same dimensions as +'exp2' and 'obs'.} + +\item{exp2}{A numeric array with named dimensions of the second experimental +data. It must have at least 'time_dim' and have the same dimensions as +'exp1' and 'obs'.} + +\item{obs}{A numeric array with named dimensions of the observational data. +It must have at least 'time_dim' and have the same dimensions as 'exp1' and +'exp2'.} + +\item{time_dim}{A character string of the dimension name along which RMS is +computed. The default value is 'sdate'.} + +\item{pval}{A logical value indicating whether to compute the p-value of Ho: +RMSE1/RMSE2 = 1 or not. The default value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list containing the numeric arrays with dimensions identical with + 'exp1', 'exp2', and 'obs', expect 'time_dim': +\item{$ratiorms}{ + The ratio between the RMSE (i.e., RMSE1/RMSE2). +} +\item{$p.val}{ + The p-value of the two-sided Fisher test with Ho: RMSE1/RMSE2 = 1. Only + exists if 'pval' is TRUE. +} +} +\description{ +Calculate the ratio of the RMSE for two forecasts with the same observation, +that is, RMSE(ens, obs) / RMSE(ens.ref, obs). The p-value is provided by a +two-sided Fischer test. +} +\examples{ +\dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) +} +# Compute DJF seasonal means and anomalies. +initial_month <- 11 +mean_start_month <- 12 +mean_stop_month <- 2 +sampleData$mod <- Season(sampleData$mod, monini = initial_month, + moninf = mean_start_month, monsup = mean_stop_month) +sampleData$obs <- Season(sampleData$obs, monini = initial_month, + moninf = mean_start_month, monsup = mean_stop_month) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +ano_obs <- Ano(sampleData$obs, clim$clim_obs) +# Generate two experiments with 2 and 1 members from the only experiment +# available in the sample data. Take only data values for a single forecast +# time step. +ano_exp_1 <- ClimProjDiags::Subset(ano_exp, 'member', c(1, 2)) +ano_exp_2 <- ClimProjDiags::Subset(ano_exp, 'member', c(3)) +ano_exp_1 <- ClimProjDiags::Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +ano_exp_2 <- ClimProjDiags::Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +ano_obs <- ClimProjDiags::Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +# Compute ensemble mean and provide as inputs to RatioRMS. +rrms <- RatioRMS(MeanDims(ano_exp_1, 'member'), + MeanDims(ano_exp_2, 'member'), + MeanDims(ano_obs, 'member')) +# Plot the RatioRMS for the first forecast time step. +\donttest{ +PlotEquiMap(rrms$ratiorms, sampleData$lon, sampleData$lat, + toptitle = 'Ratio RMSE') +} + +} diff --git a/man/RatioSDRMS.Rd b/man/RatioSDRMS.Rd new file mode 100644 index 0000000000000000000000000000000000000000..7dbd68283599edcfa599768a1946b00ce89fdea1 --- /dev/null +++ b/man/RatioSDRMS.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RatioSDRMS.R +\name{RatioSDRMS} +\alias{RatioSDRMS} +\title{Compute the ratio between the ensemble spread and RMSE} +\usage{ +RatioSDRMS( + exp, + obs, + dat_dim = "dataset", + memb_dim = "member", + time_dim = "sdate", + pval = TRUE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numeric array of experimental data with at least two +dimensions 'memb_dim' and 'time_dim'.} + +\item{obs}{A named numeric array of observational data with at least two +dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +parameter 'exp' except along 'dat_dim' and 'memb_dim'.} + +\item{dat_dim}{A character string indicating the name of dataset (nobs/nexp) +dimension. If there is no dataset dimension, set as NULL. The default value +is 'dataset'.} + +\item{memb_dim}{A character string indicating the name of the member +dimension. It must be one dimension in 'exp' and 'obs'. The default value +is 'member'.} + +\item{time_dim}{A character string indicating the name of dimension along +which the ratio is computed. The default value is 'sdate'.} + +\item{pval}{A logical value indicating whether to compute or not the p-value +of the test Ho : SD/RMSE = 1 or not. The default value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list of two arrays with dimensions c(nexp, nobs, the rest of + dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is + the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. +(only present if \code{pval = TRUE}) of the one-sided Fisher test with +Ho: SD/RMSE = 1.\cr\cr +\item{$ratio}{ + The ratio of the ensemble spread and RMSE. +} +\item{$p_val}{ + The p-value of the one-sided Fisher test with Ho: SD/RMSE = 1. Only present + if \code{pval = TRUE}. +} +} +\description{ +Compute the ratio between the standard deviation of the members around the +ensemble mean in experimental data and the RMSE between the ensemble mean of +experimental and observational data. The p-value is provided by a one-sided +Fischer test. +} +\examples{ +# Load sample data as in Load() example: +example(Load) +rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs) +# Reorder the data in order to plot it with PlotVsLTime +rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) +rsdrms_plot[, , 2, ] <- rsdrms$ratio +rsdrms_plot[, , 4, ] <- rsdrms$p.val +\donttest{ +PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", + monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), + listobs = c('ERSST'), biglab = FALSE, siglev = TRUE, + fileout = 'tos_rsdrms.eps') +} + +} diff --git a/man/Regression.Rd b/man/Regression.Rd index 4faafc1d83c57ab08f6d6fbf523e18593d89a131..8e27295175b9a357bad33c7c6ff28c09eb06ead4 100644 --- a/man/Regression.Rd +++ b/man/Regression.Rd @@ -4,8 +4,17 @@ \alias{Regression} \title{Compute the regression of an array on another along one dimension.} \usage{ -Regression(datay, datax, reg_dim = "sdate", formula = y ~ x, pval = TRUE, - conf = TRUE, conf.lev = 0.95, na.action = na.omit, ncores = NULL) +Regression( + datay, + datax, + reg_dim = "sdate", + formula = y ~ x, + pval = TRUE, + conf = TRUE, + conf.lev = 0.95, + na.action = na.omit, + ncores = NULL +) } \arguments{ \item{datay}{An numeric array as predictand including the dimension along @@ -92,4 +101,3 @@ res1 <- Regression(datay, datax, formula = y~poly(x, 2, raw = TRUE)) res2 <- Regression(datay, datax, conf.lev = 0.9) } - diff --git a/man/Reorder.Rd b/man/Reorder.Rd index 0afa07ea27990f673828ea1d32547c2d993ffde1..8748aaf2662d81e53211e3001954cfc896428f11 100644 --- a/man/Reorder.Rd +++ b/man/Reorder.Rd @@ -26,4 +26,3 @@ Reorder the dimension order of a multi-dimensional array dat2 <- array(c(1:10), dim = c(2, 1, 5)) print(dim(Reorder(dat2, c(2, 1, 3)))) } - diff --git a/man/SPOD.Rd b/man/SPOD.Rd index cbbbf1abda9a47c52bc13751d8fc1136f7502c97..4c4ed298fd7f8534e7920851c022b344ea5320d6 100644 --- a/man/SPOD.Rd +++ b/man/SPOD.Rd @@ -4,17 +4,29 @@ \alias{SPOD} \title{Compute the South Pacific Ocean Dipole (SPOD) index} \usage{ -SPOD(data, data_lats, data_lons, type, lat_dim = "lat", lon_dim = "lon", - mask = NULL, monini = 11, fmonth_dim = "fmonth", sdate_dim = "sdate", - indices_for_clim = NULL, year_dim = "year", month_dim = "month", - member_dim = "member") +SPOD( + data, + data_lats, + data_lons, + type, + lat_dim = "lat", + lon_dim = "lon", + mask = NULL, + monini = 11, + fmonth_dim = "fmonth", + sdate_dim = "sdate", + indices_for_clim = NULL, + year_dim = "year", + month_dim = "month", + na.rm = TRUE, + ncores = NULL +) } \arguments{ -\item{data}{A numerical array to be used for the index computation with the -dimensions: 1) latitude, longitude, start date, forecast month, and member -(in case of decadal predictions), 2) latitude, longitude, year, month and -member (in case of historical simulations), or 3) latitude, longitude, year -and month (in case of observations or reanalyses). This data has to be +\item{data}{A numerical array to be used for the index computation with, at least, the +dimensions: 1) latitude, longitude, start date and forecast month +(in case of decadal predictions), 2) latitude, longitude, year and month +(in case of historical simulations or observations). This data has to be provided, at least, over the whole region needed to compute the index.} \item{data_lats}{A numeric vector indicating the latitudes of the data.} @@ -57,7 +69,7 @@ climatology is calculated over the whole period. If the data are already anomalies, set it to FALSE. The default value is NULL.\cr In case of parameter 'type' is 'dcpp', 'indices_for_clim' must be relative to the first forecast year, and the climatology is automatically computed -over the actual common period for the different forecast years.} +over the common calendar period for the different forecast years.} \item{year_dim}{A character string indicating the name of the year dimension The default value is 'year'. Only used if parameter 'type' is 'hist' or @@ -67,22 +79,26 @@ The default value is 'year'. Only used if parameter 'type' is 'hist' or dimension. The default value is 'month'. Only used if parameter 'type' is 'hist' or 'obs'.} -\item{member_dim}{A character string indicating the name of the member -dimension. The default value is 'member'. Only used if parameter 'type' is -'dcpp' or 'hist'.} +\item{na.rm}{A logical value indicanting whether to remove NA values. The default +value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} } \value{ -A numerical array of the SPOD index with the dimensions of: - 1) sdate, forecast year, and member (in case of decadal predictions); - 2) year and member (in case of historical simulations); or - 3) year (in case of observations or reanalyses). +A numerical array with the SPOD index with the same dimensions as data except + the lat_dim, lon_dim and fmonth_dim (month_dim) in case of decadal predictions + (historical simulations or observations). In case of decadal predictions, a new dimension + 'fyear' is added. } \description{ The South Pacific Ocean Dipole (SPOD) index is related to the El Nino-Southern Oscillation (ENSO) and the Inderdecadal Pacific Oscillation (IPO). The SPOD index is computed as the difference of weighted-averaged SST anomalies over 20ºS-48ºS, 165ºE-190ºE (NW pole) and the weighted-averaged SST -anomalies over 44ºS-65ºS, 220ºE-260ºE (SE pole) (Saurral et al., 2020). +anomalies over 44ºS-65ºS, 220ºE-260ºE (SE pole) (Saurral et al., 2020). +If different members and/or datasets are provided, the climatology (used to +calculate the anomalies) is computed individually for all of them. } \examples{ ## Observations or reanalyses @@ -104,4 +120,3 @@ lon <- seq(0, 360, 10) index_dcpp <- SPOD(data = dcpp, data_lats = lat, data_lons = lon, type = 'dcpp', monini = 1) } - diff --git a/man/Season.Rd b/man/Season.Rd index cb10deefb8e4353a0901b0cd471efc9fd28e8106..3c1e3ffcda3ec669195e8762f1710a9ecc7855b7 100644 --- a/man/Season.Rd +++ b/man/Season.Rd @@ -4,8 +4,16 @@ \alias{Season} \title{Compute seasonal mean} \usage{ -Season(data, time_dim = "ftime", monini, moninf, monsup, method = mean, - na.rm = TRUE, ncores = NULL) +Season( + data, + time_dim = "ftime", + monini, + moninf, + monsup, + method = mean, + na.rm = TRUE, + ncores = NULL +) } \arguments{ \item{data}{A named numeric array with at least one dimension 'time_dim'.} @@ -53,4 +61,3 @@ dat2[na] <- NA res <- Season(data = dat2, monini = 3, moninf = 1, monsup = 2) res <- Season(data = dat2, monini = 3, moninf = 1, monsup = 2, na.rm = FALSE) } - diff --git a/man/Smoothing.Rd b/man/Smoothing.Rd index ba62ca17eab6aa023119e504b37abdae19157ea7..8d4a55871654d6159691f896bbed85434fe94da4 100644 --- a/man/Smoothing.Rd +++ b/man/Smoothing.Rd @@ -43,4 +43,3 @@ PlotAno(smooth_ano_exp, smooth_ano_obs, startDates, fileout = "tos_smoothed_ano.png") } } - diff --git a/man/Spectrum.Rd b/man/Spectrum.Rd new file mode 100644 index 0000000000000000000000000000000000000000..84b39c0cf44cc4165c25b354680ec226b8bd668d --- /dev/null +++ b/man/Spectrum.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Spectrum.R +\name{Spectrum} +\alias{Spectrum} +\title{Estimate frequency spectrum} +\usage{ +Spectrum(data, time_dim = "ftime", conf.lev = 0.95, ncores = NULL) +} +\arguments{ +\item{data}{A vector or numeric array of which the frequency spectrum is +required. If it's a vector, it should be a time series. If it's an array, +the dimensions must have at least 'time_dim'. The data is assumed to be +evenly spaced in time.} + +\item{time_dim}{A character string indicating the dimension along which to +compute the frequency spectrum. The default value is 'ftime'.} + +\item{conf.lev}{A numeric indicating the confidence level for the Monte-Carlo +significance test. The default value is 0.95.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A numeric array of the frequency spectrum with dimensions + c( = number of frequencies, stats = 3, the rest of the + dimensions of 'data'). The 'stats' dimension contains the frequency values, + the spectral density, and the confidence interval. +} +\description{ +Estimate the frequency spectrum of the data array together with a +user-specified confidence level. The output is provided as an array with +dimensions c(number of frequencies, stats = 3, other margin dimensions of +data). The 'stats' dimension contains the frequencies at which the spectral +density is estimated, the estimates of the spectral density, and the +significance level.\cr +The spectrum estimation relies on an R built-in function \code{spectrum()} +and the confidence interval is estimated by the Monte-Carlo method. +} +\examples{ +# Load sample data as in Load() example: +example(Load) +ensmod <- MeanDims(sampleData$mod, 2) +spectrum <- Spectrum(ensmod) + +for (jsdate in 1:dim(spectrum)['sdate']) { + for (jlen in 1:dim(spectrum)['ftime']) { + if (spectrum[jlen, 2, 1, jsdate] > spectrum[jlen, 3, 1, jsdate]) { + ensmod[1, jsdate, ] <- Filter(ensmod[1, jsdate, ], spectrum[jlen, 1, 1, jsdate]) + } + } +} + \donttest{ +PlotAno(InsertDim(ensmod, 2, 1), sdates = startDates) + } + +} diff --git a/man/Spread.Rd b/man/Spread.Rd new file mode 100644 index 0000000000000000000000000000000000000000..26e289e919178e5b945a2477e0f266992c7fcf08 --- /dev/null +++ b/man/Spread.Rd @@ -0,0 +1,100 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Spread.R +\name{Spread} +\alias{Spread} +\title{Compute interquartile range, maximum-minimum, standard deviation and median +absolute deviation} +\usage{ +Spread( + data, + compute_dim = "member", + na.rm = TRUE, + conf = TRUE, + conf.lev = 0.95, + ncores = NULL +) +} +\arguments{ +\item{data}{A numeric vector or array with named dimensions to compute the +statistics. The dimensions should at least include 'compute_dim'.} + +\item{compute_dim}{A vector of character strings of the dimension names along +which to compute the statistics. The default value is 'member'.} + +\item{na.rm}{A logical value indicating if NAs should be removed (TRUE) or +kept (FALSE) for computation. The default value is TRUE.} + +\item{conf}{A logical value indicating whether to compute the confidence +intervals or not. The default value is TRUE.} + +\item{conf.lev}{A numeric value of the confidence level for the computation. +The default value is 0.95.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list of numeric arrays with the same dimensions as 'data' but without +'compute_dim' and with the first dimension 'stats'. If 'conf' is TRUE, the +length of 'stats' is 3 corresponding to the lower limit of the confidence +interval, the spread, and the upper limit of the confidence interval. If +'conf' is FALSE, the length of 'stats' is 1 corresponding to the spread. +\item{$iqr}{ + InterQuartile Range. +} +\item{$maxmin}{ + Maximum - Minimum. +} +\item{$sd}{ + Standard Deviation. +} +\item{$mad}{ + Median Absolute Deviation. +} +} +\description{ +Compute interquartile range, maximum-minimum, standard deviation and median +absolute deviation along the list of dimensions provided by the compute_dim +argument (typically along the ensemble member and start date dimension). +The confidence interval is computed by bootstrapping by 100 times. The input +data can be the output of \code{Load()}, \code{Ano()}, or +\code{Ano_CrossValid()}, for example. +} +\examples{ +# Load sample data as in Load() example: +example(Load) +clim <- Clim(sampleData$mod, sampleData$obs) +ano_exp <- Ano(sampleData$mod, clim$clim_exp) +runmean_months <- 12 +smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) +smooth_ano_exp_m_sub <- smooth_ano_exp - InsertDim(MeanDims(smooth_ano_exp, 'member', + na.rm = TRUE), + posdim = 3, + lendim = dim(smooth_ano_exp)['member'], + name = 'member') +spread <- Spread(smooth_ano_exp_m_sub, compute_dim = c('member', 'sdate')) + +\donttest{ +PlotVsLTime(Reorder(spread$iqr, c('dataset', 'stats', 'ftime')), + toptitle = "Inter-Quartile Range between ensemble members", + ytitle = "K", monini = 11, limits = NULL, + listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, + hlines = c(0), fileout = 'tos_iqr.png') +PlotVsLTime(Reorder(spread$maxmin, c('dataset', 'stats', 'ftime')), + toptitle = "Maximum minus minimum of the members", + ytitle = "K", monini = 11, limits = NULL, + listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, + hlines = c(0), fileout = 'tos_maxmin.png') +PlotVsLTime(Reorder(spread$sd, c('dataset', 'stats', 'ftime')), + toptitle = "Standard deviation of the members", + ytitle = "K", monini = 11, limits = NULL, + listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, + hlines = c(0), fileout = 'tos_sd.png') +PlotVsLTime(Reorder(spread$mad, c('dataset', 'stats', 'ftime')), + toptitle = "Median Absolute Deviation of the members", + ytitle = "K", monini = 11, limits = NULL, + listexp = c('CMIP5 IC3'), listobs = c('ERSST'), biglab = FALSE, + hlines = c(0), fileout = 'tos_mad.png') +} + +} diff --git a/man/StatSeasAtlHurr.Rd b/man/StatSeasAtlHurr.Rd new file mode 100644 index 0000000000000000000000000000000000000000..965732291cd23fd966a15710a7446a366266dde7 --- /dev/null +++ b/man/StatSeasAtlHurr.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/StatSeasAtlHurr.R +\name{StatSeasAtlHurr} +\alias{StatSeasAtlHurr} +\title{Compute estimate of seasonal mean of Atlantic hurricane activity} +\usage{ +StatSeasAtlHurr(atlano, tropano, hrvar = "HR", ncores = NULL) +} +\arguments{ +\item{atlano}{A numeric array with named dimensions of Atlantic sea surface +temperature anomalies. It must have the same dimensions as 'tropano'.} + +\item{tropano}{A numeric array with named dimensions of tropical sea surface +temperature anomalies. It must have the same dimensions as 'atlano'.} + +\item{hrvar}{A character string of the seasonal average to be estimated. The +options are either "HR" (hurricanes), "TC" (tropical cyclones with lifetime +>=48h), or "PDI" (power dissipation index). The default value is 'HR'.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list composed of two arrays with the same dimensions as 'atlano' + and 'tropano'. +\item{$mean}{ + The mean of the desired quantity. +} +\item{$var}{ + The variance of that quantity. +} +} +\description{ +Compute one of G. Villarini's statistically downscaled measure of mean +Atlantic hurricane activity and its variance. The hurricane activity is +estimated using seasonal averages of sea surface temperature anomalies over +the tropical Atlantic (bounded by 10N-25N and 80W-20W) and the tropics at +large (bounded by 30N-30S). The anomalies are for the JJASON season.\cr +The estimated seasonal average is either 1) number of hurricanes, 2) number +of tropical cyclones with lifetime >=48h or 3) power dissipation index +(PDI; in 10^11 m^3 s^{-2}).\cr +The statistical models used in this function are described in references. +} +\examples{ +# Let AtlAno represents 5 different 5-year forecasts of seasonally averaged +# Atlantic sea surface temperature anomalies. +AtlAno <- array(runif(25, -1, 1), dim = c(sdate = 5, ftime = 5)) +# Let TropAno represents 5 corresponding 5-year forecasts of seasonally +# averaged tropical sea surface temperature anomalies. +TropAno <- array(runif(25, -1, 1), dim = c(sdate = 5, ftime = 5)) +# The seasonal average of hurricanes for each of the five forecasted years, +# for each forecast, would then be given by. +hr_count <- StatSeasAtlHurr(atlano = AtlAno, tropano = TropAno, hrvar = 'HR') + +} +\references{ +Villarini et al. (2010) Mon Wea Rev, 138, 2681-2705.\cr +Villarini et al. (2012) Mon Wea Rev, 140, 44-65.\cr +Villarini et al. (2012) J Clim, 25, 625-637.\cr +An example of how the function can be used in hurricane forecast studies + is given in\cr +Caron, L.-P. et al. (2014) Multi-year prediction skill of Atlantic hurricane + activity in CMIP5 decadal hindcasts. Climate Dynamics, 42, 2675-2690. + doi:10.1007/s00382-013-1773-1. +} diff --git a/man/TPI.Rd b/man/TPI.Rd index 6968f22e6cc4502755093ff012f39647c6561e13..5e8f716b1191393234796d81c17b93ce444068f1 100644 --- a/man/TPI.Rd +++ b/man/TPI.Rd @@ -4,17 +4,29 @@ \alias{TPI} \title{Compute the Tripole Index (TPI) for the Interdecadal Pacific Oscillation (IPO)} \usage{ -TPI(data, data_lats, data_lons, type, lat_dim = "lat", lon_dim = "lon", - mask = NULL, monini = 11, fmonth_dim = "fmonth", sdate_dim = "sdate", - indices_for_clim = NULL, year_dim = "year", month_dim = "month", - member_dim = "member") +TPI( + data, + data_lats, + data_lons, + type, + lat_dim = "lat", + lon_dim = "lon", + mask = NULL, + monini = 11, + fmonth_dim = "fmonth", + sdate_dim = "sdate", + indices_for_clim = NULL, + year_dim = "year", + month_dim = "month", + na.rm = TRUE, + ncores = NULL +) } \arguments{ -\item{data}{A numerical array to be used for the index computation with the -dimensions: 1) latitude, longitude, start date, forecast month, and member -(in case of decadal predictions), 2) latitude, longitude, year, month and -member (in case of historical simulations), or 3) latitude, longitude, year -and month (in case of observations or reanalyses). This data has to be +\item{data}{A numerical array to be used for the index computation with, at least, the +dimensions: 1) latitude, longitude, start date and forecast month +(in case of decadal predictions), 2) latitude, longitude, year and month +(in case of historical simulations or observations). This data has to be provided, at least, over the whole region needed to compute the index.} \item{data_lats}{A numeric vector indicating the latitudes of the data.} @@ -57,7 +69,7 @@ climatology is calculated over the whole period. If the data are already anomalies, set it to FALSE. The default value is NULL.\cr In case of parameter 'type' is 'dcpp', 'indices_for_clim' must be relative to the first forecast year, and the climatology is automatically computed -over the actual common period for the different forecast years.} +over the common calendar period for the different forecast years.} \item{year_dim}{A character string indicating the name of the year dimension The default value is 'year'. Only used if parameter 'type' is 'hist' or @@ -67,21 +79,25 @@ The default value is 'year'. Only used if parameter 'type' is 'hist' or dimension. The default value is 'month'. Only used if parameter 'type' is 'hist' or 'obs'.} -\item{member_dim}{A character string indicating the name of the member -dimension. The default value is 'member'. Only used if parameter 'type' is -'dcpp' or 'hist'.} +\item{na.rm}{A logical value indicanting whether to remove NA values. The default +value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} } \value{ -A numerical array of the TPI index with the dimensions of: - 1) sdate, forecast year, and member (in case of decadal predictions); - 2) year and member (in case of historical simulations); or - 3) year (in case of observations or reanalyses). +A numerical array with the TPI index with the same dimensions as data except + the lat_dim, lon_dim and fmonth_dim (month_dim) in case of decadal predictions + (historical simulations or observations). In case of decadal predictions, a new dimension + 'fyear' is added. } \description{ The Tripole Index (TPI) for the Interdecadal Pacific Oscillation (IPO) is computed as the difference of weighted-averaged SST anomalies over 10ºS-10ºN, 170ºE-270ºE minus the mean of the weighted-averaged SST anomalies over -25ºN-45ºN, 140ºE-215ºE and 50ºS-15ºS, 150ºE-200ºE (Henley et al., 2015). +25ºN-45ºN, 140ºE-215ºE and 50ºS-15ºS, 150ºE-200ºE (Henley et al., 2015). +If different members and/or datasets are provided, the climatology (used to +calculate the anomalies) is computed individually for all of them. } \examples{ ## Observations or reanalyses @@ -103,4 +119,3 @@ lon = seq(0, 360, 10) index_dcpp = TPI(data = dcpp, data_lats = lat, data_lons = lon, type = 'dcpp', monini = 1) } - diff --git a/man/ToyModel.Rd b/man/ToyModel.Rd index 379ed3bba67e39165ae85a5e41e80ce66313bbf4..ee7a98e93204383f416d1aacec019ead1ea12b66 100644 --- a/man/ToyModel.Rd +++ b/man/ToyModel.Rd @@ -7,8 +7,18 @@ components of a forecast: (1) predictabiltiy (2) forecast error (3) non-stationarity and (4) ensemble generation. The forecast can be computed for real observations or observations generated artifically.} \usage{ -ToyModel(alpha = 0.1, beta = 0.4, gamma = 1, sig = 1, trend = 0, - nstartd = 30, nleadt = 4, nmemb = 10, obsini = NULL, fxerr = NULL) +ToyModel( + alpha = 0.1, + beta = 0.4, + gamma = 1, + sig = 1, + trend = 0, + nstartd = 30, + nleadt = 4, + nmemb = 10, + obsini = NULL, + fxerr = NULL +) } \arguments{ \item{alpha}{Predicabiltiy of the forecast on the observed residuals @@ -120,4 +130,3 @@ toyforecast <- ToyModel(alpha = a, beta = b, gamma = g, nmemb = nm, # } } - diff --git a/man/Trend.Rd b/man/Trend.Rd index a641041cd451628b948ccb0545970322e0cf1f81..d283ee652d6795f127c6853e5dfa52e9715ce2e9 100644 --- a/man/Trend.Rd +++ b/man/Trend.Rd @@ -4,8 +4,16 @@ \alias{Trend} \title{Compute the trend} \usage{ -Trend(data, time_dim = "ftime", interval = 1, polydeg = 1, conf = TRUE, - conf.lev = 0.95, pval = TRUE, ncores = NULL) +Trend( + data, + time_dim = "ftime", + interval = 1, + polydeg = 1, + conf = TRUE, + conf.lev = 0.95, + pval = TRUE, + ncores = NULL +) } \arguments{ \item{data}{An numeric array including the dimension along which the trend @@ -80,4 +88,3 @@ months_between_startdates <- 60 trend <- Trend(sampleData$obs, polydeg = 2, interval = months_between_startdates) } - diff --git a/man/UltimateBrier.Rd b/man/UltimateBrier.Rd new file mode 100644 index 0000000000000000000000000000000000000000..2cad133c23d620eeee9e0a57d7466550da9478cf --- /dev/null +++ b/man/UltimateBrier.Rd @@ -0,0 +1,114 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/UltimateBrier.R +\name{UltimateBrier} +\alias{UltimateBrier} +\title{Compute Brier scores} +\usage{ +UltimateBrier( + exp, + obs, + dat_dim = "dataset", + memb_dim = "member", + time_dim = "sdate", + quantile = TRUE, + thr = c(5/100, 95/100), + type = "BS", + decomposition = TRUE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A numeric array of forecast anomalies with named dimensions that +at least include 'dat_dim', 'memb_dim', and 'time_dim'. It can be provided +by \code{Ano()}.} + +\item{obs}{A numeric array of observational reference anomalies with named +dimensions that at least include 'dat_dim' and 'time_dim'. If it has +'memb_dim', the length must be 1. The dimensions should be consistent with +'exp' except 'dat_dim' and 'memb_dim'. It can be provided by \code{Ano()}.} + +\item{dat_dim}{A character string indicating the name of the dataset +dimension in 'exp' and 'obs'. The default value is 'dataset'.} + +\item{memb_dim}{A character string indicating the name of the member +dimension in 'exp' (and 'obs') for ensemble mean calculation. The default +value is 'member'.} + +\item{time_dim}{A character string indicating the dimension along which to +compute the probabilistic scores. The default value is 'sdate'.} + +\item{quantile}{A logical value to decide whether a quantile (TRUE) or a +threshold (FALSE) is used to estimate the forecast and observed +probabilities. If 'type' is 'FairEnsembleBS' or 'FairEnsembleBSS', it must +be TRUE. The default value is TRUE.} + +\item{thr}{A numeric vector to be used in probability calculation (for 'BS', +'FairStartDatesBS', 'BSS', and 'FairStartDatesBSS') and binary event +judgement (for 'FairEnsembleBS' and 'FairEnsembleBSS'). It is as +quantiles if 'quantile' is TRUE or as thresholds if 'quantile' is FALSE. +The default value is \code{c(0.05, 0.95)} for 'quantile = TRUE'.} + +\item{type}{A character string of the desired score type. It can be the + following values: +\itemize{ + \item{'BS': Simple Brier Score. Use SpecsVerification::BrierDecomp inside.} + \item{'FairEnsembleBS': Corrected Brier Score computed across ensemble + members. Use SpecsVerification::FairBrier inside.} + \item{'FairStartDatesBS': Corrected Brier Score computed across starting + dates. Use s2dv:::.BrierScore inside.} + \item{'BSS': Simple Brier Skill Score. Use s2dv:::.BrierScore inside.} + \item{'FairEnsembleBSS': Corrected Brier Skill Score computed across + ensemble members. Use SpecsVerification::FairBrierSs inside.} + \item{'FairStartDatesBSS': Corrected Brier Skill Score computed across + starting dates. Use s2dv:::.BrierScore inside.} +} + The default value is 'BS'.} + +\item{decomposition}{A logical value to determine whether the decomposition +of the Brier Score should be provided (TRUE) or not (FALSE). It is only +used when 'type' is 'BS' or 'FairStartDatesBS'. The default value is TRUE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +If 'type' is 'BS' or 'FairStartDatesBS' and 'decomposition' is TRUE, the +output is a list of 4 arrays (see details below.) In other cases, the output +is an array of Brier scores or Brier skill scores. All the arrays have the +same dimensions: +c(nexp, nobs, no. of bins, the rest dimensions of 'exp' except 'time_dim' and +'memb_dim'). 'nexp' and 'nobs' is the length of dataset dimension in 'exp' +and 'obs' respectively.\cr +The list of 4 includes: + \itemize{ + \item{$bs: Brier Score} + \item{$rel: Reliability component} + \item{$res: Resolution component} + \item{$unc: Uncertainty component} + } +} +\description{ +Interface to compute probabilistic scores (Brier Score, Brier Skill Score) +from the forecast and observational data anomalies. It provides six types +to choose. +} +\examples{ + \dontshow{ +startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), + c('observation'), startDates, + leadtimemin = 1, + leadtimemax = 4, + output = 'lonlat', + latmin = 27, latmax = 48, + lonmin = -12, lonmax = 40) + } +sampleData$mod <- Season(sampleData$mod, monini = 11, moninf = 12, monsup = 2) +sampleData$obs <- Season(sampleData$obs, monini = 11, moninf = 12, monsup = 2) +clim <- Clim(sampleData$mod, sampleData$obs) +exp <- Ano(sampleData$mod, clim$clim_exp) +obs <- Ano(sampleData$obs, clim$clim_obs) +bs <- UltimateBrier(exp, obs) +bss <- UltimateBrier(exp, obs, type = 'BSS') + +} diff --git a/man/clim.palette.Rd b/man/clim.palette.Rd index d912f47a346c3e9a0d3255fe7fe9f1774cc1a58c..5d17947af606686801b830d29048be5a5df41790 100644 --- a/man/clim.palette.Rd +++ b/man/clim.palette.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/clim.palette.R \name{clim.palette} -\alias{clim.colors} \alias{clim.palette} +\alias{clim.colors} \title{Generate Climate Color Palettes} \usage{ clim.palette(palette = "bluered") @@ -30,4 +30,3 @@ cols <- clim.colors(20) ColorBar(lims, cols) } - diff --git a/man/s2dv-package.Rd b/man/s2dv-package.Rd index cb522141db9a5c9aa5fa068ba61e4b65c80a5bc7..557692115e5ac3f458a52161ca4323658bfb8a25 100644 --- a/man/s2dv-package.Rd +++ b/man/s2dv-package.Rd @@ -4,20 +4,46 @@ \name{s2dv-package} \alias{s2dv} \alias{s2dv-package} -\title{A Set of Common Tools for Seasonal to Decadal Verification} +\title{s2dv: A Set of Common Tools for Seasonal to Decadal Verification} \description{ -The advanced version of package 's2dverification'. It is -intended for 'seasonal to decadal' (s2d) climate forecast verification, but -it can also be used in other kinds of forecasts or general climate analysis. -This package is specially designed for the comparison between the experimental -and observational datasets. The functionality of the included functions covers -from data retrieval, data post-processing, skill scores against observation, -to visualization. Compared to 's2dverification', 's2dv' is more compatible -with the package 'startR', able to use multiple cores for computation and -handle multi-dimensional arrays with a higher flexibility. +The advanced version of package 's2dverification'. It is + intended for 'seasonal to decadal' (s2d) climate forecast verification, but + it can also be used in other kinds of forecasts or general climate analysis. + This package is specially designed for the comparison between the experimental + and observational datasets. The functionality of the included functions covers + from data retrieval, data post-processing, skill scores against observation, + to visualization. Compared to 's2dverification', 's2dv' is more compatible + with the package 'startR', able to use multiple cores for computation and + handle multi-dimensional arrays with a higher flexibility. } \references{ \url{https://earth.bsc.es/gitlab/es/s2dv/} } -\keyword{internal} +\seealso{ +Useful links: +\itemize{ + \item \url{https://earth.bsc.es/gitlab/es/s2dv/} + \item Report bugs at \url{https://earth.bsc.es/gitlab/es/s2dv/-/issues} +} + +} +\author{ +\strong{Maintainer}: An-Chi Ho \email{an.ho@bsc.es} + +Authors: +\itemize{ + \item BSC-CNS [copyright holder] + \item Nuria Perez-Zanon \email{nuria.perez@bsc.es} +} + +Other contributors: +\itemize{ + \item Roberto Bilbao \email{roberto.bilbao@bsc.es} [contributor] + \item Carlos Delgado \email{carlos.delgado@bsc.es} [contributor] + \item Llorenç Lledó \email{llorenc.lledo@bsc.es} [contributor] + \item Andrea Manrique \email{andrea.manrique@bsc.es} [contributor] + \item Deborah Verfaillie \email{deborah.verfaillie@bsc.es} [contributor] +} +} +\keyword{internal} diff --git a/man/sampleDepthData.Rd b/man/sampleDepthData.Rd index 869af86eff4767dc415ecc948852362bdd7ed76e..77e4a7a290855556aa7b36f8a6c46af2e2791ca7 100644 --- a/man/sampleDepthData.Rd +++ b/man/sampleDepthData.Rd @@ -28,4 +28,3 @@ variable 'tos', i.e. sea surface temperature, from the decadal climate prediction experiment run at IC3 in the context of the CMIP5 project.\cr Its name within IC3 local database is 'i00k'. } - diff --git a/man/sampleMap.Rd b/man/sampleMap.Rd index 651d18597c5dbbf40a55f411d4cd2c39c6bc6fcf..eaf8aa5a686f589db7e223ccc651af29266203e6 100644 --- a/man/sampleMap.Rd +++ b/man/sampleMap.Rd @@ -43,4 +43,3 @@ sampleData <- Load('tos', list(exp), list(obs), startDates, } Check the documentation on 'Load()' in the package 's2dv' for more information. } - diff --git a/man/sampleTimeSeries.Rd b/man/sampleTimeSeries.Rd index 280277eb9ccc2e2ae7bf6e37439d01d716b8e3d7..05a8e7980116c649d6b156a4746f16b2c45fea4e 100644 --- a/man/sampleTimeSeries.Rd +++ b/man/sampleTimeSeries.Rd @@ -47,4 +47,3 @@ sampleData <- Load('tos', list(exp), list(obs), startDates, } Check the documentation on 'Load()' in the package 's2dv' for more information. } - diff --git a/s2dv-manual.pdf b/s2dv-manual.pdf index 8c9d1cac06d9fffc9d59bc28e9427d11643c27f3..de5dc4cb860f84e21a44781861245233d13c7fdf 100644 Binary files a/s2dv-manual.pdf and b/s2dv-manual.pdf differ diff --git a/tests/testthat/test-ACC.R b/tests/testthat/test-ACC.R new file mode 100644 index 0000000000000000000000000000000000000000..7631401d0ae4864e0aa418daa1e9b116542719cc --- /dev/null +++ b/tests/testthat/test-ACC.R @@ -0,0 +1,199 @@ +context("s2dv::ACC tests") + +############################################## +##NOTE: bootstrap is not tested because sample() is used inside. + + # dat1 + set.seed(1) + exp1 <- array(rnorm(60), dim = c(dataset = 1, member = 2, sdate = 5, + ftime = 1, lat = 2, lon = 3)) + + set.seed(2) + obs1 <- array(rnorm(30), dim = c(dataset = 1, member = 1, sdate = 5, + ftime = 1, lat = 2, lon = 3)) + # dat2 + set.seed(1) + exp2 <- array(rnorm(60), dim = c(dataset = 2, sdate = 5, + ftime = 1, lat = 2, lon = 3)) + set.seed(2) + obs2 <- array(rnorm(30), dim = c(dataset = 1, sdate = 5, + ftime = 1, lat = 2, lon = 3)) + set.seed(2) + na <- floor(runif(2, min = 1, max = 30)) + obs2[na] <- NA + +############################################## +test_that("1. Input checks", { + + expect_error( + ACC(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + ACC(c('b'), c('a')), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + ACC(c(1:10), c(2:4)), + paste0("Parameter 'exp' and 'obs' must have at least dimensions ", + "dat_dim and space_dim.") + ) + expect_error( + ACC(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + ACC(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), + "Parameter 'exp' and 'obs' must have same dimension name" + ) + expect_error( + ACC(exp1, obs1, dat_dim = 1), + "Parameter 'dat_dim' must be a character string." + ) + expect_error( + ACC(exp1, obs1, dat_dim = 'a'), + "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + ACC(exp1, obs1, space_dim = c('lon')), + "Parameter 'space_dim' must be a character vector of 2." + ) + expect_error( + ACC(exp1, obs1, space_dim = c('lon', 'lev')), + "Parameter 'space_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + ACC(exp1, obs1, avg_dim = 1), + "Parameter 'avg_dim' must be a character string." + ) + expect_error( + ACC(exp1, obs1, avg_dim = c('lev')), + "Parameter 'avg_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + ACC(exp1, obs1, memb_dim = TRUE), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + ACC(exp1, obs1, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + ACC(exp1, obs1, lat = c(1, 2, 3)), + paste0("Parameter \'lat\' must be a numeric vector with the same ", + "length as the latitude dimension of \'exp\' and \'obs\'.") + ) + expect_error( + ACC(exp1, obs1, lon = c(1, 3)), + paste0("Parameter \'lon\' must be a numeric vector with the same ", + "length as the longitude dimension of \'exp\' and \'obs\'.") + ) + expect_error( + ACC(exp1, obs1, lonlatbox = c(-90, 90)), + "Parameter 'lonlatbox' must be a numeric vector of 4." + ) + expect_error( + ACC(exp1, obs1, lat = c(1, 2), lon = c(1, 2, 3)), + paste0("Parameters 'lon', 'lat', and 'lonlatbox' must be used or be ", + "NULL at the same time.") + ) + expect_error( + ACC(exp1, obs1, conf = 1), + "Parameter 'conf' must be one logical value." + ) + expect_error( + ACC(exp1, obs1, conftype = 'a'), + "Parameter \'conftype\' must be either \'parametric\' or \'bootstrap\'." + ) + expect_error( + ACC(exp1, obs1, memb_dim = NULL, conftype = 'bootstrap'), + "Parameter 'memb_dim' cannot be NULL when parameter 'conftype' is 'bootstrap'." + ) + expect_error( + ACC(exp1, obs1, conf.lev = -1), + "Parameter 'conf.lev' must be a numeric number between 0 and 1." + ) + expect_error( + ACC(exp1, obs1, pval = 'TRUE'), + "Parameter 'pval' must be one logical value." + ) + expect_error( + ACC(exp1, obs1, ncores = 1.5), + "Parameter 'ncores' must be a positive integer." + ) + expect_error( + ACC(exp = array(1:10, dim = c(dataset = 5, member = 1, lat = 2, lon = 1)), + obs = array(1:4, dim = c(dataset = 2, member = 2, lat = 1, lon = 1)), + avg_dim = NULL), + "Parameter 'exp' and 'obs' must have same length of all the dimensions expect 'dat_dim' and 'memb_dim'." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + dim(ACC(exp1, obs1)$acc), + c(nexp = 1, nobs = 1, sdate = 5, ftime = 1) + ) + expect_equal( + names(ACC(exp1, obs1)), + c("acc", "conf.lower", "conf.upper", "p.val", "macc") + ) + expect_equal( + mean(ACC(exp1, obs1)$acc), + -0.2001352, + tolerance = 0.00001 + ) + expect_equal( + as.vector(ACC(exp1, obs1)$p.val), + c(0.7292993, 0.7292993, 0.7292993, 0.7292993, 0.7292993), + tolerance = 0.00001 + ) + expect_equal( + as.vector(ACC(exp1, obs1)$conf.lower), + c(-0.8595534, -0.9644555, -0.9408508, -0.6887500, -0.7619374), + tolerance = 0.00001 + ) + expect_equal( + as.vector(ACC(exp1, obs1)$conf.upper), + c(0.7493799, 0.2515608, 0.4759707, 0.8890967, 0.8517117), + tolerance = 0.00001 + ) + expect_equal( + names(ACC(exp1, obs1, avg_dim = NULL)), + c("acc", "conf.lower", "conf.upper", "p.val") + ) + expect_equal( + dim(ACC(exp1, obs1, dat_dim = 'member', memb_dim = NULL)$acc), + c(nexp = 2, nobs = 1, sdate = 5, dataset = 1, ftime = 1) + ) + expect_equal( + names(ACC(exp1, obs1, conf = FALSE)), + c("acc", "p.val", "macc") + ) + expect_equal( + names(ACC(exp1, obs1, pval = FALSE)), + c("acc", "conf.lower", "conf.upper", "macc") + ) + expect_equal( + names(ACC(exp1, obs1, conf = FALSE, pval = FALSE)), + c("acc", "macc") + ) + expect_equal( + as.vector(ACC(exp1, obs1, conf = FALSE, avg_dim = NULL, conf.lev = 0.9)$p.val), + c(0.6083998, 0.6083998, 0.6083998, 0.6083998, 0.6083998), + tolerance = 0.00001 + ) + expect_equal( + mean(ACC(exp1, obs1, lat = c(10, 20), lon = c(20, 30, 40), lonlatbox = c(20, 30, 10, 20))$acc), + -0.1681097, + tolerance = 0.00001 + ) +}) + + + + + diff --git a/tests/testthat/test-Ano.R b/tests/testthat/test-Ano.R index 4d64ce6cd6e4e37655dd28cfccb529e760f155b5..a74c7c946778287ac84451319457c36782362401 100644 --- a/tests/testthat/test-Ano.R +++ b/tests/testthat/test-Ano.R @@ -3,11 +3,14 @@ context("s2dv::Ano test") ############################################## # dat1 set.seed(1) - dat1 <- array(rnorm(72), c(dat = 1,member = 3, sdate = 4, ftime = 6)) + dat1 <- array(rnorm(72), c(dat = 1, member = 3, sdate = 4, ftime = 6)) set.seed(2) - clim1 <- array(rnorm(12), c(dat = 1,member = 3, sdate = 4)) + clim1 <- array(rnorm(12), c(dat = 1, member = 3, sdate = 4)) #dat2 + set.seed(1) + dat2 <- array(rnorm(72), c(dat = 1, sdate = 4, ftime = 6, member = 3)) + clim2 <- clim1 ############################################## @@ -75,5 +78,39 @@ test_that("2. Output checks: dat1", { c(-0.24416258, -0.08427184, 0.79636122, -0.05306879), tolerance = 0.0001 ) + expect_equal( + Ano(dat1, clim1, ncores = 1), + Ano(dat1, clim1, ncores = 2) + ) }) + +test_that("3. Output checks: dat2", { + + expect_equal( + dim(Ano(dat2, clim2)), + dim(dat2) + ) + expect_equal( + mean(Ano(dat2, clim2)), + -0.1434844, + tolerance = 0.0001 + ) + expect_equal( + min(Ano(dat2, clim2)), + -3.789433, + tolerance = 0.0001 + ) + expect_equal( + Ano(dat2, clim2)[1, 2, , 3], + c(0.74868744, -1.26178338, -1.17655491, -0.17166029, 0.05637202, 2.04019139), + tolerance = 0.0001 + ) + expect_equal( + Ano(dat2, clim2, ncores = 1), + Ano(dat2, clim2, ncores = 2) + ) + +}) + + diff --git a/tests/testthat/test-Ano_CrossValid.R b/tests/testthat/test-Ano_CrossValid.R new file mode 100644 index 0000000000000000000000000000000000000000..60bff8c70a1752c795664cc2acebed6dc9c81413 --- /dev/null +++ b/tests/testthat/test-Ano_CrossValid.R @@ -0,0 +1,143 @@ +context("s2dv::Ano_CrossValid tests") + +############################################## + # dat1 +set.seed(1) +exp1 <- array(rnorm(60), dim = c(dataset = 2, member = 3, sdate = 5, ftime = 2)) +set.seed(2) +obs1 <- array(rnorm(20), dim = c(dataset = 1, member = 2, sdate = 5, ftime = 2)) + +# dat2 +set.seed(1) +exp2 <- array(rnorm(30), dim = c(member = 3, ftime = 2, sdate = 5)) +set.seed(2) +obs2 <- array(rnorm(20), dim = c(ftime = 2, member = 2, sdate = 5)) + + +############################################## +test_that("1. Input checks", { + + # exp and obs (1) + expect_error( + Ano_CrossValid(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + Ano_CrossValid(c('b'), c('a')), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + Ano_CrossValid(c(1:10), c(2:4)), + paste0("Parameter 'exp' and 'obs' must have at least dimensions ", + "time_dim and dat_dim.") + ) + expect_error( + Ano_CrossValid(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + # time_dim + expect_error( + Ano_CrossValid(exp1, obs1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + Ano_CrossValid(exp1, obs1, time_dim = c('a', 'sdate')), + "Parameter 'time_dim' must be a character string." + ) + # dat_dim + expect_error( + Ano_CrossValid(exp1, obs1, dat_dim = 1), + "Parameter 'dat_dim' must be a character vector." + ) + expect_error( + Ano_CrossValid(exp1, obs1, dat_dim = 'dat'), + "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." + ) + # memb + expect_error( + Ano_CrossValid(exp1, obs1, memb = 'member'), + "Parameter 'memb' must be one logical value." + ) + # memb_dim + expect_error( + Ano_CrossValid(exp1, obs1, memb = FALSE, memb_dim = 2), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + Ano_CrossValid(exp1, obs1, memb = FALSE, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Ano_CrossValid(exp1, obs1, memb = FALSE, memb_dim = 'ftime'), + "Parameter 'memb_dim' must be one element in parameter 'dat_dim'." + ) + # ncores + expect_error( + Ano_CrossValid(exp1, obs1, memb = FALSE, ncores = -1), + "Parameter 'ncores' must be a positive integer." + ) + # exp and obs (2) + expect_error( + Ano_CrossValid(exp1, array(1:20, dim = c(dataset = 1, member = 2, sdate = 4, ftime = 2))), + paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension expect 'dat_dim'.") + ) + + +}) +############################################## +test_that("2. dat1", { + + expect_equal( + names(Ano_CrossValid(exp1, obs1)), + c("exp", "obs") + ) + expect_equal( + dim(Ano_CrossValid(exp1, obs1)$exp), + c(sdate = 5, dataset = 2, member = 3, ftime = 2) + ) + expect_equal( + Ano_CrossValid(exp1, obs1)$exp[, 1, 2, 2], + c(0.2771331, 1.1675753, -1.0684010, 0.2901759, -0.6664833), + tolerance = 0.0001 + ) + expect_equal( + Ano_CrossValid(exp1, obs1)$obs[, 1, 2, 2], + c(1.7024193, -0.8243579, -2.4136080, 0.5199868, 1.0155598), + tolerance = 0.0001 + ) + expect_equal( + Ano_CrossValid(exp1, obs1, memb = FALSE)$exp[, 1, 2, 2], + c(0.1229714, 0.8496518, -0.9531644, 0.1548713, -0.5264025), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("3. dat2", { + expect_equal( + names(Ano_CrossValid(exp2, obs2, dat_dim = 'member')), + c("exp", "obs") + ) + expect_equal( + dim(Ano_CrossValid(exp2, obs2, dat_dim = 'member')$exp), + c(sdate = 5, member = 3, ftime = 2) + ) + expect_equal( + Ano_CrossValid(exp2, obs2, dat_dim = 'member')$exp[, 2, 2], + c(0.05650631, 1.53434806, -0.37561623, -0.26217217, -0.95306597), + tolerance = 0.0001 + ) + expect_equal( + Ano_CrossValid(exp2, obs2, dat_dim = 'member', memb = FALSE)$exp[, 2, 2], + c(0.34489635, 1.56816273, -0.01926901, -0.09646066, -0.68236823), + tolerance = 0.0001 + ) + +}) + + + + + diff --git a/tests/testthat/test-BrierScore.R b/tests/testthat/test-BrierScore.R new file mode 100644 index 0000000000000000000000000000000000000000..5668a089972352b29db2d553acb7134a3ede2203 --- /dev/null +++ b/tests/testthat/test-BrierScore.R @@ -0,0 +1,269 @@ +context("s2dv::BrierScore tests") + +############################################## +# dat1 +set.seed(1) +exp1 <- array(runif(10), dim = c(dataset = 1, sdate = 5, ftime = 2)) +set.seed(2) +obs1 <- array(round(runif(10)), dim = c(dataset = 1, sdate = 5, ftime = 2)) + +# dat2 +set.seed(1) +exp2 <- runif(10) +set.seed(2) +obs2 <- round(runif(10)) + +# dat3 +set.seed(1) +exp3 <- array(sample(c(0, 1), 60, replace = T), + dim = c(dataset = 2, member = 3, sdate = 5, ftime = 2)) +set.seed(2) +obs3 <- array(sample(c(0, 1), 10, replace = T), + dim = c(dataset = 1, sdate = 5, ftime = 2)) + +############################################## +test_that("1. Input checks", { + # exp and obs + expect_error( + BrierScore(exp1, c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + BrierScore(c('b'), obs1), + "Parameter 'exp' and 'obs' must be a numeric vector or a numeric array." + ) + expect_error( + BrierScore(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + BrierScore(exp = 1:10, obs = obs1), + "Parameter 'exp' must be within \\[0, 1\\] range." + ) + expect_error( + BrierScore(exp = array(exp1, dim = dim(exp3)), obs = obs3, memb_dim = 'member'), + "Parameter 'exp' must be 0 or 1 if it has memb_dim." + ) + expect_error( + BrierScore(exp1, runif(10)), + "Parameter 'obs' must be binary events \\(0 or 1\\)." + ) + expect_error( + BrierScore(exp3, obs3), + paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'dat_dim' and 'memb_dim'.") + ) + # thresholds + expect_error( + BrierScore(exp2, obs2, thresholds = TRUE), + "Parameter 'thresholds' must be a numeric vector." + ) + expect_error( + BrierScore(exp2, obs2, thresholds = seq(0, 1, length.out = 4)), + "Parameter 'thresholds' must be between 0 and 1 as the bin-breaks." + ) + # time_dim + expect_error( + BrierScore(exp2, obs2, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + BrierScore(exp1, obs1, memb_dim = 'member', time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp' and 'obs' dimension." + ) + # dat_dim + expect_error( + BrierScore(exp1, obs1, dat_dim = 2), + "Parameter 'dat_dim' must be a character string." + ) + expect_error( + BrierScore(exp1, obs1, dat_dim = 'dat'), + "Parameter 'dat_dim' is not found in 'exp' and 'obs' dimension." + ) + # memb_dim + expect_error( + BrierScore(exp2, obs2, memb_dim = 2), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + BrierScore(exp2, obs2, memb_dim = 'ensemble'), + "Parameter 'memb_dim' is not found in 'exp' dimension." + ) + expect_error( + BrierScore(exp3, array(1, dim = c(dataset = 1, member = 3, sdate = 5, ftime = 2)), memb_dim = 'member'), + "The length of parameter 'memb_dim' in 'obs' must be 1." + ) + # ncores + expect_error( + BrierScore(exp2, obs2, ncores = 0), + "Parameter 'ncores' must be a positive integer." + ) + +}) + + +############################################## +test_that("2. Output checks: dat1", { + +expect_equal( +length(BrierScore(exp1, obs1)), +16 +) +expect_equal( +names(BrierScore(exp1, obs1)), +c('rel', 'res', 'unc', 'bs', 'bs_check_res', 'bss_res', 'gres', + 'bs_check_gres', 'bss_gres', 'rel_bias_corrected', 'gres_bias_corrected', + 'unc_bias_corrected', 'bss_bias_corrected', 'nk', 'fkbar', 'okbar') +) +expect_equal( +dim(BrierScore(exp1, obs1)$rel), +c(dataset = 1, ftime = 2) +) +expect_equal( +BrierScore(exp1, obs1)$rel[1, ], +c(0.3086934, 0.3650011), +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp1, obs1)$res[1, ], +c(0.14, 0.14), +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp1, obs1)$bs[1, ], +c(0.4218661, 0.4587647), +tolerance = 0.0001 +) +expect_equal( +dim(BrierScore(exp1, obs1)$okbar), +c(bin = 10, dataset = 1, ftime = 2) +) +expect_equal( +BrierScore(exp1, obs1)$okbar[, 1, 1], +c(NaN, NaN, 0.5,1.0, NaN, 1.0, NaN, NaN, NaN, 0.0) +) +expect_equal( +BrierScore(exp1, obs1)$fkbar[, 1, 1], +c(NaN, NaN, 0.2335953, 0.3721239, NaN, 0.5728534, NaN, NaN, NaN, 0.9082078), +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp1, obs1)$nk[, 1, 1], +c(0, 0, 2, 1, 0, 1, 0, 0, 0, 1) +) + +expect_equal( +dim(BrierScore(exp1, obs1, dat_dim = 'dataset')$rel), +c(nexp = 1, nobs = 1, ftime = 2) +) +expect_equal( +dim(BrierScore(exp1, obs1, dat_dim = 'dataset')$nk), +c(nexp = 1, nobs = 1, bin = 10, ftime = 2) +) +expect_equal( +as.vector(BrierScore(exp1, obs1, dat_dim = 'dataset')$nk), +as.vector(BrierScore(exp1, obs1)$nk) +) +expect_equal( +as.vector(BrierScore(exp1, obs1, dat_dim = 'dataset')$bs), +as.vector(BrierScore(exp1, obs1)$bs) +) + +}) + +############################################## +test_that("3. Output checks: dat2", { +expect_equal( +length(BrierScore(exp2, obs2)), +16 +) +expect_equal( +dim(BrierScore(exp2, obs2))$bss, +NULL +) +expect_equal( +length(BrierScore(exp2, obs2)$bss_res), +1 +) +expect_equal( +dim(BrierScore(exp2, obs2))$nk, +NULL +) +expect_equal( +length(BrierScore(exp2, obs2)$nk), +10 +) +expect_equal( +BrierScore(exp2, obs2)$bs, +0.4403154, +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp2, obs2)$gres_bias_corrected, +0.06313199, +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp2, obs2)$bss_bias_corrected, +-0.6511828, +tolerance = 0.0001 +) +expect_equal( +as.vector(BrierScore(exp2, obs2)$nk), +c(1, 0, 2, 1, 0, 1, 2, 0, 1, 2) +) + +}) +############################################## +test_that("4. Output checks: dat3", { + +expect_equal( +length(BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')), +16 +) +expect_equal( +names(BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')), +c('rel', 'res', 'unc', 'bs', 'bs_check_res', 'bss_res', 'gres', + 'bs_check_gres', 'bss_gres', 'rel_bias_corrected', 'gres_bias_corrected', + 'unc_bias_corrected', 'bss_bias_corrected', 'nk', 'fkbar', 'okbar') +) +expect_equal( +dim(BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$rel), +c(nexp = 2, nobs = 1, ftime = 2) +) +expect_equal( +BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$rel[, 1, 2], +c(0.3555556, 0.2222222), +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$res[1, 1, ], +c(0.0000000, 0.1066667), +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$bs[2, 1, ], +c(0.3555556, 0.4222222), +tolerance = 0.0001 +) +expect_equal( +dim(BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$okbar), +c(nexp = 2, nobs = 1, bin = 10, ftime = 2) +) +expect_equal( +BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$okbar[, 1, 1, 1], +c(NaN, 1) +) +expect_equal( +BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset', thresholds = 1:2/3)$fkbar[2, 1, , 1], +c(0.0000000, 0.3333333, 0.6666667), +tolerance = 0.0001 +) +expect_equal( +BrierScore(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset', thresholds = 1:2/3)$nk[1, 1, , 1], +c(0, 5, 0) +) + +}) + + diff --git a/tests/testthat/test-Cluster.R b/tests/testthat/test-Cluster.R new file mode 100644 index 0000000000000000000000000000000000000000..9071cd8e5e479e0ef2ccee2b6b74208815e604c8 --- /dev/null +++ b/tests/testthat/test-Cluster.R @@ -0,0 +1,127 @@ +context("s2dv::Cluster tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(100), + dim = c(sdate = 50, space = 2)) + weights1 <- array(c(0.9, 1.1), dim = c(space = 2)) + + # dat2 + set.seed(2) + dat2 <- array(rnorm(300), + dim = c(sdate = 50, lat = 2, lon = 3)) + weights2 <- array(c(0.9, 1.1), dim = c(lat = 2, lon = 3)) + +############################################## + +test_that("1. Input checks", { + # data + expect_error( + Cluster(c()), + "Parameter 'data' cannot be NULL." + ) + expect_error( + Cluster(c(NA, NA)), + "Parameter 'data' must be a numeric array." + ) + expect_error( + Cluster(array(1:10, dim = c(2, 5))), + "Parameter 'data' must have dimension names." + ) + # weights + expect_error( + Cluster(dat1, weights = 'lat'), + "Parameter 'weights' must be a numeric array." + ) + expect_error( + Cluster(dat1, weights = 2), + "Parameter 'weights' must have dimension names." + ) + expect_error( + Cluster(dat1, weights = array(2, dim = c(lat = 2))), + "Parameter 'weights' must have dimensions that can be found in 'data' dimensions." + ) + # time_dim + expect_error( + Cluster(dat1, weights1, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + expect_error( + Cluster(array(c(1:25), dim = c(dat = 1, time = 5, space = 2)), weights1), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + expect_error( + Cluster(dat1, weights1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + Cluster(dat1, weights1, time_dim = c('a', 'sdate')), + "Parameter 'time_dim' must be a character string." + ) + # nclusters + expect_error( + Cluster(dat1, weights1, ncluster = 1), + "Parameter 'nclusters' must be an integer bigger than 1." + ) + # index + expect_error( + Cluster(dat1, weights1, index = 1), + "Parameter 'index' should be a character strings accepted as 'index' by the function NbClust::NbClust." + ) + # ncores + expect_error( + Cluster(dat1, weights1, ncore = 0), + "Parameter 'ncores' must be a positive integer." + ) +}) + +############################################## +test_that("2. Output checks: dat1", { +# The output is random. Only check dimensions. + expect_equal( + length(Cluster(dat1, weights1)$cluster), + 50 + ) + expect_equal( + length(Cluster(dat1)$cluster), + 100 + ) + expect_equal( + dim(Cluster(dat1, weights1)$centers), + c(8, 2) + ) + expect_equal( + dim(Cluster(dat1, weights1, nclusters = 3)$centers), + c(3, 2) + ) + +}) + + +############################################## +test_that("3. Output checks: dat2", { + + expect_equal( + length(Cluster(dat2, weights2)$cluster), + 50 + ) + expect_equal( + length(Cluster(dat2)$cluster), + 300 + ) + expect_equal( + length(Cluster(dat2, space_dim = c('lon', 'lat'))$cluster), + 50 + ) + expect_equal( + dim(Cluster(dat2, weights2)$centers), + c(7, 6) + ) + expect_equal( + dim(Cluster(dat2, weights2, nclusters = 5)$centers), + c(5, 6) + ) + +}) + diff --git a/tests/testthat/test-Consist_Trend.R b/tests/testthat/test-Consist_Trend.R new file mode 100644 index 0000000000000000000000000000000000000000..2dd2214a8ea31e851fae8564921c66266d54342c --- /dev/null +++ b/tests/testthat/test-Consist_Trend.R @@ -0,0 +1,166 @@ +context("s2dv::Consist_Trend tests") + +############################################## + # dat1 + set.seed(1) + exp1 <- array(rnorm(30), dim = c(dataset = 2, sdate = 5, ftime = 3)) + set.seed(2) + obs1 <- array(rnorm(15), dim = c(dataset = 1, sdate = 5, ftime = 3)) + # dat2 + exp2 <- exp1 + set.seed(1) + exp2[1, 1, 1] <- NA + obs2 <- obs1 + obs2[1, 2, 3] <- NA + + +############################################## +test_that("1. Input checks", { + + # exp and obs (1) + expect_error( + Consist_Trend(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + Consist_Trend(c('b'), c('a')), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + Consist_Trend(c(1:10), c(2:4)), + paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.") + ) + expect_error( + Consist_Trend(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + Consist_Trend(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), + "Parameter 'exp' and 'obs' must have the same dimension names." + ) + # time_dim + expect_error( + Consist_Trend(exp1, obs1, time_dim = c('sdate', 'ftime')), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + Consist_Trend(exp1, obs1, time_dim = 'asd'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + # dat_dim + expect_error( + Consist_Trend(exp1, obs1, dat_dim = c(1, 2)), + "Parameter 'dat_dim' must be a character string." + ) + expect_error( + Consist_Trend(exp1, obs1, dat_dim = c('member')), + "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." + ) + # exp and obs (2) + expect_error( + Consist_Trend(array(1:10, dim = c(dataset = 2, member = 5, sdate = 4, ftime = 3)), + array(1:4, dim = c(dataset = 2, member = 2, sdate = 5, ftime = 3))), + paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension expect 'dat_dim'.") + ) + # interval + expect_error( + Consist_Trend(exp1, obs1, interval = 0), + "Parameter 'interval' must be a positive number." + ) + # ncores + expect_error( + Consist_Trend(exp1, obs1, ncores = 0), + "Parameter 'ncores' must be a positive integer." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + +expect_equal( +names(Consist_Trend(exp1, obs1)), +c('trend', 'conf.lower', 'conf.upper', 'detrended_exp', 'detrended_obs') +) +expect_equal( +dim(Consist_Trend(exp1, obs1)$trend), +c(stats = 2, dataset = 3, ftime = 3) +) +expect_equal( +dim(Consist_Trend(exp1, obs1)$conf.lower), +c(stats = 2, dataset = 3, ftime = 3) +) +expect_equal( +dim(Consist_Trend(exp1, obs1)$conf.upper), +c(stats = 2, dataset = 3, ftime = 3) +) +expect_equal( +dim(Consist_Trend(exp1, obs1)$detrended_exp), +c(dataset = 2, sdate = 5, ftime = 3) +) +expect_equal( +dim(Consist_Trend(exp1, obs1)$detrended_obs), +c(dataset = 1, sdate = 5, ftime = 3) +) +expect_equal( +Consist_Trend(exp1, obs1)$trend[, 2, 1], +c(0.8287843, -0.1835020), +tolerance = 0.0001 +) +expect_equal( +Consist_Trend(exp1, obs1)$conf.lower[, 2, 2], +c(-5.449028, -0.943639), +tolerance = 0.0001 +) +expect_equal( +Consist_Trend(exp1, obs1)$conf.upper[, 2, 2], +c(3.176215, 1.656969), +tolerance = 0.0001 +) +expect_equal( +Consist_Trend(exp1, obs1)$detrended_exp[, 2, 1], +c(-0.449003, 1.133500), +tolerance = 0.0001 +) +expect_equal( +Consist_Trend(exp1, obs1)$detrended_obs[, 2, 1], +c(0.2836287), +tolerance = 0.0001 +) + +}) + +############################################## +test_that("3. Output checks: dat2", { + +expect_equal( +Consist_Trend(exp2, obs2)$trend[, 2, 1], +c(1.7520623, -0.4143214), +tolerance = 0.0001 +) +expect_equal( +Consist_Trend(exp2, obs2)$detrended_exp[1, , 1], +c(NA, -0.3160783, 0.4098429, 0.1285491, -0.2223137), +tolerance = 0.0001 +) +expect_equal( +Consist_Trend(exp2, obs2)$detrended_obs[1, , 1], +c(NA, -0.4826962, 1.2716524, -1.0952163, 0.3062600), +tolerance = 0.0001 +) +expect_equal( +mean(Consist_Trend(exp2, obs2)$detrended_obs, na.rm = TRUE)*10^18, +2.118364, +tolerance = 0.0001 +) +expect_equal( +mean(Consist_Trend(exp2, obs2)$trend, na.rm = TRUE), +0.1662461, +tolerance = 0.0001 +) + +}) + + diff --git a/tests/testthat/test-Corr.R b/tests/testthat/test-Corr.R index 4a06f8247fc378cfba5b28abc51f536be8ccfdf0..9d5d4a3653ca55366919e8cf0bb87676972bf4b9 100644 --- a/tests/testthat/test-Corr.R +++ b/tests/testthat/test-Corr.R @@ -1,7 +1,7 @@ context("s2dv::Corr tests") ############################################## - # dat1 + # dat1: memb_dim is NULL set.seed(1) exp1 <- array(rnorm(240), dim = c(member = 1, dataset = 2, sdate = 5, ftime = 3, lat = 2, lon = 4)) @@ -13,6 +13,33 @@ context("s2dv::Corr tests") na <- floor(runif(10, min = 1, max = 120)) obs1[na] <- NA + # dat2: memb_dim = member + set.seed(1) + exp2 <- array(rnorm(180), dim = c(member = 3, dataset = 2, sdate = 5, + lat = 2, lon = 3)) + + set.seed(2) + obs2 <- array(rnorm(30), dim = c(member = 1, dataset = 1, sdate = 5, + lat = 2, lon = 3)) + + # dat3: memb_dim = member, obs has multiple memb + set.seed(1) + exp3 <- array(rnorm(180), dim = c(member = 3, dataset = 2, sdate = 5, + lat = 2, lon = 3)) + + set.seed(2) + obs3 <- array(rnorm(120), dim = c(member = 2, dataset = 2, sdate = 5, + lat = 2, lon = 3)) + + # dat4: exp and obs have dataset = 1 (to check the return array by small func) + set.seed(1) + exp4 <- array(rnorm(10), dim = c(member = 1, dataset = 1, sdate = 5, + lat = 2)) + + set.seed(2) + obs4 <- array(rnorm(10), dim = c(member = 1, dataset = 1, sdate = 5, + lat = 2)) + ############################################## test_that("1. Input checks", { @@ -79,6 +106,18 @@ test_that("1. Input checks", { "Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'." ) expect_error( + Corr(exp1, obs1, memb_dim = 1), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + Corr(exp1, obs1, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + Corr(exp2, obs2, memb_dim = 'member', memb = 1), + "Parameter 'memb' must be one logical value." + ) + expect_error( Corr(exp1, obs1, conf = 1), "Parameter 'conf' must be one logical value." ) @@ -105,61 +144,252 @@ test_that("1. Input checks", { ############################################## test_that("2. Output checks: dat1", { - +suppressWarnings( expect_equal( dim(Corr(exp1, obs1)$corr), c(nexp = 2, nobs = 1, member = 1, ftime = 3, lat = 2, lon = 4) ) +) +suppressWarnings( expect_equal( Corr(exp1, obs1)$corr[1:6], c(0.11503859, -0.46959987, -0.64113021, 0.09776572, -0.32393603, 0.27565829), tolerance = 0.001 ) +) +suppressWarnings( expect_equal( length(which(is.na(Corr(exp1, obs1)$p.val))), 2 ) +) +suppressWarnings( expect_equal( max(Corr(exp1, obs1)$conf.lower, na.rm = T), 0.6332941, tolerance = 0.001 ) +) +suppressWarnings( expect_equal( length(which(is.na(Corr(exp1, obs1, comp_dim = 'ftime')$corr))), 6 ) +) +suppressWarnings( expect_equal( length(which(is.na(Corr(exp1, obs1, comp_dim = 'ftime', limits = c(2, 3))$corr))), 2 ) +) +suppressWarnings( expect_equal( min(Corr(exp1, obs1, conf.lev = 0.99)$conf.upper, na.rm = TRUE), 0.2747904, tolerance = 0.0001 ) +) +suppressWarnings( expect_equal( length(Corr(exp1, obs1, conf = FALSE, pval = FALSE)), 1 ) +) +suppressWarnings( expect_equal( length(Corr(exp1, obs1, conf = FALSE)), 2 ) +) +suppressWarnings( expect_equal( length(Corr(exp1, obs1, pval = FALSE)), 3 ) +) +suppressWarnings( expect_equal( Corr(exp1, obs1, method = 'spearman')$corr[1:6], c(-0.3, -0.4, -0.6, 0.3, -0.3, 0.2) ) +) +suppressWarnings( expect_equal( range(Corr(exp1, obs1, method = 'spearman', comp_dim = 'ftime')$p.val, na.rm = T), c(0.0, 0.5), tolerance = 0.001 ) +) }) ############################################## +test_that("3. Output checks: dat2", { + # individual member + expect_equal( + dim(Corr(exp2, obs2, memb_dim = 'member')$corr), + c(nexp = 2, nobs = 1, exp_memb = 3, obs_memb = 1, lat = 2, lon = 3) + ) + expect_equal( + names(Corr(exp2, obs2, memb_dim = 'member')), + c("corr", "p.val", "conf.lower", "conf.upper") + ) + expect_equal( + names(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE)), + c("corr") + ) + expect_equal( + names(Corr(exp2, obs2, memb_dim = 'member', conf = FALSE)), + c("corr", "p.val") + ) + expect_equal( + names(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE)), + c("corr", "conf.lower", "conf.upper") + ) + expect_equal( + mean(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + 0.01645575, + tolerance = 0.0001 + ) + expect_equal( + median(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + 0.03024513, + tolerance = 0.0001 + ) + expect_equal( + max(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + 0.9327993, + tolerance = 0.0001 + ) + expect_equal( + min(Corr(exp2, obs2, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + -0.9361258, + tolerance = 0.0001 + ) + expect_equal( + Corr(exp2, obs2, memb_dim = 'member', conf = FALSE)$p.val[1:5], + c(0.24150854, 0.21790352, 0.04149139, 0.49851332, 0.19859843), + tolerance = 0.0001 + ) + expect_equal( + Corr(exp2, obs2, memb_dim = 'member', pval = FALSE)$conf.lower[1:5], + c(-0.9500121, -0.9547642, -0.9883400, -0.8817478, -0.6879465), + tolerance = 0.0001 + ) + # ensemble mean + expect_equal( + dim(Corr(exp2, obs2, memb_dim = 'member', memb = FALSE)$corr), + c(nexp = 2, nobs = 1, lat = 2, lon = 3) + ) + expect_equal( + mean(Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + 0.02939929, + tolerance = 0.0001 + ) + expect_equal( + median(Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + 0.03147432, + tolerance = 0.0001 + ) + expect_equal( + max(Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + 0.8048901, + tolerance = 0.0001 + ) + expect_equal( + min(Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + -0.6839388, + tolerance = 0.0001 + ) + expect_equal( + Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, conf = FALSE)$p.val[1:5], + c(0.1999518, 0.2776874, 0.3255444, 0.2839667, 0.1264518), + tolerance = 0.0001 + ) + expect_equal( + Corr(exp2, obs2, memb_dim = 'member', memb = FALSE, pval = FALSE)$conf.lower[1:5], + c(-0.9582891, -0.7668065, -0.9316879, -0.9410621, -0.5659657), + tolerance = 0.0001 + ) + +}) +############################################## +test_that("4. Output checks: dat3", { + # individual member + expect_equal( + dim(Corr(exp3, obs3, memb_dim = 'member')$corr), + c(nexp = 2, nobs = 2, exp_memb = 3, obs_memb = 2, lat = 2, lon = 3) + ) + expect_equal( + names(Corr(exp3, obs3, memb_dim = 'member')), + c("corr", "p.val", "conf.lower", "conf.upper") + ) + expect_equal( + mean(Corr(exp3, obs3, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + 0.006468017, + tolerance = 0.0001 + ) + expect_equal( + median(Corr(exp3, obs3, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + 0.03662394, + tolerance = 0.0001 + ) + expect_equal( + max(Corr(exp3, obs3, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + 0.9798228, + tolerance = 0.0001 + ) + expect_equal( + min(Corr(exp3, obs3, memb_dim = 'member', pval = FALSE, conf = FALSE)$corr), + -0.9464891, + tolerance = 0.0001 + ) + + # ensemble mean + expect_equal( + dim(Corr(exp3, obs3, memb_dim = 'member', memb = FALSE)$corr), + c(nexp = 2, nobs = 2, lat = 2, lon = 3) + ) + expect_equal( + mean(Corr(exp3, obs3, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + -0.01001896, + tolerance = 0.0001 + ) + expect_equal( + median(Corr(exp3, obs3, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + -0.01895816, + tolerance = 0.0001 + ) + expect_equal( + max(Corr(exp3, obs3, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + 0.798233, + tolerance = 0.0001 + ) + expect_equal( + min(Corr(exp3, obs3, memb_dim = 'member', memb = FALSE, pval = FALSE, conf = FALSE)$corr), + -0.6464809, + tolerance = 0.0001 + ) +}) + +############################################## +test_that("5. Output checks: dat4", { + # no member + expect_equal( + dim(Corr(exp4, obs4)$corr), + c(nexp = 1, nobs = 1, member = 1, lat = 2) + ) + # individual member + expect_equal( + dim(Corr(exp4, obs4, memb_dim = 'member')$corr), + c(nexp = 1, nobs = 1, exp_memb = 1, obs_memb = 1, lat = 2) + ) + # ensemble + expect_equal( + dim(Corr(exp4, obs4, memb_dim = 'member', memb = FALSE)$corr), + c(nexp = 1, nobs = 1, lat = 2) + ) + +}) +############################################## diff --git a/tests/testthat/test-EOF.R b/tests/testthat/test-EOF.R new file mode 100644 index 0000000000000000000000000000000000000000..7966518dec3472f37d267f36c9a9589ac089c43d --- /dev/null +++ b/tests/testthat/test-EOF.R @@ -0,0 +1,267 @@ +context("s2dv::EOF tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(120), dim = c(sdate = 10, lat = 6, lon = 2)) + lat1 <- seq(10, 30, length.out = 6) + lon1 <- c(10, 12) + + # dat2 + set.seed(1) + dat2 <- array(rnorm(240), dim = c(lat = 6, lon = 2, sdate = 20)) + lat2 <- seq(-10, 10, length.out = 6) + lon2 <- c(-10, -12) + + # dat3 + set.seed(1) + dat3 <- array(rnorm(480), dim = c(dat = 2, lat = 6, lon = 2, sdate = 20)) + lat3 <- seq(10, 30, length.out = 6) + lon3 <- c(10, 12) + +############################################## +test_that("1. Input checks", { + + # ano + expect_error( + EOF(c()), + "Parameter 'ano' cannot be NULL." + ) + expect_error( + EOF(c(NA, NA)), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + EOF(list(a = array(rnorm(50), dim = c(dat = 5, sdate = 10)), b = c(1:4))), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + EOF(array(1:10, dim = c(2, 5))), + "Parameter 'ano' must have dimension names." + ) + # time_dim + expect_error( + EOF(dat1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + EOF(dat1, time_dim = c('a','sdate')), + "Parameter 'time_dim' must be a character string." + ) + # space_dim + expect_error( + EOF(dat1, space_dim = 'lat'), + "Parameter 'space_dim' must be a character vector of 2." + ) + expect_error( + EOF(dat1, space_dim = c('latitude', 'longitude')), + "Parameter 'space_dim' is not found in 'ano' dimension." + ) + # lat + expect_error( + EOF(dat1, lat = 1:10), + paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") + ) + expect_error( + EOF(dat1, lat = seq(-100, -80, length.out = 6)), + "Parameter 'lat' must contain values within the range \\[-90, 90\\]." + ) + # lon + expect_error( + EOF(dat1, lat = lat1, lon = c('a', 'b')), + paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") + ) + expect_warning( + EOF(dat1, lat = lat1, lon = c(350, 370)), + "Some 'lon' is out of the range \\[-360, 360\\]." + ) + # neofs + expect_error( + EOF(dat1, lat = lat1, lon = lon1, neofs = -1), + "Parameter 'neofs' must be a positive integer." + ) + # corr + expect_error( + EOF(dat1, lat = lat1, lon = lon1, corr = 0.1), + "Parameter 'corr' must be one logical value." + ) + # ncores + expect_error( + EOF(dat1, lat1, lon1, ncore = 3.5), + "Parameter 'ncores' must be a positive integer." + ) + +}) +############################################## +test_that("2. dat1", { + res1 <- EOF(dat1, lon = lon1, lat = lat1, neofs = 10) + + expect_equal( + names(res1), + c("EOFs", "PCs", "var", "tot_var", "mask", "wght") + ) + expect_equal( + dim(res1$EOFs), + c(mode = 10, lat = 6, lon = 2) + ) + expect_equal( + dim(res1$PCs), + c(sdate = 10, mode = 10) + ) + expect_equal( + dim(res1$var), + c(mode = 10) + ) + expect_equal( + dim(res1$mask), + c(lat = 6, lon = 2) + ) + expect_equal( + dim(res1$wght), + c(lat = 6, lon = 2) + ) + expect_equal( + res1$EOFs[1:5], + c(-0.2888168, 0.2792765, 0.1028387, 0.1883640, -0.2896943), + tolerance = 0.0001 + ) + expect_equal( + mean(res1$EOFs), + 0.01792716, + tolerance = 0.0001 + ) + expect_equal( + res1$PCs[1:5], + c(3.2061306, -0.1692669, 0.5420990, -2.5324441, -0.6143680), + tolerance = 0.0001 + ) + expect_equal( + mean(res1$PCs), + 0.08980279, + tolerance = 0.0001 + ) + expect_equal( + res1$var[1:5], + array(c(29.247073, 25.364840, 13.247046, 11.121006, 8.662517), dim = c(mode = 5)), + tolerance = 0.0001 + ) + expect_equal( + sum(res1$mask), + 12 + ) + expect_equal( + res1$wght[1:5], + c(0.9923748, 0.9850359, 0.9752213, 0.9629039, 0.9480475), + tolerance = 0.0001 + ) + expect_equal( + res1$tot_var, + 88.20996, + tolerance = 0.0001 + ) + + # rebuild the field + latlon_eof <- array(res1$EOFs, dim = c(mode = 10, latlon = 12)) + field <- res1$PCs %*% latlon_eof + latlon_dat1<- array(dat1, dim = c(sdate = 10, laton = 12)) + expect_equal( + as.vector(latlon_dat1), + as.vector(field) + ) + + dat1_1 <- dat1 + dat1_1[, 2, 1] <- NA + res1_1 <- EOF(dat1_1, lon = lon1, lat = lat1, neofs = 10) + expect_equal( + mean(res1_1$EOFs, na.rm = T), + 0.02270081, + tolerance = 0.0001 + ) + expect_equal( + mean(res1_1$PCs, na.rm = T), + 0.1092327, + tolerance = 0.0001 + ) + # rebuild the field + latlon_eof <- array(res1_1$EOFs, dim = c(mode = 10, latlon = 12)) + field <- res1_1$PCs %*% latlon_eof + latlon_dat1<- array(dat1_1, dim = c(sdate = 10, laton = 12)) + expect_equal( + as.vector(latlon_dat1), + as.vector(field) + ) + + dat1_2 <- dat1 + dat1_2[2:5, 2, 1] <- NA + expect_error( + EOF(dat1_2, lon = lon1, lat = lat1, neofs = 10), + "Detect certain grid points have NAs but not consistent across time dimension. If the grid point is NA, it should have NA at all time step." + ) + + +}) +############################################## +test_that("3. dat2", { + expect_equal( + dim(EOF(dat2, lon = lon2, lat = lat2)$EOFs), + c(mode = 12, lat = 6, lon = 2) + ) + expect_equal( + dim(EOF(dat2, lon = lon2, lat = lat2)$PCs), + c(sdate = 20, mode = 12) + ) + expect_equal( + EOF(dat2, lon = lon2, lat = lat2)$EOFs[1:5], + c(0.33197201, 0.18837900, -0.19697143, 0.08305805, -0.51297585), + tolerance = 0.0001 + ) + expect_equal( + mean(EOF(dat2, lon = lon2, lat = lat2)$EOFs), + 0.02720393, + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("4. dat3", { + expect_equal( + dim(EOF(dat3, lon = lon3, lat = lat3)$EOFs), + c(mode = 12, lat = 6, lon = 2, dat = 2) + ) + expect_equal( + dim(EOF(dat3, lon = lon3, lat = lat3)$PCs), + c(sdate = 20, mode = 12, dat = 2) + ) + expect_equal( + dim(EOF(dat3, lon = lon3, lat = lat3)$var), + c(mode = 12, dat = 2) + ) + expect_equal( + dim(EOF(dat3, lon = lon3, lat = lat3)$mask), + c(lat = 6, lon = 2, dat = 2) + ) + expect_equal( + dim(EOF(dat3, lon = lon3, lat = lat3)$wght), + c(lat = 6, lon = 2) + ) + expect_equal( + mean(EOF(dat3, lon = lon3, lat = lat3)$EOFs), + 0.01214845, + tolerance = 0.0001 + ) + expect_equal( + EOF(dat3, lon = lon3, lat = lat3)$EOFs[1:5], + c(0.3292733, 0.1787016, -0.3801986, 0.1957160, -0.4377031), + tolerance = 0.0001 + ) + expect_equal( + EOF(dat3, lon = lon3, lat = lat3)$tot_var, + array(c(213.2422, 224.4203), dim = c(dat = 2)), + tolerance = 0.0001 + ) + +}) +############################################## diff --git a/tests/testthat/test-EuroAtlanticTC.R b/tests/testthat/test-EuroAtlanticTC.R new file mode 100644 index 0000000000000000000000000000000000000000..6e3ac4bad6b2173bdc033dec4e92ab697b9b8354 --- /dev/null +++ b/tests/testthat/test-EuroAtlanticTC.R @@ -0,0 +1,308 @@ +context("s2dv::EuroAtlanticTC tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(480), dim = c(sdate = 10, lat = 6, lon = 8)) + lat1 <- seq(20, 80, length.out = 6) + lon1 <- seq(-90, 60, length.out = 8) + + # dat2 + set.seed(2) + dat2 <- array(rnorm(800), dim = c(dat = 2, lat = 8, lon = 15, sdate = 5)) + lat2 <- seq(10, 90, length.out = 8) + lon2 <- seq(-100, 70, length.out = 15) + + # dat3 + set.seed(2) + dat3 <- array(rnorm(1520), dim = c(dat = 2, lat = 8, lon = 19, sdate = 5)) + lat3 <- seq(10, 90, length.out = 8) + lon3 <- c(seq(0, 70, length.out = 8), seq(250, 350, length.out = 11)) + +############################################## +test_that("1. Input checks", { + + # ano + expect_error( + EuroAtlanticTC(c()), + "Parameter 'ano' cannot be NULL." + ) + expect_error( + EuroAtlanticTC(c(NA, NA)), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + EuroAtlanticTC(list(a = array(rnorm(50), dim = c(dat = 5, sdate = 10)), b = c(1:4))), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + EuroAtlanticTC(array(1:10, dim = c(2, 5))), + "Parameter 'ano' must have dimension names." + ) + # time_dim + expect_error( + EuroAtlanticTC(dat1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + EuroAtlanticTC(dat1, time_dim = c('a','sdate')), + "Parameter 'time_dim' must be a character string." + ) + # space_dim + expect_error( + EuroAtlanticTC(dat1, space_dim = 'lat'), + "Parameter 'space_dim' must be a character vector of 2." + ) + expect_error( + EuroAtlanticTC(dat1, space_dim = c('latitude', 'longitude')), + "Parameter 'space_dim' is not found in 'ano' dimension." + ) + # lat and lon + expect_error( + EuroAtlanticTC(dat1, lat = 1:10), + paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") + ) + expect_error( + EuroAtlanticTC(dat1, lat = seq(-100, -80, length.out = 6)), + "Parameter 'lat' must contain values within the range \\[-90, 90\\]." + ) + expect_error( + EuroAtlanticTC(dat1, lat = lat1, lon = c('a', 'b')), + paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") + ) + expect_error( + EuroAtlanticTC(dat1, lat = lat1, lon = seq(300, 370, length.out = 8)), + "Parameter 'lon' must be within the range [-180, 180] or [0, 360].", + fixed = TRUE + ) + expect_error( + EuroAtlanticTC(dat1, lat = lat1, lon = seq(-190, -10, length.out = 8)), + "Parameter 'lon' must be within the range [-180, 180] or [0, 360].", + fixed = TRUE + ) + expect_error( + EuroAtlanticTC(dat1, lat = seq(30, 80, length.out = 6), lon = lon1), + "The provided data does not cover the EuroAtlantic region (20N-80N, 90W-60E).", + fixed = TRUE + ) + expect_error( + EuroAtlanticTC(dat1, lat = lat1, lon = seq(-80, 20, length.out = 8)), + "The provided data does not cover the EuroAtlantic region (20N-80N, 90W-60E).", + fixed = TRUE + ) + # ntrunc + expect_error( + EuroAtlanticTC(dat1, lat = lat1, lon = lon1, ntrunc = 0), + "Parameter 'ntrunc' must be a positive integer." + ) + # corr + expect_error( + EuroAtlanticTC(dat1, lat = lat1, lon = lon1, corr = 0.1), + "Parameter 'corr' must be one logical value." + ) + # ncores + expect_error( + EuroAtlanticTC(dat1, lat1, lon1, ncore = 3.5), + "Parameter 'ncores' must be a positive integer." + ) + +}) + +############################################## +test_that("2. dat1", { + + expect_equal( + names(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 4)), + c("patterns", "indices", "var", "wght") + ) + expect_equal( + dim(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 4)$patterns), + c(mode = 4, lat = 6, lon = 8) + ) + expect_equal( + dim(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$patterns), + c(mode = 4, lat = 6, lon = 8) + ) + expect_equal( + dim(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 9)$indices), + c(sdate = 10, mode = 4) + ) + expect_equal( + dim(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$var), + c(mode = 10) + ) + expect_equal( + dim(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 4)$wght), + c(lat = 6, lon = 8) + ) + expect_equal( + EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 4)$patterns[, 2, 3], + c(-0.019905033, -0.048926441, -0.330219176, 0.008138493), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 5)$patterns[, 2, 3], + c(0.01878324, -0.03784923, -0.22820514, -0.21184373), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$indices[2, ], + c(-1.944509, -1.335159, 0.997195, -2.697545), + tolerance = 0.0001 + ) + expect_equal( + mean(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$var), + 10, + tolerance = 0.0001 + ) + expect_equal( + as.vector(EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$var[1:4]), + c(17.995853, 10.768974, 9.598904, 10.234672), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$wght[1,1], + EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$wght[1,2] + ) + expect_equal( + EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 10)$wght[1,1], + c(0.9693774), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat1, lon = lon1, lat = lat1, ntrunc = 4, corr = T)$patterns[, 2, 3], + c(-0.05850999, 0.03827591, -0.04454523, -0.43713946), + tolerance = 0.0001 + ) +}) + +############################################## +test_that("3. dat2", { + + expect_equal( + names(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 4)), + c("patterns", "indices", "var", "wght") + ) + expect_equal( + dim(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$patterns), + c(mode = 4, lat = 6, lon = 13, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 4)$patterns), + c(mode = 4, lat = 6, lon = 13, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$indices), + c(sdate = 5, mode = 4, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$var), + c(mode = 5, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 4)$wght), + c(lat = 6, lon = 13) + ) + expect_equal( + EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 4)$patterns[, 2, 3, 2], + c(-0.17289486, -0.07021256, -0.08045222, 0.17330862), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$patterns[, 2, 3, 1], + c(0.1347727, 0.2157945, -0.1024759, 0.1633547), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$indices[2, , 1], + c(2.1975962, 2.9158790, -3.2257169, -0.4055974), + tolerance = 0.0001 + ) + expect_equal( + mean(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$var), + 20, + tolerance = 0.0001 + ) + expect_equal( + as.vector(EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$var[1:4]), + c(23.06692, 21.98278, 20.22588, 19.51251), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$wght[1,1], + EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$wght[1,2] + ) + expect_equal( + EuroAtlanticTC(dat2, lon = lon2, lat = lat2, ntrunc = 5)$wght[1,1], + c(0.964818), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("4. dat3", { + + expect_equal( + names(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 4)), + c("patterns", "indices", "var", "wght") + ) + expect_equal( + dim(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$patterns), + c(mode = 4, lat = 6, lon = 16, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 4)$patterns), + c(mode = 4, lat = 6, lon = 16, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$indices), + c(sdate = 5, mode = 4, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$var), + c(mode = 5, dat = 2) + ) + expect_equal( + dim(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 4)$wght), + c(lat = 6, lon = 16) + ) + expect_equal( + EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 4)$patterns[, 2, 3, 2], + c(-0.10653582, -0.22437848, 0.10192633, 0.08331549), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$patterns[, 2, 3, 1], + c(0.25209479, -0.05872688, 0.03186457, -0.02901076), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$indices[2, , 1], + c(2.940060, 5.036896, 4.188896, 2.816158), + tolerance = 0.0001 + ) + expect_equal( + mean(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$var), + 20, + tolerance = 0.0001 + ) + expect_equal( + as.vector(EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$var[1:4]), + c(24.38583, 22.57439, 20.19659, 17.95064), + tolerance = 0.0001 + ) + expect_equal( + EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$wght[1,1], + EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$wght[1,2] + ) + expect_equal( + EuroAtlanticTC(dat3, lon = lon3, lat = lat3, ntrunc = 5)$wght[1,1], + c(0.964818), + tolerance = 0.0001 + ) + +}) + diff --git a/tests/testthat/test-Filter.R b/tests/testthat/test-Filter.R new file mode 100644 index 0000000000000000000000000000000000000000..cf271e1aa87f86bdaa9e787ab52c0bf63ba30fd8 --- /dev/null +++ b/tests/testthat/test-Filter.R @@ -0,0 +1,101 @@ +context("s2dv::Filter tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(10), dim = c(dat = 1, ftime = 5, sdate = 2)) + freq1 <- 0.015 + # dat2 + set.seed(10) + dat2 <- c(1:10) + rnorm(10) + freq2 <- freq1 + + # dat3 + dat3 <- dat2 + dat3[2] <- NA + freq3 <- freq1 +############################################## + +test_that("1. Input checks", { + + # data + expect_error( + Filter(c()), + "Parameter 'data' cannot be NULL." + ) + expect_error( + Filter(c(NA, NA)), + "Parameter 'data' must be a numeric array." + ) + expect_error( + Filter(array(1:10, dim = c(2, 5))), + "Parameter 'data' must have dimension names." + ) + # freq + expect_error( + Filter(dat1, c()), + "Parameter 'freq' cannot be NULL." + ) + expect_error( + Filter(dat1, c(0.1, 0.2)), + "Parameter 'freq' must be a number." + ) + # time_dim + expect_error( + Filter(array(c(1:25), dim = c(dat = 1, date = 5, sdate = 5)), freq1), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + expect_error( + Filter(dat1, freq1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + # ncores + expect_error( + Filter(dat1, freq1, ncore = 3.5), + "Parameter 'ncores' must be a positive integer." + ) +}) + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + dim(Filter(dat1, freq1)), + c(ftime = 5, dat = 1, sdate = 2) + ) + expect_equal( + Filter(dat1, freq1)[, 1, 2], + c(-0.080093110, 0.141328669, -0.105230299, -0.004168101, 0.048162841), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("3. Output checks: dat2", { + + expect_equal( + dim(Filter(dat2, freq2)), + c(ftime = 10) + ) + expect_equal( + as.vector(Filter(dat2, freq2)[2:5]), + c(0.1215244, -1.0229749, -0.2053940, 0.7375181), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("4. Output checks: dat3", { + expect_equal( + dim(Filter(dat3, freq3)), + c(ftime = 10) + ) + expect_equal( + as.vector(Filter(dat3, freq3)[2:5]), + c(NA, -0.9414294, -0.1265448, 0.7910344), + tolerance = 0.0001 + ) + +}) diff --git a/tests/testthat/test-Histo2Hindcast.R b/tests/testthat/test-Histo2Hindcast.R new file mode 100644 index 0000000000000000000000000000000000000000..025f0035677e712d259954b11fe009353bb19626 --- /dev/null +++ b/tests/testthat/test-Histo2Hindcast.R @@ -0,0 +1,183 @@ +context("s2dv::Histo2Hindcast tests") + +############################################## +# dat1 +set.seed(1) +dat1 <- array(rnorm(24), dim = c(sdate = 1, ftime = 24)) +sdatesin1 <- '19901101' +sdatesout1 <- c('19901101', '19911101') +nleadtimesout1 <- 12 + +# dat2 +set.seed(1) +dat2 <- array(rnorm(288), dim = c(dat = 1, member = 2, sdate = 1, ftime = 24, lat = 2, lon = 3)) +sdatesin2 <- '19901101' +sdatesout2 <- c('19901101', '19911101') +nleadtimesout2 <- 12 + +############################################## +test_that("1. Input checks", { + + # dat + expect_error( + Histo2Hindcast(c()), + "Parameter 'data' cannot be NULL." + ) + expect_error( + Histo2Hindcast(c(NA, NA)), + "Parameter 'data' must be a numeric array." + ) + # sdatesin + expect_error( + Histo2Hindcast(dat1, c()), + "Parameter 'sdatesin' cannot be NULL." + ) + expect_error( + Histo2Hindcast(dat1, sdatesin = '1999'), + paste0("Parameter 'sdatesin' must be a character string in the format", + " 'YYYYMMDD' or 'YYYYMM'.") + ) + expect_error( + Histo2Hindcast(dat1, sdatesin = c('19991101', '19991201')), + paste0("Parameter 'sdatesin' must be a character string in the format", + " 'YYYYMMDD' or 'YYYYMM'.") + ) + # sdatesout + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, c()), + "Parameter 'sdatesout' cannot be NULL." + ) + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = 1999:2000), + paste0("Parameter 'sdatesout' must be a vector of character in the ", + "format 'YYYYMMDD' or 'YYYYMM'.") + ) + # nleadtimesout + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, c()), + "Parameter 'nleadtimesout' cannot be NULL." + ) + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = c(10, 12)), + "Parameter 'nleadtimesout' must be a positive integer." + ) + # sdate_dim + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1, sdate_dim = 1), + "Parameter 'sdate_dim' must be a character string." + ) + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1, sdate_dim = 'time'), + "Parameter 'sdate_dim' is not found in 'data' dimension." + ) + expect_error( + Histo2Hindcast(array(1:10, dim = c(sdate = 2, ftime = 5)), + sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1), + "The dimension length of sdate_dim of 'data' must be 1." + ) + # ftime_dim + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1, ftime_dim = 2), + "Parameter 'ftime_dim' must be a character string." + ) + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1, ftime_dim = 'time'), + "Parameter 'ftime_dim' is not found in 'data' dimension." + ) + # ncores + expect_error( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1, ncores = 0), + "Parameter 'ncores' must be a positive integer." + ) + +}) +############################################## +test_that("2. dat1", { + + expect_equal( + dim(Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)), + c(sdate = 2, ftime = 12) + ) + expect_equal( + mean(Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)), + 0.1498669, + tolerance = 0.00001 + ) + expect_equal( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)[1, 5:7], + c(0.3295078, -0.8204684, 0.4874291), + tolerance = 0.00001 + ) + expect_equal( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)[2, 5:7], + c(-0.01619026, 0.94383621, 0.82122120), + tolerance = 0.00001 + ) + +sdatesout1 <- c('19901101', '19910101') +nleadtimesout1 <- 6 + + expect_equal( + dim(Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)), + c(sdate = 2, ftime = 6) + ) + expect_equal( + mean(Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)), + 0.1100272, + tolerance = 0.00001 + ) + expect_equal( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)[1, 3:5], + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)[2, 1:3], + tolerance = 0.00001 + ) + + +sdatesout1 <- c('19901101', '19911101') +nleadtimesout1 <- 15 + + expect_equal( + mean(Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1), na.rm = T), + 0.06984426, + tolerance = 0.00001 + ) + expect_equal( + length(which(is.na(Histo2Hindcast(dat1, sdatesin = sdatesin1, + sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)))), + 3 + ) + expect_equal( + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)[1, 13:15], + Histo2Hindcast(dat1, sdatesin = sdatesin1, sdatesout = sdatesout1, + nleadtimesout = nleadtimesout1)[2, 1:3] + ) + +}) +############################################## +test_that("3. dat2", { + + expect_equal( + dim(Histo2Hindcast(dat2, sdatesin = sdatesin2, sdatesout = sdatesout2, + nleadtimesout = nleadtimesout2)), + c(sdate = 2, ftime = 12, dat = 1, member = 2, lat = 2, lon = 3) + ) + +}) diff --git a/tests/testthat/test-InsertDim.R b/tests/testthat/test-InsertDim.R index c4d3226ff66a795cf937fb21431f5158e6835028..f3cfab801f33fd29e44e805ae65d6f8c52d9dad7 100644 --- a/tests/testthat/test-InsertDim.R +++ b/tests/testthat/test-InsertDim.R @@ -42,14 +42,14 @@ test_that("1. Input checks", { InsertDim(1:10, posdim = 1, lendim = 1, name = 1), "Parameter 'name' must be a character string." ) - expect_error( - InsertDim(1:10, posdim = 1, lendim = 1, ncores = 'a'), - "Parameter 'ncores' must be a positive integer." - ) - expect_error( - InsertDim(1:10, posdim = 1, lendim = 1, ncores = 0), - "Parameter 'ncores' must be a positive integer." - ) +# expect_error( +# InsertDim(1:10, posdim = 1, lendim = 1, ncores = 'a'), +# "Parameter 'ncores' must be a positive integer." +# ) +# expect_error( +# InsertDim(1:10, posdim = 1, lendim = 1, ncores = 0), +# "Parameter 'ncores' must be a positive integer." +# ) }) diff --git a/tests/testthat/test-NAO.R b/tests/testthat/test-NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..f3c6d21f5574be54e0b05513d887e927aa6f7a87 --- /dev/null +++ b/tests/testthat/test-NAO.R @@ -0,0 +1,246 @@ +context("s2dv::NAO tests") + +############################################## + # dat1 + set.seed(1) + exp1 <- array(rnorm(144), dim = c(dataset = 1, member = 2, sdate = 3, ftime = 4, lat = 2, lon = 3)) + set.seed(2) + obs1 <- array(rnorm(72), dim = c(dataset = 1, member = 1, sdate = 3, ftime = 4, lat = 2, lon = 3)) + lat1 <- c(20, 80) + lon1 <- c(40, 280, 350) + + # dat2 + set.seed(1) + exp2 <- array(rnorm(216), dim = c(sdate = 3, ftime = 4, member = 2, lat = 3, lon = 3)) + set.seed(2) + obs2 <- array(rnorm(108), dim = c(sdate = 3, ftime = 4, lat = 3, lon = 3)) + lat2 <- c(80, 50, 20) + lon2 <- c(-80, 0, 40) + +############################################## +test_that("1. Input checks", { + + # exp and obs (1) + expect_error( + NAO(c(), c()), + "Parameter 'exp' and 'obs' cannot both be NULL." + ) + expect_error( + NAO(exp = c(NA, NA)), + "Parameter 'exp' must be a numeric array." + ) + expect_error( + NAO(exp = c(1:10)), + paste0("Parameter 'exp' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.") + ) + expect_error( + NAO(array(1:10, dim = c(2, 5))), + "Parameter 'exp' must have dimension names." + ) + expect_error( + NAO(exp = exp1, obs = c(NA, NA)), + "Parameter 'obs' must be a numeric array." + ) + expect_error( + NAO(exp = exp1, obs = c(1:10)), + paste0("Parameter 'obs' must have at least dimensions ", + "time_dim, space_dim, and ftime_dim.") + ) + expect_error( + NAO(exp = exp1, obs = array(1:10, dim = c(2, 5))), + "Parameter 'obs' must have dimension names." + ) + # time_dim + expect_error( + NAO(exp1, obs1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + NAO(exp1, obs1, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + # memb_dim + expect_error( + NAO(exp1, obs1, memb_dim = 2), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + NAO(exp1, array(rnorm(10), dim = c(member = 10, sdate = 3, ftime = 4, lat = 2, lon = 2))), + "The length of parameter 'memb_dim' in 'obs' must be 1." + ) + expect_error( + NAO(exp1, obs1, memb_dim = 'a'), + "Parameter 'memb_dim' is not found in 'exp' dimension." + ) + # space_dim + expect_error( + NAO(exp1, obs1, space_dim = 'lat'), + "Parameter 'space_dim' must be a character vector of 2." + ) + expect_error( + NAO(exp1, obs1, space_dim = c('latitude', 'longitude')), + "Parameter 'space_dim' is not found in 'exp' or 'obs' dimension." + ) + # ftime_dim + expect_error( + NAO(exp1, obs1, ftime_dim = 2), + "Parameter 'ftime_dim' must be a character string." + ) + expect_error( + NAO(exp1, obs1, ftime_dim = 'a'), + "Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension." + ) + # exp and obs (2) + expect_error( + NAO(exp1, array(rnorm(10), dim = c(member = 1, sdate = 3, ftime = 4, lat = 2, lon = 2))), + paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'memb_dim'.") + ) + expect_error( + NAO(exp1, array(rnorm(10), dim = c(dataset = 1, member = 1, sdate = 3, ftime = 4, lat = 1, lon = 2))), + paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'memb_dim'.") + ) + # ftime_avg + expect_error( + NAO(exp1, obs1, ftime_avg = T), + "Parameter 'ftime_avg' must be an integer vector." + ) + expect_error( + NAO(exp1, obs1, ftime_avg = 1:10), +"Parameter 'ftime_avg' must be within the range of ftime_dim length." + ) + # sdate >= 2 + expect_error( + NAO(exp = array(rnorm(10), dim = c(member = 10, sdate = 1, ftime = 4, lat = 2, lon = 2)), + obs = array(rnorm(10), dim = c(member = 1, sdate = 1, ftime = 4, lat = 2, lon = 2))), + "The length of time_dim must be at least 2." + ) + # lat and lon + expect_error( + NAO(exp1, obs1, lat = 1:10), + paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + ) + expect_error( + NAO(exp1, obs1, lat = lat1, lon = c('a', 'b')), + paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + ) + expect_error( + NAO(exp1, obs1, lat = c(1, 2), lon = lon1), + paste0("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.") + ) + expect_error( + NAO(exp1, obs1, lat = c(-10, -5), lon = lon1), + paste0("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.") + ) + expect_error( + NAO(exp1, obs1, lat = lat1, lon = c(40, 50, 60)), + paste0("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.") + ) + # obsproj + expect_error( + NAO(exp1, obs1, lat = lat1, lon = lon1, obsproj = 1), + "Parameter 'obsproj' must be either TRUE or FALSE." + ) + expect_error( + NAO(exp = exp1, lat = lat1, lon = lon1), + "Parameter 'obsproj' set to TRUE but no 'obs' provided." + ) + # ncores + expect_error( + NAO(exp1, obs1, lat1, lon1, ncore = 3.5), + "Parameter 'ncores' must be a positive integer." + ) + +}) +############################################## +test_that("2. dat1", { + + expect_equal( + names(NAO(exp1, obs1, lat = lat1, lon = lon1)), + c("exp", "obs") + ) + expect_equal( + dim(NAO(exp1, obs1, lat = lat1, lon = lon1)$exp), + c(sdate = 3, member = 2, dataset = 1) + ) + expect_equal( + dim(NAO(exp1, obs1, lat = lat1, lon = lon1)$obs), + c(sdate = 3, member = 1, dataset = 1) + ) + expect_equal( + NAO(exp1, obs1, lat = lat1, lon = lon1)$exp[1:5], + c(-0.1995564, -0.2996030, 0.7340010, -0.2747980, -0.3606155), + tolerance = 0.0001 + ) + expect_equal( + NAO(exp1, obs1, lat = lat1, lon = lon1)$obs[1:3], + c(-0.1139683, 0.1056687, 0.1889449), + tolerance = 0.0001 + ) + expect_equal( + mean(NAO(exp1, obs1, lat = lat1, lon = lon1, obsproj = FALSE)$exp), + -0.2263239, + tolerance = 0.0001 + ) + expect_equal( + names(NAO(exp = exp1, lat = lat1, lon = lon1, obsproj = FALSE)), + c("exp") + ) + suppressWarnings( + expect_equal( + names(NAO(obs = obs1, lat = lat1, lon = lon1)), + c("obs") + ) + ) + expect_equal( + dim(NAO(obs = obs1, lat = lat1, lon = lon1, obsproj = FALSE)$obs), + c(sdate = 3, member = 1, dataset = 1) + ) + expect_equal( + as.vector(NAO(obs = obs1, lat = lat1, lon = lon1, obsproj = FALSE)$obs), + c(-0.1139683, 0.1056687, 0.1889449), + tolerance = 0.0001 + ) + +}) +############################################## +test_that("3. dat2", { + expect_equal( + dim(NAO(exp2, obs2, lat = lat2, lon = lon2)$exp), + c(sdate = 3, member = 2) + ) + expect_equal( + dim(NAO(exp2, obs2, lat = lat2, lon = lon2)$obs), + c(sdate = 3) + ) + expect_equal( + mean(NAO(exp2, obs2, lat = lat2, lon = lon2)$exp), + 0.006805087, + tolerance = 0.00001 + ) + expect_equal( + NAO(exp2, obs2, lat = lat2, lon = lon2)$exp[2:4], + c(0.07420822, 0.09383927, -0.17372708), + tolerance = 0.00001 + ) + expect_equal( + NAO(exp2, obs2, lat = lat2, lon = lon2, ftime_avg = 1:3)$exp[2:4], + c(0.01652294, -0.63365859, -0.74297551), + tolerance = 0.00001 + ) + expect_equal( + as.vector(NAO(exp = exp2, lat = lat2, lon = lon2, obsproj = FALSE)$exp), + c(-0.3529993, 0.4702901, 0.2185340, 0.1525028, 0.3759627, -0.4451322), + tolerance = 0.00001 + ) + +}) + +############################################## diff --git a/tests/testthat/test-Persistence.R b/tests/testthat/test-Persistence.R index a75c51830d7164fa4131309b5272ad15563d83fb..2ce2dec47fe1872a183c46338b3fbcf946bef5b5 100644 --- a/tests/testthat/test-Persistence.R +++ b/tests/testthat/test-Persistence.R @@ -1,9 +1,23 @@ context("s2dv::Persistence tests") ############################################## - set.seed(1) - dat1 <- array(rnorm(540), dim = c(member = 1, time = 90, lat = 2, lon = 3)) - dates1 <- seq(1920, 2009, 1) +#dat1: year +set.seed(1) +dat1 <- rnorm(1 * 70 * 6 * 7) +dim(dat1) <- c(member = 1, time = 70, lat = 6, lon = 7) +dates1 <- seq(1920, 1989, 1) +start1 <- 1961 +end1 <- 1990 +res <- Persistence(obs1, dates = dates1, start = 1961, end = 1990, ft_start = 1, + nmemb = 40) + +#dat2: day +dates2 <- seq(as.Date(ISOdate(1990, 1, 1)), as.Date(ISOdate(1990, 4, 1)) ,1) +set.seed(2) +dat2 <- rnorm(1 * length(dates2) * 6 * 7) +dim(dat2) <- c(member = 1, time = length(dates2), lat = 6, lon = 7) +start2 <- as.Date(ISOdate(1990, 2, 15)) +end2 <- as.Date(ISOdate(1990, 4, 1)) ############################################## test_that("1. Input checks", { @@ -21,10 +35,6 @@ test_that("1. Input checks", { "Parameter 'data' must have dimension names." ) expect_error( - Persistence(data = dat1, dates = seq(1900, 2009, 1)), - "Parameter 'dates' must have the same length as in 'time_dim'." - ) - expect_error( Persistence(data = dat1, dates = dates1, time_dim = 12), "Parameter 'time_dim' must be a character string." ) @@ -33,34 +43,76 @@ test_that("1. Input checks", { "Parameter 'time_dim' is not found in 'data' dimension." ) expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 0.5), + Persistence(data = dat1, dates = c(1:10)), + paste0("Parameter 'dates' must be a sequence of integer \\(YYYY\\) or ", + "string \\(YYYY-MM-DD\\) in class 'Date'.") + ) + expect_error( + Persistence(data = dat1, dates = seq(1900, 2009, 1)), + "Parameter 'dates' must have the same length as in 'time_dim'." + ) + expect_error( + Persistence(data = dat1, dates = dates1, start = start1, end = end2), + "Parameter 'dates', 'start', and 'end' should be the same format." + ) + # start + expect_error( + Persistence(data = dat1, dates = dates1, start = 1800, end = end1), + paste0("Parameter 'start' must be an integer or a string in class ", + "'Date' between 1850 and 2020.") + ) + expect_error( + Persistence(data = dat1, dates = dates1, start = 1851, end = end1), + "Parameter 'start' must be one of the values of 'dates'." + ) + expect_error( + Persistence(data = dat1, dates = dates1, start = 1921, end = end1), + paste0("Parameter 'start' must start at least 40 time steps after ", + "the first 'dates'.") + ) + # end + expect_error( + Persistence(data = dat2, dates = dates2, start = start2, end = as.Date(ISOdate(2021, 1, 1))), + paste0("Parameter 'end' must be an integer or a string in class ", + "'Date' between 1850 and 2020.") + ) + expect_error( + Persistence(data = dat2, dates = dates2, start = start2, end = as.Date(ISOdate(1990, 4, 3))), + paste0("Parameter 'end' must end at most 1 time steps after ", + "the last 'dates'.") + ) + # ft_start + expect_error( + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 0.5), "Parameter 'ft_start' must be a positive integer." ) + # ft_end expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, ft_end = 12), "Parameter 'ft_end' must be a positive integer below 'max_ft'." ) + # max_ft expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, ft_end = 12, max_ft = 13.5), "Parameter 'max_ft' must be a positive integer." ) + # nmemb expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, - nmemb = 0), + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, nmemb = 0), "Parameter 'nmemb' must be a positive integer." ) expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, na.action = T), paste0("Parameter 'na.action' must be a function for NA values or ", "a numeric indicating the number of NA values allowed ", "before returning NA.") ) expect_error( - Persistence(data = dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1, + Persistence(data = dat1, dates = dates1, start = start1, end = end1, ft_start = 1, ncores = 0), "Parameter 'ncores' must be a positive integer." ) @@ -69,7 +121,7 @@ test_that("1. Input checks", { ############################################## test_that("2. Output checks: dat1", { - res <- Persistence(dat1, dates = dates1, start = 1961, end = 2005, ft_start = 1) + res <- Persistence(dat1, dates = dates1, start = start1, end = end1, ft_start = 1) expect_equal( names(res), @@ -78,26 +130,25 @@ test_that("2. Output checks: dat1", { ) expect_equal( dim(res$persistence), - c(realization = 1, time = 45, member = 1, lat = 2, lon = 3) + c(realization = 1, time = 30, member = 1, lat = 6, lon = 7) ) expect_equal( dim(res$persistence.mean), - c(45, member = 1, lat = 2, lon = 3) - ) - expect_equal( - mean(res$persistence), - 0.03481641, - tolerance = 0.00001 + c(30, member = 1, lat = 6, lon = 7) ) +}) + +############################################## +test_that("2. Output checks: dat1", { + res <- Persistence(dat2, dates = dates2, start = start2, end = end2, ft_start = 1) + expect_equal( - range(res$persistence), - c(-1.025059, 1.042929), - tolerance = 0.0001 + names(res), + c('persistence', 'persistence.mean', 'persistence.predint', 'AR.slope', + 'AR.intercept', 'AR.lowCI', 'AR.highCI') ) expect_equal( - range(res$AR.slope), - c(-0.2636489, 0.2334777), - tolerance = 0.0001 + dim(res$persistence), + c(realization = 1, time = 46, member = 1, lat = 6, lon = 7) ) }) - diff --git a/tests/testthat/test-ProbBins.R b/tests/testthat/test-ProbBins.R new file mode 100644 index 0000000000000000000000000000000000000000..4b3d0ecde379c9543d2485e054bc15153d22d90c --- /dev/null +++ b/tests/testthat/test-ProbBins.R @@ -0,0 +1,172 @@ +context("s2dv::ProbBins tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(24), dim = c(dataset = 1, member = 3, sdate = 4, ftime = 2)) +############################################## + +test_that("1. Input checks", { + + expect_error( + ProbBins(c()), + "Parameter 'data' cannot be NULL." + ) + expect_error( + ProbBins(c(NA, NA)), + "Parameter 'data' must be a numeric array." + ) + expect_error( + ProbBins(array(1:10, dim = c(2, 5))), + "Parameter 'data' must have dimension names." + ) + # thr + expect_error( + ProbBins(dat1, thr = c()), + "Parameter 'thr' cannot be NULL." + ) + expect_error( + ProbBins(dat1, thr = TRUE), + "Parameter 'thr' must be a numeric vector." + ) + expect_error( + ProbBins(dat1, thr = 1:10), + "Parameter 'thr' must be within the range \\[0, 1\\] if 'quantile' is TRUE." + ) + # time_dim + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), time_dim = 'time'), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + # memb_dim + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), memb_dim = 2), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), memb_dim = 'ens'), + "Parameter 'memb_dim' is not found in 'data' dimension." + ) + # fcyr + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 'sdate'), + "Parameter 'fcyr' must be a numeric vector or 'all'." + ) + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2:6), + "Parameter 'fcyr' must be the indices of 'time_dim' within the range \\[1, 4\\]." + ) + # quantile + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), quantile = 0.9), + "Parameter 'quantile' must be one logical value." + ) + # compPeriod + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), compPeriod = TRUE), + "Parameter 'compPeriod' must be either 'Full period', 'Without fcyr', or 'Cross-validation'." + ) + # ncores + expect_error( + ProbBins(dat1, thr = c(1/3, 2/3), ncores = 0), + "Parameter 'ncores' must be a positive integer." + ) + +}) + + +############################################## +test_that("2. Output checks: dat1", { + +expect_equal( +dim(ProbBins(dat1, thr = c(1/3, 2/3))), +c(bin = 3, sdate = 4, member = 3, dataset = 1, ftime = 2) +) +expect_equal( +dim(ProbBins(dat1, thr = c(0.25, 0.5, 0.75))), +c(bin = 4, sdate = 4, member = 3, dataset = 1, ftime = 2) +) +expect_equal( +dim(ProbBins(dat1, thr = c(0.25, 0.5, 0.75), compPeriod = 'Cross-validation')), +c(bin = 4, sdate = 4, member = 3, dataset = 1, ftime = 2) +) +expect_equal( +dim(ProbBins(dat1, thr = c(0.25, 0.5, 0.75), compPeriod = 'Without fcyr')), +c(bin = 4, sdate = 4, member = 3, dataset = 1, ftime = 2) +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3)) == 0)), +48 +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3)) == 1)), +24 +) +expect_equal( +which(ProbBins(dat1, thr = c(1/3, 2/3)) == 1), +c(1, 6, 8, 10, 14, 17, 21, 24, 25, 28, 33, 35, 37, 40, 45, 47, 49, 53, 56, 59, 63, 66, 69, 70) +) +expect_equal( +all(is.na(ProbBins(dat1, thr = c(1/3, 2/3), compPeriod = 'Without fcyr'))), +TRUE +) +expect_equal( +dim(ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2, compPeriod = 'Without fcyr')), +c(bin = 3, sdate = 1, member = 3, dataset = 1, ftime = 2) +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2, compPeriod = 'Without fcyr') == 0)), +12 +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2, compPeriod = 'Without fcyr') == 1)), +6 +) +expect_equal( +which(ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2, compPeriod = 'Without fcyr') == 1), +c(3, 5, 7, 11, 14, 18) +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2, compPeriod = 'Cross-validation') == 0)), +12 +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2, compPeriod = 'Cross-validation') == 1)), +6 +) +expect_equal( +which(ProbBins(dat1, thr = c(1/3, 2/3), fcyr = 2, compPeriod = 'Cross-validation') == 1), +c(3, 5, 7, 11, 14, 18) +) + +expect_equal( +dim(ProbBins(dat1, thr = c(1/3, 2/3), quantile = FALSE)), +c(bin = 3, sdate = 4, member = 3, dataset = 1, ftime = 2) +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3), quantile = FALSE) == 0)), +48 +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1/3, 2/3), quantile = FALSE) == 1)), +24 +) +expect_equal( +which(ProbBins(dat1, thr = c(1/3, 2/3), quantile = FALSE) == 1), +c(1, 6, 8, 10, 13, 16, 21, 24, 25, 28, 32, 35, 37, 40, 45, 48, 49, 52, 56, 58, 63, 66, 69, 70) +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1:3), quantile = FALSE) == 0)), +72 +) +expect_equal( +length(which(ProbBins(dat1, thr = c(1:3), quantile = FALSE) == 1)), +24 +) + + +}) diff --git a/tests/testthat/test-ProjectField.R b/tests/testthat/test-ProjectField.R new file mode 100644 index 0000000000000000000000000000000000000000..f3f05ce618df9777393a911eaae2f02c9e428b9e --- /dev/null +++ b/tests/testthat/test-ProjectField.R @@ -0,0 +1,303 @@ +context("s2dv::ProjectField tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(120), dim = c(sdate = 10, lat = 6, lon = 2)) + lat1 <- seq(10, 30, length.out = 6) + lon1 <- c(10, 12) + eof1 <- EOF(dat1, lat1, lon1, neofs = 10) + reof1 <- REOF(dat1, lat1, lon1, ntrunc = 3) + + # dat2 + set.seed(1) + dat2 <- array(rnorm(48), dim = c(dat = 1, memb = 1, sdate = 6, ftime = 1, lat = 4, lon = 2)) + lat2 <- seq(10, 30, length.out = 4) + lon2 <- c(-5, 5) + eof2 <- EOF(dat2, lat2, lon2, neofs = 6) + + # dat3 + dat3 <- dat2 + dat3[1, 1, 1, 1, , ] <- NA + names(dim(dat3)) <- names(dim(dat2)) + lat3 <- lat2 + lon3 <- lon2 + eof3 <- eof2 + + # dat4 + set.seed(1) + dat4 <- array(rnorm(288*2), dim = c(dat = 2, memb = 2, sdate = 6, ftime = 3, lat = 4, lon = 2)) + lat4 <- seq(-10, -30, length.out = 4) + lon4 <- c(350, 355) + set.seed(2) + tmp <- array(rnorm(144), dim = c(dat = 2, sdate = 6, lat = 4, lon = 2)) + eof4 <- EOF(tmp, lat4, lon4, neofs = 6) + reof4 <- REOF(tmp, lat4, lon4, ntrunc = 6) + + # dat5 + set.seed(1) + dat5 <- array(rnorm(144*3), dim = c(dat = 1, memb = 2, sdate = 6, ftime = 3, lat = 4, lon = 3)) + lat5 <- seq(-10, 10, length.out = 4) + lon5 <- c(0, 5, 10) + set.seed(2) + tmp <- array(rnorm(72), dim = c(sdate = 6, lat = 4, lon = 3)) + eof5 <- EOF(tmp, lat5, lon5, neofs = 6) + + # dat6 + set.seed(1) + dat6 <- array(rnorm(480), dim = c(sdate = 10, lat = 6, lon = 8)) + lat6 <- seq(20, 80, length.out = 6) + lon6 <- seq(-90, 60, length.out = 8) + reof6 <- EuroAtlanticTC(dat6, lat6, lon6, ntrunc = 10) + +############################################## +test_that("1. Input checks", { + + # ano + expect_error( + ProjectField(c()), + "Parameter 'ano' cannot be NULL." + ) + expect_error( + ProjectField(c(NA, NA)), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + ProjectField(list(a = array(rnorm(50), dim = c(dat = 5, sdate = 10)), b = c(1:4))), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + ProjectField(array(1:10, dim = c(2, 5))), + "Parameter 'ano' must have dimension names." + ) + # eof + expect_error( + ProjectField(dat1, c()), + "Parameter 'eof' cannot be NULL." + ) + expect_error( + ProjectField(dat1, c(1, 2)), + "Parameter 'eof' must be a list generated by EOF() or REOF().", + fixed = TRUE + ) + expect_error( + ProjectField(dat1, list(a = 1)), + paste0("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", + "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC()."), + fixed = TRUE + ) + eof_fake <- list(EOFs = 'a', wght = 1:10) + expect_error( + ProjectField(dat1, eof_fake), + "The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array." + ) + eof_fake <- list(EOFs = array(rnorm(10), dim = c(mode = 1, lat = 2, lon = 5)), + wght = c(1:10)) + expect_error( + ProjectField(dat1, eof_fake), + "The component 'wght' of parameter 'eof' must be a numeric array." + ) + # time_dim + expect_error( + ProjectField(dat1, eof1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + ProjectField(dat1, eof1, time_dim = c('a','sdate')), + "Parameter 'time_dim' must be a character string." + ) + # space_dim + expect_error( + ProjectField(dat1, eof1, space_dim = 'lat'), + "Parameter 'space_dim' must be a character vector of 2." + ) + expect_error( + ProjectField(dat1, eof1, space_dim = c('latitude', 'longitude')), + "Parameter 'space_dim' is not found in 'ano' dimension." + ) + # eof (2) + eof_fake <- list(EOFs = array(rnorm(10), dim = c(lat = 2, lon = 5)), + wght = array(rnorm(10), dim = c(lat = 2, lon = 5))) + expect_error( + ProjectField(dat1, eof_fake), + paste0("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim' and 'mode'.") + ) + eof_fake <- list(EOFs = array(rnorm(10), dim = c(mode = 1, lat = 6, lon = 2)), + wght = array(rnorm(10), dim = c(level = 6, lon = 2))) + expect_error( + ProjectField(dat1, eof_fake), + paste0("The component 'wght' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim'.") + ) + # mode + expect_error( + ProjectField(dat1, eof1, mode = -1), + "Parameter 'mode' must be NULL or a positive integer." + ) + expect_error( + ProjectField(dat1, eof1, mode = 15), + paste0("Parameter 'mode' is greater than the number of available ", + "modes in 'eof'.") + ) + # ncores + expect_error( + ProjectField(dat1, eof1, ncore = 3.5), + "Parameter 'ncores' must be a positive integer." + ) + +}) +############################################## +test_that("2. dat1", { + + expect_equal( + dim(ProjectField(dat1, eof = eof1, mode = 1)), + c(sdate = 10) + ) + expect_equal( + dim(ProjectField(dat1, eof = eof1)), + c(mode = 10, sdate = 10) + ) + expect_equal( + as.vector(ProjectField(dat1, eof1, mode = 1))[1:5], + c(3.2061306, -0.1692669, 0.5420990, -2.5324441, -0.6143680), + tolerance = 0.0001 + ) + expect_equal( + as.vector(ProjectField(dat1, eof1, mode = 10))[1:5], + c(-0.24848299, -0.19311118, -0.16951195, -0.10000207, 0.04554693), + tolerance = 0.0001 + ) + expect_equal( + as.vector(ProjectField(dat1, eof1, mode = 1)), + as.vector(ProjectField(dat1, eof1)[1, ]) + ) + # reof + expect_equal( + dim(ProjectField(dat1, eof = reof1, mode = 1)), + c(sdate = 10) + ) + expect_equal( + dim(ProjectField(dat1, eof = reof1)), + c(mode = 3, sdate = 10) + ) + expect_equal( + as.vector(ProjectField(dat1, reof1, mode = 1))[1:5], + c(3.1567219, -0.1023512, 0.6339372, -0.7998676, -1.3727226), + tolerance = 0.0001 + ) + +}) +############################################## +test_that("3. dat2", { + expect_equal( + dim(ProjectField(dat2, eof2, mode = 1)), + c(sdate = 6, dat = 1, memb = 1, ftime = 1) + ) + expect_equal( + dim(ProjectField(dat2, eof2)), + c(mode = 6, sdate = 6, dat = 1, memb = 1, ftime = 1) + ) + expect_equal( + ProjectField(dat2, eof2, mode = 1)[1:6], + c(0.00118771, -1.20872474, -0.00821559, -2.06064916, -0.19245169, 2.26026937), + tolerance = 0.0001 + ) + expect_equal( + mean(ProjectField(dat2, eof2, mode = 6)), + 0.1741076, + tolerance = 0.0001 + ) + expect_equal( + as.vector(ProjectField(dat2, eof2, mode = 1)), + as.vector(ProjectField(dat2, eof2)[1, , , , ]) + ) + expect_equal( + as.vector(ProjectField(dat2, eof2, mode = 5)), + as.vector(ProjectField(dat2, eof2)[5, , , , ]) + ) + +}) + +############################################## +test_that("4. dat3", { + expect_equal( + dim(ProjectField(dat3, eof3, mode = 1)), + c(sdate = 6, dat = 1, memb = 1, ftime = 1) + ) + expect_equal( + ProjectField(dat3, eof3, mode = 1)[1:6], + c(NA, -1.20872474, -0.00821559, -2.06064916, -0.19245169, 2.26026937), + tolerance = 0.0001 + ) + +}) +############################################## +test_that("5. dat4", { + expect_equal( + dim(ProjectField(dat4, eof4, mode = 1)), + c(sdate = 6, dat = 2, memb = 2, ftime = 3) + ) + expect_equal( + mean(ProjectField(dat4, eof4, mode = 1)), + 0.078082, + tolerance = 0.0001 + ) + expect_equal( + ProjectField(dat4, eof4, mode = 1)[, 1, 2, 2], + c(0.28137048, -0.17616154, -0.39155370, 0.08288953, 1.18465521, 0.81850535), + tolerance = 0.0001 + ) + # reof + expect_equal( + dim(ProjectField(dat4, reof4)), + c(mode = 6, sdate = 6, dat = 2, memb = 2, ftime = 3) + ) + expect_equal( + ProjectField(dat4, reof4, mode = 1)[, 1, 2, 2], + c(-1.6923627, -0.4080116, 0.3044336, -0.7853220, -0.2670783, 0.6940482), + tolerance = 0.0001 + ) + +}) +############################################## +test_that("6. dat5", { + expect_equal( + dim(ProjectField(dat5, eof5, mode = 1)), + c(sdate = 6, dat = 1, memb = 2, ftime = 3) + ) + expect_equal( + mean(ProjectField(dat5, eof5, mode = 1)), + 0.0907149, + tolerance = 0.0001 + ) + expect_equal( + ProjectField(dat5, eof5, mode = 1)[, 1, 2, 2], + c(0.60881970, 0.93588392, 0.01982465, 0.82376024, -0.33147699, -1.35488289), + tolerance = 0.0001 + ) + +}) +############################################## +test_that("7. dat6", { + expect_equal( + dim(ProjectField(dat6, reof6, mode = 1)), + c(sdate = 10) + ) + expect_equal( + dim(ProjectField(dat6, reof6)), + c(mode = 4, sdate = 10) + ) + expect_equal( + mean(ProjectField(dat6, reof6)), + 0.3080207, + tolerance = 0.0001 + ) + expect_equal( + as.vector(ProjectField(dat6, reof6)[, 1]), + c(4.6114959, 0.8241051, 1.4160364, -0.9601872), + tolerance = 0.0001 + ) + +}) + diff --git a/tests/testthat/test-REOF.R b/tests/testthat/test-REOF.R new file mode 100644 index 0000000000000000000000000000000000000000..9f4bb480a5dc5bb3de121041fa9f72afe1cea72c --- /dev/null +++ b/tests/testthat/test-REOF.R @@ -0,0 +1,173 @@ +context("s2dv::REOF tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(120), dim = c(sdate = 10, lat = 6, lon = 2)) + lat1 <- seq(10, 30, length.out = 6) + lon1 <- c(10, 12) + + # dat2 + set.seed(2) + dat2 <- array(rnorm(120), dim = c(dat = 2, lat = 6, lon = 2, sdate = 5)) + lat2 <- lat1 + lon2 <- lon1 + +############################################## +test_that("1. Input checks", { + + # ano + expect_error( + REOF(c()), + "Parameter 'ano' cannot be NULL." + ) + expect_error( + REOF(c(NA, NA)), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + REOF(list(a = array(rnorm(50), dim = c(dat = 5, sdate = 10)), b = c(1:4))), + "Parameter 'ano' must be a numeric array." + ) + expect_error( + REOF(array(1:10, dim = c(2, 5))), + "Parameter 'ano' must have dimension names." + ) + # time_dim + expect_error( + REOF(dat1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + REOF(dat1, time_dim = c('a','sdate')), + "Parameter 'time_dim' must be a character string." + ) + # space_dim + expect_error( + REOF(dat1, space_dim = 'lat'), + "Parameter 'space_dim' must be a character vector of 2." + ) + expect_error( + REOF(dat1, space_dim = c('latitude', 'longitude')), + "Parameter 'space_dim' is not found in 'ano' dimension." + ) + # lat + expect_error( + REOF(dat1, lat = 1:10), + paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") + ) + expect_error( + REOF(dat1, lat = seq(-100, -80, length.out = 6)), + "Parameter 'lat' must contain values within the range \\[-90, 90\\]." + ) + # lon + expect_error( + REOF(dat1, lat = lat1, lon = c('a', 'b')), + paste0("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") + ) + expect_error( + REOF(dat1, lat = lat1, lon = c(350, 370)), + "Parameter 'lon' must be within the range [-180, 180] or [0, 360].", + fixed = TRUE + ) + # ntrunc + expect_error( + REOF(dat1, lat = lat1, lon = lon1, ntrunc = 0), + "Parameter 'ntrunc' must be a positive integer." + ) + # corr + expect_error( + REOF(dat1, lat = lat1, lon = lon1, corr = 0.1), + "Parameter 'corr' must be one logical value." + ) + # ncores + expect_error( + REOF(dat1, lat1, lon1, ncore = 3.5), + "Parameter 'ncores' must be a positive integer." + ) + +}) + +############################################## +test_that("2. dat1", { + + expect_equal( + names(REOF(dat1, lon = lon1, lat = lat1, ntrunc = 5)), + c("REOFs", "RPCs", "var", "wght") + ) + expect_equal( + dim(REOF(dat1, lon = lon1, lat = lat1)$REOFs), + c(mode = 10, lat = 6, lon = 2) + ) + expect_equal( + dim(REOF(dat1, lon = lon1, lat = lat1, ntrunc = 9)$RPCs), + c(sdate = 10, mode = 9) + ) + expect_equal( + dim(REOF(dat1, lon = lon1, lat = lat1, ntrunc = 1)$var), + c(mode = 1) + ) + expect_equal( + dim(REOF(dat1, lon = lon1, lat = lat1, ntrunc = 1)$wght), + c(lat = 6, lon = 2) + ) + expect_equal( + REOF(dat1, lon = lon1, lat = lat1, ntrunc = 1)$REOFs[1:5], + c(-0.28881677, 0.47116712, 0.27298759, 0.32794052, 0.01873475), + tolerance = 0.0001 + ) + expect_equal( + mean(REOF(dat1, lon = lon1, lat = lat1, ntrunc = 2)$REOFs), + -0.007620167, + tolerance = 0.0001 + ) + expect_equal( + REOF(dat1, lon = lon1, lat = lat1, ntrunc = 2)$RPCs[4:8], + c(-0.58817084, -1.86745710, -0.09939452, -1.11012382, 1.89513430), + tolerance = 0.0001 + ) + expect_equal( + REOF(dat1, lon = lon1, lat = lat1, ntrunc = 2)$var[1:2], + array(c(28.04203, 26.56988), dim = c(mode = 2)), + tolerance = 0.0001 + ) + expect_equal( + REOF(dat1, lon = lon1, lat = lat1, ntrunc = 1)$wght[1,1], + REOF(dat1, lon = lon1, lat = lat1, ntrunc = 1)$wght[1,2] + ) + expect_equal( + REOF(dat1, lon = lon1, lat = lat1, ntrunc = 1)$wght[1:1], + c(0.9923748), + tolerance = 0.0001 + ) + +}) +############################################## +test_that("3. dat2", { + expect_equal( + dim(REOF(dat2, lon = lon2, lat = lat2)$REOFs), + c(mode = 5, lat = 6, lon = 2, dat = 2) + ) + expect_equal( + dim(REOF(dat2, lon = lon2, lat = lat2, ntrunc = 4)$RPCs), + c(sdate = 5, mode = 4, dat = 2) + ) + expect_equal( + dim(REOF(dat2, lon = lon2, lat = lat2, ntrunc = 2)$wght), + c(lat = 6, lon = 2) + ) + expect_equal( + REOF(dat2, lon = lon2, lat = lat2, ntrunc = 1)$REOFs[1, 3, 2, 1], + 0.09529009, + tolerance = 0.0001 + ) + expect_equal( + mean(REOF(dat2, lon = lon2, lat = lat2)$REOFs), + 0.01120786, + tolerance = 0.0001 + ) + +}) + diff --git a/tests/testthat/test-RatioRMS.R b/tests/testthat/test-RatioRMS.R new file mode 100644 index 0000000000000000000000000000000000000000..b70d6fb5404998d7d737579a57012dd378383317 --- /dev/null +++ b/tests/testthat/test-RatioRMS.R @@ -0,0 +1,128 @@ +context("s2dv::RatioRMS tests") + +############################################## + # dat1 + set.seed(1) + exp1_1 <- array(rnorm(120), dim = c(dataset = 1, sdate = 5, ftime = 3)) + set.seed(2) + exp1_2 <- array(rnorm(120), dim = c(dataset = 1, sdate = 5, ftime = 3)) + set.seed(3) + obs1 <- array(rnorm(80), dim = c(dataset = 1, sdate = 5, ftime = 3)) + + # dat 2: vector + set.seed(4) + exp2_1 <- rnorm(10) + set.seed(5) + exp2_2 <- rnorm(10) + set.seed(6) + obs2 <- rnorm(10) + + +############################################## +test_that("1. Input checks", { + + # exp1, exp2, obs + expect_error( + RatioRMS(c(), exp1_2, c()), + "Parameter 'exp1', 'exp2', and 'obs' cannot be NULL." + ) + expect_error( + RatioRMS(c('b'), c('a'), obs1), + "Parameter 'exp1', 'exp2', and 'obs' must be a numeric array." + ) + expect_error( + RatioRMS(exp1_1, array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp1', 'exp2', and 'obs' must have dimension names." + ) + expect_error( + RatioRMS(exp1_1, exp1_2, array(1:15, dim = c(data = 1, ftime = 3, sdates = 5))), + "Parameter 'exp1', 'exp2', and 'obs' must have same dimension names." + ) + expect_error( + RatioRMS(exp1_1, exp1_2, array(1:12, dim = c(dataset = 1, ftime = 3, sdate = 4))), + "Parameter 'exp1', 'exp2', and 'obs' must have the same length of all the dimensions." + ) + # time_dim + expect_error( + RatioRMS(exp1_1, exp1_2, obs1, time_dim = c('sdate', 'ftime')), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + RatioRMS(exp1_1, exp1_2, obs1, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'exp1', 'exp2', and 'obs' dimensions." + ) + # pval + expect_error( + RatioRMS(exp1_1, exp1_2, obs1, pval = 1), + "Parameter 'pval' must be one logical value." + ) + expect_error( + RatioRMS(exp1_1, exp1_2, obs1, ncores = 1.5), + "Parameter 'ncores' must be a positive integer." + ) + + +}) + +############################################## +test_that("2. Output checks: dat1", { + +expect_equal( +names(RatioRMS(exp1_1, exp1_2, obs1)), +c('ratiorms', 'p.val') +) +expect_equal( +dim(RatioRMS(exp1_1, exp1_2, obs1)$ratiorms), +c(dataset = 1, ftime = 3) +) +expect_equal( +dim(RatioRMS(exp1_1, exp1_2, obs1)$p.val), +c(dataset = 1, ftime = 3) +) +expect_equal( +as.vector(RatioRMS(exp1_1, exp1_2, obs1)$p.val), +c(0.1811868, 0.4758232, 0.7473213), +tolerance = 0.0001 +) +expect_equal( +as.vector(RatioRMS(exp1_1, exp1_2, obs1)$ratiorms), +c(2.0944471, 0.6814573, 1.1873955), +tolerance = 0.0001 +) +expect_equal( +names(RatioRMS(exp1_1, exp1_2, obs1, pval = FALSE)), +c('ratiorms') +) +expect_equal( +as.vector(RatioRMS(exp1_1, exp1_2, obs1, pval = FALSE, time_dim = 'ftime')$ratiorms), +c(2.0832571, 0.7292987, 0.6031437, 1.1885930, 0.8542696), +tolerance = 0.0001 +) +expect_equal( +as.vector(RatioRMS(exp1_1, exp1_2, obs1, time_dim = 'ftime')$p.val), +c(0.3745346, 0.6944118, 0.5334904, 0.8289285, 0.8437813), +tolerance = 0.0001 +) + +}) + +############################################## +test_that("3. Output checks: dat2", { + +expect_equal( +names(RatioRMS(exp2_1, exp2_2, obs2)), +c('ratiorms', 'p.val') +) +expect_equal( +RatioRMS(exp2_1, exp2_2, obs2)$p.val, +0.7418331, +tolerance = 0.0001 +) +expect_equal( +RatioRMS(exp2_1, exp2_2, obs2)$ratiorms, +0.8931399, +tolerance = 0.0001 +) + +}) + diff --git a/tests/testthat/test-RatioSDRMS.R b/tests/testthat/test-RatioSDRMS.R new file mode 100644 index 0000000000000000000000000000000000000000..4b93fafa213dcac0e831b7ac59d7646064f8f856 --- /dev/null +++ b/tests/testthat/test-RatioSDRMS.R @@ -0,0 +1,165 @@ +context("s2dv::RatioSDRMS tests") + +############################################## + # dat1 + set.seed(1) + exp1 <- array(rnorm(40), dim = c(dataset = 2, member = 2, sdate = 5, ftime = 2)) + set.seed(2) + obs1 <- array(rnorm(10), dim = c(dataset = 1, member = 1, sdate = 5, ftime = 2)) + + # dat2 + exp2 <- exp1 + obs2 <- obs1 + exp2[1] <- NA + + # dat 3 + set.seed(3) + exp3 <- array(rnorm(10), dim = c(member = 2, sdate = 5)) + set.seed(4) + obs3 <- array(rnorm(5), dim = c(member = 1, sdate = 5)) + + +############################################## +test_that("1. Input checks", { + + expect_error( + RatioSDRMS(c(), c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + RatioSDRMS(c('b'), c('a')), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + RatioSDRMS(c(1:10), c(2:4)), + "Parameter 'exp' and 'obs' must be array with as least two dimensions memb_dim and time_dim." + ) + expect_error( + RatioSDRMS(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + RatioSDRMS(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), + "Parameter 'exp' and 'obs' must have the same dimension names." + ) + expect_error( + RatioSDRMS(exp1, obs1, dat_dim = 1), + "Parameter 'dat_dim' must be a character string." + ) + expect_error( + RatioSDRMS(exp1, obs1, dat_dim = 'a'), + "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + RatioSDRMS(exp1, obs1, memb_dim = 1), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + RatioSDRMS(exp1, obs1, memb_dim = 'a'), + "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + RatioSDRMS(exp1, obs1, time_dim = c('sdate', 'a')), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + RatioSDRMS(exp1, obs1, time_dim = 'a'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + RatioSDRMS(exp1, obs1, pval = 1), + "Parameter 'pval' must be one logical value." + ) + expect_error( + RatioSDRMS(exp1, obs1, ncores = 1.5), + "Parameter 'ncores' must be a positive integer." + ) + expect_error( + RatioSDRMS(exp = exp3, + obs = array(1:2, dim = c(member = 1, sdate = 2)), dat_dim = NULL), + "Parameter 'exp' and 'obs' must have same length of all the dimensions expect 'dat_dim' and 'memb_dim'." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { +expect_equal( +names(RatioSDRMS(exp1, obs1)), +c('ratio', 'p.val') +) +expect_equal( +dim(RatioSDRMS(exp1, obs1)$ratio), +c(nexp = 2, nobs = 1, ftime = 2) +) +expect_equal( +dim(RatioSDRMS(exp1, obs1)$p.val), +c(nexp = 2, nobs = 1, ftime = 2) +) +expect_equal( +as.vector(RatioSDRMS(exp1, obs1)$ratio), +c(0.7198164, 0.6525068, 0.6218262, 0.6101527), +tolerance = 0.0001 +) +expect_equal( +as.vector(RatioSDRMS(exp1, obs1)$p.val), +c(0.8464094, 0.8959219, 0.9155102, 0.9224119), +tolerance = 0.0001 +) +expect_equal( +names(RatioSDRMS(exp1, obs1, pval = F)), +c('ratio') +) +expect_equal( +as.vector(RatioSDRMS(exp1, obs1)$ratio), +as.vector(RatioSDRMS(exp1, obs1, pval = F)$ratio) +) + +}) + +############################################## +test_that("3. Output checks: dat2", { +expect_equal( +dim(RatioSDRMS(exp2, obs2)$ratio), +c(nexp = 2, nobs = 1, ftime = 2) +) +expect_equal( +as.vector(RatioSDRMS(exp2, obs2)$ratio), +c(0.7635267, 0.6525068, 0.6218262, 0.6101527), +tolerance = 0.0001 +) +expect_equal( +as.vector(RatioSDRMS(exp1, obs1)$p.val), +c(0.8464094, 0.8959219, 0.9155102, 0.9224119), +tolerance = 0.0001 +) + +}) + +############################################## +test_that("4. Output checks: dat3", { +expect_equal( +names(RatioSDRMS(exp3, obs3, dat_dim = NULL)), +c('ratio', 'p.val') +) +expect_equal( +dim(RatioSDRMS(exp3, obs3, dat_dim = NULL)$ratio), +NULL +) +expect_equal( +dim(RatioSDRMS(exp3, obs3, dat_dim = NULL)$p.val), +NULL +) +expect_equal( +as.numeric(RatioSDRMS(exp3, obs3, dat_dim = NULL)$ratio), +0.8291582, +tolerance = 0.0001 +) +expect_equal( +as.numeric(RatioSDRMS(exp3, obs3, dat_dim = NULL)$p.val), +0.7525497, +tolerance = 0.0001 +) + + +}) diff --git a/tests/testthat/test-Season.R b/tests/testthat/test-Season.R index 36c3027043245125cb1b960802da3b0164788905..0b009a6e8aeffda408123303e13f97977b84ff30 100644 --- a/tests/testthat/test-Season.R +++ b/tests/testthat/test-Season.R @@ -11,6 +11,10 @@ context("s2dv::Season tests") na <- floor(runif(30, min = 1, max = 144*3)) dat2[na] <- NA + # dat3 + set.seed(1) + dat3 <- array(rnorm(12), dim = c(ftime = 12)) + ############################################## test_that("1. Input checks", { @@ -161,4 +165,26 @@ test_that("3. Output checks: dat2", { }) ############################################## +test_that("3. Output checks: dat3", { + expect_equal( + as.numeric(Season(dat3, monini = 10, moninf = 12, monsup = 2)), + 0.3630533, + tolerance = 0.0001 + ) + expect_equal( + dim(Season(dat3, monini = 10, moninf = 12, monsup = 2)), + c(ftime = 1) + ) +# expect_equal( +# Season(dat3, monini = 10, moninf = 12, monsup = 2, ncores = 2), +# 0.3630533, +# tolerance = 0.0001 +# ) +# expect_equal( +# dim(Season(dat3, monini = 10, moninf = 12, monsup = 2, ncores = 2)), +# c(ftime = 1) +# ) + +}) +############################################### diff --git a/tests/testthat/test-Spectrum.R b/tests/testthat/test-Spectrum.R new file mode 100644 index 0000000000000000000000000000000000000000..caf53d395d7a3a3c1d374fa6ab1abbd80decbbc9 --- /dev/null +++ b/tests/testthat/test-Spectrum.R @@ -0,0 +1,111 @@ +context("s2dv::Spectrum tests") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(10), dim = c(dat = 1, ftime = 5, sdate = 2)) + + # dat2 + set.seed(10) + dat2 <- c(1:10) + rnorm(10) + + # dat3 + dat3 <- dat2 + dat3[2] <- NA +############################################## + +test_that("1. Input checks", { + + # data + expect_error( + Spectrum(c()), + "Parameter 'data' cannot be NULL." + ) + expect_error( + Spectrum(c(NA, NA)), + "Parameter 'data' must be a numeric array." + ) + expect_error( + Spectrum(array(1:10, dim = c(2, 5))), + "Parameter 'data' must have dimension names." + ) + # time_dim + expect_error( + Spectrum(array(c(1:25), dim = c(dat = 1, date = 5, sdate = 5))), + "Parameter 'time_dim' is not found in 'data' dimension." + ) + expect_error( + Spectrum(dat1, time_dim = 2), + "Parameter 'time_dim' must be a character string." + ) + # conf.lev + expect_error( + Spectrum(dat1, conf.lev = -1), + "Parameter 'conf.lev' must be a numeric number between 0 and 1.", + fixed = T + ) + # ncores + expect_error( + Spectrum(dat1, ncore = 3.5), + "Parameter 'ncores' must be a positive integer." + ) +}) + +############################################## +test_that("2. Output checks: dat1", { + + expect_equal( + dim(Spectrum(dat1)), + c(ftime = 2, stats = 3, dat = 1, sdate = 2) + ) + expect_equal( + Spectrum(dat1)[, 1, 1, 2], + c(0.2, 0.4), + tolerance = 0.0001 + ) + expect_equal( + Spectrum(dat1)[, 2, 1, 2], + c(0.89583007, 0.05516983), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("3. Output checks: dat2", { + + expect_equal( + dim(Spectrum(dat2)), + c(ftime = 5, stats = 3) + ) + expect_equal( + Spectrum(dat2)[, 1], + c(0.1, 0.2, 0.3, 0.4, 0.5), + tolerance = 0.0001 + ) + expect_equal( + Spectrum(dat2)[, 2], + c(0.1767994, 1.0113808, 0.3341372, 0.1807377, 1.0594528), + tolerance = 0.0001 + ) + +}) + +############################################## +test_that("4. Output checks: dat3", { + expect_equal( + dim(Spectrum(dat3)), + c(ftime = 4, stats = 3) + ) + expect_equal( + Spectrum(dat3)[, 1], + c(0.1111111, 0.2222222, 0.3333333, 0.4444444), + tolerance = 0.0001 + ) + expect_equal( + Spectrum(dat3)[, 2], + c(0.7204816, 0.6529411, 0.2605188, 0.7009824), + tolerance = 0.0001 + ) + +}) diff --git a/tests/testthat/test-Spread.R b/tests/testthat/test-Spread.R new file mode 100644 index 0000000000000000000000000000000000000000..1d299a6f41149744d600648699f6eea70d718dae --- /dev/null +++ b/tests/testthat/test-Spread.R @@ -0,0 +1,164 @@ +context("s2dv::Spread test") + +############################################## + # dat1 + set.seed(1) + dat1 <- array(rnorm(240), c(member = 3, sdate = 4, ftime = 20)) + + # dat2 + set.seed(2) + dat2 <- rnorm(20) + +############################################## + +test_that("1. Input checks", { + + # data + expect_error( + Spread(c()), + "Parameter 'data' cannot be NULL." + ) + expect_error( + Spread(c(NA, NA)), + "Parameter 'data' must be a numeric array." + ) + expect_error( + Spread(array(1:10, dim = c(2, 5))), + "Parameter 'data' must have dimension names." + ) + # compute_dim + expect_error( + Spread(dat1, compute_dim = 1), + "Parameter 'compute_dim' must be a character vector." + ) + expect_error( + Spread(dat1, compute_dim = 'memb'), + "Parameter 'compute_dim' has some element not in 'data' dimension names." + ) + # na.rm + expect_error( + Spread(dat1, na.rm = 1), + "Parameter 'na.rm' must be one logical value." + ) + # conf + expect_error( + Spread(dat1, conf = 0.1), + "Parameter 'conf' must be one logical value." + ) + # conf.lev + expect_error( + Spread(dat1, conf.lev = c(0.05, 0.95)), + "Parameter 'conf.lev' must be a numeric number between 0 and 1." + ) + # ncores + expect_error( + Spread(dat1, ncores = 0), + "Parameter 'ncores' must be a positive integer." + ) + +}) + +############################################## +test_that("2. dat1", { + +res1_1 <- Spread(dat1) + +expect_equal( +names(res1_1), +c('iqr', 'maxmin', 'sd', 'mad') +) +expect_equal( +dim(res1_1$iqr), +c(stats = 3, sdate = 4, ftime = 20) +) +expect_equal( +dim(res1_1$maxmin), +c(stats = 3, sdate = 4, ftime = 20) +) +expect_equal( +dim(res1_1$sd), +c(stats = 3, sdate = 4, ftime = 20) +) +expect_equal( +dim(res1_1$mad), +c(stats = 3, sdate = 4, ftime = 20) +) + +res1_2 <- Spread(dat1, conf = F) + +expect_equal( +names(res1_2), +c('iqr', 'maxmin', 'sd', 'mad') +) +expect_equal( +dim(res1_2$iqr), +c(stats = 1, sdate = 4, ftime = 20) +) +expect_equal( +dim(res1_2$iqr), +dim(res1_2$maxmin) +) +expect_equal( +dim(res1_2$iqr), +dim(res1_2$sd) +) +expect_equal( +dim(res1_2$iqr), +dim(res1_2$mad) +) + +}) + +############################################## + +test_that("3. dat2", { + +res2_1 <- Spread(dat2) + +expect_equal( +names(res2_1), +c('iqr', 'maxmin', 'sd', 'mad') +) +expect_equal( +dim(res2_1$iqr), +c(stats = 3) +) +expect_equal( +dim(res2_1$maxmin), +c(stats = 3) +) +expect_equal( +dim(res2_1$sd), +c(stats = 3) +) +expect_equal( +dim(res2_1$mad), +c(stats = 3) +) + +res2_2 <- Spread(dat2, conf = F) + +expect_equal( +names(res2_2), +c('iqr', 'maxmin', 'sd', 'mad') +) +expect_equal( +dim(res2_2$iqr), +c(stats = 1) +) +expect_equal( +dim(res2_2$maxmin), +c(stats = 1) +) +expect_equal( +dim(res2_2$sd), +c(stats = 1) +) +expect_equal( +dim(res2_2$mad), +c(stats = 1) +) + +}) + + diff --git a/tests/testthat/test-StatSeasAtlHurr.R b/tests/testthat/test-StatSeasAtlHurr.R new file mode 100644 index 0000000000000000000000000000000000000000..82ef308e03d777bedeebd7334ca5c42386b0c812 --- /dev/null +++ b/tests/testthat/test-StatSeasAtlHurr.R @@ -0,0 +1,110 @@ +context("s2dv::StatSeaAtlHurr tests") + +############################################## + # dat1 + set.seed(1) + atlano1 <- array(runif(30, -1, 1), + dim = c(dat = 2, sdate = 5, ftime = 3)) + + set.seed(2) + tropano1 <- array(runif(30, -1, 1), + dim = c(dat = 2, sdate = 5, ftime = 3)) + + # dat2 + atlano2 <- atlano1 + tropano2 <- tropano1 + atlano2[1, 1, 1] <- NA + tropano2[1, 1, 1:2] <- NA + +############################################## +test_that("1. Input checks", { + + expect_error( + StatSeasAtlHurr(c(), c()), + "Parameter 'atlano' and 'tropano' cannot be NULL." + ) + expect_error( + StatSeasAtlHurr(c('b'), c('a')), + "Parameter 'atlano' and 'tropano' must be a numeric array." + ) + expect_error( + StatSeasAtlHurr(atlano1, array(1:4, dim = c(2, 2))), + "Parameter 'atlano' and 'tropano' must have dimension names." + ) + expect_error( + StatSeasAtlHurr(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), + "Parameter 'atlano' and 'tropano' must have same dimension names." + ) + expect_error( + StatSeasAtlHurr(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(c = 5, a = 3))), + "Parameter 'atlano' and 'tropano' must have the same length of all the dimensions." + ) + expect_error( + StatSeasAtlHurr(atlano1, tropano1, hrvar = 1), + "The parameter 'hrvar' must be either 'HR', 'TC', or 'PDI'." + ) + expect_error( + StatSeasAtlHurr(atlano1, tropano1, ncores = 1.5), + "Parameter 'ncores' must be a positive integer." + ) + + +}) + +############################################## +test_that("2. Output checks: dat1", { + +expect_equal( +names(StatSeasAtlHurr(atlano1, tropano1)), +c('mean', 'var') +) +expect_equal( +dim(StatSeasAtlHurr(atlano1, tropano1)$mean), +c(dat = 2, sdate = 5, ftime = 3) +) +expect_equal( +dim(StatSeasAtlHurr(atlano1, tropano1)$var), +c(dat = 2, sdate = 5, ftime = 3) +) +expect_equal( +StatSeasAtlHurr(atlano1, tropano1)$mean, +StatSeasAtlHurr(atlano1, tropano1)$var +) +expect_equal( +StatSeasAtlHurr(atlano1, tropano1)$mean[1, 1:2, 2], +c(3.032203, 5.119961), +tolerance = 0.0001 +) +expect_equal( +StatSeasAtlHurr(atlano1, tropano1, hrvar = 'TC')$mean, +StatSeasAtlHurr(atlano1, tropano1, hrvar = 'TC')$var +) +expect_equal( +StatSeasAtlHurr(atlano1, tropano1, hrvar = 'PDI')$mean[1, 1:2, 2], +c(0.5664659, 1.7475613), +tolerance = 0.0001 +) +expect_equal( +StatSeasAtlHurr(atlano1, tropano1, hrvar = 'PDI')$var[1, 1:2, 2], +c(0.1042551, 0.9922350), +tolerance = 0.0001 +) + +}) + +############################################## +test_that("3. Output checks: dat2", { + +expect_equal( +StatSeasAtlHurr(atlano2, tropano2)$mean[1, 1:2, 2], +c(NA, 5.119961), +tolerance = 0.0001 +) +expect_equal( +StatSeasAtlHurr(atlano2, tropano2)$mean[1, 1, ], +c(NA, NA, 10.84862), +tolerance = 0.0001 +) + + +}) diff --git a/tests/testthat/test-UltimateBrier.R b/tests/testthat/test-UltimateBrier.R new file mode 100644 index 0000000000000000000000000000000000000000..654aad020d5d7df65981316ab1918b3cece94c51 --- /dev/null +++ b/tests/testthat/test-UltimateBrier.R @@ -0,0 +1,240 @@ +context("s2dv::UltimateBrier tests") + +############################################## +# dat1 +set.seed(1) +exp1 <- array(rnorm(30), dim = c(dataset = 1, member = 3, sdate = 5, ftime = 2)) +set.seed(2) +obs1 <- array(round(rnorm(10)), dim = c(dataset = 1, sdate = 5, ftime = 2)) + + +############################################## +test_that("1. Input checks", { + # exp and obs + expect_error( + UltimateBrier(exp1, c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + UltimateBrier(c('b'), obs1), + "Parameter 'exp' and 'obs' must be a vector or a numeric array." + ) + expect_error( + UltimateBrier(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), + "Parameter 'exp' and 'obs' must have dimension names." + ) + # dat_dim + expect_error( + UltimateBrier(exp1, obs1, dat_dim = 2), + "Parameter 'dat_dim' must be a character string." + ) + expect_error( + UltimateBrier(exp1, obs1, dat_dim = 'dat'), + "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." + ) + # memb_dim + expect_error( + UltimateBrier(exp1, obs1, memb_dim = 2), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + UltimateBrier(exp1, obs1, memb_dim = 'ensemble'), + "Parameter 'memb_dim' is not found in 'exp' dimension." + ) + expect_error( + UltimateBrier(exp1, array(1:10, dim = c(dataset = 1, member = 2, sdate = 5, ftime = 2))), + "The length of parameter 'memb_dim' in 'obs' must be 1." + ) + # time_dim + expect_error( + UltimateBrier(exp1, obs1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + UltimateBrier(exp1, obs1, memb_dim = 'member', time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + # exp and obs (2) + expect_error( + UltimateBrier(exp1, array(1:10, dim = c(dataset = 1, sdate = 6, ftime = 2))), + paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'dat_dim' and 'memb_dim'.") + ) + expect_error( + UltimateBrier(exp1, array(1:10, dim = c(dataset = 1, sdate = 5, time = 2))), + paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions expect 'dat_dim' and 'memb_dim'.") + ) + # quantile + expect_error( + UltimateBrier(exp1, obs1, quantile = c(0.05, 0.95)), + "Parameter 'quantile' must be one logical value." + ) + expect_error( + UltimateBrier(exp1, obs1, quantile = FALSE, thr = 1:3, type = 'FairEnsembleBS'), + "Parameter 'quantile' must be TRUE if 'type' is 'FairEnsembleBSS' or 'FairEnsembleBS'." + ) + # thr + expect_error( + UltimateBrier(exp1, obs1, thr = TRUE), + "Parameter 'thr' must be a numeric vector." + ) + expect_error( + UltimateBrier(exp1, obs1, quantile = TRUE, thr = 1:3), + "Parameter 'thr' must be between 0 and 1 when quantile is TRUE." + ) + # type + expect_error( + UltimateBrier(exp1, obs1, type = 'UltimateBrier'), + "Parameter 'type' must be one of 'BS', 'BSS', 'FairEnsembleBS', 'FairEnsembleBSS', 'FairStartDatesBS' or 'FairStartDatesBSS'." + ) + # decomposition + expect_error( + UltimateBrier(exp1, obs1, decomposition = 1), + "Parameter 'decomposition' must be one logical value." + ) + # ncores + expect_error( + UltimateBrier(exp1, obs1, ncores = 0), + "Parameter 'ncores' must be a positive integer." + ) + + +}) + +############################################## +test_that("2. Output checks: dat1", { + +# 'BS' +expect_equal( +is.list(UltimateBrier(exp1, obs1)), +TRUE +) +expect_equal( +names(UltimateBrier(exp1, obs1)), +c('bs', 'rel', 'res', 'unc') +) +expect_equal( +is.list(UltimateBrier(exp1, obs1, decomposition = FALSE)), +FALSE +) +expect_equal( +dim(UltimateBrier(exp1, obs1, decomposition = FALSE)), +c(nexp = 1, nobs = 1, bin = 3, ftime = 2) +) +expect_equal( +dim(UltimateBrier(exp1, obs1, decomposition = FALSE, thr = c(0.25, 0.5, 0.75))), +c(nexp = 1, nobs = 1, bin = 4, ftime = 2) +) +expect_equal( +UltimateBrier(exp1, obs1)$bs, +UltimateBrier(exp1, obs1, decomposition = FALSE) +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1)$bs), +c(0.42222222, 0.44444444, 0.02222222, 0.48888889, 0.37777778, 0.02222222), +tolerance = 0.0001 +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1)$rel), +c(0.22222222, 0.31111111, 0.02222222, 0.28888889, 0.24444444, 0.02222222), +tolerance = 0.0001 +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1)$res), +c(0.0400000, 0.1066667, 0.0000000, 0.0400000, 0.1066667, 0.0000000), +tolerance = 0.0001 +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1)$unc), +c(0.24, 0.24, 0.00, 0.24, 0.24, 0.00), +tolerance = 0.0001 +) + +# 'BSS' +expect_equal( +dim(UltimateBrier(exp1, obs1, type = 'BSS')), +c(nexp = 1, nobs = 1, bin = 3, ftime = 2) +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'BSS')), +c(-0.7592593, -0.8518519, -Inf, -1.0370370, -0.5740741, -Inf), +tolerance = 0.0001 +) + +# 'FairStartDatesBS' +expect_equal( +is.list(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')), +TRUE +) +expect_equal( +names(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')), +c('bs', 'rel', 'res', 'unc') +) +expect_equal( +is.list(UltimateBrier(exp1, obs1, decomposition = FALSE, type = 'FairStartDatesBS')), +FALSE +) +expect_equal( +dim(UltimateBrier(exp1, obs1, decomposition = FALSE, type = 'FairStartDatesBS')), +c(nexp = 1, nobs = 1, bin = 3, ftime = 2) +) +expect_equal( +UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')$bs, +UltimateBrier(exp1, obs1, decomposition = FALSE, type = 'FairStartDatesBS') +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')$bs), +c(0.42222222, 0.44444444, 0.02222222, 0.48888889, 0.37777778, 0.02222222), +tolerance = 0.0001 +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')$rel), +c(0.22222222, 0.31111111, 0.02222222, 0.28888889, 0.24444444, 0.02222222), +tolerance = 0.0001 +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')$res), +c(0.0400000, 0.1066667, 0.0000000, 0.0400000, 0.1066667, 0.0000000), +tolerance = 0.0001 +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBS')$unc), +c(0.24, 0.24, 0.00, 0.24, 0.24, 0.00), +tolerance = 0.0001 +) + +# 'FairStartDatesBSS' +expect_equal( +dim(UltimateBrier(exp1, obs1, type = 'FairStartDatesBSS')), +c(nexp = 1, nobs = 1, bin = 3, ftime = 2) +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'FairStartDatesBSS')), +c(-0.7592593, -0.8518519, -Inf, -1.0370370, -0.5740741, -Inf), +tolerance = 0.0001 +) +# 'FairEnsembleBS' +expect_equal( +dim(UltimateBrier(exp1, obs1, type = 'FairEnsembleBS')), +c(nexp = 1, nobs = 1, bin = 3, ftime = 2) +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'FairEnsembleBS')), +c(0.1333333, 0.2000000, 0.2000000, 0.1333333, 0.4000000, 0.2000000), +tolerance = 0.0001 +) +# 'FairEnsembleBSS' +expect_equal( +dim(UltimateBrier(exp1, obs1, type = 'FairEnsembleBSS')), +c(nexp = 1, nobs = 1, bin = 3, ftime = 2) +) +expect_equal( +as.vector(UltimateBrier(exp1, obs1, type = 'FairEnsembleBSS')), +c(-0.1111111, -0.6666667, -0.6666667, 0.2592593, -1.2222222, -0.6666667), +tolerance = 0.0001 +) + +}) + +