diff --git a/DESCRIPTION b/DESCRIPTION index f2577bfeebb720f2d76e89bde01f267e54ade2ae..e8382cd0b7f07c91ba1223f52002a40090724ab1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,11 +1,12 @@ Package: s2dv Title: A Set of Common Tools for Seasonal to Decadal Verification -Version: 1.1.0 +Version: 1.2.0 Authors@R: c( person("BSC-CNS", role = c("aut", "cph")), person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")), person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = "aut"), person("Roberto", "Bilbao", , "roberto.bilbao@bsc.es", role = "ctb"), + person("Josep", "Cos", , "josep.cos@bsc.es", role = "ctb"), person("Carlos", "Delgado", , "carlos.delgado@bsc.es", role = "ctb"), person("Llorenç", "Lledó", , "llorenc.lledo@bsc.es", role = "ctb"), person("Andrea", "Manrique", , "andrea.manrique@bsc.es", role = "ctb"), @@ -37,9 +38,9 @@ Imports: NbClust, multiApply (>= 2.1.1), SpecsVerification (>= 0.5.0), - easyNCDF + easyNCDF, + easyVerification Suggests: - easyVerification, testthat License: Apache License 2.0 URL: https://earth.bsc.es/gitlab/es/s2dv/ @@ -47,4 +48,4 @@ BugReports: https://earth.bsc.es/gitlab/es/s2dv/-/issues LazyData: true SystemRequirements: cdo Encoding: UTF-8 -RoxygenNote: 7.0.1 +RoxygenNote: 7.2.0 diff --git a/NAMESPACE b/NAMESPACE index ca5d500368ca97734ff3b1c4bbc51b3b547e948c..032ebf98b3d409665b45f43ceea18477573f25fd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,7 @@ export(ConfigShowSimilarEntries) export(ConfigShowTable) export(Consist_Trend) export(Corr) +export(DiffCorr) export(EOF) export(Eno) export(EuroAtlanticTC) @@ -54,12 +55,15 @@ export(ProjectField) export(REOF) export(RMS) export(RMSSS) +export(RPS) +export(RPSS) export(RandomWalkTest) export(RatioPredictableComponents) export(RatioRMS) export(RatioSDRMS) export(Regression) export(Reorder) +export(ResidualCorr) export(SPOD) export(Season) export(SignalNoiseRatio) @@ -91,6 +95,7 @@ importFrom(ClimProjDiags,WeightedMean) importFrom(abind,abind) importFrom(abind,adrop) importFrom(easyNCDF,ArrayToNc) +importFrom(easyVerification,convert2prob) importFrom(grDevices,bmp) importFrom(grDevices,col2rgb) importFrom(grDevices,colorRampPalette) diff --git a/NEWS.md b/NEWS.md index 71993f0e554bb260e19b2678f9c455d5e0923f2f..d65a8240a76842468a0a509707745d2587b0f28f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,18 @@ +# s2dv 1.2.0 (Release date: 2022-06-22) +- Cluster(): Fix a bug of calculating nclusters ("K"): the function didn't use the whole data to calculate "K" if parameter "nclusters" is NULL.; Add missing output dimension names +- Clim(): Correct the output dimensions for some cases; allow dat_dim to be NULL; obs doesn't need to have dat_dim. +- MeanDims(): if the result is a number and drop = T, return a numeric instead of an array +- Load(): Bugfix for R >= 4.0.0 regarding list and vector confusion +- PlotLayout(): Bugfix when param "var" is a list +- ACC(): Add area-weighting into the calculation and ensure the data has a spatial mean of zero. "space_dim" is deprecated and replaced by "lat_dim" and "lon_dim". "dat_dim" can be NULL. +- PlotEquiMap(): Add useRaster = TRUE in image() if possible (i.e., latitude and longitude are regularly spaced.) +- PlotEquiMap(): New parameters xlonshft ylatshft xlabels ylabels for self-defined axis +- PlotEquiMap(): Flexible map longitude range +- New function: WeightCells, DiffCorr, ResidualCorr, RPS, RPSS +- Clim() and MeanDims() efficiency improvement +- CDORemap(): Add arbitrary time metadata to avoid cdo warning like "Warning (find_time_vars): Time variable >time< not found!" +- CDORemap(): Stop printing messages from cdo command. + # s2dv 1.1.0 (Release date: 2021-12-14) - New functions: RatioPredictableComponents, SignalNoiseRatio - CDORemap(): Able to interpolate irregular grid to regular grid; include new cdo methods 'con2', 'laf' and 'nn' diff --git a/R/ACC.R b/R/ACC.R index 0f1040cec8741e1f482ca7e95b769ad91e898d61..44c724c927675295ee54f26d7768380aaf96fc7a 100644 --- a/R/ACC.R +++ b/R/ACC.R @@ -1,37 +1,47 @@ -#'Compute the anomaly correlation coefficient between the forecast and corresponding observation +#'Compute the spatial anomaly correlation coefficient between the forecast and corresponding observation #' -#'Calculate the anomaly correlation coefficient for the ensemble mean of each -#'model and the corresponding references over a spatial domain. It can return a -#'forecast time series if the data contain forest time dimension, and also the -#'start date mean if the data contain start date dimension. -#'The domain of interest can be specified by providing the list -#'of longitudes/latitudes (lon/lat) of the data together with the corners -#'of the domain: lonlatbox = c(lonmin, lonmax, latmin, latmax). +#'Calculate the spatial anomaly correlation coefficient (ACC) for the ensemble +#'mean of each model and the corresponding references over a spatial domain. It +#'can return a forecast time series if the data contain forest time dimension, +#'and also the ACC mean over one dimension, e.g., start date dimension. +#'The domain of interest can be specified by providing the list of longitudes/ +#'latitudes of the data together with the corners of the domain: lonlatbox = +#'c(lonmin, lonmax, latmin, latmax). The data will be adjusted to have a spatial +#'mean of zero, then area weighting is applied. The formula is referenced from +#'Wilks (2011; section 7.6.4; https://doi.org/10.1016/B978-0-12-385022-5.00008-7). #' #'@param exp A numeric array of experimental anomalies with named dimensions. -#' It must have at least 'dat_dim' and 'space_dim'. +#' The dimension must have at least 'lat_dim' and 'lon_dim'. #'@param obs A numeric array of observational anomalies with named dimensions. -#' It must have the same dimensions as 'exp' except the length of 'dat_dim' +#' The dimension should be the same as 'exp' except the length of 'dat_dim' #' and 'memb_dim'. #'@param dat_dim A character string indicating the name of dataset (nobs/nexp) -#' dimension. The default value is 'dataset'. +#' dimension. The default value is 'dataset'. If there is no dataset +#' dimension, set NULL. +#'@param lat_dim A character string indicating the name of the latitude +#' dimension of 'exp' and 'obs' along which ACC is computed. The default value +#' is 'lat'. +#'@param lon_dim A character string indicating the name of the longitude +#' dimension of 'exp' and 'obs' along which ACC is computed. The default value +#' is 'lon'. #'@param space_dim A character string vector of 2 indicating the name of the #' latitude and longitude dimensions (in order) along which ACC is computed. -#' The default value is c('lat', 'lon'). +#' The default value is c('lat', 'lon'). This argument has been deprecated. +#' Use 'lat_dim' and 'lon_dim' instead. #'@param avg_dim A character string indicating the name of the dimension to be -#' averaged. It must be one of 'time_dim'. The mean ACC is calculated along -#' averaged. If no need to calculate mean ACC, set as NULL. The default value -#' is 'sdate'. +#' averaged, which is usually the time dimension. If no need to calculate mean +#' ACC, set as NULL. The default value is 'sdate'. #'@param memb_dim A character string indicating the name of the member #' dimension. If the data are not ensemble ones, set as NULL. The default #' value is 'member'. -#'@param lat A vector of the latitudes of the exp/obs grids. Only required when -#' the domain of interested is specified. The default value is NULL. +#'@param lat A vector of the latitudes of the exp/obs grids. It is used for +#' area weighting and when the domain of interested 'lonlatbox' is specified. #'@param lon A vector of the longitudes of the exp/obs grids. Only required when -#' the domain of interested is specified. The default value is NULL. +#' the domain of interested 'lonlatbox' is specified. The default value is +#' NULL. #'@param lonlatbox A numeric vector of 4 indicating the corners of the domain of -#' interested: c(lonmin, lonmax, latmin, latmax). Only required when the domain -#' of interested is specified. The default value is NULL. +#' interested: c(lonmin, lonmax, latmin, latmax). The default value is NULL +#' and the whole data will be used. #'@param conf A logical value indicating whether to retrieve the confidence #' intervals or not. The default value is TRUE. #'@param conftype A charater string of "parametric" or "bootstrap". @@ -54,8 +64,9 @@ #'A list containing the numeric arrays:\cr #'\item{acc}{ #' The ACC with the dimensions c(nexp, nobs, the rest of the dimension except -#' space_dim and memb_dim). nexp is the number of experiment (i.e., dat_dim in -#' exp), and nobs is the number of observation (i.e., dat_dim in obs). +#' lat_dim, lon_dim and memb_dim). nexp is the number of experiment (i.e., dat_dim in +#' exp), and nobs is the number of observation (i.e., dat_dim in obs). If +#' dat_dim is NULL, nexp and nobs are omitted. #'} #'\item{conf.lower (if conftype = "parametric") or acc_conf.lower (if #' conftype = "bootstrap")}{ @@ -73,8 +84,9 @@ #'} #'\item{macc}{ #' The mean anomaly correlation coefficient with dimensions -#' c(nexp, nobs, the rest of the dimension except space_dim, memb_dim, and -#' avg_dim). Only present if 'avg_dim' is not NULL. +#' c(nexp, nobs, the rest of the dimension except lat_dim, lon_dim, memb_dim, and +#' avg_dim). Only present if 'avg_dim' is not NULL. If dat_dim is NULL, nexp +#' and nobs are omitted. #'} #'\item{macc_conf.lower}{ #' The lower confidence interval of MACC with the same dimensions as MACC. @@ -101,8 +113,8 @@ #'clim <- Clim(sampleData$mod, sampleData$obs) #'ano_exp <- Ano(sampleData$mod, clim$clim_exp) #'ano_obs <- Ano(sampleData$obs, clim$clim_obs) -#'acc <- ACC(ano_exp, ano_obs) -#'acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap') +#'acc <- ACC(ano_exp, ano_obs, lat = sampleData$lat) +#'acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap', lat = sampleData$lat) #'# Combine acc results for PlotACC #'res <- array(c(acc$conf.lower, acc$acc, acc$conf.upper, acc$p.val), #' dim = c(dim(acc$acc), 4)) @@ -113,14 +125,15 @@ #'PlotACC(res_bootstrap, startDates) #' } #'@references Joliffe and Stephenson (2012). Forecast Verification: A -#' Practitioner's Guide in Atmospheric Science. Wiley-Blackwell. +#' Practitioner's Guide in Atmospheric Science. Wiley-Blackwell.; +#' Wilks (2011; section 7.6.4; https://doi.org/10.1016/B978-0-12-385022-5.00008-7). #'@import multiApply #'@importFrom abind abind #'@importFrom stats qt qnorm quantile #'@importFrom ClimProjDiags Subset #'@export -ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), - avg_dim = 'sdate', memb_dim = 'member', +ACC <- function(exp, obs, dat_dim = 'dataset', lat_dim = 'lat', lon_dim = 'lon', + space_dim = c('lat', 'lon'), avg_dim = 'sdate', memb_dim = 'member', lat = NULL, lon = NULL, lonlatbox = NULL, conf = TRUE, conftype = "parametric", conf.lev = 0.95, pval = TRUE, ncores = NULL) { @@ -135,7 +148,7 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), } if (is.null(dim(exp)) | is.null(dim(obs))) { stop(paste0("Parameter 'exp' and 'obs' must have at least dimensions ", - "dat_dim and space_dim.")) + "lat_dim and lon_dim.")) } if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { @@ -146,18 +159,34 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), stop("Parameter 'exp' and 'obs' must have same dimension names.") } ## dat_dim - if (!is.character(dat_dim) | length(dat_dim) > 1) { - stop("Parameter 'dat_dim' must be a character string.") + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + "Set it as NULL if there is no dataset dimension.") + } + } + ## space_dim (deprecated) + if (!missing("space_dim")) { + warning("Parameter 'space_dim' is deprecated. Use 'lat_dim' and 'lon_dim' instead.") + lat_dim <- space_dim[1] + lon_dim <- space_dim[2] + } + ## lat_dim + if (!is.character(lat_dim) | length(lat_dim) != 1) { + stop("Parameter 'lat_dim' must be a character string.") } - if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { - stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + if (!lat_dim %in% names(dim(exp)) | !lat_dim %in% names(dim(obs))) { + stop("Parameter 'lat_dim' is not found in 'exp' or 'obs' dimension.") } - ## space_dim - if (!is.character(space_dim) | length(space_dim) != 2) { - stop("Parameter 'space_dim' must be a character vector of 2.") + ## lon_dim + if (!is.character(lon_dim) | length(lon_dim) != 1) { + stop("Parameter 'lon_dim' must be a character string.") } - if (any(!space_dim %in% names(dim(exp))) | any(!space_dim %in% names(dim(obs)))) { - stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + if (!lon_dim %in% names(dim(exp)) | !lon_dim %in% names(dim(obs))) { + stop("Parameter 'lon_dim' is not found in 'exp' or 'obs' dimension.") } ## avg_dim if (!is.null(avg_dim)) { @@ -174,19 +203,21 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), stop("Parameter 'memb_dim' must be a character string.") } if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { - stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.", + "Set it as NULL if there is no member dimension.") } } ## lat - if (!is.null(lat)) { - if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) { - stop(paste0("Parameter 'lat' must be a numeric vector with the same ", - "length as the latitude dimension of 'exp' and 'obs'.")) - } + if (is.null(lat)) { + stop("Parameter 'lat' cannot be NULL. It is required for area weighting.") + } + if (!is.numeric(lat) | length(lat) != dim(exp)[lat_dim]) { + stop(paste0("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.")) } ## lon if (!is.null(lon)) { - if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) { + if (!is.numeric(lon) | length(lon) != dim(exp)[lon_dim]) { stop(paste0("Parameter 'lon' must be a numeric vector with the same ", "length as the longitude dimension of 'exp' and 'obs'.")) } @@ -196,15 +227,12 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), if (!is.numeric(lonlatbox) | length(lonlatbox) != 4) { stop("Parameter 'lonlatbox' must be a numeric vector of 4.") } - } - ## lat, lon, and lonlatbox - if (!is.null(lon) & !is.null(lat) & !is.null(lonlatbox)) { + if (is.null(lon)) { + stop("Parameter 'lat' and 'lon' are required if 'lonlatbox' is specified.") + } select_lonlat <- TRUE - } else if (is.null(lon) & is.null(lat) & is.null(lonlatbox)) { - select_lonlat <- FALSE } else { - stop(paste0("Parameters 'lon', 'lat', and 'lonlatbox' must be used or be ", - "NULL at the same time.")) + select_lonlat <- FALSE } ## conf if (!is.logical(conf) | length(conf) > 1) { @@ -219,7 +247,8 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), stop("Parameter 'memb_dim' cannot be NULL when parameter 'conftype' is 'bootstrap'.") } ## conf.lev - if (!is.numeric(conf.lev) | conf.lev < 0 | conf.lev > 1 | length(conf.lev) > 1) { + if (!is.numeric(conf.lev) | any(conf.lev < 0) | any(conf.lev > 1) | + length(conf.lev) > 1) { stop("Parameter 'conf.lev' must be a numeric number between 0 and 1.") } } @@ -229,21 +258,23 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), } ## ncores if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 | - length(ncores) > 1) { + if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) | + length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } } ## exp and obs (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == dat_dim)] - name_obs <- name_obs[-which(name_obs == dat_dim)] + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } if (!is.null(memb_dim)) { name_exp <- name_exp[-which(name_exp == memb_dim)] name_obs <- name_obs[-which(name_obs == memb_dim)] } - if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { stop(paste0("Parameter 'exp' and 'obs' must have same length of ", "all the dimensions expect 'dat_dim' and 'memb_dim'.")) } @@ -273,8 +304,8 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), (lonlatbox[1] > lonlatbox[2] & (lon > lonlatbox[1] | lon < lonlatbox[2]))) indlat <- which(lat >= lonlatbox[3] & lat <= lonlatbox[4]) - exp <- ClimProjDiags::Subset(exp, space_dim, list(indlat, indlon), drop = FALSE) - obs <- ClimProjDiags::Subset(obs, space_dim, list(indlat, indlon), drop = FALSE) + exp <- ClimProjDiags::Subset(exp, c(lat_dim, lon_dim), list(indlat, indlon), drop = FALSE) + obs <- ClimProjDiags::Subset(obs, c(lat_dim, lon_dim), list(indlat, indlon), drop = FALSE) } # Ensemble mean @@ -287,84 +318,68 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), obs <- MeanDims(obs, memb_dim, na.rm = TRUE) } - if (is.null(avg_dim)) { - res <- Apply(list(exp, obs), - target_dims = list(c(space_dim, dat_dim), - c(space_dim, dat_dim)), - fun = .ACC, - dat_dim = dat_dim, avg_dim = avg_dim, - conftype = conftype, pval = pval, conf = conf, conf.lev = conf.lev, - ncores_input = ncores, - ncores = ncores) - - if (conftype == 'bootstrap') { - res_conf <- Apply(list(exp_ori, obs_ori), - target_dims = list(c(memb_dim, dat_dim, space_dim), - c(memb_dim, dat_dim, space_dim)), - fun = .ACC_bootstrap, - dat_dim = dat_dim, memb_dim = memb_dim, avg_dim = avg_dim, - conftype = conftype, pval = pval, conf = conf, conf.lev = conf.lev, - ncores_input = ncores, - ncores = ncores) - #NOTE: pval? - res <- list(acc = res$acc, - acc_conf.lower = res_conf$acc_conf.lower, - acc_conf.upper = res_conf$acc_conf.upper, - macc = res$macc, - macc_conf.lower = res_conf$macc_conf.lower, - macc_conf.upper = res_conf$macc_conf.upper) - } + if (is.null(avg_dim)) { + target_dims <- list(c(lat_dim, lon_dim, dat_dim), c(lat_dim, lon_dim, dat_dim)) } else { - res <- Apply(list(exp, obs), - target_dims = list(c(space_dim, avg_dim, dat_dim), - c(space_dim, avg_dim, dat_dim)), + target_dims <- list(c(lat_dim, lon_dim, avg_dim, dat_dim), c(lat_dim, lon_dim, avg_dim, dat_dim)) + } + res <- Apply(list(exp, obs), + target_dims = target_dims, fun = .ACC, dat_dim = dat_dim, avg_dim = avg_dim, + lat = lat, conftype = conftype, pval = pval, conf = conf, conf.lev = conf.lev, - ncores_input = ncores, ncores = ncores) - if (conftype == 'bootstrap') { - res_conf <- Apply(list(exp_ori, obs_ori), - target_dims = list(c(memb_dim, dat_dim, avg_dim, space_dim), - c(memb_dim, dat_dim, avg_dim, space_dim)), - fun = .ACC_bootstrap, - dat_dim = dat_dim, memb_dim = memb_dim, avg_dim = avg_dim, - conftype = conftype, pval = pval, conf = conf, conf.lev = conf.lev, - ncores_input = ncores, - ncores = ncores) - res <- list(acc = res$acc, - acc_conf.lower = res_conf$acc_conf.lower, - acc_conf.upper = res_conf$acc_conf.upper, - macc = res$macc, - macc_conf.lower = res_conf$macc_conf.lower, - macc_conf.upper = res_conf$macc_conf.upper) - + # If bootstrap, calculate confidence level + if (conftype == 'bootstrap') { + if (is.null(avg_dim)) { + target_dims_bs <- list(c(memb_dim, dat_dim, lat_dim, lon_dim), + c(memb_dim, dat_dim, lat_dim, lon_dim)) + } else { + target_dims_bs <- list(c(memb_dim, dat_dim, avg_dim, lat_dim, lon_dim), + c(memb_dim, dat_dim, avg_dim, lat_dim, lon_dim)) } - + res_conf <- Apply(list(exp_ori, obs_ori), + target_dims = target_dims_bs, + fun = .ACC_bootstrap, + dat_dim = dat_dim, memb_dim = memb_dim, avg_dim = avg_dim, + lat = lat, + conftype = conftype, pval = pval, conf = conf, conf.lev = conf.lev, + ncores = ncores) + #NOTE: pval? + res <- list(acc = res$acc, + acc_conf.lower = res_conf$acc_conf.lower, + acc_conf.upper = res_conf$acc_conf.upper, + macc = res$macc, + macc_conf.lower = res_conf$macc_conf.lower, + macc_conf.upper = res_conf$macc_conf.upper) } - return(res) + return(res) } -.ACC <- function(exp, obs, dat_dim = 'dataset', #space_dim = c('lat', 'lon'), - avg_dim = 'sdate', #memb_dim = NULL, - lon = NULL, lat = NULL, lonlatbox = NULL, - conf = TRUE, conftype = "parametric", conf.lev = 0.95, pval = TRUE, - ncores_input = NULL) { - -# if (is.null(avg_dim)) - # exp: [space_dim, dat_exp] - # obs: [space_dim, dat_obs] -# if (!is.null(avg_dim)) - # exp: [space_dim, avg_dim, dat_exp] - # obs: [space_dim, avg_dim, dat_obs] - +.ACC <- function(exp, obs, dat_dim = 'dataset', avg_dim = 'sdate', lat, + conf = TRUE, conftype = "parametric", conf.lev = 0.95, pval = TRUE) { # .ACC() should use all the spatial points to calculate ACC. It returns [nexp, nobs]. - - nexp <- as.numeric(dim(exp)[length(dim(exp))]) - nobs <- as.numeric(dim(obs)[length(dim(obs))]) + # If dat_dim = NULL, it returns a number. + + # if (is.null(avg_dim)) + ## exp: [lat, lon, (dat_exp)] + ## obs: [lat, lon, (dat_obs)] + # if (!is.null(avg_dim)) + ## exp: [lat, lon, avg_dim, (dat_exp)] + ## obs: [lat, lon, avg_dim, (dat_obs)] + + # Add dat_dim temporarily if dat_dim = NULL + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + } else { + nexp <- as.numeric(dim(exp)[length(dim(exp))]) + nobs <- as.numeric(dim(obs)[length(dim(obs))]) + } if (is.null(avg_dim)) { acc <- array(dim = c(nexp = nexp, nobs = nobs)) @@ -372,34 +387,56 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), if (conf) { conf.upper <- array(dim = c(nexp = nexp, nobs = nobs)) conf.lower <- array(dim = c(nexp = nexp, nobs = nobs)) - if (conftype == 'bootstrap') { - ndraw <- 100 - acc_draw <- array(dim = c(nexp = nexp, nobs = nobs, ndraw)) - } } } else { - acc <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[length(dim(exp)) - 1])) - names(dim(acc))[3] <- avg_dim - macc <- array(dim = c(nexp = nexp, nobs = nobs)) - if (pval) p.val <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[length(dim(exp)) - 1])) + acc <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim])) + names(dim(acc))[3] <- avg_dim + macc <- array(dim = c(nexp = nexp, nobs = nobs)) + if (pval) p.val <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim])) if (conf) { - conf.upper <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[length(dim(exp)) - 1])) - conf.lower <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[length(dim(exp)) - 1])) - if (conftype == 'bootstrap') { - ndraw <- 100 - acc_draw <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[length(dim(exp)) - 1], ndraw)) - macc_draw <- array(dim = c(nexp = nexp, nobs = nobs, ndraw)) - } + conf.upper <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim])) + conf.lower <- array(dim = c(nexp = nexp, nobs = nobs, dim(exp)[avg_dim])) } } + # centralize & area weighted + ## spatial centralization for each [avg_dim, dat] + dim_exp <- dim(exp) + dim_obs <- dim(obs) + wt <- cos(lat * pi/180) + wt <- rep(wt, times = prod(dim_exp[2:length(dim_exp)])) + + if (is.null(avg_dim) & is.null(dat_dim)) { #[lat, lon] + # turn exp and obs into vector, first latitudes and then longitudes + exp <- as.vector(exp) + obs <- as.vector(obs) + exp <- array(sqrt(wt) * (exp - mean(exp, na.rm = TRUE)), dim = dim_exp) + obs <- array(sqrt(wt) * (obs - mean(obs, na.rm = TRUE)), dim = dim_obs) + } else { # [lat, lon, dat], [lat, lon, avg_dim], or [lat, lon, avg_dim, dat] + # exp + exp <- array(exp, dim = c(prod(dim_exp[1:2]), dim_exp[3:length(dim_exp)])) + mean_exp <- apply(exp, 2:length(dim(exp)), mean, na.rm = TRUE) # [avg_dim, (dat)] + mean_exp <- rep(as.vector(mean_exp), each = prod(dim_exp[1:2])) + exp <- array(sqrt(wt) * (as.vector(exp) - mean_exp), dim = dim_exp) + # obs + obs <- array(obs, dim = c(prod(dim_obs[1:2]), dim_obs[3:length(dim_obs)])) + mean_obs <- apply(obs, 2:length(dim(obs)), mean, na.rm = TRUE) # [avg_dim, (dat)] + mean_obs <- rep(as.vector(mean_obs), each = prod(dim_obs[1:2])) + obs <- array(sqrt(wt) * (as.vector(obs) - mean_obs), dim = dim_obs) + } + # Per-paired exp and obs. NAs should be in the same position in both exp and obs for (iobs in 1:nobs) { for (iexp in 1:nexp) { - exp_sub <- ClimProjDiags::Subset(exp, dat_dim, iexp, drop = 'selected') - obs_sub <- ClimProjDiags::Subset(obs, dat_dim, iobs, drop = 'selected') - # dim: [space_dim] + if (!is.null(dat_dim)) { + exp_sub <- ClimProjDiags::Subset(exp, dat_dim, iexp, drop = 'selected') + obs_sub <- ClimProjDiags::Subset(obs, dat_dim, iobs, drop = 'selected') + } else { + exp_sub <- exp + obs_sub <- obs + } + # dim: [lat, lon, (avg_dim)] # Variance(iexp) should not take into account any point # that is not available in iobs and therefore not accounted for @@ -409,18 +446,18 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), if (is.null(avg_dim)) { # ACC - top <- sum(exp_sub*obs_sub, na.rm = TRUE) #a number + top <- sum(exp_sub * obs_sub, na.rm = TRUE) #a number bottom <- sqrt(sum(exp_sub^2, na.rm = TRUE) * sum(obs_sub^2, na.rm = TRUE)) acc[iexp, iobs] <- top/bottom #a number # handle bottom = 0 if (is.infinite(acc[iexp, iobs])) acc[iexp, iobs] <- NA + # pval and conf if (pval | conf) { if (conftype == "parametric") { - # calculate effective sample size along space_dim - # combine space_dim into one dim first - obs_tmp <- array(obs_sub, dim = c(space = length(obs_sub))) - eno <- Eno(obs_tmp, 'space', ncores = ncores_input) # a number + # calculate effective sample size + eno <- .Eno(as.vector(obs_sub), na.action = na.pass) + if (pval) { t <- qt(conf.lev, eno - 2) # a number p.val[iexp, iobs] <- sqrt(t^2 / (t^2 + eno - 2)) @@ -433,18 +470,21 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), } } else { #avg_dim is not NULL + # exp_sub and obs_sub: [lat, lon, avg_dim] + # MACC - top <- sum(exp_sub*obs_sub, na.rm = TRUE) #a number + top <- sum(exp_sub * obs_sub, na.rm = TRUE) #a number bottom <- sqrt(sum(exp_sub^2, na.rm = TRUE) * sum(obs_sub^2, na.rm = TRUE)) macc[iexp, iobs] <- top/bottom #a number + # handle bottom = 0 if (is.infinite(macc[iexp, iobs])) macc[iexp, iobs] <- NA + # ACC - for (i in 1:dim(acc)[3]) { #NOTE: use sapply!!! - exp_sub_i <- ClimProjDiags::Subset(exp_sub, avg_dim, i, drop = 'selected') - obs_sub_i <- ClimProjDiags::Subset(obs_sub, avg_dim, i, drop = 'selected') - #dim: [space_dim] - top <- sum(exp_sub_i*obs_sub_i, na.rm = TRUE) #a number + for (i in 1:dim(acc)[3]) { + exp_sub_i <- exp_sub[, , i] + obs_sub_i <- obs_sub[, , i] + top <- sum(exp_sub_i * obs_sub_i, na.rm = TRUE) #a number bottom <- sqrt(sum(exp_sub_i^2, na.rm = TRUE) * sum(obs_sub_i^2, na.rm = TRUE)) acc[iexp, iobs, i] <- top/bottom #a number # handle bottom = 0 @@ -454,18 +494,21 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), # pval and conf if (pval | conf) { if (conftype == "parametric") { - # calculate effective sample size along space_dim - # combine space_dim into one dim first - obs_tmp <- array(obs_sub, dim = c(space = prod(dim(obs_sub)[-length(dim(obs_sub))]), - dim(obs_sub)[length(dim(obs_sub))])) - eno <- Eno(obs_tmp, 'space', ncores = ncores_input) # a vector of avg_dim + # calculate effective sample size along lat_dim and lon_dim + # combine lat_dim and lon_dim into one dim first + obs_tmp <- array(obs_sub, + dim = c(space = prod(dim(obs_sub)[1:2]), + dim(obs_sub)[3])) + eno <- apply(obs_tmp, 2, .Eno, na.action = na.pass) # a vector of avg_dim if (pval) { t <- qt(conf.lev, eno - 2) # a vector of avg_dim p.val[iexp, iobs, ] <- sqrt(t^2 / (t^2 + eno - 2)) } if (conf) { - conf.upper[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) + qnorm(1 - (1 - conf.lev) / 2) / sqrt(eno - 3)) - conf.lower[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) + qnorm((1 - conf.lev) / 2) / sqrt(eno - 3)) + conf.upper[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) + + qnorm(1 - (1 - conf.lev) / 2) / sqrt(eno - 3)) + conf.lower[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) + + qnorm((1 - conf.lev) / 2) / sqrt(eno - 3)) } } } @@ -476,8 +519,29 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), } #------------------------------------------------ - - + # Remove nexp and nobs if dat_dim = NULL + if (is.null(dat_dim)) { + if (is.null(avg_dim)) { + acc <- as.vector(acc) + if (conf) { + conf.lower <- as.vector(conf.lower) + conf.upper <- as.vector(conf.upper) + } + if (pval) { + p.val <- as.vector(p.val) + } + } else { + dim(acc) <- dim(acc)[3:length(dim(acc))] + macc <- as.vector(macc) + if (conf) { + dim(conf.lower) <- dim(conf.lower)[3:length(dim(conf.lower))] + dim(conf.upper) <- dim(conf.upper)[3:length(dim(conf.upper))] + } + if (pval) { + dim(p.val) <- dim(p.val)[3:length(dim(p.val))] + } + } + } # Return output if (is.null(avg_dim)) { @@ -509,17 +573,16 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), } -.ACC_bootstrap <- function(exp, obs, dat_dim = 'dataset', #space_dim = c('lat', 'lon'), - avg_dim = 'sdate', memb_dim = NULL, - lon = NULL, lat = NULL, lonlatbox = NULL, - conf = TRUE, conftype = "parametric", conf.lev = 0.95, pval = TRUE, - ncores_input = NULL) { +.ACC_bootstrap <- function(exp, obs, dat_dim = 'dataset', + avg_dim = 'sdate', memb_dim = NULL, lat, + conf = TRUE, conftype = "parametric", conf.lev = 0.95, + pval = TRUE) { # if (is.null(avg_dim)) - # exp: [memb_exp, dat_exp, space_dim] - # obs: [memb_obs, dat_obs, space_dim] + # exp: [memb_exp, dat_exp, lat, lon] + # obs: [memb_obs, dat_obs, lat, lon] # if (!is.null(avg_dim)) - # exp: [memb_exp, dat_exp, avg_dim, space_dim] - # obs: [memb_obs, dat_obs, avg_dim, space_dim] + # exp: [memb_exp, dat_exp, avg_dim, lat, lon] + # obs: [memb_obs, dat_obs, avg_dim, lat, lon] nexp <- as.numeric(dim(exp)[2]) nobs <- as.numeric(dim(obs)[2]) @@ -571,7 +634,7 @@ ACC <- function(exp, obs, dat_dim = 'dataset', space_dim = c('lat', 'lon'), #calculate the ACC of the randomized field tmpACC <- .ACC(drawexp, drawobs, conf = FALSE, pval = FALSE, avg_dim = avg_dim, - ncores_input = ncores_input) + lat = lat) if (is.null(avg_dim)) { acc_draw[, , jdraw] <- tmpACC$acc } else { diff --git a/R/AMV.R b/R/AMV.R index a51a819def8e8b163e10b01eb36cad4673692b13..e14897b291de60fa2c6ad5051e1e070eed72f094 100644 --- a/R/AMV.R +++ b/R/AMV.R @@ -98,10 +98,10 @@ AMV <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo stop("Parameter 'data' must be a numeric array.") } # data_lats and data_lons part1 - if (!(class(data_lats) == 'numeric' | class(data_lats) == 'integer')) { + if (!(inherits(data_lats, 'numeric') | inherits(data_lats, 'integer'))) { stop("Parameter 'data_lats' must be a numeric vector.") } - if (!(class(data_lons) == 'numeric' | class(data_lons) == 'integer')) { + if (!(inherits(data_lons, 'numeric') | inherits(data_lons, 'integer'))) { stop("Parameter 'data_lons' must be a numeric vector.") } # type diff --git a/R/CDORemap.R b/R/CDORemap.R index 00824de207b5649e541451b009da43eafa4ab321..ce3ed353d894d5bdb836b8ff37c93187c093df55 100644 --- a/R/CDORemap.R +++ b/R/CDORemap.R @@ -42,18 +42,35 @@ #' for the price of writing more intermediate files (whis usually is #' unconvenient) by setting the parameter \code{avoid_writes = TRUE}. #'@param crop Whether to crop the data after interpolation with -#' 'cdo sellonlatbox' (TRUE) or to extend interpolated data to the whole -#' world as CDO does by default (FALSE). If \code{crop = TRUE} then the -#' longitude and latitude borders which to crop at are taken as the limits of -#' the cells at the borders ('lons' and 'lats' are perceived as cell centers), -#' i.e. the resulting array will contain data that covers the same area as -#' the input array. This is equivalent to specifying \code{crop = 'preserve'}, -#' i.e. preserving area. If \code{crop = 'tight'} then the borders which to -#' crop at are taken as the minimum and maximum cell centers in 'lons' and -#' 'lats', i.e. the area covered by the resulting array may be smaller if -#' interpolating from a coarse grid to a fine grid. The parameter 'crop' also -#' accepts a numeric vector of custom borders which to crop at: -#' c(western border, eastern border, southern border, northern border). +#' 'cdo sellonlatbox' (TRUE) or to extend interpolated data to the whole +#' world as CDO does by default (FALSE). The default value is TRUE.\cr +#' \itemize{ +#' \item{ +#' If \code{crop = TRUE}, the longitude and latitude borders to be cropped +#' at are taken as the limits of the cells at the borders (not the values +#' of 'lons' and 'lats', which are perceived as cell centers), i.e., the +#' resulting array will contain data that covers the same area as the input +#' array. This is equivalent to specifying \code{crop = 'preserve'}, i.e., +#' preserving area. Notice that the longitude range of returning array will +#' follow the original data 'lons' instead of the target grid 'grid'. +#' } +#' \item{ +#' If \code{crop = FALSE}, the returning array is not cropped, i.e., a +#' global domain, and the longitude range will be the same as the target +#' grid 'grid'. +#' } +#' \item{ +#' If \code{crop = 'tight'}, the borders to be cropped at are taken as the +#' minimum and maximum cell centers in 'lons' and 'lats', i.e., the area +#' covered by the resulting array may be smaller if interpolating from a +#' coarse grid to a fine grid. +#' } +#' \item{ +#' The parameter 'crop' also accepts a numeric vector of customized borders +#' to be cropped at:\cr +#' c(western border, eastern border, southern border, northern border). +#' } +#' } #'@param force_remap Whether to force remapping, even if the input data array #' is already on the target grid. #'@param write_dir Path to the directory where to create the intermediate @@ -222,10 +239,10 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, if (!is.numeric(lons) || !is.numeric(lats)) { stop("Expected numeric 'lons' and 'lats'.") } - if (any(is.na(lons > 0))) { + if (anyNA(lons > 0)) { stop("Found invalid values in 'lons'.") } - if (any(is.na(lats > 0))) { + if (anyNA(lats > 0)) { stop("Found invalid values in 'lats'.") } if (is.null(dim(lons))) { @@ -414,7 +431,6 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, } if (is.logical(crop)) { if (crop) { - warning("Parameter 'crop' = 'TRUE'. The output grid range will follow the input lons and lats.") if (length(lons) == 1 || length(lats) == 1) { stop("CDORemap cannot remap if crop = TRUE and values for only one ", "longitude or one latitude are provided. Either a) provide ", @@ -525,8 +541,6 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, } ###--- } - } else if (crop == FALSE) { - warning("Parameter 'crop' = 'FALSE'. The output grid range will follow parameter 'grid'.") } } else if (is.numeric(crop)) { if (length(crop) != 4) { @@ -697,6 +711,8 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, if (!is.null(unlimited_dim)) { # This will make ArrayToNc create this dim as unlimited. names(dim(data_array))[unlimited_dim] <- 'time' + # create time variable. The value is random since CDORemap() doesn't support time remapping now and we just want to avoid cdo warning + time_attr <- array(c(1:dim(data_array)[unlimited_dim]), dim = c(dim(data_array)[unlimited_dim])) } if (length(dim(lons)) == 1) { names(dim(lons)) <- lon_dim @@ -758,7 +774,11 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, } # dims_before_crop <- dim(subset) # Make sure subset goes along with metadata - easyNCDF::ArrayToNc(setNames(list(subset, lons, lats), c('var', lon_var_name, lat_var_name)), tmp_file) + if (is.null(unlimited_dim)) { + easyNCDF::ArrayToNc(setNames(list(subset, lons, lats), c('var', lon_var_name, lat_var_name)), tmp_file) + } else { + easyNCDF::ArrayToNc(setNames(list(subset, lons, lats, time_attr), c('var', lon_var_name, lat_var_name, 'time')), tmp_file) + } } else { if (is_irregular) { pos_lon <- which(names(dim(data_array)) == lon_dim) @@ -774,7 +794,11 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, } } # dims_before_crop <- dim(data_array) - easyNCDF::ArrayToNc(setNames(list(data_array, lons, lats), c('var', lon_var_name, lat_var_name)), tmp_file) + if (is.null(unlimited_dim)) { + easyNCDF::ArrayToNc(setNames(list(data_array, lons, lats), c('var', lon_var_name, lat_var_name)), tmp_file) + } else { + easyNCDF::ArrayToNc(setNames(list(data_array, lons, lats, time_attr), c('var', lon_var_name, lat_var_name, 'time')), tmp_file) + } } sellonlatbox <- '' if (crop) { @@ -784,11 +808,11 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, ',', format(lat_extremes[2], scientific = FALSE), ' -') } err <- try({ - system(paste0("cdo -s ", sellonlatbox, "remap", method, ",", grid, " ", tmp_file, " ", tmp_file2)) + system(paste0("cdo -s ", sellonlatbox, "remap", method, ",", grid, " ", tmp_file, " ", tmp_file2), ignore.stdout = T, ignore.stderr = T) }) file.remove(tmp_file) - if (('try-error' %in% class(err)) || err > 0) { - stop("CDO remap failed.") + if (is(err, 'try-error') || err > 0) { + stop("CDO remap failed. Possible problem: parameter 'grid'.") } ncdf_remapped <- nc_open(tmp_file2) if (!lons_lats_taken) { diff --git a/R/Clim.R b/R/Clim.R index d01cf5d91ec219dd7b9124abccabb392d1f35215..ce9ba2ed17b283d9a137df32e8b06275e7691f2e 100644 --- a/R/Clim.R +++ b/R/Clim.R @@ -3,31 +3,38 @@ #'This function computes per-pair climatologies for the experimental #'and observational data using one of the following methods: #'\enumerate{ -#' \item{per-pair method (Garcia-Serrano and Doblas-Reyes, CD, 2012)} -#' \item{Kharin method (Karin et al, GRL, 2012)} -#' \item{Fuckar method (Fuckar et al, GRL, 2014)} +#' \item{per-pair method (Garcia-Serrano and Doblas-Reyes, CD, 2012 https://doi.org/10.1007/s00382-012-1413-1)} +#' \item{Kharin method (Kharin et al, GRL, 2012 https://doi.org/10.1029/2012GL052647)} +#' \item{Fuckar method (Fuckar et al, GRL, 2014 https://doi.org/10.1002/2014GL060815)} #'} #'Per-pair climatology means that only the startdates covered by the #'whole experiments/observational dataset will be used. In other words, the #'startdates which are not all available along 'dat_dim' dimension of both #'the 'exp' and 'obs' are excluded when computing the climatologies. +#'Kharin method is the linear trend bias correction method, and Fuckar method +#'is the initial condition bias correction method. The two methods both do the +#'per-pair correction beforehand. #' -#'@param exp A named numeric array of experimental data, with at least two -#' dimensions 'time_dim' and 'dat_dim'. -#'@param obs A named numeric array of observational data, same dimensions as -#' parameter 'exp' except along 'dat_dim'. +#'@param exp A named numeric array of experimental data with at least dimension +#' 'time_dim'. +#'@param obs A named numeric array of observational data that has the same +#' dimension as 'exp' except 'dat_dim'. #'@param time_dim A character string indicating the name of dimension along #' which the climatologies are computed. The default value is 'sdate'. #'@param dat_dim A character vector indicating the name of the dataset and #' member dimensions. If data at one startdate (i.e., 'time_dim') are not #' complete along 'dat_dim', this startdate along 'dat_dim' will be discarded. -#' The default value is "c('dataset', 'member')". -#'@param method A character string indicating the method to be used. The -#' options include 'clim', 'kharin', and 'NDV'. The default value is 'clim'. +#' If there is no dataset dimension, it can be NULL, however, it will be more +#' efficient to simply use mean() to do the calculation. The default value is +#' "c('dataset', 'member')". +#'@param method A character string indicating the method to be used. The +#' options include 'clim' (per-pair method), 'kharin' (Kharin method), and +#' 'NDV' (Fuckar method). The default value is 'clim'. #'@param ftime_dim A character string indicating the name of forecast time #' dimension. Only used when method = 'NDV'. The default value is 'ftime'. #'@param memb A logical value indicating whether to remain 'memb_dim' dimension -#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). The default value is TRUE. +#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). The default value is +#' TRUE. #'@param memb_dim A character string indicating the name of the member #' dimension. Only used when parameter 'memb' is FALSE. It must be one element #' in 'dat_dim'. The default value is 'member'. @@ -46,7 +53,7 @@ #' dimension 'memb_dim' is also removed. #'} #'\item{$clim_obs}{ -#' A numeric array with the same dimensions as parameter 'exp' +#' A numeric array with the same dimensions as parameter 'obs' #' except dimension 'time_dim' is removed. If parameter 'memb' is FALSE, #' dimension 'memb_dim' is also removed. #'} @@ -82,14 +89,10 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", "containing time_dim and dat_dim.")) } - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } - if(!all(names(dim(exp)) %in% names(dim(obs))) | - !all(names(dim(obs)) %in% names(dim(exp)))) { - stop("Parameter 'exp' and 'obs' must have same dimension name") - } ## time_dim if (!is.character(time_dim) | length(time_dim) > 1) { stop("Parameter 'time_dim' must be a character string.") @@ -98,12 +101,26 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") } ## dat_dim - if (!is.character(dat_dim)) { - stop("Parameter 'dat_dim' must be a character vector.") - } - if (!all(dat_dim %in% names(dim(exp))) | !all(dat_dim %in% names(dim(obs)))) { - stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") - } + if (!is.null(dat_dim)) { + if (!is.character(dat_dim)) { + stop("Parameter 'dat_dim' must be a character vector.") + } + # Check if dat_dim is in exp + if (!all(dat_dim %in% names(dim(exp)))) { + stop("Parameter 'dat_dim' is not found in 'exp' dimensions.") + } + # If dat_dim is not in obs, add it in + if (any(!dat_dim %in% names(dim(obs)))) { + reset_obs_dim <- TRUE + ori_obs_dim <- dim(obs) + dim(obs) <- c(dim(obs), rep(1, length(dat_dim[which(!dat_dim %in% names(dim(obs)))]))) + names(dim(obs)) <- c(names(ori_obs_dim), dat_dim[which(!dat_dim %in% names(dim(obs)))]) + } else { + reset_obs_dim <- FALSE + } + } else { + reset_obs_dim <- FALSE + } ## method if (!(method %in% c("clim", "kharin", "NDV"))) { stop("Parameter 'method' must be one of 'clim', 'kharin' or 'NDV'.") @@ -122,13 +139,17 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), stop("Parameter 'memb' must be one logical value.") } ## memb_dim - if (!memb) { + if (!is.null(memb_dim) & !memb) { if (!is.character(memb_dim) | length(memb_dim) > 1) { stop("Parameter 'memb_dim' must be a character string.") } if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) { - stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.") + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") } + if (!memb_dim %in% dat_dim) + stop("Parameter 'memb_dim' must be one element in parameter 'dat_dim'.") + } else if (is.null(memb_dim) & !memb) { + memb <- TRUE } ## na.rm if (!is.logical(na.rm) | length(na.rm) > 1) { @@ -144,13 +165,15 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), ## exp and obs (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) - for (i in 1:length(dat_dim)) { - name_exp <- name_exp[-which(name_exp == dat_dim[i])] - name_obs <- name_obs[-which(name_obs == dat_dim[i])] + if (!is.null(dat_dim)) { + for (i in 1:length(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim[i])] + name_obs <- name_obs[-which(name_obs == dat_dim[i])] + } } if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension expect 'dat_dim'.")) + stop(paste0("Parameter 'exp' and 'obs' must have the same dimensions ", + "expect 'dat_dim'.")) } ############################### @@ -165,8 +188,8 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), # Calculate Clim #---------------------------------- - # Remove all sdate if not complete along dat_dim - + # Per-pair: Remove all sdate if not complete along dat_dim + if (!is.null(dat_dim)) { pos <- rep(0, length(dat_dim)) for (i in 1:length(dat_dim)) { #[dat, sdate] ## dat_dim: [dataset, member] @@ -182,6 +205,7 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), } exp[which(is.na(outrows_exp))] <- NA obs[which(is.na(outrows_obs))] <- NA + } #----------------------------------- @@ -193,13 +217,6 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), dat_dim = dat_dim, memb_dim = memb_dim, memb = memb, na.rm = na.rm, ncores_input = ncores, ncores = ncores) - # Add member dimension name back - if (memb) { - if(is.null(names(dim(clim$clim_exp))[1])) { - names(dim(clim$clim_exp))[1] <- memb_dim - names(dim(clim$clim_obs))[1] <- memb_dim - } - } } else if (method == 'kharin') { clim <- Apply(list(exp, obs), @@ -218,6 +235,19 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim, memb = memb, na.rm = na.rm, ncores_input = ncores, ncores = ncores) + } + + # Remove dat_dim in obs if obs doesn't have at first place + if (reset_obs_dim) { + clim_obs_dim <- ori_obs_dim[-which(names(ori_obs_dim) == time_dim)] + if (!memb & memb_dim %in% names(clim_obs_dim)) { + clim_obs_dim <- clim_obs_dim[-which(names(clim_obs_dim) == memb_dim)] + } + if (is.integer(clim_obs_dim) & length(clim_obs_dim) == 0) { + clim$clim_obs <- as.vector(clim$clim_obs) + } else { + clim$clim_obs <- array(clim$clim_obs, dim = clim_obs_dim) + } } return(clim) @@ -230,32 +260,43 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), na.rm = TRUE, ncores_input = NULL) { if (method == 'clim') { - # exp: [sdate, dat_dim_exp] - # obs: [sdate, dat_dim_obs] - - clim_exp <- apply(exp, which(names(dim(exp)) != time_dim), - mean, na.rm = na.rm) #average out time_dim - clim_obs <- apply(obs, which(names(dim(obs)) != time_dim), - mean, na.rm = na.rm) #[dat_dim] - - ## member mean - if (!memb) { - if (length(dim(clim_exp)) == 1) { #dim: [member] - clim_exp <- mean(clim_exp, na.rm = TRUE) - clim_obs <- mean(clim_obs, na.rm = TRUE) - } else { - pos <- which(names(dim(clim_exp)) == memb_dim) - pos <- c(1:length(dim(clim_exp)))[-pos] - dim_name <- names(dim(clim_exp)) - clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE) - clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE) - if (is.null(names(dim(as.array(clim_exp))))) { - clim_exp <- as.array(clim_exp) - clim_obs <- as.array(clim_obs) - names(dim(clim_exp)) <- dim_name[pos] - names(dim(clim_obs)) <- dim_name[pos] + if (!is.null(dat_dim)) { + # exp: [sdate, dat_dim_exp] + # obs: [sdate, dat_dim_obs] + + clim_exp <- apply(exp, which(names(dim(exp)) != time_dim), + mean, na.rm = na.rm) #average out time_dim + clim_obs <- apply(obs, which(names(dim(obs)) != time_dim), + mean, na.rm = na.rm) #[dat_dim] + + if (is.null(dim(clim_exp))) { + dim(clim_exp) <- length(clim_exp) + names(dim(clim_exp)) <- dat_dim + dim(clim_obs) <- length(clim_obs) + names(dim(clim_obs)) <- dat_dim + } + + ## ensemble mean + if (!memb) { + if (length(dim(clim_exp)) == 1) { #dim: [member] + clim_exp <- mean(clim_exp, na.rm = TRUE) + clim_obs <- mean(clim_obs, na.rm = TRUE) + } else { + dim_name <- names(dim(clim_exp)) + pos <- c(1:length(dim(clim_exp)))[-which(dim_name == memb_dim)] + clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE) + clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE) + if (is.null(dim(clim_exp))) { + dim(clim_exp) <- length(clim_exp) + dim(clim_obs) <- length(clim_obs) + names(dim(clim_exp)) <- dim_name[pos] + names(dim(clim_obs)) <- dim_name[pos] + } } } + } else { #dat_dim = NULL + clim_exp <- mean(exp) + clim_obs <- mean(obs) } } else if (method == 'kharin') { @@ -263,27 +304,39 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), # obs: [sdate, dat_dim_obs] # obs clim - clim_obs <- apply(obs, which(names(dim(obs)) != time_dim), - mean, na.rm = na.rm) #[dat_dim] + if (!is.null(dat_dim)) { + clim_obs <- apply(obs, which(names(dim(obs)) != time_dim), + mean, na.rm = na.rm) #[dat_dim] + if (is.null(dim(clim_obs))) { + dim(clim_obs) <- length(clim_obs) + names(dim(clim_obs)) <- dat_dim + } + } else { + clim_obs <- mean(obs) + } # exp clim ##--- NEW trend ---## - tmp_obs <- Trend(data = obs, time_dim = time_dim, interval = 1, - polydeg = 1, conf = FALSE, ncores = ncores_input)$trend + tmp_obs <- Trend(data = obs, time_dim = time_dim, interval = 1, + polydeg = 1, conf = FALSE, ncores = ncores_input)$trend tmp_exp <- Trend(data = exp, time_dim = time_dim, interval = 1, polydeg = 1, conf = FALSE, ncores = ncores_input)$trend - # tmp_exp: [stats, dat_dim)] - + # tmp_exp: [stats, dat_dim] tmp_obs_mean <- apply(tmp_obs, 1, mean) #average out dat_dim (dat and member) #tmp_obs_mean: [stats = 2] - - intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected') #[dat_dim] - slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected') #[dat_dim] - intercept_obs <- array(tmp_obs_mean[1], dim = dim(exp)[-1]) #[dat_dim] - slope_obs <- array(tmp_obs_mean[2], dim = dim(exp)[-1]) #[dat_dim] + if (!is.null(dat_dim)) { + intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected') #[dat_dim] + slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected') #[dat_dim] + intercept_obs <- array(tmp_obs_mean[1], dim = dim(exp)[-1]) #[dat_dim] + slope_obs <- array(tmp_obs_mean[2], dim = dim(exp)[-1]) #[dat_dim] + } else { + intercept_exp <- tmp_exp[1] + slope_exp <- tmp_exp[2] + intercept_obs <- tmp_obs_mean[1] + slope_obs <- tmp_obs_mean[2] + } trend_exp <- list() trend_obs <- list() - for (jdate in 1:dim(exp)[time_dim]) { trend_exp[[jdate]] <- intercept_exp + jdate * slope_exp trend_obs[[jdate]] <- intercept_obs + jdate * slope_obs @@ -291,26 +344,40 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), # turn list into array trend_exp <- array(unlist(trend_exp), dim = c(dim(exp)[-1], dim(exp)[1])) trend_obs <- array(unlist(trend_obs), dim = c(dim(exp)[-1], dim(exp)[1])) - len <- length(dim(exp)) - trend_exp <- Reorder(trend_exp, c(len, 1:(len - 1))) - trend_obs <- Reorder(trend_obs, c(len, 1:(len - 1))) + if (!is.null(dat_dim)) { + len <- length(dim(exp)) + trend_exp <- Reorder(trend_exp, c(len, 1:(len - 1))) + trend_obs <- Reorder(trend_obs, c(len, 1:(len - 1))) + } - clim_obs_mean <- mean(apply(clim_obs, 1, mean)) #average out dat_dim, get a number + # average out dat_dim, get a number + if (is.null(dim(clim_obs))) { + clim_obs_mean <- mean(clim_obs) + } else { + clim_obs_mean <- mean(apply(clim_obs, 1, mean)) + } clim_obs_mean <- array(clim_obs_mean, dim = dim(exp)) #enlarge it for the next line clim_exp <- trend_exp - trend_obs + clim_obs_mean - - ## member mean + + ## member mean if (!memb) { - pos <- which(names(dim(clim_exp)) == memb_dim) - pos <- c(1:length(dim(clim_exp)))[-pos] - dim_name <- names(dim(clim_exp)) - clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE) - clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE) - if (is.null(names(dim(as.array(clim_exp))))) { + pos_exp <- c(1:length(dim(clim_exp)))[-which(names(dim(clim_exp)) == memb_dim)] + pos_obs <- c(1:length(dim(clim_obs)))[-which(names(dim(clim_obs)) == memb_dim)] + tmp_dim_exp <- dim(clim_exp) + tmp_dim_obs <- dim(clim_obs) + clim_exp <- apply(clim_exp, pos_exp, mean, na.rm = TRUE) + if (is.integer(pos_obs) & length(pos_obs) == 0) { + clim_obs <- mean(clim_obs) + } else { + clim_obs <- apply(clim_obs, pos_obs, mean, na.rm = TRUE) + } + if (is.null(dim(clim_exp))) { clim_exp <- as.array(clim_exp) + dim(clim_exp) <- tmp_dim_exp[pos_exp] + } + if (is.null(dim(clim_obs)) & !(is.integer(pos_obs) & length(pos_obs) == 0)) { clim_obs <- as.array(clim_obs) - names(dim(clim_exp)) <- dim_name[pos] - names(dim(clim_obs)) <- dim_name[pos] + dim(clim_obs) <- tmp_dim_obs[pos_obs] } } @@ -319,15 +386,24 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), # exp: [sdate, dat_dim, ftime] # obs: [sdate, dat_dim, ftime] - # obs clim + # obs clim clim_obs <- apply(obs, which(names(dim(obs)) != time_dim), - mean, na.rm = na.rm) #[dat_dim, ftime] + mean, na.rm = na.rm) #[(dat_dim), ftime] - # exp clim + if (is.null(dim(clim_obs))) { + dim(clim_obs) <- length(clim_obs) + names(dim(clim_obs)) <- c(dat_dim, ftime_dim) + } + + # exp clim pos_ftime <- length(dim(exp)) #a number dim_ftime <- dim(exp)[pos_ftime] #c(ftime = 4) - pos_dat <- 2:(length(dim(exp)) - 1) #1 is sdate, last is ftime - dim_dat <- dim(exp)[pos_dat] #c(dataset = 1, member = 3) + if (!is.null(dat_dim)) { + pos_dat <- 2:(length(dim(exp)) - 1) #1 is sdate, last is ftime + dim_dat <- dim(exp)[pos_dat] #c(dataset = 1, member = 3) + } else { + dim_dat <- NULL + } # Create initial data set (i.e., only first ftime) tmp <- Subset(exp, ftime_dim, 1, drop = 'selected') @@ -350,14 +426,14 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), # Find intercept and slope intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected') #[dat_dim, ftime] slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected') #[dat_dim, ftime] - intercept_obs <- array(tmp_obs_mean[1, ], dim = c(dim_ftime, dim_dat)) - #[ftime, dat_dim] exp - intercept_obs <- Reorder(intercept_obs, c(2:length(dim(intercept_obs)), 1)) - #[dat_dim, ftime] exp - slope_obs <- array(tmp_obs_mean[2, ], dim = c(dim_ftime, dim_dat)) - #[ftime, dat_dim] exp - slope_obs <- Reorder(slope_obs, c(2:length(dim(slope_obs)), 1)) - #[dat_dim, ftime] exp + intercept_obs <- array(tmp_obs_mean[1, ], dim = c(dim_ftime, dim_dat)) #[ftime, dat_dim] exp + if (!is.null(dat_dim)) { + intercept_obs <- Reorder(intercept_obs, c(2:length(dim(intercept_obs)), 1)) #[dat_dim, ftime] exp + } + slope_obs <- array(tmp_obs_mean[2, ], dim = c(dim_ftime, dim_dat)) #[ftime, dat_dim] exp + if (!is.null(dat_dim)) { + slope_obs <- Reorder(slope_obs, c(2:length(dim(slope_obs)), 1)) #[dat_dim, ftime] exp + } trend_exp <- list() trend_obs <- list() @@ -366,7 +442,9 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), trend_exp[[jdate]] <- intercept_exp + tmp * slope_exp #[dat_dim, ftime] tmp <- array(ini_obs_mean[jdate, ], dim = c(dim_ftime, dim_dat)) #[ftime, dat_dim] - tmp <- Reorder(tmp, c(2:length(dim(tmp)), 1)) #[dat_dim, ftime] + if (!is.null(dat_dim)) { + tmp <- Reorder(tmp, c(2:length(dim(tmp)), 1)) #[dat_dim, ftime] + } trend_obs[[jdate]] <- intercept_obs + tmp * slope_obs } # turn list into array @@ -377,8 +455,11 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), trend_exp <- Reorder(trend_exp, c(len, 1:(len - 1))) trend_obs <- Reorder(trend_obs, c(len, 1:(len - 1))) #trend_: [sdate, dat_dim, ftime] - + if (is.null(dim(clim_obs))) { + clim_obs_mean <- clim_obs #[ftime] + } else { clim_obs_mean <- apply(clim_obs, length(dim(clim_obs)), mean) #average out dat_dim, [ftime] + } clim_obs_mean <- array(clim_obs_mean, dim = c(dim_ftime, dim(exp)[1], dim_dat)) #[ftime, sdate, dat_dim] len <- length(dim(clim_obs_mean)) @@ -387,18 +468,26 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), clim_exp <- trend_exp - trend_obs + clim_obs_mean - ## member mean + ## member mean if (!memb) { - pos <- which(names(dim(clim_exp)) == memb_dim) - pos <- c(1:length(dim(clim_exp)))[-pos] - dim_name <- names(dim(clim_exp)) - clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE) - clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE) - if (is.null(names(dim(as.array(clim_exp))))) { + pos_exp <- c(1:length(dim(clim_exp)))[-which(names(dim(clim_exp)) == memb_dim)] + pos_obs <- c(1:length(dim(clim_obs)))[-which(names(dim(clim_obs)) == memb_dim)] + + tmp_dim_exp <- dim(clim_exp) + tmp_dim_obs <- dim(clim_obs) + clim_exp <- apply(clim_exp, pos_exp, mean, na.rm = TRUE) + if (is.integer(pos_obs) & length(pos_obs) == 0) { + clim_obs <- mean(clim_obs) + } else { + clim_obs <- apply(clim_obs, pos_obs, mean, na.rm = TRUE) + } + if (is.null(dim(clim_exp))) { clim_exp <- as.array(clim_exp) + dim(clim_exp) <- tmp_dim_exp[pos_exp] + } + if (is.null(dim(clim_obs)) & !(is.integer(pos_obs) & length(pos_obs) == 0)) { clim_obs <- as.array(clim_obs) - names(dim(clim_exp)) <- dim_name[pos] - names(dim(clim_obs)) <- dim_name[pos] + dim(clim_obs) <- tmp_dim_obs[pos_obs] } } diff --git a/R/Cluster.R b/R/Cluster.R index 8f401182b93a1d1b62a1041de9779910cbcc8d2d..2fac687a3aa1dc4afcde425fa170a938247f535d 100644 --- a/R/Cluster.R +++ b/R/Cluster.R @@ -37,7 +37,7 @@ #' "ptbiserial", "gap", "frey", "mcclain", "gamma", "gplus", "tau", "dunn", #' "hubert", "sdindex", and "sdbw". #' One can also use all of them with the option 'alllong' or almost all indices -# except gap, gamma, gplus and tau with 'all', when the optimal number of +#' except gap, gamma, gplus and tau with 'all', when the optimal number of #' clusters K is detremined by the majority rule (the maximum of histogram of #' the results of all indices with finite solutions). Use of some indices on #' a big and/or unstructured dataset can be computationally intense and/or @@ -53,7 +53,7 @@ #' are same as 'data' without 'space_dim'. #'} #'\item{$centers}{ -#' A nemeric array of cluster centres or centroids (e.g. [1:K, 1:spatial degrees +#' A numeric array of cluster centres or centroids (e.g. [1:K, 1:spatial degrees #' of freedom]). The rest dimensions are same as 'data' except 'time_dim' #' and 'space_dim'. #'} @@ -214,8 +214,23 @@ Cluster <- function(data, weights = NULL, time_dim = 'sdate', space_dim = NULL, } ############################### - # Calculate Cluster + # Compute nclusters + if (is.null(nclusters)) { + pdf(file = NULL) + nbclust.results <- NbClust::NbClust(data, distance = 'euclidean', + min.nc = 2, max.nc = 20, + method = 'kmeans', index = index) + dev.off() + if (index == 'all' || index == 'alllong') { + kmc <- hist(nbclust.results$Best.nc[1, ], breaks = seq(0, 20), + plot = FALSE)$counts + nclusters <- which(kmc == max(kmc)) + } else { + nclusters <- nbclust.results$Best.nc[1] + } + } + # Calculate Cluster output <- Apply(list(data), target_dims = c(time_dim, space_dim), fun = .Cluster, @@ -225,7 +240,7 @@ Cluster <- function(data, weights = NULL, time_dim = 'sdate', space_dim = NULL, return(output) } -.Cluster <- function(data, weights = NULL, nclusters = NULL, index = 'sdindex') { +.Cluster <- function(data, weights = NULL, nclusters, index = 'sdindex') { # data: [time, (lat, lon)] dat_dim <- dim(data) @@ -241,27 +256,22 @@ Cluster <- function(data, weights = NULL, time_dim = 'sdate', space_dim = NULL, data <- do.call(abind::abind, c(data_list, along = 0)) } } + + kmeans.results <- kmeans(data, centers = nclusters, iter.max = 300, + nstart = 30) - if (!is.null(nclusters)) { - kmeans.results <- kmeans(data, centers = nclusters, iter.max = 300, - nstart = 30) - } else { - pdf(file = NULL) - nbclust.results <- NbClust::NbClust(data, distance = 'euclidean', - min.nc = 2, max.nc = 20, - method = 'kmeans', index = index) - dev.off() - - if (index == 'all' || index == 'alllong') { - kmc <- hist(nbclust.results$Best.nc[1, ], breaks = seq(0, 20), - plot = FALSE)$counts - kmc1 <- which(kmc == max(kmc)) - } else { - kmc1 <- nbclust.results$Best.nc[1] - } +#---------------NEW--------------- + # Add dimension names and shape space_dim back + kmeans.results$cluster <- as.array(kmeans.results$cluster) + names(dim(kmeans.results$cluster)) <- names(dat_dim)[1] + kmeans.results$centers <- array(kmeans.results$centers, + dim = c(nclusters, dat_dim[-1])) + names(dim(kmeans.results$centers)) <- c('K', names(dat_dim)[-1]) + kmeans.results$withinss <- as.array(kmeans.results$withinss) + names(dim(kmeans.results$withinss)) <- 'K' + kmeans.results$size <- as.array(kmeans.results$size) + names(dim(kmeans.results$size)) <- 'K' - kmeans.results <- kmeans(data, centers = kmc1, iter.max = 300, - nstart = 30) - } +#----------NEW_END---------------- invisible(kmeans.results) } diff --git a/R/ColorBar.R b/R/ColorBar.R index 206f68dc9c2fba5337f2adb21c77c31d3e582494..286b1152f59dee536bd81d4a31fcfe5e668e0287 100644 --- a/R/ColorBar.R +++ b/R/ColorBar.R @@ -180,7 +180,7 @@ ColorBar <- function(brks = NULL, cols = NULL, vertical = TRUE, if (!is.null(var_limits)) { if (!(is.numeric(var_limits) && (length(var_limits) == 2))) { stop("Parameter 'var_limits' must be a numeric vector of length 2.") - } else if (any(is.na(var_limits))) { + } else if (anyNA(var_limits)) { stop("Parameter 'var_limits' must not contain NA values.") } else if (any(is.infinite(var_limits))) { stop("Parameter 'var_limits' must not contain infinite values.") @@ -210,7 +210,7 @@ ColorBar <- function(brks = NULL, cols = NULL, vertical = TRUE, brks <- length(cols) + 1 } } - if (is.null(bar_limits) || any(is.na(bar_limits))) { + if (is.null(bar_limits) || anyNA(bar_limits)) { # var_limits is defined if (is.null(bar_limits)) { bar_limits <- c(NA, NA) diff --git a/R/DiffCorr.R b/R/DiffCorr.R new file mode 100644 index 0000000000000000000000000000000000000000..c250814ce859739f4ad5a9e79e92aa14d8cd1fb4 --- /dev/null +++ b/R/DiffCorr.R @@ -0,0 +1,256 @@ +#'Compute the correlation difference and its significance +#' +#'Compute the correlation difference between two deterministic forecasts. +#'Positive values of the correlation difference indicate that the forecast is +#'more skillful than the reference forecast, while negative values mean that +#'the reference forecast is more skillful. The statistical significance of the +#'correlation differences is computed with a one-sided test for equality of +#'dependent correlation coefficients (Steiger, 1980; Siegert et al., 2017) using +#'effective degrees of freedom to account for the autocorrelation of the time +#'series (von Storch and Zwiers, 1999). +#' +#'@param exp A named numerical array of the forecast data with at least time +#' dimension. +#'@param obs A named numerical array with the observations with at least time +#' dimension. The dimensions must be the same as "exp" except 'memb_dim'. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension. The dimensions must be the same as "exp" except +#' 'memb_dim'. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be NA (and it will be computed with the s2dv:::.Eno), a +#' numeric (which is used for all cases), or an array with the same dimensions +#' as "obs" except "time_dim" (for a particular N.eff to be used for each case) +#' . The default value is NA. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member +#' dimension to compute the ensemble mean of the forecast and reference +#' forecast. If it is NULL (default), the ensemble mean should be provided +#' directly to the function. +#'@param method A character string indicating the correlation coefficient to be +#' computed ("pearson" or "spearman"). The default value is "pearson". +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test. If it is a numeric, "sign" will be returned. If NULL, the +#' p-value will be returned instead. The default value is NULL. +#'@param handle.na A charcater string indicating how to handle missing values. +#' If "return.na", NAs will be returned for the cases that contain at least one +#' NA in "exp", "ref", or "obs". If "only.complete.triplets", only the time +#' steps with no missing values in all "exp", "ref", and "obs" will be used. If +#' "na.fail", an error will arise if any of "exp", "ref", or "obs" contains any +#' NA. The default value is "return.na". +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list with: +#'\item{$diff.corr}{ +#' A numerical array of the correlation differences with the same dimensions as +#' the input arrays except "time_dim" (and "memb_dim" if provided). +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance of the correlation +#' differences with the same dimensions as the input arrays except "time_dim" +#' (and "memb_dim" if provided). Returned only if "alpha" is a numeric. +#'} +#'\item{$p.val}{ +#' A numeric array of the p-values with the same dimensions as the input arrays +#' except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is +#' NULL. +#'} +#' +#'@references +#'Steiger, 1980; https://content.apa.org/doi/10.1037/0033-2909.87.2.245 +#'Siegert et al., 2017; https://doi.org/10.1175/MWR-D-16-0037.1 +#'von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336 +#' +#'@examples +#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#' res <- DiffCorr(exp, obs, ref, memb_dim = 'member') +#' +#'@import multiApply +#'@export +DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', + memb_dim = NULL, method = 'pearson', alpha = NULL, + handle.na = 'return.na', ncores = NULL) { + + # Check inputs + ## exp, ref, and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if (!is.array(ref) | !is.numeric(ref)) + stop('Parameter "ref" must be a numeric array.') + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop(paste0('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "obs" except "time_dim".')) + } + } else if (any((!is.na(N.eff) & !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop(paste0('Parameter "N.eff" must be NA, a numeric, or an array with ', + 'the same dimensions as "obs" except "time_dim".')) + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs)) | + !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'exp', 'obs', or 'ref' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'ref' dimension.") + } + } + ## exp, ref, and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (length(name_exp) != length(name_obs) | length(name_exp) != length(name_ref) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs]) | !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp', 'obs', and 'ref' must have same length of ", + "all dimensions except 'memb_dim'.")) + } + ## method + if (!method %in% c("pearson", "spearman")) { + stop('Parameter "method" must be "pearson" or "spearman".') + } + if (method == "spearman") { + warning(paste0("The test used in this function is built on Pearson method. ", + "To verify if Spearman method is reliable, you can run the ", + "Monte-Carlo simulations that are done in Siegert et al., 2017")) + } + ## alpha + if (!is.null(alpha)) { + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | + length(alpha) > 1)) { + stop('Parameter "alpha" must be NULL or a number between 0 and 1.') + } + } + ## handle.na + if (!handle.na %in% c('return.na', 'only.complete.triplets', 'na.fail')) { + stop('Parameter "handle.na" must be "return.na", "only.complete.triplets" or "na.fail".') + } + if (!is.null(ncores)) { + if (any(!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1)) { + stop('Parameter "ncores" must be either NULL or a positive integer.') + } + } + + ############################### + + # NA check: na.fail + if (handle.na == "na.fail" & (anyNA(exp) | anyNA(ref) | anyNA(obs))) + stop('The data contain NAs.') + + # Calculate ensemble means + dim_exp <- dim(exp) + dim_ref <- dim(ref) + dim_obs <- dim(obs) + + if (!is.null(memb_dim)) { + exp_memb_dim_ind <- which(names(dim_exp) == memb_dim) + ref_memb_dim_ind <- which(names(dim_ref) == memb_dim) + exp <- apply(exp, c(1:length(dim_exp))[-exp_memb_dim_ind], mean, na.rm = FALSE) + ref <- apply(ref, c(1:length(dim_ref))[-ref_memb_dim_ind], mean, na.rm = FALSE) + if (is.null(dim(exp))) exp <- array(exp, dim = c(dim_exp[time_dim])) + if (is.null(dim(ref))) ref <- array(ref, dim = c(dim_ref[time_dim])) + } + + # output_dims + if (is.null(alpha)) { + output_dims <- list(diff.corr = NULL, p.val = NULL) + } else { + output_dims <- list(diff.corr = NULL, sign = NULL) + } + # Correlation difference + if (is.array(N.eff)) { + output <- Apply(data = list(exp = exp, obs = obs, ref = ref, + N.eff = N.eff), + target_dims = list(exp = time_dim, obs = time_dim, + ref = time_dim, N.eff = NULL), + output_dims = output_dims, + fun = .DiffCorr, method = method, + alpha = alpha, handle.na = handle.na, ncores = ncores) + } else { + output <- Apply(data = list(exp = exp, obs = obs, ref = ref), + target_dims = list(exp = time_dim, obs = time_dim, + ref = time_dim), + output_dims = output_dims, N.eff = N.eff, + fun = .DiffCorr, method = method, + alpha = alpha, handle.na = handle.na, ncores = ncores) + } + + return(output) +} + +.DiffCorr <- function(exp, obs, ref, N.eff = NA, method = 'pearson', alpha = NULL, handle.na = 'return.na') { + + .diff.corr <- function(exp, obs, ref, method = 'pearson', N.eff = NA, alpha = NULL) { + + # Correlation difference + cor.exp <- cor(x = exp, y = obs, method = method) + cor.ref <- cor(x = ref, y = obs, method = method) + output <- NULL + output$diff.corr <- cor.exp - cor.ref + + # Significance with one-sided test for equality of dependent correlation coefficients (Steiger, 1980) + r12 <- cor.exp + r13 <- cor.ref + r23 <- cor(exp, ref) + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs, na.action = na.pass) ## effective degrees of freedom + } + R <- (1 - r12 * r12 - r13 * r13 - r23 * r23) + 2 * r12 * r13 * r23 + t <- (r12 - r13) * sqrt((N.eff - 1) * (1 + r23) / (2 * ((N.eff - 1) / (N.eff - 3)) * R + 0.25 * (r12 + r13)^2 * (1 - r23)^3)) + p.value <- 1 - pt(t, df = N.eff - 3) + if (is.null(alpha)) { + output$p.val <- p.value + } else { + output$sign <- ifelse(!is.na(p.value) & p.value <= alpha, TRUE, FALSE) + } + return(output) + } + + #================================================== + + if (anyNA(exp) | anyNA(obs) | anyNA(ref)) { ## There are NAs + if (handle.na == 'only.complete.triplets') { + nna <- is.na(exp) | is.na(obs) | is.na(ref) # A vector of T/F + if (all(nna)) stop("There is no complete set of forecasts and observations.") + # Remove the incomplete set + exp <- exp[!nna] + obs <- obs[!nna] + ref <- ref[!nna] + + output <- .diff.corr(exp = exp, obs = obs, ref = ref, method = method, + N.eff = N.eff, alpha = alpha) + + } else if (handle.na == 'return.na') { + # Data contain NA, return NAs directly without passing to .diff.corr + if (is.null(alpha)) { + output <- list(diff.corr = NA, p.val = NA) + } else { + output <- list(diff.corr = NA, sign = NA) + } + } + + } else { ## There is no NA + output <- .diff.corr(exp = exp, obs = obs, ref = ref, method = method, + N.eff = N.eff, alpha = alpha) + } + return(output) +} diff --git a/R/EOF.R b/R/EOF.R index 8f8d6403f96876039c0087e340866101f4eddeb1..d5c79a630f776f46eee54e932aa78a912a0118fc 100644 --- a/R/EOF.R +++ b/R/EOF.R @@ -200,7 +200,7 @@ EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), # Check if all the time steps at one grid point are NA-consistent. # The grid point should have all NAs or no NA along time dim. - if (any(is.na(ano))) { + if (anyNA(ano)) { ano_latlon <- array(ano, dim = c(nt, ny * nx)) # [time, lat*lon] na_ind <- which(is.na(ano_latlon), arr.ind = T) if (dim(na_ind)[1] != nt * length(unique(na_ind[, 2]))) { diff --git a/R/Eno.R b/R/Eno.R index 8c8d16bfdf8aa4998df634b8f18b54942856ab3d..e2324de5a2157e1f3bda77c90ad2653c8315b4e4 100644 --- a/R/Eno.R +++ b/R/Eno.R @@ -58,7 +58,7 @@ Eno <- function(data, time_dim = 'sdate', na.action = na.pass, ncores = NULL) { as.character(substitute(na.action)) != c("na.fail")) { stop("Parameter 'na.action' must be a function either na.pass or na.fail.") } - if(as.character(substitute(na.action))== c("na.fail") && any(is.na(data))) { + if(as.character(substitute(na.action))== c("na.fail") && anyNA(data)) { stop(paste0("Calculation fails because NA is found in paratemter 'data', ", "which is not accepted when ", "parameter 'na.action' = na.fail.")) diff --git a/R/GMST.R b/R/GMST.R index 0d6c49e8aa3a827d69d5dc9905fab23c4197efd7..85b382d524fc763f166971994067c98cc604777b 100644 --- a/R/GMST.R +++ b/R/GMST.R @@ -125,10 +125,10 @@ GMST <- function(data_tas, data_tos, data_lats, data_lons, mask_sea_land, sea_va stop("The dimension of data_tas and data_tos must be identical.") } # data_lats and data_lons part1 - if (!(class(data_lats) == 'numeric' | class(data_lats) == 'integer')) { + if (!(inherits(data_lats, 'numeric') | inherits(data_lats, 'integer'))) { stop("Parameter 'data_lats' must be a numeric vector.") } - if (!(class(data_lons) == 'numeric' | class(data_lons) == 'integer')) { + if (!(inherits(data_lons, 'numeric') | inherits(data_lons, 'integer'))) { stop("Parameter 'data_lons' must be a numeric vector.") } # lat_dim diff --git a/R/GSAT.R b/R/GSAT.R index 1774bd684533081137b75961907dac3750ad9a21..30975b9b1b7f2678f823f3488226a7d10cff0714 100644 --- a/R/GSAT.R +++ b/R/GSAT.R @@ -94,10 +94,10 @@ GSAT <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l stop("Parameter 'data' must be a numeric array.") } # data_lats and data_lons part1 - if (!(class(data_lats) == 'numeric' | class(data_lats) == 'integer')) { + if (!(inherits(data_lats, 'numeric') | inherits(data_lats, 'integer'))) { stop("Parameter 'data_lats' must be a numeric vector.") } - if (!(class(data_lons) == 'numeric' | class(data_lons) == 'integer')) { + if (!(inherits(data_lons, 'numeric') | inherits(data_lons, 'integer'))) { stop("Parameter 'data_lons' must be a numeric vector.") } # type diff --git a/R/Histo2Hindcast.R b/R/Histo2Hindcast.R index 5657457d7a47933153496ada5062cb8a94bc95fa..f910a9a349e53ba62653d511c9a27b15ca522391 100644 --- a/R/Histo2Hindcast.R +++ b/R/Histo2Hindcast.R @@ -80,7 +80,7 @@ Histo2Hindcast <- function(data, sdatesin, sdatesout, nleadtimesout, if (!is.character(sdatesout) | !is.vector(sdatesout)) { stop(paste0("Parameter 'sdatesout' must be a vector of character in the ", "format 'YYYYMMDD' or 'YYYYMM'.")) - } else if (!all(nchar(sdatesout) %in% c(6, 8)) | any(is.na(as.numeric(sdatesin)))) { + } else if (!all(nchar(sdatesout) %in% c(6, 8)) | anyNA(as.numeric(sdatesin))) { stop(paste0("Parameter 'sdatesout' must be a vector of character in the ", "format 'YYYYMMDD' or 'YYYYMM'.")) } diff --git a/R/Load.R b/R/Load.R index 955c8942acabdb74a28c38568afcb644a6d0ac1f..e188299aa3cbacc9ae8a4da0f19c6fa19ab29917 100644 --- a/R/Load.R +++ b/R/Load.R @@ -992,7 +992,7 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, if (is.null(sdates)) { stop("Error: parameter 'sdates' must be provided.") } - if (!is.character(sdates) || !all(nchar(sdates) == 8) || any(is.na(strtoi(sdates)))) { + if (!is.character(sdates) || !all(nchar(sdates) == 8) || anyNA(strtoi(sdates))) { stop("Error: parameter 'sdates' is incorrect. All starting dates should be a character string in the format 'YYYYMMDD'.") } @@ -1007,7 +1007,7 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, } if (length(nmember) != length(exp)) { stop("Error: 'nmember' must contain as many values as 'exp'.") - } else if (any(is.na(nmember))) { + } else if (anyNA(nmember)) { nmember[which(is.na(nmember))] <- max(nmember, na.rm = TRUE) } } @@ -1023,7 +1023,7 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, } if (length(nmemberobs) != length(obs)) { stop("Error: 'nmemberobs' must contain as many values as 'obs'.") - } else if (any(is.na(nmemberobs))) { + } else if (anyNA(nmemberobs)) { nmemberobs[which(is.na(nmemberobs))] <- max(nmemberobs, na.rm = TRUE) } } @@ -1532,7 +1532,7 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, } dims <- dim_exp[na.omit(match(c('dataset', 'member', 'sdate', 'ftime', 'lat', 'lon'), names(dim_exp)))] - if (is.null(dims[['member']]) || any(is.na(unlist(dims))) || any(unlist(dims) == 0)) { + if (is.null(dims[['member']]) || anyNA(unlist(dims)) || any(unlist(dims) == 0)) { dims <- 0 dim_exp <- NULL } @@ -1844,7 +1844,7 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, .warning("no data found in file system for any observational dataset.") } dims <- dim_obs[na.omit(match(c('dataset', 'member', 'sdate', 'ftime', 'lat', 'lon'), names(dim_obs)))] - if (is.null(dims[['member']]) || any(is.na(unlist(dims))) || any(unlist(dims) == 0)) { + if (is.null(dims[['member']]) || anyNA(unlist(dims)) || any(unlist(dims) == 0)) { dims <- 0 dim_obs <- NULL } @@ -1864,12 +1864,12 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, # So [1, 1, 1, 1, 1, 1] will be next to [2, 1, 1, 1, 1, 1] in memory pointer_var_exp <- pointer_var_obs <- NULL if (!is.null(dim_exp) && (length(unlist(dim_exp)) == length(dim_exp)) && - !any(is.na(unlist(dim_exp))) && !any(unlist(dim_exp) == 0)) { + !anyNA(unlist(dim_exp)) && !any(unlist(dim_exp) == 0)) { var_exp <- big.matrix(nrow = prod(unlist(dim_exp)), ncol = 1) pointer_var_exp <- describe(var_exp) } if (!is.null(dim_obs) && (length(unlist(dim_obs)) == length(dim_obs)) && - !any(is.na(unlist(dim_obs))) && !any(unlist(dim_obs) == 0)) { + !anyNA(unlist(dim_obs)) && !any(unlist(dim_obs) == 0)) { var_obs <- big.matrix(nrow = prod(unlist(dim_obs)), ncol = 1) pointer_var_obs <- describe(var_obs) } @@ -1878,11 +1878,11 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, } # We calculate the % of total progress that each work piece represents so # that progress bar can be updated properly - exp_work_piece_percent <- prod(dim_exp) / (prod(dim_obs) + prod(dim_exp)) - obs_work_piece_percent <- prod(dim_obs) / (prod(dim_obs) + prod(dim_exp)) + exp_work_piece_percent <- prod(unlist(dim_exp)) / (prod(unlist(dim_obs)) + prod(unlist(dim_exp))) + obs_work_piece_percent <- prod(unlist(dim_obs)) / (prod(unlist(dim_obs)) + prod(unlist(dim_exp))) # Add some important extra fields in the work pieces before sending - exp_work_pieces <- lapply(exp_work_pieces, function (x) c(x, list(dataset_type = 'exp', dims = dim_exp, out_pointer = pointer_var_exp)))###, progress_amount = exp_work_piece_progress))) - obs_work_pieces <- lapply(obs_work_pieces, function (x) c(x, list(dataset_type = 'obs', dims = dim_obs, out_pointer = pointer_var_obs)))###, progress_amount = obs_work_piece_progress))) + exp_work_pieces <- lapply(exp_work_pieces, function (x) c(x, list(dataset_type = 'exp', dims = unlist(dim_exp), out_pointer = pointer_var_exp)))###, progress_amount = exp_work_piece_progress))) + obs_work_pieces <- lapply(obs_work_pieces, function (x) c(x, list(dataset_type = 'obs', dims = unlist(dim_obs), out_pointer = pointer_var_obs)))###, progress_amount = obs_work_piece_progress))) work_pieces <- c(exp_work_pieces, obs_work_pieces) # Calculate the progress %s that will be displayed and assign them to the # appropriate work pieces @@ -1953,15 +1953,15 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, bytes_obs <- 0 obs_dim_sizes <- '0' } else { - bytes_obs <- prod(c(dim_obs, 8)) - obs_dim_sizes <- paste(na.omit(as.vector(dim_obs[c('dataset', 'member', 'sdate', 'ftime', 'lat', 'lon')])), collapse = ' x ') + bytes_obs <- prod(c(unlist(dim_obs), 8)) + obs_dim_sizes <- paste(na.omit(as.vector(unlist(dim_obs)[c('dataset', 'member', 'sdate', 'ftime', 'lat', 'lon')])), collapse = ' x ') } if (length(dim_exp) == 0) { bytes_exp <- 0 exp_dim_sizes <- '0' } else { - bytes_exp <- prod(c(dim_exp, 8)) - exp_dim_sizes <- paste(na.omit(as.vector(dim_exp[c('dataset', 'member', 'sdate', 'ftime', 'lat', 'lon')])), collapse = ' x ') + bytes_exp <- prod(c(unlist(dim_exp), 8)) + exp_dim_sizes <- paste(na.omit(as.vector(unlist(dim_exp)[c('dataset', 'member', 'sdate', 'ftime', 'lat', 'lon')])), collapse = ' x ') } .message(paste("Total size of requested data: ", bytes_obs + bytes_exp, "bytes.")) .message(paste("- Experimental data: (", exp_dim_sizes, ") x 8 bytes =", bytes_exp, "bytes."), indent = 2) @@ -2121,7 +2121,7 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, }) - if (class(errors) == 'try-error') { + if (inherits(errors, 'try-error')) { invisible(list(load_parameters = load_parameters)) } else { # Before ending, the data is arranged in the common format, with the following @@ -2146,7 +2146,7 @@ Load <- function(var, exp = NULL, obs = NULL, sdates, nmember = NULL, old_dims <- dim_exp dim_exp <- dim_exp[dim_reorder] mod_data <- - aperm(array(bigmemory::as.matrix(var_exp), dim = old_dims), dim_reorder) + aperm(array(bigmemory::as.matrix(var_exp), dim = unlist(old_dims)), dim_reorder) attr(mod_data, 'dimensions') <- names(dim_exp) names(dim(mod_data)) <- names(dim_exp) number_ftime <- dim_exp[["ftime"]] diff --git a/R/MeanDims.R b/R/MeanDims.R index 9e5dd49fe47ed21008ed0c95da4a2f3163d98acd..56a304dbefb98d359c0458ea44ee1ee450a016b4 100644 --- a/R/MeanDims.R +++ b/R/MeanDims.R @@ -10,9 +10,10 @@ #' not (FALSE). #'@param drop A logical value indicating whether to keep the averaged #' dimension (FALSE) or drop it (TRUE). The default value is TRUE. -#'@return An array with the same dimension as parameter 'data' except the -#' 'dims' dimensions. If 'drop' is TRUE, 'dims' will be removed; if 'drop' is -#' FALSE, 'dims' will be preserved and the length will be 1. +#'@return A numeric or an array with the same dimension as parameter 'data' +#' except the 'dims' dimensions. If 'drop' is TRUE, 'dims' will be removed; if +#' 'drop' is FALSE, 'dims' will be preserved and the length will be 1. If all +#' the dimensions are averaged out, a numeric is returned. #' #'@examples #'a <- array(rnorm(24), dim = c(dat = 2, member = 3, time = 4)) @@ -69,7 +70,7 @@ MeanDims <- function(data, dims, na.rm = FALSE, drop = TRUE) { if (length(dims) == length(dim_data)) { if (drop) { - data <- as.array(mean(data, na.rm = na.rm)) + data <- mean(data, na.rm = na.rm) } else { data <- array(mean(data, na.rm = na.rm), dim = rep(1, length(dim_data))) @@ -79,8 +80,8 @@ MeanDims <- function(data, dims, na.rm = FALSE, drop = TRUE) { if (is.character(dims)) { dims <- which(names(dim_data) %in% dims) } - pos <- (1:length(dim_data))[-dims] - data <- apply(data, pos, mean, na.rm = na.rm) + data <- aperm(data, c(dims, (1:length(dim_data))[-dims])) + data <- colMeans(data, dims = length(dims), na.rm = na.rm) # If data is vector if (is.null(dim(data))) { diff --git a/R/Persistence.R b/R/Persistence.R index 5a53857f797c47db5e99bb912c2be44d52e8b001..9895f479a460e22dafc731185bed30fa50f22cdf 100644 --- a/R/Persistence.R +++ b/R/Persistence.R @@ -122,7 +122,7 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, stop(paste0("Parameter 'dates' must be a sequence of integer (YYYY) or ", "string (YYYY-MM-DD) in class 'Date'.")) } - } else if (class(dates) == 'Date') { #(YYYY-MM-DD) + } else if (inherits(dates, 'Date')) { #(YYYY-MM-DD) } else { stop(paste0("Parameter 'dates' must be a sequence of integer (YYYY) or ", @@ -148,7 +148,7 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, stop(paste0("Parameter 'start' must start at least 40 time steps after ", "the first 'dates'.")) } - } else if (class(start) == 'Date') { + } else if (inherits(start, 'Date')) { if (length(start) > 1 | any(start < as.Date(ISOdate(1850, 1, 1))) | any(start > as.Date(ISOdate(2021, 1, 1)))) { stop(paste0("Parameter 'start' must be an integer or a string in class ", @@ -176,7 +176,7 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, stop(paste0("Parameter 'end' must end at most 1 time steps after ", "the last 'dates'.")) } - } else if (class(end) == 'Date') { + } else if (inherits(end, 'Date')) { if (length(end) > 1 | any(end < as.Date(ISOdate(1850, 1, 1))) | any(end > as.Date(ISOdate(2020, 12, 31)))) { stop(paste0("Parameter 'end' must be an integer or a string in class ", diff --git a/R/PlotACC.R b/R/PlotACC.R index 3bffa68f279234a7d6881eb1848830df6f7ad3d9..a674ff69d91e5de3dac876d97db8e112c26eb6e8 100644 --- a/R/PlotACC.R +++ b/R/PlotACC.R @@ -65,8 +65,8 @@ #'ano_exp <- Ano(sampleData$mod, clim$clim_exp) #'ano_obs <- Ano(sampleData$obs, clim$clim_obs) -#'acc <- ACC(ano_exp, ano_obs) -#'acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap') +#'acc <- ACC(ano_exp, ano_obs, lat = sampleData$lat) +#'acc_bootstrap <- ACC(ano_exp, ano_obs, lat = sampleData$lat, conftype = 'bootstrap') #'# Combine acc results for PlotACC #'res <- array(c(acc$conf.lower, acc$acc, acc$conf.upper, acc$p.val), #' dim = c(dim(acc$acc), 4)) diff --git a/R/PlotEquiMap.R b/R/PlotEquiMap.R index 3de30e984afe36fdd29b1ece13069d5f67abd1d1..ee10ec3aa4f2c42947f1b286b03a4f1e4d683bbf 100644 --- a/R/PlotEquiMap.R +++ b/R/PlotEquiMap.R @@ -134,11 +134,24 @@ #'@param lab_dist_y A numeric of the distance of the latitude labels to the #' box borders. The default value is NULL and is automatically adjusted by #' the function. -#'@param degree_sym A logical indicating whether to include degree symbol (30° N) or not (30N; default). +#'@param degree_sym A logical indicating whether to include degree symbol (30° N) +#' or not (30N; default). #'@param intylat Interval between latitude ticks on y-axis, in degrees. #' Defaults to 20. #'@param intxlon Interval between latitude ticks on x-axis, in degrees. #' Defaults to 20. +#'@param xlonshft A numeric of the degrees to shift the latitude ticks. The +#' default value is 0. +#'@param ylatshft A numeric of the degrees to shift the longitude ticks. The +#' default value is 0. +#'@param xlabels A vector of character string of the custumized x-axis labels. +#' The values should correspond to each tick, which is decided by the longitude +#' and parameter 'intxlon'. The default value is NULL and the labels will be +#' automatically generated. +#'@param ylabels A vector of character string of the custumized y-axis labels. +#' The values should correspond to each tick, which is decided by the latitude +#' and parameter 'intylat'. The default value is NULL and the labels will be +#' automatically generated. #'@param axes_tick_scale Scale factor for the tick lines along the longitude #' and latitude axes. #'@param axes_label_scale Scale factor for the labels along the longitude @@ -249,7 +262,8 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, arr_scale_shaft = 1, arr_scale_shaft_angle = 1, axelab = TRUE, labW = FALSE, lab_dist_x = NULL, lab_dist_y = NULL, degree_sym = FALSE, - intylat = 20, intxlon = 20, + intylat = 20, intxlon = 20, + xlonshft = 0, ylatshft = 0, xlabels = NULL, ylabels = NULL, axes_tick_scale = 1, axes_label_scale = 1, drawleg = TRUE, subsampleg = NULL, bar_extra_labels = NULL, draw_bar_ticks = TRUE, @@ -635,6 +649,22 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, } else { intxlon <- round(intxlon) } + if (!is.numeric(xlonshft) | length(xlonshft) != 1) { + stop("Parameter 'xlonshft' must be a number.") + } + if (!is.numeric(ylatshft) | length(ylatshft) != 1) { + stop("Parameter 'ylatshft' must be a number.") + } + if (!is.null(xlabels)) { + if (!is.character(xlabels) | !is.vector(xlabels)) { + stop("Parameter 'xlabels' must be a vector of character string.") + } + } + if (!is.null(ylabels)) { + if (!is.character(ylabels) | !is.vector(ylabels)) { + stop("Parameter 'ylabels' must be a vector of character string.") + } + } # Check legend parameters if (!is.logical(drawleg)) { @@ -728,11 +758,6 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, latmax <- ceiling(max(lat) / 10) * 10 lonmin <- floor(min(lon) / 10) * 10 lonmax <- ceiling(max(lon) / 10) * 10 - if (min(lon) < 0) { - continents <- 'world' - } else { - continents <- 'world2' - } # # Plotting the map @@ -760,40 +785,61 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, cex_axes_ticks <- -0.5 * axes_tick_scale spaceticklab <- 0 if (axelab) { - ypos <- seq(latmin, latmax, intylat) - xpos <- seq(lonmin, lonmax, intxlon) - letters <- array('', length(ypos)) - if (degree_sym == FALSE) { - letters[ypos < 0] <- 'S' - letters[ypos > 0] <- 'N' + # Y axis label + if (!is.null(ylabels)) { + ypos <- seq(latmin, latmax, intylat) + ylatshft + if (length(ypos) != length(ylabels)) { + stop(paste0("Parameter 'ylabels' must have the same length as the latitude ", + "vector spaced by 'intylat' (length = ", length(ypos), ").")) + } + ylabs <- ylabels } else { - letters[ypos < 0] <- paste(intToUtf8(176), 'S') - letters[ypos > 0] <- paste(intToUtf8(176), 'N') - } - ylabs <- paste(as.character(abs(ypos)), letters, sep = '') - letters <- array('', length(xpos)) - if (labW) { - xpos2 <- xpos - xpos2[xpos2 > 180] <- 360 - xpos2[xpos2 > 180] + ypos <- seq(latmin, latmax, intylat) + ylatshft + letters <- array('', length(ypos)) + if (degree_sym == FALSE) { + letters[ypos < 0] <- 'S' + letters[ypos > 0] <- 'N' + } else { + letters[ypos < 0] <- paste(intToUtf8(176), 'S') + letters[ypos > 0] <- paste(intToUtf8(176), 'N') + } + ylabs <- paste(as.character(abs(ypos)), letters, sep = '') } - if (degree_sym == FALSE) { - letters[xpos < 0] <- 'W' - letters[xpos > 0] <- 'E' + + # X axis label + if (!is.null(xlabels)) { + xpos <- seq(lonmin, lonmax, intxlon) + xlonshft + if (length(xpos) != length(xlabels)) { + stop(paste0("Parameter 'xlabels' must have the same length as the longitude ", + "vector spaced by 'intxlon' (length = ", length(xpos), ").")) + } + xlabs <- xlabels } else { - letters[xpos < 0] <- paste(intToUtf8(176), 'W') - letters[xpos > 0] <- paste(intToUtf8(176), 'E') - } - if (labW) { - letters[xpos == 0] <- ' ' - letters[xpos == 180] <- ' ' + xpos <- seq(lonmin, lonmax, intxlon) + xlonshft + letters <- array('', length(xpos)) + if (labW) { + xpos2 <- xpos + xpos2[xpos2 > 180] <- 360 - xpos2[xpos2 > 180] + } if (degree_sym == FALSE) { - letters[xpos > 180] <- 'W' + letters[xpos < 0] <- 'W' + letters[xpos > 0] <- 'E' } else { - letters[xpos > 180] <- paste(intToUtf8(176), 'W') - } - xlabs <- paste(as.character(abs(xpos2)), letters, sep = '') - } else { - xlabs <- paste(as.character(abs(xpos)), letters, sep = '') + letters[xpos < 0] <- paste(intToUtf8(176), 'W') + letters[xpos > 0] <- paste(intToUtf8(176), 'E') + } + if (labW) { + letters[xpos == 0] <- ' ' + letters[xpos == 180] <- ' ' + if (degree_sym == FALSE) { + letters[xpos > 180] <- 'W' + } else { + letters[xpos > 180] <- paste(intToUtf8(176), 'W') + } + xlabs <- paste(as.character(abs(xpos2)), letters, sep = '') + } else { + xlabs <- paste(as.character(abs(xpos)), letters, sep = '') + } } spaceticklab <- max(-cex_axes_ticks, 0) margins[1] <- margins[1] + 1.2 * cex_axes_labels + spaceticklab @@ -847,9 +893,19 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, col_inf_image <- ifelse(is.null(col_inf), colNA, col_inf) col_sup_image <- ifelse(is.null(col_sup), colNA, col_sup) if (square) { - image(lonb$x, latb$x, var[lonb$ix, latb$ix], col = c(col_inf_image, cols, col_sup_image), - breaks = c(-.Machine$double.xmax, brks, .Machine$double.xmax), - axes = FALSE, xlab = "", ylab = "", add = TRUE) + # If lat and lon are both regular-spaced, "useRaster = TRUE" can avoid + # artifact white lines on the figure. If not, useRaster has to be FALSE (default) + tryCatch({ + image(lonb$x, latb$x, var[lonb$ix, latb$ix], + col = c(col_inf_image, cols, col_sup_image), + breaks = c(-.Machine$double.xmax, brks, .Machine$double.xmax), + axes = FALSE, xlab = "", ylab = "", add = TRUE, useRaster = TRUE) + }, error = function(x) { + image(lonb$x, latb$x, var[lonb$ix, latb$ix], + col = c(col_inf_image, cols, col_sup_image), + breaks = c(-.Machine$double.xmax, brks, .Machine$double.xmax), + axes = FALSE, xlab = "", ylab = "", add = TRUE) + }) } else { .filled.contour(lonb$x, latb$x, var[lonb$ix, latb$ix], levels = c(.Machine$double.xmin, brks, .Machine$double.xmax), @@ -886,24 +942,15 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, # Plotting continents # ~~~~~~~~~~~~~~~~~~~~~ # - # maps::map has the global map range [0, 360] or [-180, 180]. - # Set xlim so lon = 0 won't show a straight line when lon = [0, 359]. - # NOTE: It works except Antartic area. Don't know why. ylim is also set - # but it doesn't work. - if (continents == 'world') { # [-180, 180] - xlim_conti <- c(-179.99, 179.99) - } else { # [0, 360] - xlim_conti <- c(0.01, 359.99) - } + wrap_vec <- c(lon[1], lon[1] + 360) old_lwd <- par('lwd') par(lwd = coast_width) # If [0, 360], use GEOmap; if [-180, 180], use maps::map # UPDATE: Use maps::map for both cases. The difference between GEOmap and # maps is trivial. The only thing we can see for now is that # GEOmap has better lakes. - coast <- maps::map(continents, interior = country.borders, wrap = TRUE, - xlim = xlim_conti, ylim = c(-89.99, 89.99), - fill = filled.continents, add = TRUE, plot = FALSE) + coast <- maps::map(interior = country.borders, wrap = wrap_vec, + fill = filled.continents, add = TRUE, plot = FALSE) if (filled.continents) { polygon(coast, col = continent_color, border = coast_color, lwd = coast_width) @@ -911,7 +958,7 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, lines(coast, col = coast_color, lwd = coast_width) } if (!is.null(lake_color)) { - maps::map('lakes', add = TRUE, fill = filled.continents, col = lake_color) + maps::map('lakes', add = TRUE, wrap = wrap_vec, fill = filled.continents, col = lake_color) } par(lwd = old_lwd) @@ -920,8 +967,8 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, old_lwd <- par('lwd') par(lwd = coast_width) - outline <- maps::map(continents, fill = T, plot = FALSE) # must be fill = T - xbox <- xlim_conti + c(-2, 2) + outline <- maps::map(wrap = wrap_vec, fill = T, plot = FALSE) # must be fill = T + xbox <- wrap_vec + c(-2, 2) ybox <- c(-92, 92) outline$x <- c(outline$x, NA, c(xbox, rev(xbox), xbox[1])) outline$y <- c(outline$y, NA, rep(ybox, each = 2), ybox[1]) @@ -931,9 +978,9 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, } # Plot shapefile + #NOTE: the longitude range cannot cut shapefile range, or not all the shapefile will be plotted. if (!is.null(shapefile)) { - maps::map(shape, interior = country.borders, #wrap = TRUE, - xlim = xlim_conti, ylim = c(-89.99, 89.99), + maps::map(shape, interior = country.borders, #wrap = wrap_vec, fill = filled.continents, add = TRUE, plot = TRUE, lwd = shapefile_lwd, col = shapefile_color) } diff --git a/R/PlotLayout.R b/R/PlotLayout.R index 50a87f26a5c354ce0e1ed09782c615207ab9a366..742478e08db3ad08563ae00b6619379b9448bb9b 100644 --- a/R/PlotLayout.R +++ b/R/PlotLayout.R @@ -56,6 +56,19 @@ #' b) providing a list of named sub-lists in 'special_args', where the names #' of each sub-list match the names of the parameters to be adjusted, and #' each value in a sub-list contains the value of the corresponding parameter. +#' For example, if the plots are two maps with different arguments, the +#' structure would be like:\cr +#' var:\cr +#' List of 2\cr +#' $ : num [1:360, 1:181] 1 3.82 5.02 6.63 8.72 ...\cr +#' $ : num [1:360, 1:181] 2.27 2.82 4.82 7.7 10.32 ...\cr +#' special_args:\cr +#' List of 2\cr +#' $ :List of 2\cr +#' ..$ arg1: ...\cr +#' ..$ arg2: ...\cr +#' $ :List of 1\cr +#' ..$ arg1: ...\cr #'@param nrow Numeric value to force the number of rows in the automatically #' generated layout. If higher than the required, this will yield blank cells #' in the layout (which can then be populated). If lower than the required @@ -225,7 +238,7 @@ PlotLayout <- function(fun, plot_dims, var, ..., special_args = NULL, is_single_na <- function(x) ifelse(length(x) > 1, FALSE, is.na(x)) # Check var - if (is.array(var) || (is_single_na(var))) { + if (!is.list(var) & (is.array(var) || (is_single_na(var)))) { var <- list(var) } else if (is.list(var)) { if (!all(sapply(var, is.array) | sapply(var, is_single_na))) { diff --git a/R/RPS.R b/R/RPS.R new file mode 100644 index 0000000000000000000000000000000000000000..8ded53e6d1546373f065bca04f07d12bebfbc73a --- /dev/null +++ b/R/RPS.R @@ -0,0 +1,248 @@ +#'Compute the Ranked Probability Score +#' +#'The Ranked Probability Score (RPS; Wilks, 2011) is defined as the sum of the +#'squared differences between the cumulative forecast probabilities (computed +#'from the ensemble members) and the observations (defined as 0% if the category +#'did not happen and 100% if it happened). It can be used to evaluate the skill +#'of multi-categorical probabilistic forecasts. The RPS ranges between 0 +#'(perfect forecast) and n-1 (worst possible forecast), where n is the number of +#'categories. In the case of a forecast divided into two categories (the lowest +#'number of categories that a probabilistic forecast can have), the RPS +#'corresponds to the Brier Score (BS; Wilks, 2011), therefore, ranges between 0 +#'and 1. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPSS (the +#' potential RPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param weights A named two-dimensional numerical array of the weights for each +#' member and time. The dimension names should include 'memb_dim' and +#' 'time_dim'. The default value is NULL. The ensemble should have at least 70 +#' members or span at least 10 time steps and have more than 45 members if +#' consistency between the weighted and unweighted methodologies is desired. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of RPS with the same dimensions as "exp" except the +#''time_dim' and 'memb_dim' dimensions. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'res <- RPS(exp = exp, obs = obs) +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', + prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, + weights = NULL, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- 1:dim(obs)[time_dim] + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## weights + if (!is.null(weights)) { + if (!is.array(weights) | !is.numeric(weights)) + stop('Parameter "weights" must be a two-dimensional numeric array.') + if (length(dim(weights)) != 2 | any(!names(dim(weights)) %in% c(memb_dim, time_dim))) + stop("Parameter 'weights' must have two dimensions with the names of memb_dim and time_dim.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights' must have the same dimension lengths as memb_dim and time_dim in 'exp'.") + } + weights <- Reorder(weights, c(time_dim, memb_dim)) + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPS + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- time_dim + } else { + target_dims_obs <- c(time_dim, memb_dim) + } + rps <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = c(time_dim, memb_dim), + obs = target_dims_obs), + output_dims = time_dim, + fun = .RPS, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights = weights, ncores = ncores)$output1 + # browser() + # Return only the mean RPS + rps <- MeanDims(rps, time_dim, na.rm = FALSE) + + return(rps) +} + + +.RPS <- function(exp, obs, prob_thresholds = c(1/3, 2/3), + indices_for_clim = NULL, Fair = FALSE, weights = NULL) { + # exp: [sdate, memb] + # obs: [sdate, (memb)] + exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds, weights = weights) + # exp_probs: [bin, sdate] + obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, weights = NULL) + # obs_probs: [bin, sdate] + + probs_exp_cumsum <- apply(exp_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps <- apply((probs_exp_cumsum - probs_obs_cumsum)^2, 2, sum) + # rps: [sdate] + + if (Fair) { # FairRPS + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] + R <- dim(exp)[2] #memb + R_new <- Inf + adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) + adjustment <- apply(adjustment, 2, sum) + rps <- rps + adjustment + } + + return(rps) +} + +.get_probs <- function(data, indices_for_quantiles, prob_thresholds, weights = NULL) { + # if exp: [sdate, memb] + # if obs: [sdate, (memb)] + + # Add dim [memb = 1] to obs if it doesn't have memb_dim + if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1) + + # Absolute thresholds + if (is.null(weights)) { + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], weights[indices_for_quantiles, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + + # Probabilities + probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate] + for (i_time in 1:dim(data)[1]) { + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = length(quantiles) + 1) + } else { + if (is.null(weights)) { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) + } else { + sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + # find any quantiles that are outside the data range + integrated_probs <- array(dim = c(bin = length(quantiles))) + for (i_quant in 1:length(quantiles)) { + # for thresholds falling under the distribution + if (quantiles[i_quant] < min(sorted_data)) { + integrated_probs[i_quant] <- 0 + # for thresholds falling over the distribution + } else if (max(sorted_data) < quantiles[i_quant]) { + integrated_probs[i_quant] <- 1 + } else { + integrated_probs[i_quant] <- approx(sorted_data, cumulative_weights, quantiles[i_quant], "linear")$y + } + } + probs[, i_time] <- append(integrated_probs, 1) - append(0, integrated_probs) + if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { + stop(paste0("Probability in i_time = ", i_time, " is out of [0, 1].")) + } + } + } + } + return(probs) +} + +.sorted_distributions <- function(data_vector, weights_vector) { + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_weights <- weights_vector[sorter] + cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights + cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 + cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 + return(list("data" = data_vector[sorter], "cumulative_weights" = cumulative_weights)) +} diff --git a/R/RPSS.R b/R/RPSS.R new file mode 100644 index 0000000000000000000000000000000000000000..3b24777ddde51eaab88286ca6479a07724e7eeb3 --- /dev/null +++ b/R/RPSS.R @@ -0,0 +1,241 @@ +#'Compute the Ranked Probability Skill Score +#' +#'The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score +#'based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to +#'assess whether a forecast presents an improvement or worsening with respect to +#'a reference forecast. The RPSS ranges between minus infinite and 1. If the +#'RPSS is positive, it indicates that the forecast has higher skill than the +#'reference forecast, while a negative value means that it has a lower skill. +#'Examples of reference forecasts are the climatological forecast (same +#'probabilities for all categories for all time steps), persistence, a previous +#'model version, and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref. +#'The statistical significance is obtained based on a Random Walk test at the +#'95% confidence level (DelSole and Tippett, 2016). +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension. The dimensions must be the same as 'exp' except +#' 'memb_dim'. If it is NULL, the climatological forecast is used as reference +#' forecast. The default value is NULL. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast and the reference forecast. The +#' default value is 'member'. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPSS (the +#' potential RPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param weights A named two-dimensional numerical array of the weights for each +#' member and time. The dimension names should include 'memb_dim' and +#' 'time_dim'. The default value is NULL. The ensemble should have at least 70 +#' members or span at least 10 time steps and have more than 45 members if +#' consistency between the weighted and unweighted methodologies is desired. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'\item{$rpss}{ +#' A numerical array of the RPSS with the same dimensions as "exp" except the +#' 'time_dim' and 'memb_dim' dimensions. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance of the RPSS with the same +#' dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions. +#'} +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast +#'res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +#' +#'@import multiApply +#'@export +RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', + prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, + weights = NULL, ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop('Parameter "ref" must be a numeric array.') + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + name_ref <- name_ref[-which(name_ref == memb_dim)] + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- 1:dim(obs)[time_dim] + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## weights + if (!is.null(weights)) { + if (!is.array(weights) | !is.numeric(weights)) + stop('Parameter "weights" must be a two-dimensional numeric array.') + if (length(dim(weights)) != 2 | any(!names(dim(weights)) %in% c(memb_dim, time_dim))) + stop("Parameter 'weights' must have two dimensions with the names of memb_dim and time_dim.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights' must have the same dimension lengths as memb_dim and time_dim in 'exp'.") + } + weights <- Reorder(weights, c(time_dim, memb_dim)) + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPSS + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- time_dim + } else { + target_dims_obs <- c(time_dim, memb_dim) + } + + if (!is.null(ref)) { # use "ref" as reference forecast + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = c(time_dim, memb_dim), + obs = target_dims_obs, + ref = c(time_dim, memb_dim)) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = c(time_dim, memb_dim), + obs = target_dims_obs) + } + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights = weights, + ncores = ncores) + + return(output) +} + +.RPSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), + indices_for_clim = NULL, Fair = FALSE, weights = NULL) { + # exp: [sdate, memb] + # obs: [sdate, (memb)] + # ref: [sdate, memb] or NULL + + # RPS of the forecast + rps_exp <- .RPS(exp = exp, obs = obs, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, weights = weights) + + # RPS of the reference forecast + if (is.null(ref)) { ## using climatology as reference forecast + obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, + prob_thresholds = prob_thresholds, weights = NULL) + # obs_probs: [bin, sdate] + + clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- array(clim_probs, dim = dim(obs_probs)) + # clim_probs: [bin, sdate] + + # Calculate RPS for each time step + probs_clim_cumsum <- apply(clim_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) + # rps_ref: [sdate] + +# if (Fair) { # FairRPS +# ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] +# R <- dim(exp)[2] #memb +# R_new <- Inf +# adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) +# adjustment <- apply(adjustment, 2, sum) +# rps_ref <- rps_ref + adjustment +# } + + } else { # use "ref" as reference forecast + rps_ref <- .RPS(exp = ref, obs = obs, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, weights = weights) + } + + # RPSS + rpss <- 1 - mean(rps_exp) / mean(rps_ref) + + # Significance + sign <- .RandomWalkTest(skill_A = rps_exp, skill_B = rps_ref)$signif + + return(list(rpss = rpss, sign = sign)) +} + diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index 494be6520dba4d6aedff0449b23e59a321e00732..adeadc1ec94b62920c885640938f966c91e75ddc 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -1,8 +1,8 @@ #'Random walk test for skill differences #' #'Forecast comparison of the skill obtained with 2 forecasts (with respect to a -#'common reference) based on Random Walks, with significance estimate at the 5% -#'confidence level, as in DelSole and Tippett (2015). +#'common reference) based on Random Walks, with significance estimate at the 95% +#'confidence level, as in DelSole and Tippett (2016). #' #'@param skill_A A numerical array of the time series of the skill with the #' forecaster A's. diff --git a/R/Regression.R b/R/Regression.R index 244ddc729e94f8046b9c0ff1f0c47c89b08df62b..1cd12e6e206bfda316bca0ac5d98f7da1f703228 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -123,7 +123,7 @@ Regression <- function(datay, datax, reg_dim = 'sdate', formula = y ~ x, stop("Parameter 'reg_dim' is not found in 'datay' or 'datax' dimension.") } ## formula - if (class(formula) != 'formula') { + if (!inherits(formula, 'formula')) { stop("Parameter 'formula' must the an object of class 'formula'.") } ## pval diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R new file mode 100644 index 0000000000000000000000000000000000000000..7428d1b08be1ab462cbd37750e82835a2c91f797 --- /dev/null +++ b/R/ResidualCorr.R @@ -0,0 +1,264 @@ +#'Compute the residual correlation and its significance +#' +#'The residual correlation assesses whether a forecast captures any of the +#'observed variability that is not already captured by a reference forecast +#'(Smith et al., 2019; https://doi.org/10.1038/s41612-019-0071-y.). +#'The procedure is as follows: the residuals of the forecasts and observations +#'are computed by linearly regressing out the reference forecast's ensemble mean +#'from the forecasts' ensemble mean and observations, respectively. Then, the +#'residual correlation is computed as the correlation between both residuals. +#'Positive values of the residual correlation indicate that the forecast capture +#'more observed variability than the reference forecast, while negative values +#'mean that the reference forecast capture more. The significance of the +#'residual correlation is computed with a two-sided t-test +#'(Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) using an +#'effective degrees of freedom to account for the time series' autocorrelation +#'(von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336). +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observations with at least time +#' dimension. The dimensions must be the same as "exp" except 'memb_dim'. +#'@param ref A named numerical array of the reference forecast data with at +#' least time dimension. The dimensions must be the same as "exp" except +#' 'memb_dim'. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be NA (and it will be computed with the s2dv:::.Eno), a +#' numeric (which is used for all cases), or an array with the same dimensions +#' as "obs" except "time_dim" (for a particular N.eff to be used for each case) +#' . The default value is NA. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'year'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean of the forecast and reference forecast. If it +#' is NULL, the ensemble mean should be provided directly to the function. The +#' default value is NULL. +#'@param method A character string indicating the correlation coefficient to be +#' computed ("pearson", "kendall", or "spearman"). The default value is +#' "pearson". +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test. If it is a numeric, "sign" will be returned. If NULL, the +#' p-value will be returned instead. The default value is NULL. +#'@param handle.na A charcater string indicating how to handle missing values. +#' If "return.na", NAs will be returned for the cases that contain at least one +#' NA in "exp", "ref", or "obs". If "only.complete.triplets", only the time +#' steps with no missing values in all "exp", "ref", and "obs" will be used. If +#' "na.fail", an error will arise if any of "exp", "ref", or "obs" contains any +#' NA. The default value is "return.na". +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list with: +#'\item{$res.corr}{ +#' A numerical array of the residual correlation with the same dimensions as +#' the input arrays except "time_dim" (and "memb_dim" if provided). +#'} +#'\item{$sign}{ +#' A logical array indicating whether the residual correlation is statistically +#' significant or not with the same dimensions as the input arrays except "time_dim" +#' (and "memb_dim" if provided). Returned only if "alpha" is a numeric. +#'} +#'\item{$p.val}{ +#' A numeric array of the p-values with the same dimensions as the input arrays +#' except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is +#' NULL. +#'} +#' +#'@examples +#' exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#' obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#' ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 5, sdate = 50)) +#' res <- ResidualCorr(exp = exp, obs = obs, ref = ref, memb_dim = 'member') +#' +#'@import multiApply +#'@export +ResidualCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', + memb_dim = NULL, method = 'pearson', alpha = NULL, + handle.na = 'return.na', ncores = NULL) { + + # Check inputs + ## exp, ref, and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if (!is.array(ref) | !is.numeric(ref)) + stop('Parameter "ref" must be a numeric array.') + + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop(paste0('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "obs" except "time_dim".')) + } + } else if (any((!is.na(N.eff) & !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop(paste0('Parameter "N.eff" must be NA, a numeric, or an array with ', + 'the same dimensions as "obs" except "time_dim".')) + } + + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs)) | + !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'exp', 'obs', or 'ref' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'exp' or 'ref' dimension.") + } + } + ## exp, ref, and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (length(name_exp) != length(name_obs) | length(name_exp) != length(name_ref) | + any(dim(exp)[name_exp] != dim(obs)[name_obs]) | any(dim(exp)[name_exp] != dim(ref)[name_ref])) { + stop(paste0("Parameter 'exp', 'obs', and 'ref' must have same length of ", + "all dimensions expect 'memb_dim'.")) + } + ## method + if (!method %in% c("pearson", "kendall", "spearman")) { + stop('Parameter "method" must be "pearson", "kendall", or "spearman".') + } + ## alpha + if (!is.null(alpha)) { + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | + length(alpha) > 1)) { + stop('Parameter "alpha" must be NULL or a number between 0 and 1.') + } + } + ## handle.na + if (!handle.na %in% c('return.na', 'only.complete.triplets', 'na.fail')) { + stop('Parameter "handle.na" must be "return.na", "only.complete.triplets" or "na.fail".') + } + ## ncores + if (!is.null(ncores)) { + if (any(!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1)) { + stop('Parameter "ncores" must be either NULL or a positive integer.') + } + } + + ############################### + + # NA check: na.fail + if (handle.na == "na.fail" & (anyNA(exp) | anyNA(obs) | anyNA(ref))) + stop('The data contain NAs.') + + # Calculate ensemble mean + dim_exp <- dim(exp) + dim_obs <- dim(obs) + dim_ref <- dim(ref) + + if (!is.null(memb_dim)) { + exp_memb_dim_ind <- which(names(dim_exp) == memb_dim) + ref_memb_dim_ind <- which(names(dim_ref) == memb_dim) + exp <- apply(exp, c(1:length(dim_exp))[-exp_memb_dim_ind], mean, na.rm = FALSE) + ref <- apply(ref, c(1:length(dim_ref))[-ref_memb_dim_ind], mean, na.rm = FALSE) + if (is.null(dim(exp))) exp <- array(exp, dim = c(dim_exp[time_dim])) + if (is.null(dim(ref))) ref <- array(ref, dim = c(dim_ref[time_dim])) + } + + # output_dims + if (is.null(alpha)) { + output_dims <- list(res.corr = NULL, p.val = NULL) + } else { + output_dims <- list(res.corr = NULL, sign = NULL) + } + + + # Residual correlation + if (is.array(N.eff)) { + output <- Apply(data = list(exp = exp, obs = obs, ref = ref, + N.eff = N.eff), + target_dims = list(exp = time_dim, obs = time_dim, + ref = time_dim, N.eff = NULL), + output_dims = output_dims, + fun = .ResidualCorr, method = method, + alpha = alpha, handle.na = handle.na, ncores = ncores) + } else { + output <- Apply(data = list(exp = exp, obs = obs, ref = ref), + target_dims = list(exp = time_dim, obs = time_dim, + ref = time_dim), + output_dims = output_dims, N.eff = N.eff, + fun = .ResidualCorr, method = method, + alpha = alpha, handle.na = handle.na, ncores = ncores) + } + + return(output) +} + +.ResidualCorr <- function(exp, obs, ref, N.eff, method, alpha, handle.na) { + # exp and ref and obs: [time] + .residual.corr <- function(exp, obs, ref, method, N.eff, alpha) { + + # Residuals of 'exp' and 'obs' (regressing 'ref' out in both 'exp' and 'obs') + exp_res <- lm(formula = y ~ x, data = list(y = exp, x = ref), na.action = NULL)$residuals + obs_res <- lm(formula = y ~ x, data = list(y = obs, x = ref), na.action = NULL)$residuals + + # Residual correlation (and significance) + output <- NULL + output$res.corr <- cor(x = exp_res, y = obs_res, method = method) + + # Effective degrees of freedom + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs_res, na.action = na.pass) + } + t <- abs(output$res.corr) * sqrt(N.eff - 2) / sqrt(1 - output$res.corr^2) + + if (is.null(alpha)) { # p-value + output$p.val <- pt(q = t, df = N.eff - 2, lower.tail = FALSE) + } else { + t_alpha2_n2 <- qt(p = alpha / 2, df = N.eff - 2, lower.tail = FALSE) + if (!anyNA(c(t, t_alpha2_n2)) & t >= t_alpha2_n2) { + output$sign <- TRUE + } else { + output$sign <- FALSE + } + } + + return(output) + } + + + #================================================== + + if (anyNA(exp) | anyNA(obs) | anyNA(ref)) { ## There are NAs + if (handle.na == 'only.complete.triplets') { + nna <- is.na(exp) | is.na(obs) | is.na(ref) # A vector of T/F + if (all(nna)) stop("There is no complete set of forecasts and observations.") + # Remove the incomplete set + exp <- exp[!nna] + obs <- obs[!nna] + ref <- ref[!nna] + + output <- .residual.corr(exp = exp, obs = obs, ref = ref, method = method, + N.eff = N.eff, alpha = alpha) + + } else if (handle.na == 'return.na') { + # Data contain NA, return NAs directly without passing to .diff.corr + if (is.null(alpha)) { + output <- list(res.corr = NA, p.val = NA) + } else { + output <- list(res.corr = NA, sign = NA) + } + } + + } else { ## There is no NA + output <- .residual.corr(exp = exp, obs = obs, ref = ref, method = method, + N.eff = N.eff, alpha = alpha) + } + + return(output) +} diff --git a/R/SPOD.R b/R/SPOD.R index 51717b31ee05cedb757347e3bcf7b02ba644c75b..2599477900b13d3b8d89ae7a282d7fd4158502e6 100644 --- a/R/SPOD.R +++ b/R/SPOD.R @@ -97,10 +97,10 @@ SPOD <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l stop("Parameter 'data' must be a numeric array.") } # data_lats and data_lons part1 - if (!(class(data_lats) == 'numeric' | class(data_lats) == 'integer')) { + if (!(inherits(data_lats, 'numeric') | inherits(data_lats, 'integer'))) { stop("Parameter 'data_lats' must be a numeric vector.") } - if (!(class(data_lons) == 'numeric' | class(data_lons) == 'integer')) { + if (!(inherits(data_lons, 'numeric') | inherits(data_lons, 'integer'))) { stop("Parameter 'data_lons' must be a numeric vector.") } # type diff --git a/R/TPI.R b/R/TPI.R index 80d958ea2fb27d20acbceb26f0f4a00e29ba0f3d..d3f0550613a831f9428d8adaf43863b81c12bd93 100644 --- a/R/TPI.R +++ b/R/TPI.R @@ -96,10 +96,10 @@ TPI <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo stop("Parameter 'data' must be a numeric array.") } # data_lats and data_lons part1 - if (!(class(data_lats) == 'numeric' | class(data_lats) == 'integer')) { + if (!(inherits(data_lats, 'numeric') | inherits(data_lats, 'integer'))) { stop("Parameter 'data_lats' must be a numeric vector.") } - if (!(class(data_lons) == 'numeric' | class(data_lons) == 'integer')) { + if (!(inherits(data_lons, 'numeric') | inherits(data_lons, 'integer'))) { stop("Parameter 'data_lons' must be a numeric vector.") } # type @@ -181,7 +181,7 @@ TPI <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo } # indices_for_clim if (!is.null(indices_for_clim)) { - if (!class(indices_for_clim) %in% c('numeric', 'integer') + if (!(inherits(indices_for_clim, 'numeric') | inherits(indices_for_clim, 'integer')) & !(is.logical(indices_for_clim) & !any(indices_for_clim))) { stop(paste0("The parameter 'indices_for_clim' must be a numeric vector ", "or NULL to compute the anomalies based on the whole period, ", diff --git a/R/Utils.R b/R/Utils.R index 76ef63a63d1510cadf6974f44185a74823e69f7e..c13691499e7db59f4247411b0ad79498f9c0a65b 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -255,7 +255,7 @@ NA } result <- FALSE - if (!any(is.na(c(nlons, nlats)))) { + if (!anyNA(c(nlons, nlats))) { if ((nlons == length(lon)) && (nlats == length(lat))) { result <- TRUE @@ -349,11 +349,11 @@ if (!is.null(work_piece[['progress_amount']])) { cat("\n") } - cat(paste0("! Warning: the dataset with index ", + .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), " in '", work_piece[['dataset_type']], "' doesn't start at longitude 0 and will be re-interpolated in order to align its longitudes with the standard CDO grids definable with the names 'tgrid' or 'rx', which are by definition starting at the longitude 0.\n")) if (!is.null(mask)) { - cat(paste0("! Warning: a mask was provided for the dataset with index ", + .warning(paste0("A mask was provided for the dataset with index ", tail(work_piece[['indices']], 1), " in '", work_piece[['dataset_type']], "'. This dataset has been re-interpolated to align its longitudes to start at 0. You must re-interpolate the corresponding mask to align its longitudes to start at 0 as well, if you haven't done so yet. Running cdo remapcon,", common_grid_name, " original_mask_file.nc new_mask_file.nc will fix it.\n")) } @@ -363,7 +363,7 @@ cat("\n") } if (!explore_dims) { - cat(paste0("! Warning: the dataset with index ", tail(work_piece[['indices']], 1), + .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), " in '", work_piece[['dataset_type']], "' is originally on ", "a grid coarser than the common grid and it has been ", "extrapolated. Check the results carefully. It is ", @@ -583,7 +583,7 @@ } if (length(expected_dims) > 0) { dim_matches <- match(expected_dims, var_dimnames) - if (any(is.na(dim_matches))) { + if (anyNA(dim_matches)) { if (!is.null(old_members_dimname)) { expected_dims[which(expected_dims == 'lev')] <- old_members_dimname } @@ -836,7 +836,7 @@ if (!all(dim_matches == sort(dim_matches))) { if (!found_disordered_dims && rev(work_piece[['indices']])[2] == 1 && rev(work_piece[['indices']])[3] == 1) { found_disordered_dims <- TRUE - cat(paste0("! Warning: the dimensions for the variable ", namevar, " in the files of the experiment with index ", tail(work_piece[['indices']], 1), " are not in the optimal order for loading with Load(). The optimal order would be '", paste(expected_dims, collapse = ', '), "'. One of the files of the dataset is stored in ", filename)) + .warning(paste0("The dimensions for the variable ", namevar, " in the files of the experiment with index ", tail(work_piece[['indices']], 1), " are not in the optimal order for loading with Load(). The optimal order would be '", paste(expected_dims, collapse = ', '), "'. One of the files of the dataset is stored in ", filename)) } tmp <- aperm(tmp, dim_matches) } @@ -879,13 +879,13 @@ } if (output == 'areave' || output == 'lon') { - weights <- InsertDim(cos(final_lats * pi / 180), 1, length(final_lons)) + weights <- InsertDim(cos(final_lats * pi / 180), 1, length(final_lons), name = 'lon') weights[which(is.na(x))] <- NA if (output == 'areave') { weights <- weights / mean(weights, na.rm = TRUE) mean(x * weights, na.rm = TRUE) } else { - weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, length(final_lats)) + weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, length(final_lats), name = 'lat') MeanDims(x * weights, 2, na.rm = TRUE) } } else if (output == 'lat') { diff --git a/man/ACC.Rd b/man/ACC.Rd index d1389fdb596ee9a2162723d8c1ce1410f3542b19..7df6abb21bbcade409685f8b4798edbcc472bdd6 100644 --- a/man/ACC.Rd +++ b/man/ACC.Rd @@ -2,12 +2,14 @@ % Please edit documentation in R/ACC.R \name{ACC} \alias{ACC} -\title{Compute the anomaly correlation coefficient between the forecast and corresponding observation} +\title{Compute the spatial anomaly correlation coefficient between the forecast and corresponding observation} \usage{ ACC( exp, obs, dat_dim = "dataset", + lat_dim = "lat", + lon_dim = "lon", space_dim = c("lat", "lon"), avg_dim = "sdate", memb_dim = "member", @@ -23,37 +25,47 @@ ACC( } \arguments{ \item{exp}{A numeric array of experimental anomalies with named dimensions. -It must have at least 'dat_dim' and 'space_dim'.} +The dimension must have at least 'lat_dim' and 'lon_dim'.} \item{obs}{A numeric array of observational anomalies with named dimensions. -It must have the same dimensions as 'exp' except the length of 'dat_dim' +The dimension should be the same as 'exp' except the length of 'dat_dim' and 'memb_dim'.} \item{dat_dim}{A character string indicating the name of dataset (nobs/nexp) -dimension. The default value is 'dataset'.} +dimension. The default value is 'dataset'. If there is no dataset +dimension, set NULL.} + +\item{lat_dim}{A character string indicating the name of the latitude +dimension of 'exp' and 'obs' along which ACC is computed. The default value +is 'lat'.} + +\item{lon_dim}{A character string indicating the name of the longitude +dimension of 'exp' and 'obs' along which ACC is computed. The default value +is 'lon'.} \item{space_dim}{A character string vector of 2 indicating the name of the latitude and longitude dimensions (in order) along which ACC is computed. -The default value is c('lat', 'lon').} +The default value is c('lat', 'lon'). This argument has been deprecated. +Use 'lat_dim' and 'lon_dim' instead.} \item{avg_dim}{A character string indicating the name of the dimension to be -averaged. It must be one of 'time_dim'. The mean ACC is calculated along -averaged. If no need to calculate mean ACC, set as NULL. The default value -is 'sdate'.} +averaged, which is usually the time dimension. If no need to calculate mean +ACC, set as NULL. The default value is 'sdate'.} \item{memb_dim}{A character string indicating the name of the member dimension. If the data are not ensemble ones, set as NULL. The default value is 'member'.} -\item{lat}{A vector of the latitudes of the exp/obs grids. Only required when -the domain of interested is specified. The default value is NULL.} +\item{lat}{A vector of the latitudes of the exp/obs grids. It is used for +area weighting and when the domain of interested 'lonlatbox' is specified.} \item{lon}{A vector of the longitudes of the exp/obs grids. Only required when -the domain of interested is specified. The default value is NULL.} +the domain of interested 'lonlatbox' is specified. The default value is +NULL.} \item{lonlatbox}{A numeric vector of 4 indicating the corners of the domain of -interested: c(lonmin, lonmax, latmin, latmax). Only required when the domain -of interested is specified. The default value is NULL.} +interested: c(lonmin, lonmax, latmin, latmax). The default value is NULL +and the whole data will be used.} \item{conf}{A logical value indicating whether to retrieve the confidence intervals or not. The default value is TRUE.} @@ -81,8 +93,9 @@ computation. The default value is NULL.} A list containing the numeric arrays:\cr \item{acc}{ The ACC with the dimensions c(nexp, nobs, the rest of the dimension except - space_dim and memb_dim). nexp is the number of experiment (i.e., dat_dim in - exp), and nobs is the number of observation (i.e., dat_dim in obs). + lat_dim, lon_dim and memb_dim). nexp is the number of experiment (i.e., dat_dim in + exp), and nobs is the number of observation (i.e., dat_dim in obs). If + dat_dim is NULL, nexp and nobs are omitted. } \item{conf.lower (if conftype = "parametric") or acc_conf.lower (if conftype = "bootstrap")}{ @@ -100,8 +113,9 @@ A list containing the numeric arrays:\cr } \item{macc}{ The mean anomaly correlation coefficient with dimensions - c(nexp, nobs, the rest of the dimension except space_dim, memb_dim, and - avg_dim). Only present if 'avg_dim' is not NULL. + c(nexp, nobs, the rest of the dimension except lat_dim, lon_dim, memb_dim, and + avg_dim). Only present if 'avg_dim' is not NULL. If dat_dim is NULL, nexp + and nobs are omitted. } \item{macc_conf.lower}{ The lower confidence interval of MACC with the same dimensions as MACC. @@ -113,13 +127,15 @@ A list containing the numeric arrays:\cr } } \description{ -Calculate the anomaly correlation coefficient for the ensemble mean of each -model and the corresponding references over a spatial domain. It can return a -forecast time series if the data contain forest time dimension, and also the -start date mean if the data contain start date dimension. -The domain of interest can be specified by providing the list -of longitudes/latitudes (lon/lat) of the data together with the corners -of the domain: lonlatbox = c(lonmin, lonmax, latmin, latmax). +Calculate the spatial anomaly correlation coefficient (ACC) for the ensemble +mean of each model and the corresponding references over a spatial domain. It +can return a forecast time series if the data contain forest time dimension, +and also the ACC mean over one dimension, e.g., start date dimension. +The domain of interest can be specified by providing the list of longitudes/ +latitudes of the data together with the corners of the domain: lonlatbox = +c(lonmin, lonmax, latmin, latmax). The data will be adjusted to have a spatial +mean of zero, then area weighting is applied. The formula is referenced from +Wilks (2011; section 7.6.4; https://doi.org/10.1016/B978-0-12-385022-5.00008-7). } \examples{ \dontshow{ @@ -137,8 +153,8 @@ sampleData$obs <- Season(sampleData$obs, monini = 11, moninf = 12, monsup = 2) clim <- Clim(sampleData$mod, sampleData$obs) ano_exp <- Ano(sampleData$mod, clim$clim_exp) ano_obs <- Ano(sampleData$obs, clim$clim_obs) -acc <- ACC(ano_exp, ano_obs) -acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap') +acc <- ACC(ano_exp, ano_obs, lat = sampleData$lat) +acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap', lat = sampleData$lat) # Combine acc results for PlotACC res <- array(c(acc$conf.lower, acc$acc, acc$conf.upper, acc$p.val), dim = c(dim(acc$acc), 4)) @@ -151,5 +167,6 @@ PlotACC(res_bootstrap, startDates) } \references{ Joliffe and Stephenson (2012). Forecast Verification: A - Practitioner's Guide in Atmospheric Science. Wiley-Blackwell. + Practitioner's Guide in Atmospheric Science. Wiley-Blackwell.; + Wilks (2011; section 7.6.4; https://doi.org/10.1016/B978-0-12-385022-5.00008-7). } diff --git a/man/CDORemap.Rd b/man/CDORemap.Rd index b49f94493442665c162a9c10ae595738dbc2baa1..61f61011114884b2efb5ba07abc2971f10216f68 100644 --- a/man/CDORemap.Rd +++ b/man/CDORemap.Rd @@ -49,18 +49,35 @@ for the price of writing more intermediate files (whis usually is unconvenient) by setting the parameter \code{avoid_writes = TRUE}.} \item{crop}{Whether to crop the data after interpolation with -'cdo sellonlatbox' (TRUE) or to extend interpolated data to the whole -world as CDO does by default (FALSE). If \code{crop = TRUE} then the -longitude and latitude borders which to crop at are taken as the limits of -the cells at the borders ('lons' and 'lats' are perceived as cell centers), -i.e. the resulting array will contain data that covers the same area as -the input array. This is equivalent to specifying \code{crop = 'preserve'}, -i.e. preserving area. If \code{crop = 'tight'} then the borders which to -crop at are taken as the minimum and maximum cell centers in 'lons' and -'lats', i.e. the area covered by the resulting array may be smaller if -interpolating from a coarse grid to a fine grid. The parameter 'crop' also -accepts a numeric vector of custom borders which to crop at: -c(western border, eastern border, southern border, northern border).} +'cdo sellonlatbox' (TRUE) or to extend interpolated data to the whole +world as CDO does by default (FALSE). The default value is TRUE.\cr +\itemize{ + \item{ + If \code{crop = TRUE}, the longitude and latitude borders to be cropped + at are taken as the limits of the cells at the borders (not the values + of 'lons' and 'lats', which are perceived as cell centers), i.e., the + resulting array will contain data that covers the same area as the input + array. This is equivalent to specifying \code{crop = 'preserve'}, i.e., + preserving area. Notice that the longitude range of returning array will + follow the original data 'lons' instead of the target grid 'grid'. + } + \item{ + If \code{crop = FALSE}, the returning array is not cropped, i.e., a + global domain, and the longitude range will be the same as the target + grid 'grid'. + } + \item{ + If \code{crop = 'tight'}, the borders to be cropped at are taken as the + minimum and maximum cell centers in 'lons' and 'lats', i.e., the area + covered by the resulting array may be smaller if interpolating from a + coarse grid to a fine grid. + } + \item{ + The parameter 'crop' also accepts a numeric vector of customized borders + to be cropped at:\cr + c(western border, eastern border, southern border, northern border). + } +}} \item{force_remap}{Whether to force remapping, even if the input data array is already on the target grid.} diff --git a/man/Clim.Rd b/man/Clim.Rd index 50ec0d9c90d7e2dc1b0486de820e657ee84aa325..a5a6f19608a1aef12c82bced943418d74bc9eee3 100644 --- a/man/Clim.Rd +++ b/man/Clim.Rd @@ -18,11 +18,11 @@ Clim( ) } \arguments{ -\item{exp}{A named numeric array of experimental data, with at least two -dimensions 'time_dim' and 'dat_dim'.} +\item{exp}{A named numeric array of experimental data with at least dimension +'time_dim'.} -\item{obs}{A named numeric array of observational data, same dimensions as -parameter 'exp' except along 'dat_dim'.} +\item{obs}{A named numeric array of observational data that has the same +dimension as 'exp' except 'dat_dim'.} \item{time_dim}{A character string indicating the name of dimension along which the climatologies are computed. The default value is 'sdate'.} @@ -30,16 +30,20 @@ which the climatologies are computed. The default value is 'sdate'.} \item{dat_dim}{A character vector indicating the name of the dataset and member dimensions. If data at one startdate (i.e., 'time_dim') are not complete along 'dat_dim', this startdate along 'dat_dim' will be discarded. -The default value is "c('dataset', 'member')".} +If there is no dataset dimension, it can be NULL, however, it will be more +efficient to simply use mean() to do the calculation. The default value is +"c('dataset', 'member')".} \item{method}{A character string indicating the method to be used. The -options include 'clim', 'kharin', and 'NDV'. The default value is 'clim'.} +options include 'clim' (per-pair method), 'kharin' (Kharin method), and +'NDV' (Fuckar method). The default value is 'clim'.} \item{ftime_dim}{A character string indicating the name of forecast time dimension. Only used when method = 'NDV'. The default value is 'ftime'.} \item{memb}{A logical value indicating whether to remain 'memb_dim' dimension -(TRUE) or do ensemble mean over 'memb_dim' (FALSE). The default value is TRUE.} +(TRUE) or do ensemble mean over 'memb_dim' (FALSE). The default value is +TRUE.} \item{memb_dim}{A character string indicating the name of the member dimension. Only used when parameter 'memb' is FALSE. It must be one element @@ -61,7 +65,7 @@ A list of 2: dimension 'memb_dim' is also removed. } \item{$clim_obs}{ - A numeric array with the same dimensions as parameter 'exp' + A numeric array with the same dimensions as parameter 'obs' except dimension 'time_dim' is removed. If parameter 'memb' is FALSE, dimension 'memb_dim' is also removed. } @@ -70,14 +74,17 @@ A list of 2: This function computes per-pair climatologies for the experimental and observational data using one of the following methods: \enumerate{ - \item{per-pair method (Garcia-Serrano and Doblas-Reyes, CD, 2012)} - \item{Kharin method (Karin et al, GRL, 2012)} - \item{Fuckar method (Fuckar et al, GRL, 2014)} + \item{per-pair method (Garcia-Serrano and Doblas-Reyes, CD, 2012 https://doi.org/10.1007/s00382-012-1413-1)} + \item{Kharin method (Kharin et al, GRL, 2012 https://doi.org/10.1029/2012GL052647)} + \item{Fuckar method (Fuckar et al, GRL, 2014 https://doi.org/10.1002/2014GL060815)} } Per-pair climatology means that only the startdates covered by the whole experiments/observational dataset will be used. In other words, the startdates which are not all available along 'dat_dim' dimension of both the 'exp' and 'obs' are excluded when computing the climatologies. +Kharin method is the linear trend bias correction method, and Fuckar method +is the initial condition bias correction method. The two methods both do the +per-pair correction beforehand. } \examples{ # Load sample data as in Load() example: diff --git a/man/Cluster.Rd b/man/Cluster.Rd index 7ea25de5bfe29b8f49dc2c117a220d01dc6c8236..1fa6a1ff5e5e1819d76306fb1b95a32e2beadd71 100644 --- a/man/Cluster.Rd +++ b/man/Cluster.Rd @@ -47,6 +47,7 @@ Other indices available in NBClust are "kl", "ch", "hartigan", "ccc", "ptbiserial", "gap", "frey", "mcclain", "gamma", "gplus", "tau", "dunn", "hubert", "sdindex", and "sdbw". One can also use all of them with the option 'alllong' or almost all indices + except gap, gamma, gplus and tau with 'all', when the optimal number of clusters K is detremined by the majority rule (the maximum of histogram of the results of all indices with finite solutions). Use of some indices on a big and/or unstructured dataset can be computationally intense and/or @@ -63,7 +64,7 @@ A list containing: are same as 'data' without 'space_dim'. } \item{$centers}{ - A nemeric array of cluster centres or centroids (e.g. [1:K, 1:spatial degrees + A numeric array of cluster centres or centroids (e.g. [1:K, 1:spatial degrees of freedom]). The rest dimensions are same as 'data' except 'time_dim' and 'space_dim'. } diff --git a/man/DiffCorr.Rd b/man/DiffCorr.Rd new file mode 100644 index 0000000000000000000000000000000000000000..d8ff65cbeda9efc4a6a345610971527774851ad0 --- /dev/null +++ b/man/DiffCorr.Rd @@ -0,0 +1,100 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DiffCorr.R +\name{DiffCorr} +\alias{DiffCorr} +\title{Compute the correlation difference and its significance} +\usage{ +DiffCorr( + exp, + obs, + ref, + N.eff = NA, + time_dim = "sdate", + memb_dim = NULL, + method = "pearson", + alpha = NULL, + handle.na = "return.na", + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numerical array of the forecast data with at least time +dimension.} + +\item{obs}{A named numerical array with the observations with at least time +dimension. The dimensions must be the same as "exp" except 'memb_dim'.} + +\item{ref}{A named numerical array of the reference forecast data with at +least time dimension. The dimensions must be the same as "exp" except +'memb_dim'.} + +\item{N.eff}{Effective sample size to be used in the statistical significance +test. It can be NA (and it will be computed with the s2dv:::.Eno), a +numeric (which is used for all cases), or an array with the same dimensions +as "obs" except "time_dim" (for a particular N.eff to be used for each case) +. The default value is NA.} + +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'sdate'.} + +\item{memb_dim}{A character string indicating the name of the member +dimension to compute the ensemble mean of the forecast and reference +forecast. If it is NULL (default), the ensemble mean should be provided +directly to the function.} + +\item{method}{A character string indicating the correlation coefficient to be +computed ("pearson" or "spearman"). The default value is "pearson".} + +\item{alpha}{A numeric of the significance level to be used in the statistical +significance test. If it is a numeric, "sign" will be returned. If NULL, the +p-value will be returned instead. The default value is NULL.} + +\item{handle.na}{A charcater string indicating how to handle missing values. +If "return.na", NAs will be returned for the cases that contain at least one +NA in "exp", "ref", or "obs". If "only.complete.triplets", only the time +steps with no missing values in all "exp", "ref", and "obs" will be used. If +"na.fail", an error will arise if any of "exp", "ref", or "obs" contains any +NA. The default value is "return.na".} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list with: +\item{$diff.corr}{ + A numerical array of the correlation differences with the same dimensions as + the input arrays except "time_dim" (and "memb_dim" if provided). +} +\item{$sign}{ + A logical array of the statistical significance of the correlation + differences with the same dimensions as the input arrays except "time_dim" + (and "memb_dim" if provided). Returned only if "alpha" is a numeric. +} +\item{$p.val}{ + A numeric array of the p-values with the same dimensions as the input arrays + except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is + NULL. +} +} +\description{ +Compute the correlation difference between two deterministic forecasts. +Positive values of the correlation difference indicate that the forecast is +more skillful than the reference forecast, while negative values mean that +the reference forecast is more skillful. The statistical significance of the +correlation differences is computed with a one-sided test for equality of +dependent correlation coefficients (Steiger, 1980; Siegert et al., 2017) using +effective degrees of freedom to account for the autocorrelation of the time +series (von Storch and Zwiers, 1999). +} +\examples{ +exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +res <- DiffCorr(exp, obs, ref, memb_dim = 'member') + +} +\references{ +Steiger, 1980; https://content.apa.org/doi/10.1037/0033-2909.87.2.245 +Siegert et al., 2017; https://doi.org/10.1175/MWR-D-16-0037.1 +von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336 +} diff --git a/man/MeanDims.Rd b/man/MeanDims.Rd index 721866b9fbc58225741f89a5866445d600ad1dfb..94369da0bdd255126766d347ba940a5c1d02a963 100644 --- a/man/MeanDims.Rd +++ b/man/MeanDims.Rd @@ -19,9 +19,10 @@ not (FALSE).} dimension (FALSE) or drop it (TRUE). The default value is TRUE.} } \value{ -An array with the same dimension as parameter 'data' except the - 'dims' dimensions. If 'drop' is TRUE, 'dims' will be removed; if 'drop' is - FALSE, 'dims' will be preserved and the length will be 1. +A numeric or an array with the same dimension as parameter 'data' + except the 'dims' dimensions. If 'drop' is TRUE, 'dims' will be removed; if +'drop' is FALSE, 'dims' will be preserved and the length will be 1. If all + the dimensions are averaged out, a numeric is returned. } \description{ This function returns the mean of an array along a set of dimensions and diff --git a/man/PlotACC.Rd b/man/PlotACC.Rd index cd2b35729db4cdb4d2cabb7005ea7340b6f783a3..2764de3487103e4408f0da7d036e6abf5452beab 100644 --- a/man/PlotACC.Rd +++ b/man/PlotACC.Rd @@ -110,8 +110,8 @@ sampleData$obs <- Season(sampleData$obs, monini = 11, moninf = 12, monsup = 2) clim <- Clim(sampleData$mod, sampleData$obs) ano_exp <- Ano(sampleData$mod, clim$clim_exp) ano_obs <- Ano(sampleData$obs, clim$clim_obs) -acc <- ACC(ano_exp, ano_obs) -acc_bootstrap <- ACC(ano_exp, ano_obs, conftype = 'bootstrap') +acc <- ACC(ano_exp, ano_obs, lat = sampleData$lat) +acc_bootstrap <- ACC(ano_exp, ano_obs, lat = sampleData$lat, conftype = 'bootstrap') # Combine acc results for PlotACC res <- array(c(acc$conf.lower, acc$acc, acc$conf.upper, acc$p.val), dim = c(dim(acc$acc), 4)) diff --git a/man/PlotEquiMap.Rd b/man/PlotEquiMap.Rd index 31fe4d82eeac0bcffe71d6485965632da750be4f..47a0d981a3ae6d5dd022b2b59585dcd834271fd5 100644 --- a/man/PlotEquiMap.Rd +++ b/man/PlotEquiMap.Rd @@ -54,6 +54,10 @@ PlotEquiMap( degree_sym = FALSE, intylat = 20, intxlon = 20, + xlonshft = 0, + ylatshft = 0, + xlabels = NULL, + ylabels = NULL, axes_tick_scale = 1, axes_label_scale = 1, drawleg = TRUE, @@ -247,7 +251,8 @@ the function.} box borders. The default value is NULL and is automatically adjusted by the function.} -\item{degree_sym}{A logical indicating whether to include degree symbol (30° N) or not (30N; default).} +\item{degree_sym}{A logical indicating whether to include degree symbol (30° N) +or not (30N; default).} \item{intylat}{Interval between latitude ticks on y-axis, in degrees. Defaults to 20.} @@ -255,6 +260,22 @@ Defaults to 20.} \item{intxlon}{Interval between latitude ticks on x-axis, in degrees. Defaults to 20.} +\item{xlonshft}{A numeric of the degrees to shift the latitude ticks. The +default value is 0.} + +\item{ylatshft}{A numeric of the degrees to shift the longitude ticks. The +default value is 0.} + +\item{xlabels}{A vector of character string of the custumized x-axis labels. +The values should correspond to each tick, which is decided by the longitude +and parameter 'intxlon'. The default value is NULL and the labels will be +automatically generated.} + +\item{ylabels}{A vector of character string of the custumized y-axis labels. +The values should correspond to each tick, which is decided by the latitude +and parameter 'intylat'. The default value is NULL and the labels will be +automatically generated.} + \item{axes_tick_scale}{Scale factor for the tick lines along the longitude and latitude axes.} diff --git a/man/PlotLayout.Rd b/man/PlotLayout.Rd index 10bb39e94e733ed78e12d10de324f69ddc06530a..f8e7bae376763de767cd967a4f3baee1d608ceef 100644 --- a/man/PlotLayout.Rd +++ b/man/PlotLayout.Rd @@ -94,7 +94,20 @@ a) splitting your array into a list of sub-arrays (each with the data for one plot) and providing it as parameter 'var', b) providing a list of named sub-lists in 'special_args', where the names of each sub-list match the names of the parameters to be adjusted, and -each value in a sub-list contains the value of the corresponding parameter.} +each value in a sub-list contains the value of the corresponding parameter. +For example, if the plots are two maps with different arguments, the +structure would be like:\cr +var:\cr + List of 2\cr + $ : num [1:360, 1:181] 1 3.82 5.02 6.63 8.72 ...\cr + $ : num [1:360, 1:181] 2.27 2.82 4.82 7.7 10.32 ...\cr +special_args:\cr + List of 2\cr + $ :List of 2\cr + ..$ arg1: ...\cr + ..$ arg2: ...\cr + $ :List of 1\cr + ..$ arg1: ...\cr} \item{nrow}{Numeric value to force the number of rows in the automatically generated layout. If higher than the required, this will yield blank cells diff --git a/man/RPS.Rd b/man/RPS.Rd new file mode 100644 index 0000000000000000000000000000000000000000..ee5c24142cd840615cd2759ae0180b805028d773 --- /dev/null +++ b/man/RPS.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RPS.R +\name{RPS} +\alias{RPS} +\title{Compute the Ranked Probability Score} +\usage{ +RPS( + exp, + obs, + time_dim = "sdate", + memb_dim = "member", + prob_thresholds = c(1/3, 2/3), + indices_for_clim = NULL, + Fair = FALSE, + weights = NULL, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numerical array of the forecast with at least time +dimension.} + +\item{obs}{A named numerical array of the observation with at least time +dimension. The dimensions must be the same as 'exp' except 'memb_dim'.} + +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'sdate'.} + +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the probabilities of the forecast. The default value is 'member'.} + +\item{prob_thresholds}{A numeric vector of the relative thresholds (from 0 to +1) between the categories. The default value is c(1/3, 2/3), which +corresponds to tercile equiprobable categories.} + +\item{indices_for_clim}{A vector of the indices to be taken along 'time_dim' +for computing the thresholds between the probabilistic categories. If NULL, +the whole period is used. The default value is NULL.} + +\item{Fair}{A logical indicating whether to compute the FairRPSS (the +potential RPSS that the forecast would have with an infinite ensemble size). +The default value is FALSE.} + +\item{weights}{A named two-dimensional numerical array of the weights for each +member and time. The dimension names should include 'memb_dim' and +'time_dim'. The default value is NULL. The ensemble should have at least 70 +members or span at least 10 time steps and have more than 45 members if +consistency between the weighted and unweighted methodologies is desired.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A numerical array of RPS with the same dimensions as "exp" except the +'time_dim' and 'memb_dim' dimensions. +} +\description{ +The Ranked Probability Score (RPS; Wilks, 2011) is defined as the sum of the +squared differences between the cumulative forecast probabilities (computed +from the ensemble members) and the observations (defined as 0% if the category +did not happen and 100% if it happened). It can be used to evaluate the skill +of multi-categorical probabilistic forecasts. The RPS ranges between 0 +(perfect forecast) and n-1 (worst possible forecast), where n is the number of +categories. In the case of a forecast divided into two categories (the lowest +number of categories that a probabilistic forecast can have), the RPS +corresponds to the Brier Score (BS; Wilks, 2011), therefore, ranges between 0 +and 1. +} +\examples{ +exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +res <- RPS(exp = exp, obs = obs) + +} +\references{ +Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +} diff --git a/man/RPSS.Rd b/man/RPSS.Rd new file mode 100644 index 0000000000000000000000000000000000000000..5b8bd7ec07dfc58f5a588fd2113009b441eb1445 --- /dev/null +++ b/man/RPSS.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RPSS.R +\name{RPSS} +\alias{RPSS} +\title{Compute the Ranked Probability Skill Score} +\usage{ +RPSS( + exp, + obs, + ref = NULL, + time_dim = "sdate", + memb_dim = "member", + prob_thresholds = c(1/3, 2/3), + indices_for_clim = NULL, + Fair = FALSE, + weights = NULL, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numerical array of the forecast with at least time +dimension.} + +\item{obs}{A named numerical array of the observation with at least time +dimension. The dimensions must be the same as 'exp' except 'memb_dim'.} + +\item{ref}{A named numerical array of the reference forecast data with at +least time dimension. The dimensions must be the same as 'exp' except +'memb_dim'. If it is NULL, the climatological forecast is used as reference +forecast. The default value is NULL.} + +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'sdate'.} + +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the probabilities of the forecast and the reference forecast. The +default value is 'member'.} + +\item{prob_thresholds}{A numeric vector of the relative thresholds (from 0 to +1) between the categories. The default value is c(1/3, 2/3), which +corresponds to tercile equiprobable categories.} + +\item{indices_for_clim}{A vector of the indices to be taken along 'time_dim' +for computing the thresholds between the probabilistic categories. If NULL, +the whole period is used. The default value is NULL.} + +\item{Fair}{A logical indicating whether to compute the FairRPSS (the +potential RPSS that the forecast would have with an infinite ensemble size). +The default value is FALSE.} + +\item{weights}{A named two-dimensional numerical array of the weights for each +member and time. The dimension names should include 'memb_dim' and +'time_dim'. The default value is NULL. The ensemble should have at least 70 +members or span at least 10 time steps and have more than 45 members if +consistency between the weighted and unweighted methodologies is desired.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +\item{$rpss}{ + A numerical array of the RPSS with the same dimensions as "exp" except the + 'time_dim' and 'memb_dim' dimensions. +} +\item{$sign}{ + A logical array of the statistical significance of the RPSS with the same + dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions. +} +} +\description{ +The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score +based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to +assess whether a forecast presents an improvement or worsening with respect to +a reference forecast. The RPSS ranges between minus infinite and 1. If the +RPSS is positive, it indicates that the forecast has higher skill than the +reference forecast, while a negative value means that it has a lower skill. +Examples of reference forecasts are the climatological forecast (same +probabilities for all categories for all time steps), persistence, a previous +model version, and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref. +The statistical significance is obtained based on a Random Walk test at the +95% confidence level (DelSole and Tippett, 2016). +} +\examples{ +exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast +res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast + +} +\references{ +Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +} diff --git a/man/RandomWalkTest.Rd b/man/RandomWalkTest.Rd index 11106487e6ab0e82b7299c0f378ebbb7247d3a55..bd1460a74fc8c9cbe69f810ec6ef6c23497dcbd6 100644 --- a/man/RandomWalkTest.Rd +++ b/man/RandomWalkTest.Rd @@ -37,8 +37,8 @@ A list of 2: } \description{ Forecast comparison of the skill obtained with 2 forecasts (with respect to a -common reference) based on Random Walks, with significance estimate at the 5% -confidence level, as in DelSole and Tippett (2015). +common reference) based on Random Walks, with significance estimate at the 95% +confidence level, as in DelSole and Tippett (2016). } \examples{ fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2)) diff --git a/man/ResidualCorr.Rd b/man/ResidualCorr.Rd new file mode 100644 index 0000000000000000000000000000000000000000..fe7dd1012f5f81a4feaf574891b2468b79e88572 --- /dev/null +++ b/man/ResidualCorr.Rd @@ -0,0 +1,102 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ResidualCorr.R +\name{ResidualCorr} +\alias{ResidualCorr} +\title{Compute the residual correlation and its significance} +\usage{ +ResidualCorr( + exp, + obs, + ref, + N.eff = NA, + time_dim = "sdate", + memb_dim = NULL, + method = "pearson", + alpha = NULL, + handle.na = "return.na", + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numerical array of the forecast with at least time +dimension.} + +\item{obs}{A named numerical array of the observations with at least time +dimension. The dimensions must be the same as "exp" except 'memb_dim'.} + +\item{ref}{A named numerical array of the reference forecast data with at +least time dimension. The dimensions must be the same as "exp" except +'memb_dim'.} + +\item{N.eff}{Effective sample size to be used in the statistical significance +test. It can be NA (and it will be computed with the s2dv:::.Eno), a +numeric (which is used for all cases), or an array with the same dimensions +as "obs" except "time_dim" (for a particular N.eff to be used for each case) +. The default value is NA.} + +\item{time_dim}{A character string indicating the name of the time dimension. +The default value is 'year'.} + +\item{memb_dim}{A character string indicating the name of the member dimension +to compute the ensemble mean of the forecast and reference forecast. If it +is NULL, the ensemble mean should be provided directly to the function. The +default value is NULL.} + +\item{method}{A character string indicating the correlation coefficient to be +computed ("pearson", "kendall", or "spearman"). The default value is +"pearson".} + +\item{alpha}{A numeric of the significance level to be used in the statistical +significance test. If it is a numeric, "sign" will be returned. If NULL, the +p-value will be returned instead. The default value is NULL.} + +\item{handle.na}{A charcater string indicating how to handle missing values. +If "return.na", NAs will be returned for the cases that contain at least one +NA in "exp", "ref", or "obs". If "only.complete.triplets", only the time +steps with no missing values in all "exp", "ref", and "obs" will be used. If +"na.fail", an error will arise if any of "exp", "ref", or "obs" contains any +NA. The default value is "return.na".} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list with: +\item{$res.corr}{ + A numerical array of the residual correlation with the same dimensions as + the input arrays except "time_dim" (and "memb_dim" if provided). +} +\item{$sign}{ + A logical array indicating whether the residual correlation is statistically + significant or not with the same dimensions as the input arrays except "time_dim" + (and "memb_dim" if provided). Returned only if "alpha" is a numeric. +} +\item{$p.val}{ + A numeric array of the p-values with the same dimensions as the input arrays + except "time_dim" (and "memb_dim" if provided). Returned only if "alpha" is + NULL. +} +} +\description{ +The residual correlation assesses whether a forecast captures any of the +observed variability that is not already captured by a reference forecast +(Smith et al., 2019; https://doi.org/10.1038/s41612-019-0071-y.). +The procedure is as follows: the residuals of the forecasts and observations +are computed by linearly regressing out the reference forecast's ensemble mean +from the forecasts' ensemble mean and observations, respectively. Then, the +residual correlation is computed as the correlation between both residuals. +Positive values of the residual correlation indicate that the forecast capture +more observed variability than the reference forecast, while negative values +mean that the reference forecast capture more. The significance of the +residual correlation is computed with a two-sided t-test +(Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7) using an +effective degrees of freedom to account for the time series' autocorrelation +(von Storch and Zwiers, 1999; https://doi.org/10.1017/CBO9780511612336). +} +\examples{ +exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 5, sdate = 50)) +res <- ResidualCorr(exp = exp, obs = obs, ref = ref, memb_dim = 'member') + +} diff --git a/man/s2dv-package.Rd b/man/s2dv-package.Rd index 557692115e5ac3f458a52161ca4323658bfb8a25..3c98a95154225719ceb248570f232b02e9434545 100644 --- a/man/s2dv-package.Rd +++ b/man/s2dv-package.Rd @@ -6,15 +6,7 @@ \alias{s2dv-package} \title{s2dv: A Set of Common Tools for Seasonal to Decadal Verification} \description{ -The advanced version of package 's2dverification'. It is - intended for 'seasonal to decadal' (s2d) climate forecast verification, but - it can also be used in other kinds of forecasts or general climate analysis. - This package is specially designed for the comparison between the experimental - and observational datasets. The functionality of the included functions covers - from data retrieval, data post-processing, skill scores against observation, - to visualization. Compared to 's2dverification', 's2dv' is more compatible - with the package 'startR', able to use multiple cores for computation and - handle multi-dimensional arrays with a higher flexibility. +The advanced version of package 's2dverification'. It is intended for 'seasonal to decadal' (s2d) climate forecast verification, but it can also be used in other kinds of forecasts or general climate analysis. This package is specially designed for the comparison between the experimental and observational datasets. The functionality of the included functions covers from data retrieval, data post-processing, skill scores against observation, to visualization. Compared to 's2dverification', 's2dv' is more compatible with the package 'startR', able to use multiple cores for computation and handle multi-dimensional arrays with a higher flexibility. } \references{ \url{https://earth.bsc.es/gitlab/es/s2dv/} @@ -39,6 +31,7 @@ Authors: Other contributors: \itemize{ \item Roberto Bilbao \email{roberto.bilbao@bsc.es} [contributor] + \item Josep Cos \email{josep.cos@bsc.es} [contributor] \item Carlos Delgado \email{carlos.delgado@bsc.es} [contributor] \item Llorenç Lledó \email{llorenc.lledo@bsc.es} [contributor] \item Andrea Manrique \email{andrea.manrique@bsc.es} [contributor] diff --git a/man/sampleDepthData.Rd b/man/sampleDepthData.Rd index 77e4a7a290855556aa7b36f8a6c46af2e2791ca7..47e2f1b742e3231496502c05f6741ff1c54b82d2 100644 --- a/man/sampleDepthData.Rd +++ b/man/sampleDepthData.Rd @@ -5,7 +5,8 @@ \alias{sampleDepthData} \title{Sample of Experimental Data for Forecast Verification In Function Of Latitudes And Depths} -\format{The data set provides with a variable named 'sampleDepthData'.\cr\cr +\format{ +The data set provides with a variable named 'sampleDepthData'.\cr\cr sampleDepthData$exp is an array that contains the experimental data and the dimension meanings and values are:\cr @@ -18,7 +19,8 @@ but in this sample is not defined (NULL).\cr\cr sampleDepthData$depths is an array with the 7 longitudes covered by the data.\cr\cr -sampleDepthData$lat is an array with the 21 latitudes covered by the data.\cr\cr} +sampleDepthData$lat is an array with the 21 latitudes covered by the data.\cr\cr +} \usage{ data(sampleDepthData) } diff --git a/man/sampleMap.Rd b/man/sampleMap.Rd index eaf8aa5a686f589db7e223ccc651af29266203e6..e4ec5a5b2be18b7f4ac4bfef511e6a8333205a49 100644 --- a/man/sampleMap.Rd +++ b/man/sampleMap.Rd @@ -4,7 +4,8 @@ \name{sampleMap} \alias{sampleMap} \title{Sample Of Observational And Experimental Data For Forecast Verification In Function Of Longitudes And Latitudes} -\format{The data set provides with a variable named 'sampleMap'.\cr\cr +\format{ +The data set provides with a variable named 'sampleMap'.\cr\cr sampleMap$mod is an array that contains the experimental data and the dimension meanings and values are:\cr c(# of experimental datasets, # of members, # of starting dates, # of lead-times, # of latitudes, # of longitudes)\cr @@ -16,7 +17,8 @@ sampleMap$obs is an array that contains the observational data and the dimension sampleMap$lat is an array with the 2 latitudes covered by the data (see examples on Load() for details on why such low resolution).\cr\cr - sampleMap$lon is an array with the 3 longitudes covered by the data (see examples on Load() for details on why such low resolution).} + sampleMap$lon is an array with the 3 longitudes covered by the data (see examples on Load() for details on why such low resolution). +} \usage{ data(sampleMap) } diff --git a/man/sampleTimeSeries.Rd b/man/sampleTimeSeries.Rd index 05a8e7980116c649d6b156a4746f16b2c45fea4e..7f058e20443f04b3fae5e9658f07f17e0e58c5ce 100644 --- a/man/sampleTimeSeries.Rd +++ b/man/sampleTimeSeries.Rd @@ -4,7 +4,8 @@ \name{sampleTimeSeries} \alias{sampleTimeSeries} \title{Sample Of Observational And Experimental Data For Forecast Verification As Area Averages} -\format{The data set provides with a variable named 'sampleTimeSeries'.\cr\cr +\format{ +The data set provides with a variable named 'sampleTimeSeries'.\cr\cr sampleTimeSeries$mod is an array that contains the experimental data and the dimension meanings and values are:\cr c(# of experimental datasets, # of members, # of starting dates, # of lead-times)\cr @@ -16,7 +17,8 @@ sampleTimeSeries$obs is an array that contains the observational data and the di sampleTimeSeries$lat is an array with the 2 latitudes covered by the data that was area averaged to calculate the time series (see examples on Load() for details on why such low resolution).\cr\cr -sampleTimeSeries$lon is an array with the 3 longitudes covered by the data that was area averaged to calculate the time series (see examples on Load() for details on why such low resolution).} +sampleTimeSeries$lon is an array with the 3 longitudes covered by the data that was area averaged to calculate the time series (see examples on Load() for details on why such low resolution). +} \usage{ data(sampleTimeSeries) } diff --git a/s2dv-manual.pdf b/s2dv-manual.pdf index b6faa89fbc92c8735e85d4fa7bc4d0f68d47030b..2b6da42257033ded7f67d7307378f372ae3a10fe 100644 Binary files a/s2dv-manual.pdf and b/s2dv-manual.pdf differ diff --git a/tests/testthat/test-ACC.R b/tests/testthat/test-ACC.R index 7631401d0ae4864e0aa418daa1e9b116542719cc..5c3672552930be48e114949fdb2301a8ab429bce 100644 --- a/tests/testthat/test-ACC.R +++ b/tests/testthat/test-ACC.R @@ -11,6 +11,8 @@ context("s2dv::ACC tests") set.seed(2) obs1 <- array(rnorm(30), dim = c(dataset = 1, member = 1, sdate = 5, ftime = 1, lat = 2, lon = 3)) + lat1 <- c(30, 35) + lon1 <- c(0, 5, 10) # dat2 set.seed(1) exp2 <- array(rnorm(60), dim = c(dataset = 2, sdate = 5, @@ -21,10 +23,12 @@ context("s2dv::ACC tests") set.seed(2) na <- floor(runif(2, min = 1, max = 30)) obs2[na] <- NA + lat2 <- c(30, 35) + lon2 <- c(0, 5, 10) ############################################## test_that("1. Input checks", { - + # exp and obs (1) expect_error( ACC(c(), c()), "Parameter 'exp' and 'obs' cannot be NULL." @@ -36,7 +40,7 @@ test_that("1. Input checks", { expect_error( ACC(c(1:10), c(2:4)), paste0("Parameter 'exp' and 'obs' must have at least dimensions ", - "dat_dim and space_dim.") + "lat_dim and lon_dim.") ) expect_error( ACC(array(1:10, dim = c(2, 5)), array(1:4, dim = c(2, 2))), @@ -46,6 +50,7 @@ test_that("1. Input checks", { ACC(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), "Parameter 'exp' and 'obs' must have same dimension name" ) + # dat_dim expect_error( ACC(exp1, obs1, dat_dim = 1), "Parameter 'dat_dim' must be a character string." @@ -54,14 +59,30 @@ test_that("1. Input checks", { ACC(exp1, obs1, dat_dim = 'a'), "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." ) + # space_dim (deprecated) + expect_warning( + ACC(exp1, obs1, space_dim = c('lat', 'lon'), lat = c(1, 2)), + "Parameter 'space_dim' is deprecated. Use 'lat_dim' and 'lon_dim' instead." + ) + # lat_dim + expect_error( + ACC(exp1, obs1, lat_dim = 1), + "Parameter 'lat_dim' must be a character string." + ) + expect_error( + ACC(exp1, obs1, lat_dim = 'latitude'), + "Parameter 'lat_dim' is not found in 'exp' or 'obs' dimension." + ) + # lon_dim expect_error( - ACC(exp1, obs1, space_dim = c('lon')), - "Parameter 'space_dim' must be a character vector of 2." + ACC(exp1, obs1, lon_dim = 2), + "Parameter 'lon_dim' must be a character string." ) expect_error( - ACC(exp1, obs1, space_dim = c('lon', 'lev')), - "Parameter 'space_dim' is not found in 'exp' or 'obs' dimension." + ACC(exp1, obs1, lon_dim = 'lonn'), + "Parameter 'lon_dim' is not found in 'exp' or 'obs' dimension." ) + # avg_dim expect_error( ACC(exp1, obs1, avg_dim = 1), "Parameter 'avg_dim' must be a character string." @@ -70,6 +91,7 @@ test_that("1. Input checks", { ACC(exp1, obs1, avg_dim = c('lev')), "Parameter 'avg_dim' is not found in 'exp' or 'obs' dimension." ) + # memb_dim expect_error( ACC(exp1, obs1, memb_dim = TRUE), "Parameter 'memb_dim' must be a character string." @@ -78,52 +100,57 @@ test_that("1. Input checks", { ACC(exp1, obs1, memb_dim = 'memb'), "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." ) + # lat expect_error( ACC(exp1, obs1, lat = c(1, 2, 3)), paste0("Parameter \'lat\' must be a numeric vector with the same ", "length as the latitude dimension of \'exp\' and \'obs\'.") ) + # lon expect_error( - ACC(exp1, obs1, lon = c(1, 3)), + ACC(exp1, obs1, lat = c(1, 2), lon = c(1, 3)), paste0("Parameter \'lon\' must be a numeric vector with the same ", "length as the longitude dimension of \'exp\' and \'obs\'.") ) + # lonlatbox expect_error( - ACC(exp1, obs1, lonlatbox = c(-90, 90)), + ACC(exp1, obs1, lat = c(1, 2), lon = c(1, 2, 3), lonlatbox = c(-90, 90)), "Parameter 'lonlatbox' must be a numeric vector of 4." ) + # conf expect_error( - ACC(exp1, obs1, lat = c(1, 2), lon = c(1, 2, 3)), - paste0("Parameters 'lon', 'lat', and 'lonlatbox' must be used or be ", - "NULL at the same time.") - ) - expect_error( - ACC(exp1, obs1, conf = 1), + ACC(exp1, obs1, lat = c(1, 2), lon = c(1, 2, 3), conf = 1), "Parameter 'conf' must be one logical value." ) + # conftype expect_error( - ACC(exp1, obs1, conftype = 'a'), + ACC(exp1, obs1, lat = c(1, 2), lon = c(1, 2, 3), conftype = 'a'), "Parameter \'conftype\' must be either \'parametric\' or \'bootstrap\'." ) expect_error( - ACC(exp1, obs1, memb_dim = NULL, conftype = 'bootstrap'), + ACC(exp1, obs1, lat = c(1, 2), lon = c(1, 2, 3), memb_dim = NULL, conftype = 'bootstrap'), "Parameter 'memb_dim' cannot be NULL when parameter 'conftype' is 'bootstrap'." ) + # conf.lev expect_error( - ACC(exp1, obs1, conf.lev = -1), + ACC(exp1, obs1, lat = c(1, 2), lon = c(1, 2, 3), conf.lev = -1), "Parameter 'conf.lev' must be a numeric number between 0 and 1." ) + # pval expect_error( - ACC(exp1, obs1, pval = 'TRUE'), + ACC(exp1, obs1, lat = c(1, 2), lon = c(1, 2, 3), pval = 'TRUE'), "Parameter 'pval' must be one logical value." ) + # ncores expect_error( - ACC(exp1, obs1, ncores = 1.5), + ACC(exp1, obs1, lat = c(1, 2), lon = c(1, 2, 3), ncores = 1.5), "Parameter 'ncores' must be a positive integer." ) + # exp and obs (2) expect_error( ACC(exp = array(1:10, dim = c(dataset = 5, member = 1, lat = 2, lon = 1)), obs = array(1:4, dim = c(dataset = 2, member = 2, lat = 1, lon = 1)), + lat = c(1, 2), lon = c(1), avg_dim = NULL), "Parameter 'exp' and 'obs' must have same length of all the dimensions expect 'dat_dim' and 'memb_dim'." ) @@ -134,66 +161,84 @@ test_that("1. Input checks", { test_that("2. Output checks: dat1", { expect_equal( - dim(ACC(exp1, obs1)$acc), + dim(ACC(exp1, obs1, lat = lat1, lon = lon1)$acc), c(nexp = 1, nobs = 1, sdate = 5, ftime = 1) ) expect_equal( - names(ACC(exp1, obs1)), + names(ACC(exp1, obs1, lat = lat1)), c("acc", "conf.lower", "conf.upper", "p.val", "macc") ) expect_equal( - mean(ACC(exp1, obs1)$acc), - -0.2001352, + as.vector(ACC(exp1, obs1, lat = lat1)$acc), + c(-0.006384684, -0.664580451, -0.637292180, 0.320974249, -0.453246995), tolerance = 0.00001 ) expect_equal( - as.vector(ACC(exp1, obs1)$p.val), + as.vector(ACC(exp1, obs1, lat = lat1)$p.val), c(0.7292993, 0.7292993, 0.7292993, 0.7292993, 0.7292993), tolerance = 0.00001 ) expect_equal( - as.vector(ACC(exp1, obs1)$conf.lower), - c(-0.8595534, -0.9644555, -0.9408508, -0.6887500, -0.7619374), + as.vector(ACC(exp1, obs1, lat = lat1)$conf.lower), + c(-0.8137296, -0.9589397, -0.9549511, -0.6633950, -0.9246771), tolerance = 0.00001 ) expect_equal( - as.vector(ACC(exp1, obs1)$conf.upper), - c(0.7493799, 0.2515608, 0.4759707, 0.8890967, 0.8517117), + as.vector(ACC(exp1, obs1, lat = lat1)$conf.upper), + c(0.8093704, 0.3190711, 0.3609563, 0.8984881, 0.5668074), tolerance = 0.00001 ) expect_equal( - names(ACC(exp1, obs1, avg_dim = NULL)), + names(ACC(exp1, obs1, lat = lat1, avg_dim = NULL)), c("acc", "conf.lower", "conf.upper", "p.val") ) expect_equal( - dim(ACC(exp1, obs1, dat_dim = 'member', memb_dim = NULL)$acc), + dim(ACC(exp1, obs1, lat = lat1, dat_dim = 'member', memb_dim = NULL)$acc), c(nexp = 2, nobs = 1, sdate = 5, dataset = 1, ftime = 1) ) expect_equal( - names(ACC(exp1, obs1, conf = FALSE)), + names(ACC(exp1, obs1, lat = lat1, conf = FALSE)), c("acc", "p.val", "macc") ) expect_equal( - names(ACC(exp1, obs1, pval = FALSE)), + names(ACC(exp1, obs1, lat = lat1, pval = FALSE)), c("acc", "conf.lower", "conf.upper", "macc") ) expect_equal( - names(ACC(exp1, obs1, conf = FALSE, pval = FALSE)), + names(ACC(exp1, obs1, lat = lat1, conf = FALSE, pval = FALSE)), c("acc", "macc") ) expect_equal( - as.vector(ACC(exp1, obs1, conf = FALSE, avg_dim = NULL, conf.lev = 0.9)$p.val), + as.vector(ACC(exp1, obs1, lat = lat1, conf = FALSE, avg_dim = NULL, conf.lev = 0.9)$p.val), c(0.6083998, 0.6083998, 0.6083998, 0.6083998, 0.6083998), tolerance = 0.00001 ) expect_equal( - mean(ACC(exp1, obs1, lat = c(10, 20), lon = c(20, 30, 40), lonlatbox = c(20, 30, 10, 20))$acc), - -0.1681097, + as.vector(ACC(exp1, obs1, lat = c(10, 20), lon = c(20, 30, 40), lonlatbox = c(20, 30, 10, 20))$acc), + c(0.3555372, -0.5205672, -0.3403301, 0.2690648, -0.5146910), tolerance = 0.00001 ) + }) +############################################## +test_that("3. Output checks: dat2", { +expect_equal( + dim(ACC(exp2, obs2, lat = lat2, lon = lon2, memb_dim = NULL)$acc), + c(nexp = 2, nobs = 1, sdate = 5, ftime = 1) +) +expect_equal( + as.vector(ACC(exp2, obs2, lat = lat2, memb_dim = NULL)$acc)[3:7], + c(-0.3601880, -0.5624773, -0.4603762, -0.6997169, -0.1336961), + tolerance = 0.00001 +) +expect_equal( + mean(ACC(exp2, obs2, lat = lat2, memb_dim = NULL)$acc), + -0.1484762, + tolerance = 0.00001 +) +}) diff --git a/tests/testthat/test-Clim.R b/tests/testthat/test-Clim.R index 1d2ff3e6b013cdefd1cd3375a34193d2bbc0545d..7751afb405c23c4e42cc5587eefa7f670182d14e 100644 --- a/tests/testthat/test-Clim.R +++ b/tests/testthat/test-Clim.R @@ -7,7 +7,7 @@ context("s2dv::Clim tests") ftime = 3, lon = 2, lat = 4)) set.seed(2) obs1 <- array(rnorm(120), dim = c(dataset = 1, member = 1, - ftime = 3, lon = 2, lat = 4, sdate = 5)) + ftime = 3, lon = 2, lat = 4, sdate = 5)) # dat2 exp2 <- exp1 set.seed(1) @@ -26,6 +26,43 @@ context("s2dv::Clim tests") obs3 <- array(rnorm(40), dim = c(dat = 1, ensemble = 1, lon = 2, lat = 4, date = 5)) + # dat4 + set.seed(1) + exp4 <- array(rnorm(30), dim = c(dataset = 1, member = 3, sdate = 5)) + set.seed(2) + obs4 <- array(rnorm(10), dim = c(dataset = 1, member = 1, sdate = 5)) + + # dat5 + set.seed(1) + exp5 <- array(rnorm(30), dim = c(member = 3, sdate = 5)) + set.seed(2) + obs5 <- array(rnorm(10), dim = c(member = 1, sdate = 5)) + + # dat6 + set.seed(1) + exp6 <- array(rnorm(30), dim = c(dataset = 1, member = 3, sdate = 5)) + set.seed(2) + obs6 <- array(rnorm(10), dim = c(dataset = 1, sdate = 5)) + + # dat7 + set.seed(1) + exp7 <- array(rnorm(30), dim = c(dataset = 1, member = 3, sdate = 5)) + set.seed(2) + obs7 <- array(rnorm(10), dim = c(sdate = 5)) + + # dat8 + set.seed(1) + exp8 <- array(rnorm(30), dim = c(member = 3, sdate = 5)) + set.seed(2) + obs8 <- array(rnorm(10), dim = c(sdate = 5)) + + # dat9 + set.seed(1) + exp9 <- array(rnorm(30), dim = c(sdate = 5)) + set.seed(2) + obs9 <- array(rnorm(10), dim = c(sdate = 5)) + + ############################################## test_that("1. Input checks", { @@ -47,10 +84,6 @@ test_that("1. Input checks", { "Parameter 'exp' and 'obs' must have dimension names." ) expect_error( - Clim(array(1:10, dim = c(a = 2, c = 5)), array(1:4, dim = c(a = 2, b = 2))), - "Parameter 'exp' and 'obs' must have same dimension name" - ) - expect_error( Clim(exp1, obs1, method = TRUE), "Parameter 'method' must be one of 'clim', 'kharin' or 'NDV'." ) @@ -68,7 +101,7 @@ test_that("1. Input checks", { ) expect_error( Clim(exp1, obs1, dat_dim = c('member', 'dat')), - "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." + "Parameter 'dat_dim' is not found in 'exp' dimensions." ) expect_error( Clim(exp1, obs1, method = 'NDV', ftime_dim = 4), @@ -88,7 +121,7 @@ test_that("1. Input checks", { ) expect_error( Clim(exp1, obs1, memb = FALSE, memb_dim = 'memb'), - "Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension." + "Parameter 'memb_dim' is not found in 'exp' dimension." ) expect_error( Clim(exp1, obs1, na.rm = na.omit), @@ -101,8 +134,7 @@ test_that("1. Input checks", { expect_error( Clim(array(1:10, dim = c(dataset = 2, member = 5, sdate = 4, ftime = 3)), array(1:4, dim = c(dataset = 2, member = 2, sdate = 5, ftime = 3))), - paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension expect 'dat_dim'.") + "Parameter 'exp' and 'obs' must have the same dimensions expect 'dat_dim'." ) }) @@ -110,58 +142,79 @@ test_that("1. Input checks", { ############################################## test_that("2. Output checks: dat1", { + # dimension expect_equal( dim(Clim(exp1, obs1)$clim_exp), c(dataset = 1, member = 3, ftime = 3, lon = 2, lat = 4) ) expect_equal( - dim(Clim(exp1, obs1, memb = FALSE)$clim_exp), - c(dataset = 1, ftime = 3, lon = 2, lat = 4) + dim(Clim(exp1, obs1)$clim_obs), + c(dataset = 1, member = 1, ftime = 3, lon = 2, lat = 4) ) expect_equal( - dim(Clim(exp1, obs1, time_dim = 'lon')$clim_exp), - c(dataset = 1, member = 3, sdate = 5, ftime = 3, lat = 4) + dim(Clim(exp1, obs1, memb = FALSE)$clim_exp), + c(dataset = 1, ftime = 3, lon = 2, lat = 4) ) expect_equal( - dim(Clim(exp1, obs1, method = 'kharin')$clim_exp), - c(sdate = 5, dataset = 1, member = 3, ftime = 3, lon = 2, lat = 4) + dim(Clim(exp1, obs1, memb = FALSE)$clim_obs), + c(dataset = 1, ftime = 3, lon = 2, lat = 4) ) expect_equal( - dim(Clim(exp1, obs1, method = 'NDV')$clim_exp), - c(sdate = 5, dataset = 1, member = 3, ftime = 3, lon = 2, lat = 4) + dim(Clim(exp1, obs1, time_dim = 'lon')$clim_exp), + c(dataset = 1, member = 3, sdate = 5, ftime = 3, lat = 4) ) + # clim_exp expect_equal( - dim(Clim(exp1, obs1)$clim_obs), - c(dataset = 1, member = 1, ftime = 3, lon = 2, lat = 4) + (Clim(exp1, obs1, memb = TRUE)$clim_exp)[1:5], + c(0.10592542, 0.10971142, 0.08689170, 0.14149945, 0.02359945), + tolerance = 0.001 ) expect_equal( - dim(Clim(exp1, obs1, method = 'kharin')$clim_obs), - c(dataset = 1, member = 1, ftime = 3, lon = 2, lat = 4) + (Clim(exp1, obs1, memb = FALSE)$clim_exp)[1:5], + c(0.10084284, 0.06407350, 0.09028584, 0.17526332, 0.18004387), + tolerance = 0.001 ) expect_equal( - dim(Clim(exp1, obs1, method = 'NDV')$clim_obs), - c(dataset = 1, member = 1, ftime = 3, lon = 2, lat = 4) + max(Clim(exp1, obs1)$clim_exp, na.rm = T), + 1.026186, + tolerance = 0.001 ) + # clim_obs expect_equal( (Clim(exp1, obs1)$clim_obs)[1:5], c(0.14831161, -0.60462627, 0.06609153, -0.23378059, 0.50553522), tolerance = 0.001 ) expect_equal( - (Clim(exp1, obs1, memb = FALSE)$clim_exp)[1:5], - c(0.10084284, 0.06407350, 0.09028584, 0.17526332, 0.18004387), + (Clim(exp1, obs1, memb = F)$clim_obs)[1:5], + c(0.14831161, -0.60462627, 0.06609153, -0.23378059, 0.50553522), tolerance = 0.001 ) + + # method = 'kharin' expect_equal( - max(Clim(exp1, obs1)$clim_exp, na.rm = T), - 1.026186, - tolerance = 0.001 + dim(Clim(exp1, obs1, method = 'kharin')$clim_exp), + c(sdate = 5, dataset = 1, member = 3, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1, method = 'kharin')$clim_obs), + c(dataset = 1, member = 1, ftime = 3, lon = 2, lat = 4) ) expect_equal( max(Clim(exp1, obs1, method = 'kharin')$clim_exp, na.rm = T), 2.282634, tolerance = 0.001 ) + + # method = 'NDV' + expect_equal( + dim(Clim(exp1, obs1, method = 'NDV')$clim_exp), + c(sdate = 5, dataset = 1, member = 3, ftime = 3, lon = 2, lat = 4) + ) + expect_equal( + dim(Clim(exp1, obs1, method = 'NDV')$clim_obs), + c(dataset = 1, member = 1, ftime = 3, lon = 2, lat = 4) + ) expect_equal( min(Clim(exp1, obs1, method = 'NDV')$clim_exp, na.rm = T), -4.025745, @@ -212,9 +265,520 @@ test_that("4. Output checks: dat3", { tolerance = 0.00001 ) +}) + + +############################################## +test_that("5. Output checks: dat4", { +res1 <- Clim(exp4, obs4, memb = T) +res2 <- Clim(exp4, obs4, memb = F) +expect_equal( +dim(res1$clim_exp), +c(dataset = 1, member = 3) +) +expect_equal( +dim(res1$clim_obs), +c(dataset = 1, member = 1) +) +expect_equal( +dim(res2$clim_exp), +c(dataset = 1) +) +expect_equal( +dim(res2$clim_obs), +c(dataset = 1) +) +expect_equal( +as.vector(res1$clim_exp), +c(0.1059254, 0.1097114, 0.0868917), +tolerance = 0.0001 +) +expect_equal( +as.vector(res1$clim_obs), +-0.06696949, +tolerance = 0.0001 +) +expect_equal( +as.vector(res2$clim_exp), +0.1008428, +tolerance = 0.0001 +) +expect_equal( +as.vector(res2$clim_obs), +-0.06696949, +tolerance = 0.0001 +) +# method = 'kharin' +res1 <- Clim(exp4, obs4, memb = T, method = 'kharin') +res2 <- Clim(exp4, obs4, memb = F, method = 'kharin') +expect_equal( +dim(res1$clim_exp), +c(sdate = 5, dataset = 1, member = 3) +) +expect_equal( +dim(res1$clim_obs), +c(dataset = 1, member = 1) +) +expect_equal( +dim(res2$clim_exp), +c(sdate = 5, dataset = 1) +) +expect_equal( +dim(res2$clim_obs), +c(dataset = 1) +) + +# method = 'NDV' +exp4 <- array(exp4, dim = c(dim(exp4), ftime = 1)) +obs4 <- array(obs4, dim = c(dim(obs4), ftime = 1)) +res1 <- Clim(exp4, obs4, memb = T, method = 'NDV') +res2 <- Clim(exp4, obs4, memb = F, method = 'NDV') +expect_equal( +dim(res1$clim_exp), +c(sdate = 5, dataset = 1, member = 3, ftime = 1) +) +expect_equal( +dim(res1$clim_obs), +c(dataset = 1, member = 1, ftime = 1) +) +expect_equal( +dim(res2$clim_exp), +c(sdate = 5, dataset = 1, ftime = 1) +) +expect_equal( +dim(res2$clim_obs), +c(dataset = 1, ftime = 1) +) + +}) + + +############################################## +test_that("6. Output checks: dat5", { +res1 <- Clim(exp5, obs5, dat_dim = 'member', memb = T) +res2 <- Clim(exp5, obs5, dat_dim = 'member', memb = F) +expect_equal( +dim(res1$clim_exp), +c(member = 3) +) +expect_equal( +dim(res1$clim_obs), +c(member = 1) +) +expect_equal( +dim(res2$clim_exp), +NULL +) +expect_equal( +dim(res2$clim_obs), +NULL +) +expect_equal( +as.vector(res1$clim_exp), +c(0.1059254, 0.1097114, 0.0868917), +tolerance = 0.0001 +) +expect_equal( +as.vector(res1$clim_obs), +-0.06696949, +tolerance = 0.0001 +) +expect_equal( +as.vector(res2$clim_exp), +0.1008428, +tolerance = 0.0001 +) +expect_equal( +as.vector(res2$clim_obs), +-0.06696949, +tolerance = 0.0001 +) + +# method = 'kharin' +res1 <- Clim(exp5, obs5, memb = T, dat_dim = 'member', method = 'kharin') +res2 <- Clim(exp5, obs5, memb = F, dat_dim = 'member', method = 'kharin') +expect_equal( +dim(res1$clim_exp), +c(sdate = 5, member = 3) +) +expect_equal( +dim(res1$clim_obs), +c(member = 1) +) +expect_equal( +dim(res2$clim_exp), +c(sdate = 5) +) +expect_equal( +dim(res2$clim_obs), +NULL +) + +# method = 'NDV' +exp5 <- array(exp5, dim = c(dim(exp5), ftime = 1)) +obs5 <- array(obs5, dim = c(dim(obs5), ftime = 1)) +res1 <- Clim(exp5, obs5, memb = T, dat_dim = 'member', method = 'NDV') +res2 <- Clim(exp5, obs5, memb = F, dat_dim = 'member', method = 'NDV') +expect_equal( +dim(res1$clim_exp), +c(sdate = 5, member = 3, ftime = 1) +) +expect_equal( +dim(res1$clim_obs), +c(member = 1, ftime = 1) +) +expect_equal( +dim(res2$clim_exp), +c(sdate = 5, ftime = 1) +) +expect_equal( +dim(res2$clim_obs), +c(ftime = 1) +) + +}) + +############################################## +test_that("7. Output checks: dat6", { +res1 <- Clim(exp6, obs6, memb = T) +res2 <- Clim(exp6, obs6, memb = F) +expect_equal( +dim(res1$clim_exp), +c(dataset = 1, member = 3) +) +expect_equal( +dim(res1$clim_obs), +c(dataset = 1) +) +expect_equal( +dim(res2$clim_exp), +c(dataset = 1) +) +expect_equal( +dim(res2$clim_obs), +c(dataset = 1) +) +expect_equal( +as.vector(res1$clim_exp), +c(0.1059254, 0.1097114, 0.0868917), +tolerance = 0.0001 +) +expect_equal( +as.vector(res1$clim_obs), +-0.06696949, +tolerance = 0.0001 +) +expect_equal( +as.vector(res2$clim_exp), +0.1008428, +tolerance = 0.0001 +) +expect_equal( +as.vector(res2$clim_obs), +-0.06696949, +tolerance = 0.0001 +) + +# method = 'kharin' +res1 <- Clim(exp6, obs6, memb = T, method = 'kharin') +res2 <- Clim(exp6, obs6, memb = F, method = 'kharin') +expect_equal( +dim(res1$clim_exp), +c(sdate = 5, dataset = 1, member = 3) +) +expect_equal( +dim(res1$clim_obs), +c(dataset = 1) +) +expect_equal( +dim(res2$clim_exp), +c(sdate = 5, dataset = 1) +) +expect_equal( +dim(res2$clim_obs), +c(dataset = 1) +) +# method = 'NDV' +exp6 <- array(exp6, dim = c(dim(exp6), ftime = 1)) +obs6 <- array(obs6, dim = c(dim(obs6), ftime = 1)) +res1 <- Clim(exp6, obs6, memb = T, method = 'NDV') +res2 <- Clim(exp6, obs6, memb = F, method = 'NDV') +expect_equal( +dim(res1$clim_exp), +c(sdate = 5, dataset = 1, member = 3, ftime = 1) +) +expect_equal( +dim(res1$clim_obs), +c(dataset = 1, ftime = 1) +) +expect_equal( +dim(res2$clim_exp), +c(sdate = 5, dataset = 1, ftime = 1) +) +expect_equal( +dim(res2$clim_obs), +c(dataset = 1, ftime = 1) +) +}) + +############################################## +test_that("8. Output checks: dat7", { +res1 <- Clim(exp7, obs7, memb = T) +res2 <- Clim(exp7, obs7, memb = F) +expect_equal( +dim(res1$clim_exp), +c(dataset = 1, member = 3) +) +expect_equal( +dim(res1$clim_obs), +NULL +) +expect_equal( +dim(res2$clim_exp), +c(dataset = 1) +) +expect_equal( +dim(res2$clim_obs), +NULL +) +expect_equal( +as.vector(res1$clim_exp), +c(0.1059254, 0.1097114, 0.0868917), +tolerance = 0.0001 +) +expect_equal( +as.vector(res1$clim_obs), +-0.06696949, +tolerance = 0.0001 +) +expect_equal( +as.vector(res2$clim_exp), +0.1008428, +tolerance = 0.0001 +) +expect_equal( +as.vector(res2$clim_obs), +-0.06696949, +tolerance = 0.0001 +) + +# method = 'kharin' +res1 <- Clim(exp7, obs7, memb = T, method = 'kharin') +res2 <- Clim(exp7, obs7, memb = F, method = 'kharin') +expect_equal( +dim(res1$clim_exp), +c(sdate = 5, dataset = 1, member = 3) +) +expect_equal( +dim(res1$clim_obs), +NULL +) +expect_equal( +dim(res2$clim_exp), +c(sdate = 5, dataset = 1) +) +expect_equal( +dim(res2$clim_obs), +NULL +) + +# method = 'NDV' +exp7 <- array(exp7, dim = c(dim(exp7), ftime = 1)) +obs7 <- array(obs7, dim = c(dim(obs7), ftime = 1)) +res1 <- Clim(exp7, obs7, memb = T, method = 'NDV') +res2 <- Clim(exp7, obs7, memb = F, method = 'NDV') +expect_equal( +dim(res1$clim_exp), +c(sdate = 5, dataset = 1, member = 3, ftime = 1) +) +expect_equal( +dim(res1$clim_obs), +c(ftime = 1) +) +expect_equal( +dim(res2$clim_exp), +c(sdate = 5, dataset = 1, ftime = 1) +) +expect_equal( +dim(res2$clim_obs), +c(ftime = 1) +) }) + +############################################## +test_that("9. Output checks: dat8", { +res1 <- Clim(exp8, obs8, dat_dim = 'member', memb = T) +res2 <- Clim(exp8, obs8, dat_dim = 'member', memb = F) +expect_equal( +dim(res1$clim_exp), +c(member = 3) +) +expect_equal( +dim(res1$clim_obs), +NULL +) +expect_equal( +dim(res2$clim_exp), +NULL +) +expect_equal( +dim(res2$clim_obs), +NULL +) +expect_equal( +as.vector(res1$clim_exp), +c(0.1059254, 0.1097114, 0.0868917), +tolerance = 0.0001 +) +expect_equal( +as.vector(res1$clim_obs), +-0.06696949, +tolerance = 0.0001 +) +expect_equal( +as.vector(res2$clim_exp), +0.1008428, +tolerance = 0.0001 +) +expect_equal( +as.vector(res2$clim_obs), +-0.06696949, +tolerance = 0.0001 +) + +# method = 'kharin' +res1 <- Clim(exp8, obs8, dat_dim = 'member', memb = T, method = 'kharin') +res2 <- Clim(exp8, obs8, dat_dim = 'member', memb = F, method = 'kharin') +expect_equal( +dim(res1$clim_exp), +c(sdate = 5, member = 3) +) +expect_equal( +dim(res1$clim_obs), +NULL +) +expect_equal( +dim(res2$clim_exp), +c(sdate = 5) +) +expect_equal( +dim(res2$clim_obs), +NULL +) + +# method = 'NDV' +exp8 <- array(exp8, dim = c(dim(exp8), ftime = 1)) +obs8 <- array(obs8, dim = c(dim(obs8), ftime = 1)) +res1 <- Clim(exp8, obs8, dat_dim = 'member', memb = T, method = 'NDV') +res2 <- Clim(exp8, obs8, dat_dim = 'member', memb = F, method = 'NDV') +expect_equal( +dim(res1$clim_exp), +c(sdate = 5, member = 3, ftime = 1) +) +expect_equal( +dim(res1$clim_obs), +c(ftime = 1) +) +expect_equal( +dim(res2$clim_exp), +c(sdate = 5, ftime = 1) +) +expect_equal( +dim(res2$clim_obs), +c(ftime = 1) +) + +}) + +############################################## +test_that("10. Output checks: dat9", { +res1 <- Clim(exp9, obs9, dat_dim = NULL, memb = T) +res2 <- Clim(exp9, obs9, dat_dim = NULL, memb_dim = NULL, memb = F) +expect_equal( +dim(res1$clim_exp), +NULL +) +expect_equal( +dim(res1$clim_obs), +NULL +) +expect_equal( +dim(res2$clim_exp), +NULL +) +expect_equal( +dim(res2$clim_obs), +NULL +) +expect_equal( +as.vector(res1$clim_exp), +0.1292699, +tolerance = 0.0001 +) +expect_equal( +as.vector(res1$clim_obs), +-0.06696949, +tolerance = 0.0001 +) +expect_equal( +as.vector(res2$clim_exp), +0.1292699, +tolerance = 0.0001 +) +expect_equal( +as.vector(res2$clim_obs), +-0.06696949, +tolerance = 0.0001 +) + +# method = 'kharin' +res1 <- Clim(exp9, obs9, dat_dim = NULL, memb = T, method = 'kharin') +res2 <- Clim(exp9, obs9, dat_dim = NULL, memb_dim = NULL, memb = F, method = 'kharin') +expect_equal( +dim(res1$clim_exp), +c(sdate = 5) +) +expect_equal( +dim(res1$clim_obs), +NULL +) +expect_equal( +dim(res2$clim_exp), +c(sdate = 5) +) +expect_equal( +dim(res2$clim_obs), +NULL +) + +# method = 'NDV' +exp9 <- array(exp9, dim = c(dim(exp9), ftime = 1)) +obs9 <- array(obs9, dim = c(dim(obs9), ftime = 1)) +res1 <- Clim(exp9, obs9, dat_dim = NULL, memb = T, method = 'NDV') +res2 <- Clim(exp9, obs9, dat_dim = NULL, memb_dim = NULL, memb = F, method = 'NDV') +expect_equal( +dim(res1$clim_exp), +c(sdate = 5, ftime = 1) +) +expect_equal( +dim(res1$clim_obs), +c(ftime = 1) +) +expect_equal( +dim(res2$clim_exp), +c(sdate = 5, ftime = 1) +) +expect_equal( +dim(res2$clim_obs), +c(ftime = 1) +) + + +}) + +############################################## diff --git a/tests/testthat/test-Cluster.R b/tests/testthat/test-Cluster.R index 4e0a7b171d25ec559afbef8b97dafb45e87cff6e..13297d80850198518b3baba264f4dcfe900989a8 100644 --- a/tests/testthat/test-Cluster.R +++ b/tests/testthat/test-Cluster.R @@ -10,7 +10,7 @@ context("s2dv::Cluster tests") # dat2 set.seed(2) dat2 <- array(rnorm(300), - dim = c(sdate = 50, lat = 2, lon = 3)) + dim = c(dat = 1, sdate = 50, lat = 2, lon = 3)) weights2 <- array(c(0.9, 1.1), dim = c(lat = 2, lon = 3)) ############################################## @@ -78,24 +78,76 @@ test_that("1. Input checks", { ############################################## test_that("2. Output checks: dat1", { -# The output is random. Only check dimensions. expect_equal( - length(Cluster(dat1, weights1, space_dim = 'space')$cluster), - 50 + names(Cluster(dat1, weights1, space_dim = 'space')), + c("cluster", "centers", "totss", "withinss", "tot.withinss", + "betweenss", "size", "iter", "ifault") ) expect_equal( - length(Cluster(dat1)$cluster), - 100 + dim(Cluster(dat1, weights1, space_dim = 'space')$cluster), + c(sdate = 50) + ) + expect_equal( + dim(Cluster(dat1)$cluster), + c(sdate = 50, space = 2) ) expect_equal( dim(Cluster(dat1, weights1, space_dim = 'space')$centers), - c(8, 2) + c(K = 7, space = 2) ) expect_equal( dim(Cluster(dat1, weights1, nclusters = 3, space_dim = 'space')$centers), - c(3, 2) + c(K = 3, space = 2) + ) + expect_equal( + dim(Cluster(dat1, weights1, space_dim = 'space')$withinss), + c(K = 7) + ) + expect_equal( + dim(Cluster(dat1, weights1, space_dim = 'space')$size), + c(K = 7) + ) + expect_equal( + as.vector(Cluster(dat1, weights1, space_dim = 'space')$cluster)[1:5], + c(7, 5, 7, 5, 4) + ) + expect_equal( + as.vector(Cluster(dat1, weights1, space_dim = 'space')$centers[1:5, 2]), + c(-0.08553708, -1.55834000, 1.60550527, 1.79873789, -0.74919031), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Cluster(dat1, weights1, space_dim = 'space')$totss), + 83.08558, + tolerance = 0.0001 + ) + expect_equal( + as.vector(Cluster(dat1, weights1, space_dim = 'space')$withinss)[1:3], + c(1.8509071, 0.6242121, 0.7800697), + tolerance = 0.0001 + ) + expect_equal( + as.vector(Cluster(dat1, weights1, space_dim = 'space')$tot.withinss), + 11.71979, + tolerance = 0.0001 + ) + expect_equal( + as.vector(Cluster(dat1, weights1, space_dim = 'space')$betweenss), + 71.36579, + tolerance = 0.0001 + ) + expect_equal( + as.vector(Cluster(dat1, weights1, space_dim = 'space')$size), + c(4, 5, 4, 6, 10, 8, 13) + ) + expect_equal( + as.vector(Cluster(dat1, weights1, space_dim = 'space')$iter), + 2 + ) + expect_equal( + as.vector(Cluster(dat1, weights1, space_dim = 'space')$ifault), + 0 ) - }) @@ -103,24 +155,37 @@ test_that("2. Output checks: dat1", { test_that("3. Output checks: dat2", { expect_equal( - length(Cluster(dat2, weights2, space_dim = c('lat', 'lon'))$cluster), - 50 + names(Cluster(dat2, weights2, space_dim = c('lat', 'lon'))), + c("cluster", "centers", "totss", "withinss", "tot.withinss", + "betweenss", "size", "iter", "ifault") ) expect_equal( - length(Cluster(dat2)$cluster), - 300 + dim(Cluster(dat2, weights2, space_dim = c('lat', 'lon'))$cluster), + c(sdate = 50, dat = 1) ) expect_equal( - length(Cluster(dat2, space_dim = c('lon', 'lat'))$cluster), - 50 + dim(Cluster(dat2)$cluster), + c(sdate = 50, dat = 1, lat = 2, lon = 3) ) expect_equal( dim(Cluster(dat2, weights2, space_dim = c('lat', 'lon'))$centers), - c(7, 6) + c(K = 6, lat = 2, lon = 3, dat = 1) + ) + expect_equal( + dim(Cluster(dat2, weights2, nclusters = 3, space_dim = c('lat', 'lon'))$centers), + c(K = 3, lat = 2, lon = 3, dat = 1) + ) + expect_equal( + dim(Cluster(dat2, weights2, space_dim = c('lat', 'lon'))$withinss), + c(K = 6, dat = 1) + ) + expect_equal( + dim(Cluster(dat2, weights2, space_dim = c('lat', 'lon'))$size), + c(K = 6, dat = 1) ) expect_equal( - dim(Cluster(dat2, weights2, nclusters = 5, space_dim = c('lat', 'lon'))$centers), - c(5, 6) + as.vector(Cluster(dat2, weights2, space_dim = c('lat', 'lon'))$cluster)[1:5], + c(5, 1, 2, 1, 5) ) }) diff --git a/tests/testthat/test-DiffCorr.R b/tests/testthat/test-DiffCorr.R new file mode 100644 index 0000000000000000000000000000000000000000..e0834e1d05196c34d6acdbe84b2359fbfee7b5ba --- /dev/null +++ b/tests/testthat/test-DiffCorr.R @@ -0,0 +1,213 @@ +context("s2dv::DiffCorr tests") + +############################################## + +# dat1 +set.seed(1) +exp1 <- array(rnorm(180), dim = c(lat = 3, lon = 2, memb = 3, sdate = 10)) +set.seed(2) +ref1 <- array(rnorm(120), dim = c(lat = 3, lon = 2, memb = 2, sdate = 10)) +set.seed(3) +obs1 <- array(rnorm(60), dim = c(lat = 3, lon = 2, sdate = 10)) +Neff1 <- array(rep(9:10, 3), dim = c(lat = 3, lon = 2)) + +# dat2 +set.seed(1) +exp2 <- array(rnorm(10), dim = c(sdate = 10)) +set.seed(2) +ref2 <- array(rnorm(10), dim = c(sdate = 10)) +set.seed(3) +obs2 <- array(rnorm(10), dim = c(sdate = 10)) + + + +############################################## +test_that("1. Input checks", { + # exp and obs (1) + expect_error( + DiffCorr(c()), + 'Parameter "exp" must be a numeric array.' + ) + expect_error( + DiffCorr(exp1, c('a')), + 'Parameter "obs" must be a numeric array.' + ) + expect_error( + DiffCorr(exp1, obs1, c()), + 'Parameter "ref" must be a numeric array.' + ) + # N.eff + expect_error( + DiffCorr(exp1, obs1, ref1, N.eff = array(1, dim = c(lat = 2, lon = 2))), + "If parameter \"N.eff\" is provided with an array, it must have the same dimensions as \"obs\" except \"time_dim\"." + ) + expect_error( + DiffCorr(exp1, obs1, ref1, N.eff = 1:3), + 'Parameter "N.eff" must be NA, a numeric, or an array with the same dimensions as "obs" except "time_dim".' + ) + # time_dim + expect_error( + DiffCorr(exp1, obs1, ref1, time_dim = 1), + 'Parameter "time_dim" must be a character string.' + ) + expect_error( + DiffCorr(exp1, obs1, ref1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp', 'obs', or 'ref' dimension." + ) + # memb_dim + expect_error( + DiffCorr(exp1, obs1, ref1, memb_dim = TRUE), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + DiffCorr(exp1, obs1, ref1, memb_dim = 'member'), + "Parameter 'memb_dim' is not found in 'exp' or 'ref' dimension." + ) + # exp, ref, and obs (2) + expect_error( + DiffCorr(exp1, array(1:10, dim = c(sdate = 10, memb = 1)), ref1, memb_dim = 'memb'), + "Parameter 'exp', 'obs', and 'ref' must have same length of all dimensions except 'memb_dim'." + ) + # method + expect_error( + DiffCorr(exp1, obs1, ref1, method = 'asd', memb_dim = 'memb'), + 'Parameter "method" must be "pearson" or "spearman".' + ) + expect_warning( + DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', method = 'spearman'), + paste0("The test used in this function is built on Pearson method. ", + "To verify if Spearman method is reliable, you can run the ", + "Monte-Carlo simulations that are done in Siegert et al., 2017") + ) + # alpha + expect_error( + DiffCorr(exp2, obs2, ref2, alpha = 1), + 'Parameter "alpha" must be NULL or a number between 0 and 1.' + ) + # handle.na + expect_error( + DiffCorr(exp2, obs2, ref2, handle.na = TRUE), + 'Parameter "handle.na" must be "return.na", "only.complete.triplets" or "na.fail".' + ) + # ncores + expect_error( + DiffCorr(exp2, obs2, ref2, ncores = 1.5), + 'Parameter "ncores" must be either NULL or a positive integer.' + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { +expect_equal( +names(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb')), +c("diff.corr", "p.val") +) +expect_equal( +dim(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb')$diff), +c(lat = 3, lon = 2) +) +expect_equal( +dim(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb')$p.val), +c(lat = 3, lon = 2) +) +expect_equal( +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb')$diff), +c(0.27347087, 0.50556882, 0.08855968, 0.24199701, 0.22935182, 0.88336336), +tolerance = 0.0001 +) +expect_equal( +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb')$p), +c(0.26166060, 0.15899774, 0.39264452, 0.27959883, 0.34736305, 0.07479832), +tolerance = 0.0001 +) +expect_equal( +names(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)), +c("diff.corr", "sign") +) +expect_equal( +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)$diff.corr), +c(0.27347087, 0.50556882, 0.08855968, 0.24199701, 0.22935182, 0.88336336), +tolerance = 0.0001 +) +expect_equal( +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)$sign), +rep(FALSE, 6) +) +expect_equal( +suppressWarnings(as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "spearman")$diff.corr)), +c(0.07272727, 0.54545455, 0.10909091, -0.01212121, -0.03636364, 1.01818182), +tolerance = 0.0001 +) +expect_equal( +suppressWarnings(as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "spearman")$p)), +c(0.4358970, 0.1341575, 0.3448977, 0.5114262, 0.5264872, 0.0437861), +tolerance = 0.0001 +) +expect_equal( +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', N.eff = Neff1)$diff.corr), +c(0.27347087, 0.50556882, 0.08855968, 0.24199701, 0.22935182, 0.88336336), +tolerance = 0.0001 +) +expect_equal( +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', N.eff = Neff1)$p), +c(0.27841537, 0.15899774, 0.40096749, 0.27959883, 0.35889690, 0.07479832), +tolerance = 0.0001 +) + + +#--------------------------- +exp1[1] <- NA +expect_equal( +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb')$diff.corr), +c(NA, 0.50556882, 0.08855968, 0.24199701, 0.22935182, 0.88336336), +tolerance = 0.0001 +) +expect_equal( +as.vector(DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', handle.na = 'only.complete.triplets')$diff.corr), +c(0.17418065, 0.50556882, 0.08855968, 0.24199701, 0.22935182, 0.88336336), +tolerance = 0.0001 +) +expect_error( +DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', handle.na = 'na.fail'), +"The data contain NAs." +) + +exp1[1,1,1,1:3] <- NA +ref1[1,1,1,4:8] <- NA +obs1[1,1,9:10] <- NA +expect_error( +DiffCorr(exp1, obs1, ref1, memb_dim = 'memb', handle.na = 'only.complete.triplets'), +"There is no complete set of forecasts and observations." +) + + +}) + +############################################## + +test_that("3. Output checks: dat2", { +expect_equal( +names(DiffCorr(exp2, obs2, ref2)), +c("diff.corr", "p.val") +) +expect_equal( +dim(DiffCorr(exp2, obs2, ref2)$diff.corr), +NULL +) +expect_equal( +dim(DiffCorr(exp2, obs2, ref2)$p), +NULL +) +expect_equal( +DiffCorr(exp2, obs2, ref2)$p, +0.6577392, +tolerance = 0.0001 +) +expect_equal( +DiffCorr(exp2, obs2, ref2)$diff, +-0.2434725, +tolerance = 0.0001 +) + +}) diff --git a/tests/testthat/test-Load.R b/tests/testthat/test-Load.R new file mode 100644 index 0000000000000000000000000000000000000000..826613920b57c4306db09918997185a524f85414 --- /dev/null +++ b/tests/testthat/test-Load.R @@ -0,0 +1,182 @@ +context("s2dv::Load tests") + +############################################## + +test_that("1-1.", { + +path <- "/esarchive/exp/meteofrance/system6c3s/$STORE_FREQ$_mean/$VAR_NAME$_f6h/$VAR_NAME$_$START_DATE$.nc" +dateseq <- c("19930501", "19940501", "19950501") +suppressWarnings( +res1 <- Load(var = 'sfcWind', + exp = list(list(name = 'meteofrance/system6c3s', path = path)), + obs = 'erainterim', + sdates = dateseq, leadtimemin = 2, leadtimemax = 3, + lonmin = -9, lonmax = 1.5, latmin = 75, latmax = 79.5, + storefreq = "daily", sampleperiod = 1, nmember = 3, + output = "lonlat", method = "bilinear", + grid = "r360x180", nprocs = 1) +) +#suppressWarnings( +#res2 <- Load(var = 'sfcWind', +# exp = list(list(name = 'meteofrance/system6c3s', path = path)), +# obs = 'erainterim', +# sdates = dateseq, leadtimemin = 2, leadtimemax = 3, +# lonmin = -9, lonmax = 1.5, latmin = 75, latmax = 79.5, +# storefreq = "daily", sampleperiod = 1, nmember = 3, +# output = "lonlat", method = "bilinear", +# grid = "r360x180") +#) +#expect_equal( +#res1$mod, res2$mod +#) +#expect_equal( +#res1$obs, res2$obs +#) +#expect_equal( +#names(res1), names(res2) +#) +expect_equal( +dim(res1$mod), +c(dataset = 1, member = 3, sdate = 3, ftime = 2, lat = 5, lon = 11) +) +expect_equal( +dim(res1$obs), +c(dataset = 1, member = 1, sdate = 3, ftime = 2, lat = 5, lon = 11) +) +expect_equal( +as.vector(drop(res1$mod)[1, , 2, 3, 4]), +c(2.0504, 2.2746, 3.4189), +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(res1$obs)[, 2, 3, 4]), +c(5.9923, 2.7927, 2.5408), +tolerance = 0.0001 +) +expect_equal( +mean(res1$mod, na.rm = T), +6.007487, +tolerance = 0.0001 +) +expect_equal( +mean(res1$obs, na.rm = T), +5.043017, +tolerance = 0.0001 +) + +}) + + +test_that("1-2.", { + +path <- "/esarchive/exp/meteofrance/system6c3s/$STORE_FREQ$_mean/$VAR_NAME$_f6h/$VAR_NAME$_$START_DATE$.nc" +dateseq <- c("19930501", "19940501", "19950501") +suppressWarnings( +res1 <- Load(var = 'sfcWind', + exp = list(list(name = 'meteofrance/system6c3s', path = path)), + obs = 'erainterim', + sdates = dateseq, leadtimemin = 2, leadtimemax = 3, + lonmin = -9, lonmax = 1.5, latmin = 75, latmax = 79.5, + storefreq = "daily", sampleperiod = 1, nmember = 3, + output = "areave", + nprocs = 1) +) +#suppressWarnings( +#res2 <- Load(var = 'sfcWind', +# exp = list(list(name = 'meteofrance/system6c3s', path = path)), +# obs = 'erainterim', +# sdates = dateseq, leadtimemin = 2, leadtimemax = 3, +# lonmin = -9, lonmax = 1.5, latmin = 75, latmax = 79.5, +# storefreq = "daily", sampleperiod = 1, nmember = 3, +# output = "areave") +#) +#expect_equal( +#res1$mod, res2$mod +#) +#expect_equal( +#res1$obs, res2$obs +#) +#expect_equal( +#names(res1), names(res2) +#) +expect_equal( +dim(res1$mod), +c(dataset = 1, member = 3, sdate = 3, ftime = 2) +) +expect_equal( +dim(res1$obs), +c(dataset = 1, member = 1, sdate = 3, ftime = 2) +) +expect_equal( +as.vector(drop(res1$mod)[1, , 2]), +c(5.037364, 2.395024, 5.418090), +tolerance = 0.0001 +) +expect_equal( +as.vector(drop(res1$obs)[, 2]), +c(8.728424, 2.931754, 3.918583), +tolerance = 0.0001 +) +expect_equal( +mean(res1$mod, na.rm = T), +6.162087, +tolerance = 0.0001 +) +expect_equal( +mean(res1$obs, na.rm = T), +5.178091, +tolerance = 0.0001 +) + +}) + + +test_that("1-3.", { + + ecearth <- list(name = 'm04o', + path = file.path('/esarchive/exp/ecearth', + '/$EXP_NAME$/monthly_mean', + '/$VAR_NAME$_f6h/$VAR_NAME$_$START_DATE$.nc')) + + ecmwf <- list(name = 'system5c3s', + path = file.path('/esarchive/exp/ecmwf', + '/$EXP_NAME$/monthly_mean', + '/$VAR_NAME$_f6h/$VAR_NAME$_$START_DATE$.nc')) + + era5 <- list(name = 'era5', + path = file.path('/esarchive/recon/ecmwf', + '$OBS_NAME$/monthly_mean', + '$VAR_NAME$_f1h/$VAR_NAME$_$YEAR$$MONTH$.nc')) + + startDates <- format(seq(as.POSIXct('1999-01-01'), as.POSIXct('1999-12-01'), by = 'months'), '%Y%m%d') + time_length <- 2 + +suppressWarnings( + data <- Load(var = 'psl', + exp = list(ecearth, ecmwf), + obs = list(era5), + sdates = startDates, + storefreq = "monthly", + leadtimemax = time_length, + output = 'lonlat', + lonmin = -2, lonmax = 2, + latmin = -5, latmax = 5, + nprocs = 1) +) + +expect_equal( +dim(data$mod), +c(dataset = 2, member = 10, sdate = 12, ftime = 2, lat = 14, lon = 5) +) +expect_equal( +dim(data$obs), +c(dataset = 1, member = 1, sdate = 12, ftime = 2, lat = 14, lon = 5) +) +expect_equal( +as.vector(data$mod[1, 1, , 2, 2, 3]), +c(rep(NA, 4), 101250, rep(NA, 5), 100940, NA), +tolerance = 0.0001 +) + + +}) diff --git a/tests/testthat/test-MeanDims.R b/tests/testthat/test-MeanDims.R index a431b995422d6cff9050d5f7a2248da0f15451bd..c043c784b8f302ae8c5ab53f5a00a206601ba4fc 100644 --- a/tests/testthat/test-MeanDims.R +++ b/tests/testthat/test-MeanDims.R @@ -78,6 +78,14 @@ test_that("2. Output checks: dat1", { dim(MeanDims(dat1, dims = c('sdate', 'ftime'))), c(dat = 1) ) + expect_equal( + dim(MeanDims(dat1, dims = c('sdate', 'ftime', 'dat'))), + NULL + ) + expect_equal( + dim(MeanDims(dat1, dims = 1:3)), + NULL + ) expect_equal( MeanDims(dat1, dims = c('sdate'))[1:2], c(3, 8) @@ -96,7 +104,7 @@ test_that("2. Output checks: dat1", { ) expect_equal( dim(MeanDims(dat1, dims = 1:3, drop = T)), - 1 + NULL ) expect_equal( as.vector(drop(MeanDims(dat1, dims = 1:3, drop = F))), @@ -153,6 +161,10 @@ test_that("5. Output checks: dat4", { dim(MeanDims(dat4, dims = 1, drop = F)), 1 ) + expect_equal( + dim(MeanDims(dat4, dims = 1, drop = T)), + NULL + ) }) ############################################## diff --git a/tests/testthat/test-RPS.R b/tests/testthat/test-RPS.R new file mode 100644 index 0000000000000000000000000000000000000000..04df1bb397b6906c10a16a5be1f92c1858299103 --- /dev/null +++ b/tests/testthat/test-RPS.R @@ -0,0 +1,157 @@ +context("s2dv::RPS tests") + +############################################## + +# dat1 +set.seed(1) +exp1 <- array(rnorm(60), dim = c(member = 3, sdate = 10, lat = 2)) +set.seed(2) +obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) +set.seed(3) +weights1 <- array(abs(rnorm(30)), dim = c(member = 3, sdate = 10)) +# dat2 +set.seed(1) +exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) +set.seed(2) +obs2 <- array(rnorm(10), dim = c(sdate = 10)) + +# dat2_1 +set.seed(1) +exp2_1 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) +set.seed(2) +obs2_1 <- array(rnorm(10), dim = c(member = 1, sdate = 10)) + + + +############################################## +test_that("1. Input checks", { + # exp and obs (1) + expect_error( + RPS(c()), + 'Parameter "exp" must be a numeric array.' + ) + expect_error( + RPS(exp1, c()), + 'Parameter "obs" must be a numeric array.' + ) + + # time_dim + expect_error( + RPS(exp1, obs1, time_dim = 1), + 'Parameter "time_dim" must be a character string.' + ) + expect_error( + RPS(exp1, obs1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + # memb_dim + expect_error( + RPS(exp1, obs1, memb_dim = TRUE), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + RPS(exp1, obs1, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' dimension." + ) + # exp, ref, and obs (2) + expect_error( + RPS(exp1, array(1:9, dim = c(sdate = 9))), + "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'memb_dim'." + ) + # prob_thresholds + expect_error( + RPS(exp1, obs1, prob_thresholds = 1), + "Parameter 'prob_thresholds' must be a numeric vector between 0 and 1." + ) + # indices_for_clim + expect_error( + RPS(exp1, obs1, indices_for_clim = array(1:6, dim = c(2, 3))), + "Parameter 'indices_for_clim' must be NULL or a numeric vector." + ) + expect_error( + RPS(exp1, obs1, indices_for_clim = 3:11), + "Parameter 'indices_for_clim' should be the indices of 'time_dim'." + ) + # Fair + expect_error( + RPS(exp1, obs1, Fair = 1), + "Parameter 'Fair' must be either TRUE or FALSE." + ) + # weights + expect_error( + RPS(exp1, obs1, weights = c(0, 1)), + 'Parameter "weights" must be a two-dimensional numeric array.' + ) + expect_error( + RPS(exp1, obs1, weights = array(1, dim = c(member = 3, time = 10))), + "Parameter 'weights' must have two dimensions with the names of memb_dim and time_dim." + ) + expect_error( + RPS(exp1, obs1, weights = array(1, dim = c(member = 3, sdate = 1))), + "Parameter 'weights' must have the same dimension lengths as memb_dim and time_dim in 'exp'." + ) + # ncores + expect_error( + RPS(exp2, obs2, ncores = 1.5), + "Parameter 'ncores' must be either NULL or a positive integer." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + +expect_equal( +dim(RPS(exp1, obs1)), +c(lat = 2) +) +expect_equal( +as.vector(RPS(exp1, obs1)), +c(0.3555556, 0.4444444), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPS(exp1, obs1, Fair = T)), +c(0.2000000, 0.2666667), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPS(exp1, obs1, indices_for_clim = 3:5)), +c(0.5000000, 0.4666667), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPS(exp1, obs1, prob_thresholds = seq(0.1, 0.9, 0.1))), +c(1.600000, 1.888889), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPS(exp1, obs1, weights = weights1)), +c(0.3692964, 0.5346627), +tolerance = 0.0001 +) + +}) + +############################################## +test_that("3. Output checks: dat2", { + +expect_equal( +dim(RPS(exp2, obs2)), +NULL +) +expect_equal( +RPS(exp2, obs2), +0.75, +tolerance = 0.0001 +) +expect_equal( +RPS(exp2, obs2, indices_for_clim = 2:5, prob_thresholds = seq(0.1, 0.9, 0.1)), +2.75, +tolerance = 0.0001 +) +expect_equal( +RPS(exp2, obs2), +RPS(exp2_1, obs2_1) +) +}) diff --git a/tests/testthat/test-RPSS.R b/tests/testthat/test-RPSS.R new file mode 100644 index 0000000000000000000000000000000000000000..c63add02246ef4980a8c388bed7b54a239e62d24 --- /dev/null +++ b/tests/testthat/test-RPSS.R @@ -0,0 +1,295 @@ +context("s2dv::RPSS tests") + +############################################## + +# dat1 +set.seed(1) +exp1 <- array(rnorm(60), dim = c(member = 3, sdate = 10, lat = 2)) +set.seed(2) +obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) +set.seed(3) +ref1 <- array(rnorm(40), dim = c(member = 2, sdate = 10, lat = 2)) +set.seed(4) +weights1 <- array(abs(rnorm(30)), dim = c(member = 3, sdate = 10)) +# dat2 +set.seed(1) +exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) +set.seed(2) +obs2 <- array(rnorm(10), dim = c(sdate = 10)) +set.seed(2) +obs2_1 <- array(rnorm(10), dim = c(member = 1, sdate = 10)) +set.seed(3) +ref2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) + + +############################################## +test_that("1. Input checks", { + # exp and obs (1) + expect_error( + RPSS(c()), + 'Parameter "exp" must be a numeric array.' + ) + expect_error( + RPSS(exp1, c()), + 'Parameter "obs" must be a numeric array.' + ) + + # time_dim + expect_error( + RPSS(exp1, obs1, time_dim = 1), + 'Parameter "time_dim" must be a character string.' + ) + expect_error( + RPSS(exp1, obs1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + expect_error( + RPSS(exp2, obs2, array(rnorm(20), dim = c(member = 2, time = 10))), + "Parameter 'time_dim' is not found in 'ref' dimension." + ) + # memb_dim + expect_error( + RPSS(exp1, obs1, memb_dim = TRUE), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + RPSS(exp1, obs1, memb_dim = 'memb'), + "Parameter 'memb_dim' is not found in 'exp' dimension." + ) + expect_error( + RPSS(exp2, obs2, array(rnorm(20), dim = c(memb = 2, sdate = 10))), + "Parameter 'memb_dim' is not found in 'ref' dimension." + ) + # exp, ref, and obs (2) + expect_error( + RPSS(exp1, array(1:9, dim = c(sdate = 9))), + "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'memb_dim'." + ) + expect_error( + RPSS(exp1, obs1, ref2), + "Parameter 'exp' and 'obs' must have same length of all dimensions expect 'memb_dim'." + ) + # prob_thresholds + expect_error( + RPSS(exp1, obs1, ref1, prob_thresholds = 1), + "Parameter 'prob_thresholds' must be a numeric vector between 0 and 1." + ) + # indices_for_clim + expect_error( + RPSS(exp1, obs1, ref1, indices_for_clim = array(1:6, dim = c(2, 3))), + "Parameter 'indices_for_clim' must be NULL or a numeric vector." + ) + expect_error( + RPSS(exp1, obs1, indices_for_clim = 3:11), + "Parameter 'indices_for_clim' should be the indices of 'time_dim'." + ) + # Fair + expect_error( + RPSS(exp1, obs1, Fair = 1), + "Parameter 'Fair' must be either TRUE or FALSE." + ) + # weights + expect_error( + RPS(exp1, obs1, weights = c(0, 1)), + 'Parameter "weights" must be a two-dimensional numeric array.' + ) + expect_error( + RPS(exp1, obs1, weights = array(1, dim = c(member = 3, time = 10))), + "Parameter 'weights' must have two dimensions with the names of memb_dim and time_dim." + ) + expect_error( + RPS(exp1, obs1, weights = array(1, dim = c(member = 3, sdate = 1))), + "Parameter 'weights' must have the same dimension lengths as memb_dim and time_dim in 'exp'." + ) + # ncores + expect_error( + RPSS(exp2, obs2, ncores = 1.5), + "Parameter 'ncores' must be either NULL or a positive integer." + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { + +expect_equal( +names(RPSS(exp1, obs1)), +c("rpss", "sign") +) +expect_equal( +names(RPSS(exp1, obs1, ref1)), +c("rpss", "sign") +) +expect_equal( +dim(RPSS(exp1, obs1)$rpss), +c(lat = 2) +) +expect_equal( +dim(RPSS(exp1, obs1)$sign), +c(lat = 2) +) +expect_equal( +dim(RPSS(exp1, obs1, ref1)$rpss), +c(lat = 2) +) +expect_equal( +dim(RPSS(exp1, obs1, ref1)$sign), +c(lat = 2) +) +# ref = NULL +expect_equal( +as.vector(RPSS(exp1, obs1)$rpss), +c(0.15789474, -0.05263158), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp1, obs1)$sign), +c(FALSE, FALSE), +) +expect_equal( +as.vector(RPSS(exp1, obs1, Fair = T)$rpss), +c(0.5263158, 0.3684211), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp1, obs1, indices_for_clim = 3:5)$rpss), +c(-0.4062500, -0.1052632), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp1, obs1, prob_thresholds = seq(0.1, 0.9, 0.1))$rpss), +c(0.03030303, -0.14478114), +tolerance = 0.0001 +) +# ref = ref +expect_equal( +as.vector(RPSS(exp1, obs1, ref1)$rpss), +c(0.5259259, 0.4771242), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp1, obs1, ref1, weights = weights1)$rpss), +c(0.6596424, 0.4063579), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp1, obs1, ref1)$sign), +c(FALSE, FALSE) +) +expect_equal( +as.vector(RPSS(exp1, obs1, ref1, Fair = T)$rpss), +c(0.6000000, 0.6190476), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp1, obs1, ref1, indices_for_clim = 3:5)$rpss), +c(0.2857143, 0.4166667), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp1, obs1, ref1, indices_for_clim = 3:5)$sign), +c(FALSE, FALSE) +) +expect_equal( +as.vector(RPSS(exp1, obs1, ref1, prob_thresholds = seq(0.1, 0.9, 0.1))$rpss), +c(0.4754098, 0.3703704), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp1, obs1, ref1, prob_thresholds = seq(0.1, 0.9, 0.1))$sign), +c(T, F) +) + +}) + +############################################## +test_that("3. Output checks: dat2", { + +expect_equal( +names(RPSS(exp2, obs2)), +c("rpss", "sign") +) +expect_equal( +names(RPSS(exp2, obs2, ref2)), +c("rpss", "sign") +) +expect_equal( +dim(RPSS(exp2, obs2)$rpss), +NULL +) +expect_equal( +dim(RPSS(exp2, obs2)$sign), +NULL +) +expect_equal( +dim(RPSS(exp2, obs2, ref2)$rpss), +NULL +) +expect_equal( +dim(RPSS(exp2, obs2, ref2)$sign), +NULL +) +# ref = NULL +expect_equal( +as.vector(RPSS(exp2, obs2)$rpss), +c(-0.7763158), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp2, obs2)$sign), +FALSE, +) +expect_equal( +as.vector(RPSS(exp2, obs2, Fair = T)$rpss), +c(-0.1842105), +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp2, obs2, indices_for_clim = 3:5)$rpss), +-0.8984375, +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp2, obs2, prob_thresholds = seq(0.1, 0.9, 0.1))$rpss), +c(-0.7272727), +tolerance = 0.0001 +) +# ref = ref +expect_equal( +as.vector(RPSS(exp2, obs2, ref2)$rpss), +0, +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp2, obs2, ref2)$sign), +FALSE +) +expect_equal( +as.vector(RPSS(exp2, obs2, ref2, Fair = T)$rpss), +0, +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp2, obs2, ref2, indices_for_clim = 3:5)$rpss), +0.03571429, +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp2, obs2, ref2, indices_for_clim = 3:5)$sign), +FALSE +) +expect_equal( +as.vector(RPSS(exp2, obs2, ref2, prob_thresholds = seq(0.1, 0.9, 0.1))$rpss), +0.06557377, +tolerance = 0.0001 +) +expect_equal( +as.vector(RPSS(exp2, obs2, ref2, prob_thresholds = seq(0.1, 0.9, 0.1))$sign), +FALSE +) +expect_equal( +RPSS(exp2, obs2), +RPSS(exp2, obs2_1) +) + +}) diff --git a/tests/testthat/test-ResidualCorr.R b/tests/testthat/test-ResidualCorr.R new file mode 100644 index 0000000000000000000000000000000000000000..c15276c93368322eec48e8b33657474fb8f4a1c4 --- /dev/null +++ b/tests/testthat/test-ResidualCorr.R @@ -0,0 +1,219 @@ +context("s2dv::ResidualCorr tests") + +############################################## + +# dat1 +set.seed(1) +exp1 <- array(rnorm(180), dim = c(lat = 3, lon = 2, memb = 3, sdate = 10)) +set.seed(2) +ref1 <- array(rnorm(120), dim = c(lat = 3, lon = 2, memb = 2, sdate = 10)) +set.seed(3) +obs1 <- array(rnorm(60), dim = c(lat = 3, lon = 2, sdate = 10)) +Neff1 <- array(rep(9:10, 3), dim = c(lat = 3, lon = 2)) + +# dat2 +set.seed(1) +exp2 <- array(rnorm(10), dim = c(sdate = 10)) +set.seed(2) +ref2 <- array(rnorm(10), dim = c(sdate = 10)) +set.seed(3) +obs2 <- array(rnorm(10), dim = c(sdate = 10)) + + + +############################################## +test_that("1. Input checks", { + # exp and obs (1) + expect_error( + ResidualCorr(c()), + 'Parameter "exp" must be a numeric array.' + ) + expect_error( + ResidualCorr(exp1, obs1, c('a')), + 'Parameter "ref" must be a numeric array.' + ) + expect_error( + ResidualCorr(exp1, c()), + 'Parameter "obs" must be a numeric array.' + ) + # N.eff + expect_error( + ResidualCorr(exp1, obs1, ref1, N.eff = array(1, dim = c(lat = 2, lon = 2))), +'If parameter "N.eff" is provided with an array, it must have the same dimensions as "obs" except "time_dim".' + ) + expect_error( + ResidualCorr(exp1, obs1, ref1, N.eff = 1:3), + 'Parameter "N.eff" must be NA, a numeric, or an array with the same dimensions as "obs" except "time_dim".' + ) + # time_dim + expect_error( + ResidualCorr(exp1, obs1, ref1, time_dim = 1), + 'Parameter "time_dim" must be a character string.' + ) + expect_error( + ResidualCorr(exp1, obs1, ref1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp', 'obs', or 'ref' dimension." + ) + # memb_dim + expect_error( + ResidualCorr(exp1, obs1, ref1, memb_dim = TRUE), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + ResidualCorr(exp1, obs1, ref1, memb_dim = 'member'), + "Parameter 'memb_dim' is not found in 'exp' or 'ref' dimension." + ) + # exp, ref, and obs (2) + expect_error( + ResidualCorr(exp1, obs1, array(1:10, dim = c(sdate = 10, memb = 1)), memb_dim = 'memb'), + "Parameter 'exp', 'obs', and 'ref' must have same length of all dimensions expect 'memb_dim'." + ) + # method + expect_error( + ResidualCorr(exp1, obs1, ref1, method = 'asd', memb_dim = 'memb'), + 'Parameter "method" must be "pearson", "kendall", or "spearman".' + ) + # alpha + expect_error( + ResidualCorr(exp2, obs2, ref2, alpha = 1), + 'Parameter "alpha" must be NULL or a number between 0 and 1.' + ) + # handle.na + expect_error( + ResidualCorr(exp2, obs2, ref2, handle.na = TRUE), + 'Parameter "handle.na" must be "return.na", "only.complete.triplets" or "na.fail".' + ) + # ncores + expect_error( + ResidualCorr(exp2, obs2, ref2, ncores = 1.5), + 'Parameter "ncores" must be either NULL or a positive integer.' + ) + +}) + +############################################## +test_that("2. Output checks: dat1", { +expect_equal( +names(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')), +c("res.corr", "p.val") +) +expect_equal( +dim(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')$res.corr), +c(lat = 3, lon = 2) +) +expect_equal( +dim(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')$p.val), +c(lat = 3, lon = 2) +) +expect_equal( +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')$res.corr), +c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), +tolerance = 0.0001 +) +expect_equal( +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')$p), +c(0.49695468, 0.05446055, 0.25203961, 0.23522967, 0.16960864, 0.10618145), +tolerance = 0.0001 +) +expect_equal( +names(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)), +c("res.corr", "sign") +) +expect_equal( +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)$res.corr), +c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), +tolerance = 0.0001 +) +expect_equal( +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', alpha = 0.05)$sign), +rep(FALSE, 6) +) +expect_equal( +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "kendall")$res), +c(0.11111111, 0.42222222, -0.20000000, 0.02222222, 0.20000000, 0.42222222), +tolerance = 0.0001 +) +expect_equal( +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "kendall")$p), +c(0.3799615, 0.1120891, 0.2897920, 0.4757064, 0.2897920, 0.1120891), +tolerance = 0.0001 +) +expect_equal( +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "spearman")$res), +c(0.13939394, 0.61212121, -0.27272727, 0.07878788, 0.28484848, 0.47878788), +tolerance = 0.0001 +) +expect_equal( +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', method = "spearman")$p), +c(0.35046594, 0.02998607, 0.22291917, 0.41435870, 0.21251908, 0.08076146), +tolerance = 0.0001 +) + + +expect_equal( +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', N.eff = Neff1)$res), +c(0.002784318, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), +tolerance = 0.0001 +) +expect_equal( +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', N.eff = Neff1)$p), +c(0.49716394, 0.05446055, 0.26690777, 0.23522967, 0.18671025, 0.10618145), +tolerance = 0.0001 +) + + +#--------------------------- +exp1[1] <- NA +expect_equal( +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb')$res.corr), +c(NA, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), +tolerance = 0.0001 +) +expect_equal( +as.vector(ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', handle.na = 'only.complete.triplets')$res.corr), +c(-0.1082588, 0.537697647, -0.240071018, 0.258706464, 0.338160748, 0.432107476), +tolerance = 0.0001 +) +expect_error( +ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', handle.na = 'na.fail'), +"The data contain NAs." +) + +exp1[1,1,1,1:3] <- NA +ref1[1,1,1,4:8] <- NA +obs1[1,1,9:10] <- NA +expect_error( +ResidualCorr(exp1, obs1, ref1, memb_dim = 'memb', handle.na = 'only.complete.triplets'), +"There is no complete set of forecasts and observations." +) + + +}) + +############################################## + +test_that("3. Output checks: dat2", { +expect_equal( +names(ResidualCorr(exp2, obs2, ref2)), +c("res.corr", "p.val") +) +expect_equal( +dim(ResidualCorr(exp2, obs2, ref2)$res.corr), +NULL +) +expect_equal( +dim(ResidualCorr(exp2, obs2, ref2)$p), +NULL +) +expect_equal( +ResidualCorr(exp2, obs2, ref2)$p, +0.2209847, +tolerance = 0.0001 +) +expect_equal( +ResidualCorr(exp2, obs2, ref2)$res, +-0.274962, +tolerance = 0.0001 +) + +})