diff --git a/.Rbuildignore b/.Rbuildignore index 0a2185526f4f30e029d88db4c83f416220c149e6..e817c8bfbfcf40c9315972a5fd5cd8c7986ffeb3 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -10,6 +10,7 @@ README\.md$ \..*\.RData$ vignettes .gitlab-ci.yml +.lintr # unit tests should be ignored when building the package for CRAN ^tests$ # CDO is not in windbuilder, so we can test the unit tests by winbuilder diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8ffc555de890974f6ebbe3a6f0948a0cc52c2886..a5dc3846bf2bb08212487171ea8ebc40bc81535a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,5 +1,6 @@ stages: - build + build: stage: build script: @@ -7,4 +8,12 @@ build: - module load CDO/1.9.8-foss-2015a - R CMD build --resave-data . - R CMD check --as-cran --no-manual --run-donttest s2dv_*.tar.gz + +lint-check: + stage: build + script: + - module load R/4.1.2-foss-2015a-bare + - module load CDO/1.9.8-foss-2015a + - echo "Run lintr on the package..." + - Rscript -e 'lintr::lint_package(path = ".")' - R -e 'covr::package_coverage()' diff --git a/.gitlab/issue_templates/Default.md b/.gitlab/issue_templates/Default.md index 30ba62047b6689018c721a83ea68ef042346cf78..ef73e07f50b25174fd354d7cca2812119915509d 100644 --- a/.gitlab/issue_templates/Default.md +++ b/.gitlab/issue_templates/Default.md @@ -1,6 +1,6 @@ (This is a template to report problems or suggest a new development. Please fill in the relevant information and remove the rest.) -Hi @aho (and @erifarov), +Hi @abatalla and @vagudets, #### Summary (Bug: Summarize the bug and explain briefly the expected and the current behavior.) diff --git a/.lintr b/.lintr new file mode 100644 index 0000000000000000000000000000000000000000..8431cf7ab4a86c0183037501c445bd121a1062bf --- /dev/null +++ b/.lintr @@ -0,0 +1,43 @@ +linters: linters_with_tags( # lintr_3.1.1 + tags = c("package_development", "readability", "best_practices"), + line_length_linter = line_length_linter(100L), + T_and_F_symbol_linter = NULL, + quotes_linter = NULL, + commented_code_linter = NULL, + implicit_integer_linter = NULL, + vector_logic_linter = NULL, + extraction_operator_linter = NULL, + function_left_parentheses_linter = NULL, + semicolon_linter = NULL, + indentation_linter = NULL, + unnecessary_nested_if_linter = NULL, + if_not_else_linter = NULL, + object_length_linter = NULL, + infix_spaces_linter(exclude_operators = "~") + ) +exclusions: list( + "R/Load.R", + "R/ColorBar.R", + "R/AnimateMap.R", "R/Plot2VarsVsLTime.R", "R/PlotACC.R", "R/PlotAno.R", "R/PlotBoxWhisker.R", + "R/PlotClim.R", "R/PlotEquiMap.R", "R/PlotLayout.R", "R/PlotMatrix.R", + "R/PlotSection.R", "R/PlotStereoMap.R", "R/PlotVsLTime.R", + "R/ConfigApplyMatchingEntries.R", "R/ConfigEditDefinition.R", "R/ConfigEditEntry.R", + "R/ConfigFileOpen.R", "R/ConfigShowSimilarEntries.R", "R/ConfigShowTable.R", + "R/clim.palette.R", + "R/sampleDepthData.R", "R/sampleMap.R", "R/sampleTimeSeries.R", + "R/ToyModel.R", + "R/s2dv-package.R", + "tests/testthat/", + "tests/testthat.R", + "R/CDORemap.R" = list( + function_argument_linter = Inf, + nonportable_path_linter = Inf + ), + "R/NAO.R" = list( + function_argument_linter = Inf + ), + "R/Utils.R" = list( + function_argument_linter = Inf, + nonportable_path_linter = Inf + ) + ) diff --git a/.lintr_result b/.lintr_result new file mode 100644 index 0000000000000000000000000000000000000000..25342f93ea01b203b2755ca93fc2e00c3842ff15 Binary files /dev/null and b/.lintr_result differ diff --git a/DESCRIPTION b/DESCRIPTION index ec571f4ce6bd0ba036ae274b7e085237d6c19863..aa79da2dade329a233747e6729878cd0990af740 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,9 @@ Package: s2dv Title: A Set of Common Tools for Seasonal to Decadal Verification -Version: 2.0.0 +Version: 2.1.0 Authors@R: c( person("BSC-CNS", role = c("aut", "cph")), - person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut", "cre")), + person("An-Chi", "Ho", , "an.ho@bsc.es", role = c("aut")), person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = "aut"), person("Roberto", "Bilbao", , "roberto.bilbao@bsc.es", role = "ctb"), person("Josep", "Cos", , "josep.cos@bsc.es", role = "ctb"), @@ -11,7 +11,10 @@ Authors@R: c( person("Llorenç", "Lledó", , "llorenc.lledo@bsc.es", role = "ctb"), person("Andrea", "Manrique", , "andrea.manrique@bsc.es", role = "ctb"), person("Deborah", "Verfaillie", , "deborah.verfaillie@bsc.es", role = "ctb"), - person("Eva", "Rifà", , "eva.rifarovira@bsc.es", role = "ctb")) + person("Eva", "Rifà", , "eva.rifarovira@bsc.es", role = "ctb"), + person("Victòria", "Agudetse", , "victoria.agudetse@bsc.es", role = "ctb"), + person("Nadia", "Milders", , "nadia.milders@bsc.es", role = "ctb"), + person("Ariadna", "Batalla", , "ariadna.batalla@bsc.es", role = c("ctb", "cre"))) Description: The advanced version of package 's2dverification'. It is intended for 'seasonal to decadal' (s2d) climate forecast verification, but it can also be used in other kinds of forecasts or general climate analysis. @@ -50,5 +53,5 @@ BugReports: https://earth.bsc.es/gitlab/es/s2dv/-/issues LazyData: true SystemRequirements: cdo Encoding: UTF-8 -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.3 Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 9214a1a54c1b7c4bc7e1e81203b51ac5e674cc26..587aec211d4a7bb13e0ef6e9e7dd9a4a99f6eadd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -77,6 +77,7 @@ export(Season) export(SignalNoiseRatio) export(Smoothing) export(Spectrum) +export(SprErr) export(Spread) export(StatSeasAtlHurr) export(TPI) diff --git a/NEWS.md b/NEWS.md index 4ea74df7b20bb85cb94f3e81cdea5a1379c3551c..2adf6fdfb13e79c0eb970fc05f0375814c192bc4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,26 @@ +# s2dv 2.1.0 (Release date: 2023-09-26) + +**Bugfixes** +- CDORemap() crop = T bug fix in R >= 4.2.0 +- CDORemap() crop = T bug fix when coordinates are sorted in decreasing order +- PlotEquiMap() and PlotLayout() create color bar correctly when data have infinite values +- Correct Corr() output dimensions when dat_dim and memb_dim are NULL +- NAO(): eliminate ftime_dim check when ftime dimension is not required by the function +- Histo2Hindcast(): Fill array with NA values for time steps before the initial date +- Add warning for default climatology when ref is null in RMSSS() and MSSS() + +**Development** +- NAO(): new parameter "exp_cor" to calculate forecast +- New parameter "abs_threshold" in GetProbs() +- New parameter "return_mean" in RPS() and CRPS() +- New parameter "print_sys_msg" in CDORemap() +- New function SprErr() +- New parameter "alpha" in Bias() +- New parameter "N.eff" in RandomWalkTest() + +**Other** +- Add citation file + # s2dv 2.0.0 (Release date: 2023-10-11) The compability break happens at the parameter changes. All the functionality remains the same but please pay attention to the parameter changes like name or default value if some error is raised. diff --git a/R/ACC.R b/R/ACC.R index 131d15a0ae3d0d442aa6867193ff3900654a6c1f..ee9d3812ff82b526dc2499c1f710453b36b8e50e 100644 --- a/R/ACC.R +++ b/R/ACC.R @@ -1,4 +1,5 @@ -#'Compute the spatial anomaly correlation coefficient between the forecast and corresponding observation +#'Compute the spatial anomaly correlation coefficient between the forecast and +#'corresponding observation #' #'Calculate the spatial anomaly correlation coefficient (ACC) for the ensemble #'mean of each model and the corresponding references over a spatial domain. It @@ -149,11 +150,11 @@ ACC <- function(exp, obs, dat_dim = NULL, lat_dim = 'lat', lon_dim = 'lon', stop("Parameter 'exp' and 'obs' must be a numeric array.") } if (is.null(dim(exp)) | is.null(dim(obs))) { - stop(paste0("Parameter 'exp' and 'obs' must have at least dimensions ", - "lat_dim and lon_dim.")) + stop("Parameter 'exp' and 'obs' must have at least dimensions ", + "lat_dim and lon_dim.") } - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } ## dat_dim @@ -213,14 +214,14 @@ ACC <- function(exp, obs, dat_dim = NULL, lat_dim = 'lat', lon_dim = 'lon', stop("Parameter 'lat' cannot be NULL. It is required for area weighting.") } if (!is.numeric(lat) | length(lat) != dim(exp)[lat_dim]) { - stop(paste0("Parameter 'lat' must be a numeric vector with the same ", - "length as the latitude dimension of 'exp' and 'obs'.")) + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") } ## lon if (!is.null(lon)) { if (!is.numeric(lon) | length(lon) != dim(exp)[lon_dim]) { - stop(paste0("Parameter 'lon' must be a numeric vector with the same ", - "length as the longitude dimension of 'exp' and 'obs'.")) + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") } } ## lonlatbox @@ -270,7 +271,7 @@ ACC <- function(exp, obs, dat_dim = NULL, lat_dim = 'lat', lon_dim = 'lon', ## exp and obs (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) - if(!all(name_exp %in% name_obs) | !all(name_obs %in% name_exp)) { + if (!all(name_exp %in% name_obs) | !all(name_obs %in% name_exp)) { stop("Parameter 'exp' and 'obs' must have same dimension names.") } if (!is.null(dat_dim)) { @@ -282,8 +283,8 @@ ACC <- function(exp, obs, dat_dim = NULL, lat_dim = 'lat', lon_dim = 'lon', name_obs <- name_obs[-which(name_obs == memb_dim)] } if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all the dimensions except 'dat_dim' and 'memb_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions except 'dat_dim' and 'memb_dim'.") } #----------------------------------------------------------------- @@ -329,7 +330,8 @@ ACC <- function(exp, obs, dat_dim = NULL, lat_dim = 'lat', lon_dim = 'lon', if (is.null(avg_dim)) { target_dims <- list(c(lat_dim, lon_dim, dat_dim), c(lat_dim, lon_dim, dat_dim)) } else { - target_dims <- list(c(lat_dim, lon_dim, avg_dim, dat_dim), c(lat_dim, lon_dim, avg_dim, dat_dim)) + target_dims <- list(c(lat_dim, lon_dim, avg_dim, dat_dim), + c(lat_dim, lon_dim, avg_dim, dat_dim)) } res <- Apply(list(exp, obs), target_dims = target_dims, @@ -367,7 +369,7 @@ ACC <- function(exp, obs, dat_dim = NULL, lat_dim = 'lat', lon_dim = 'lon', return(res) } -.ACC <- function(exp, obs, dat_dim = NULL, avg_dim = 'sdate', lat, alpha = 0.05, +.ACC <- function(exp, obs, lat, dat_dim = NULL, avg_dim = 'sdate', alpha = 0.05, pval = TRUE, sign = FALSE, conf = TRUE, conftype = "parametric") { # .ACC() should use all the spatial points to calculate ACC. It returns [nexp, nobs]. # If dat_dim = NULL, it returns a number. @@ -413,7 +415,7 @@ ACC <- function(exp, obs, dat_dim = NULL, lat_dim = 'lat', lon_dim = 'lon', ## spatial centralization for each [avg_dim, dat] dim_exp <- dim(exp) dim_obs <- dim(obs) - wt <- cos(lat * pi/180) + wt <- cos(lat * pi / 180) wt <- rep(wt, times = prod(dim_exp[2:length(dim_exp)])) if (is.null(avg_dim) & is.null(dat_dim)) { #[lat, lon] @@ -457,26 +459,26 @@ ACC <- function(exp, obs, dat_dim = NULL, lat_dim = 'lat', lon_dim = 'lon', # ACC top <- sum(exp_sub * obs_sub, na.rm = TRUE) #a number bottom <- sqrt(sum(exp_sub^2, na.rm = TRUE) * sum(obs_sub^2, na.rm = TRUE)) - acc[iexp, iobs] <- top/bottom #a number + acc[iexp, iobs] <- top / bottom #a number # handle bottom = 0 if (is.infinite(acc[iexp, iobs])) acc[iexp, iobs] <- NA # pval, sign, and conf - if (pval | conf | sign) { - if (conftype == "parametric") { - # calculate effective sample size - eno <- .Eno(as.vector(obs_sub), na.action = na.pass) - - if (pval | sign) { - t <- qt(1 - alpha, eno - 2) # a number - p.value <- sqrt(t^2 / (t^2 + eno - 2)) - if (pval) p.val[iexp, iobs] <- p.value - if (sign) signif[iexp, iobs] <- !is.na(p.value) & p.value <= alpha - } - if (conf) { - conf.upper[iexp, iobs] <- tanh(atanh(acc[iexp, iobs]) + qnorm(1 - alpha / 2) / sqrt(eno - 3)) - conf.lower[iexp, iobs] <- tanh(atanh(acc[iexp, iobs]) + qnorm(alpha / 2) / sqrt(eno - 3)) - } + if ((pval | conf | sign) && conftype == "parametric") { + # calculate effective sample size + eno <- .Eno(as.vector(obs_sub), na.action = na.pass) + + if (pval | sign) { + t <- qt(1 - alpha, eno - 2) # a number + p.value <- sqrt(t^2 / (t^2 + eno - 2)) + if (pval) p.val[iexp, iobs] <- p.value + if (sign) signif[iexp, iobs] <- !is.na(p.value) & p.value <= alpha + } + if (conf) { + conf.upper[iexp, iobs] <- tanh(atanh(acc[iexp, iobs]) + + qnorm(1 - alpha / 2) / sqrt(eno - 3)) + conf.lower[iexp, iobs] <- tanh(atanh(acc[iexp, iobs]) + + qnorm(alpha / 2) / sqrt(eno - 3)) } } @@ -486,46 +488,43 @@ ACC <- function(exp, obs, dat_dim = NULL, lat_dim = 'lat', lon_dim = 'lon', # MACC top <- sum(exp_sub * obs_sub, na.rm = TRUE) #a number bottom <- sqrt(sum(exp_sub^2, na.rm = TRUE) * sum(obs_sub^2, na.rm = TRUE)) - macc[iexp, iobs] <- top/bottom #a number + macc[iexp, iobs] <- top / bottom #a number # handle bottom = 0 if (is.infinite(macc[iexp, iobs])) macc[iexp, iobs] <- NA # ACC - for (i in 1:dim(acc)[3]) { + for (i in seq_len(dim(acc)[3])) { exp_sub_i <- exp_sub[, , i] obs_sub_i <- obs_sub[, , i] top <- sum(exp_sub_i * obs_sub_i, na.rm = TRUE) #a number bottom <- sqrt(sum(exp_sub_i^2, na.rm = TRUE) * sum(obs_sub_i^2, na.rm = TRUE)) - acc[iexp, iobs, i] <- top/bottom #a number + acc[iexp, iobs, i] <- top / bottom #a number # handle bottom = 0 if (is.infinite(acc[iexp, iobs, i])) acc[iexp, iobs, i] <- NA } # pval, sign, and conf - if (pval | sign | conf) { - if (conftype == "parametric") { - # calculate effective sample size along lat_dim and lon_dim - # combine lat_dim and lon_dim into one dim first - obs_tmp <- array(obs_sub, - dim = c(space = prod(dim(obs_sub)[1:2]), - dim(obs_sub)[3])) - eno <- apply(obs_tmp, 2, .Eno, na.action = na.pass) # a vector of avg_dim - if (pval | sign) { - t <- qt(1 - alpha, eno - 2) # a vector of avg_dim - p.value <- sqrt(t^2 / (t^2 + eno - 2)) - if (pval) p.val[iexp, iobs, ] <- p.value - if (sign) signif[iexp, iobs, ] <- !is.na(p.value) & p.value <= alpha - } - if (conf) { - conf.upper[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) + + if ((pval | sign | conf) && conftype == "parametric") { + # calculate effective sample size along lat_dim and lon_dim + # combine lat_dim and lon_dim into one dim first + obs_tmp <- array(obs_sub, + dim = c(space = prod(dim(obs_sub)[1:2]), + dim(obs_sub)[3])) + eno <- apply(obs_tmp, 2, .Eno, na.action = na.pass) # a vector of avg_dim + if (pval | sign) { + t <- qt(1 - alpha, eno - 2) # a vector of avg_dim + p.value <- sqrt(t^2 / (t^2 + eno - 2)) + if (pval) p.val[iexp, iobs, ] <- p.value + if (sign) signif[iexp, iobs, ] <- !is.na(p.value) & p.value <= alpha + } + if (conf) { + conf.upper[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) + qnorm(1 - alpha / 2) / sqrt(eno - 3)) - conf.lower[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) + + conf.lower[iexp, iobs, ] <- tanh(atanh(acc[iexp, iobs, ]) + qnorm(alpha / 2) / sqrt(eno - 3)) - } } } - } # if avg_dim is not NULL } @@ -569,8 +568,8 @@ ACC <- function(exp, obs, dat_dim = NULL, lat_dim = 'lat', lon_dim = 'lon', } -.ACC_bootstrap <- function(exp, obs, dat_dim = NULL, - avg_dim = 'sdate', memb_dim = NULL, lat, alpha = 0.05, +.ACC_bootstrap <- function(exp, obs, lat, dat_dim = NULL, + avg_dim = 'sdate', memb_dim = NULL, alpha = 0.05, pval = TRUE, sign = FALSE, conf = TRUE, conftype = "parametric") { # if (is.null(avg_dim)) # exp: [memb_exp, (dat_exp), lat, lon] @@ -605,10 +604,12 @@ ACC <- function(exp, obs, dat_dim = NULL, lat_dim = 'lat', lon_dim = 'lon', for (jdraw in 1:ndraw) { #choose a randomly member index for each point of the matrix - indexp <- array(sample(nmembexp, size = prod(dim(exp)[-c(length(dim(exp)) - 1, length(dim(exp)))]), + indexp <- array(sample(nmembexp, + size = prod(dim(exp)[-c(length(dim(exp)) - 1, length(dim(exp)))]), replace = TRUE), dim = dim(exp)) - indobs <- array(sample(nmembobs, size = prod(dim(obs)[-c(length(dim(obs)) - 1, length(dim(obs)))]), + indobs <- array(sample(nmembobs, + size = prod(dim(obs)[-c(length(dim(obs)) - 1, length(dim(obs)))]), replace = TRUE), dim = dim(obs)) @@ -620,10 +621,10 @@ ACC <- function(exp, obs, dat_dim = NULL, lat_dim = 'lat', lon_dim = 'lon', # if (is.null(avg_dim)) { drawexp <- array( - apply(varindexp, c(2:length(dim(exp))), function(x) x[,1][x[,2]] ), + apply(varindexp, 2:length(dim(exp)), function(x) x[, 1][x[, 2]]), dim = dim(exp)) drawobs <- array( - apply(varindobs, c(2:length(dim(obs))), function(x) x[,1][x[,2]] ), + apply(varindobs, 2:length(dim(obs)), function(x) x[, 1][x[, 2]]), dim = dim(obs)) # ensemble mean before .ACC diff --git a/R/AMV.R b/R/AMV.R index 916762813f71cacbd6aa4f1541dcf2207544e666..60b07dad451956eab4701ab3565cca3e6a14b666 100644 --- a/R/AMV.R +++ b/R/AMV.R @@ -123,13 +123,13 @@ AMV <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo stop("Parameter 'lon_dim' is not found in 'data' dimension.") } # data_lats and data_lons part2 - if (dim(data)[lat_dim] != length(data_lats)){ - stop(paste0("The latitude dimension of parameter 'data' must be the same", - " length of parameter 'data_lats'.")) + if (dim(data)[lat_dim] != length(data_lats)) { + stop("The latitude dimension of parameter 'data' must be the same", + " length of parameter 'data_lats'.") } - if (dim(data)[lon_dim] != length(data_lons)){ - stop(paste0("The longitude dimension of parameter 'data' must be the same", - " length of parameter 'data_lons'.")) + if (dim(data)[lon_dim] != length(data_lons)) { + stop("The longitude dimension of parameter 'data' must be the same", + " length of parameter 'data_lons'.") } # ncores if (!is.null(ncores)) { @@ -140,28 +140,26 @@ AMV <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo } # mask if (!is.null(mask)) { - if (is.array(mask) & identical(names(dim(mask)), c(lat_dim,lon_dim)) & + if (is.array(mask) & identical(names(dim(mask)), c(lat_dim, lon_dim)) & identical(as.integer(dim(mask)), c(length(data_lats), length(data_lons)))) { ## To mask those grid point that are missing in the observations mask <- s2dv::Reorder(data = mask, order = c(lat_dim, lon_dim)) - fun_mask <- function(data, mask){ + fun_mask <- function(data, mask) { data[mask == 0] <- NA return(data) } data <- multiApply::Apply(data = data, target_dims = c(lat_dim, lon_dim), fun = fun_mask, mask = mask, ncores = ncores)$output1 } else { - stop(paste0("Parameter 'mask' must be NULL (no mask) or a numerical array ", - "with c(lat_dim, lon_dim) dimensions and 0 in those grid ", - "points that have to be masked.")) + stop("Parameter 'mask' must be NULL (no mask) or a numerical array ", + "with c(lat_dim, lon_dim) dimensions and 0 in those grid ", + "points that have to be masked.") } } # monini - if (type == 'dcpp') { - if (!is.numeric(monini) | monini %% 1 != 0 | monini < 1 | - monini > 12) { - stop("Parameter 'monini' must be an integer from 1 to 12.") - } + if (type == 'dcpp' && + (!is.numeric(monini) | monini %% 1 != 0 | monini < 1 | monini > 12)) { + stop("Parameter 'monini' must be an integer from 1 to 12.") } # fmonth_dim if (type == 'dcpp') { @@ -183,11 +181,11 @@ AMV <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo } # indices_for_clim if (!is.null(indices_for_clim)) { - if (!class(indices_for_clim) %in% c('numeric', 'integer') - & !(is.logical(indices_for_clim) & !any(indices_for_clim))) { - stop(paste0("The parameter 'indices_for_clim' must be a numeric vector ", - "or NULL to compute the anomalies based on the whole period, ", - "or FALSE if data are already anomalies")) + if (!(is(indices_for_clim, "numeric") || is(indices_for_clim, "integer")) & + !(is.logical(indices_for_clim) & !any(indices_for_clim))) { + stop("The parameter 'indices_for_clim' must be a numeric vector ", + "or NULL to compute the anomalies based on the whole period, ", + "or FALSE if data are already anomalies") } } # year_dim @@ -209,7 +207,7 @@ AMV <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo } } # na.rm - if (!na.rm %in% c(TRUE,FALSE)) { + if (!na.rm %in% c(TRUE, FALSE)) { stop("Parameter 'na.rm' must be TRUE or FALSE") } @@ -230,12 +228,12 @@ AMV <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo region = regions$reg2, londim = lon_dim, latdim = lat_dim) - data <- ClimProjDiags::CombineIndices(indices = list(mean_1,mean_2), weights = NULL, + data <- ClimProjDiags::CombineIndices(indices = list(mean_1, mean_2), weights = NULL, operation = 'subtract') # (mean_1 - mean_2) - if (type == 'dcpp'){ + if (type == 'dcpp') { target_dims <- c(sdate_dim, fmonth_dim) - } else if (type %in% c('hist','obs')){ + } else if (type %in% c('hist', 'obs')) { target_dims <- c(year_dim, month_dim) } diff --git a/R/AbsBiasSS.R b/R/AbsBiasSS.R index e55d3d8f6a106a481f528c139d6e9b09c952e5d6..1b81797f0beed9131887e22e528bfa2d427a134c 100644 --- a/R/AbsBiasSS.R +++ b/R/AbsBiasSS.R @@ -82,14 +82,14 @@ AbsBiasSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = NULL, if (!is.array(obs) | !is.numeric(obs)) { stop("Parameter 'obs' must be a numeric array.") } - if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } if (!is.null(ref)) { if (!is.array(ref) | !is.numeric(ref)) stop("Parameter 'ref' must be a numeric array.") - if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + if (any(is.null(names(dim(ref)))) | any(nchar(names(dim(ref))) == 0)) { stop("Parameter 'ref' must have dimension names.") } } @@ -142,8 +142,8 @@ AbsBiasSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = NULL, } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim' and 'dat_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") } if (!is.null(ref)) { name_ref <- sort(names(dim(ref))) @@ -153,17 +153,17 @@ AbsBiasSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = NULL, if (!is.null(dat_dim)) { if (dat_dim %in% name_ref) { if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { - stop(paste0("If parameter 'ref' has dataset dimension, it must be", - " equal to dataset dimension of 'exp'.")) + stop("If parameter 'ref' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") } name_ref <- name_ref[-which(name_ref == dat_dim)] } } if (!identical(length(name_exp), length(name_ref)) | !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", - "all dimensions except 'memb_dim' and 'dat_dim' if there is ", - "only one reference dataset.")) + stop("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.") } } ## na.rm @@ -177,13 +177,12 @@ AbsBiasSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = NULL, ## sig_method.type #NOTE: These are the types of RandomWalkTest() if (!sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { - stop("Parameter 'sig_method.type' must be 'two.sided.approx', 'two.sided', 'greater', or 'less'.") + stop("Parameter 'sig_method.type' must be 'two.sided.approx', 'two.sided', ", + "'greater', or 'less'.") } - if (sig_method.type == 'two.sided.approx') { - if (alpha != 0.05) { - .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", - "= 0.05 only. Returning the significance at the 0.05 significance level.") - } + if (sig_method.type == 'two.sided.approx' && alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") } ## ncores @@ -283,12 +282,14 @@ AbsBiasSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = NULL, } ## Bias of the exp - bias_exp <- .Bias(exp = exp_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = FALSE) + bias_exp <- .Bias(exp = exp_data, obs = obs_data, na.rm = na.rm, + absolute = TRUE, time_mean = FALSE) ## Bias of the ref if (is.null(ref)) { ## Climatological forecast ref_data <- rep(mean(obs_data, na.rm = na.rm), length(obs_data)) } - bias_ref <- .Bias(exp = ref_data, obs = obs_data, na.rm = na.rm, absolute = TRUE, time_mean = FALSE) + bias_ref <- .Bias(exp = ref_data, obs = obs_data, na.rm = na.rm, + absolute = TRUE, time_mean = FALSE) ## Skill score and significance biasSS[i, j] <- 1 - mean(bias_exp) / mean(bias_ref) sign[i, j] <- .RandomWalkTest(skill_A = bias_exp, skill_B = bias_ref, diff --git a/R/Ano.R b/R/Ano.R index c4c70a330bf00074632ec18f1b6ea243e621ced8..ccb41e0a98e41fe51c9873618cdf3dcd9231daaf 100644 --- a/R/Ano.R +++ b/R/Ano.R @@ -42,7 +42,7 @@ dim(data) <- c(length(data)) names(dim(data)) <- 'tmp_name' } - if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } ## clim @@ -56,7 +56,7 @@ dim(clim) <- c(length(clim)) names(dim(clim)) <- 'tmp_name' } - if (any(is.null(names(dim(clim))))| any(nchar(names(dim(clim))) == 0)) { + if (any(is.null(names(dim(clim)))) | any(nchar(names(dim(clim))) == 0)) { stop("Parameter 'clim' must have dimension names.") } ## data and clim @@ -87,14 +87,14 @@ } if (!parallel_compute) { target_dims_ind <- match(names(dim(clim)), names(dim(data))) - if (any(target_dims_ind != sort(target_dims_ind))) { + if (is.unsorted(target_dims_ind)) { clim <- Reorder(clim, match(sort(target_dims_ind), target_dims_ind)) } if (length(dim(data)) == length(dim(clim))) { res <- data - clim } else { target_dims_ind <- match(names(dim(clim)), names(dim(data))) - margin_dims_ind <- c(1:length(dim(data)))[-target_dims_ind] + margin_dims_ind <- seq_along(dim(data))[-target_dims_ind] res <- apply(data, margin_dims_ind, .Ano, clim) res <- array(res, dim = dim(data)[c(target_dims_ind, margin_dims_ind)]) } diff --git a/R/Ano_CrossValid.R b/R/Ano_CrossValid.R index 13f7e977c37a1796879047f2be73324967d65f7a..e083fc90701b97a0db106c034bffd93e67b01b24 100644 --- a/R/Ano_CrossValid.R +++ b/R/Ano_CrossValid.R @@ -65,11 +65,11 @@ Ano_CrossValid <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', stop("Parameter 'exp' and 'obs' must be a numeric array.") } if (is.null(dim(exp)) | is.null(dim(obs))) { - stop(paste0("Parameter 'exp' and 'obs' must have at least dimensions ", - "time_dim and dat_dim.")) + stop("Parameter 'exp' and 'obs' must have at least dimensions ", + "time_dim and dat_dim.") } - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } ## time_dim @@ -114,14 +114,14 @@ Ano_CrossValid <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', " Set it as NULL if there is no dataset dimension.") } # If dat_dim is not in obs, add it in - if (any(!dat_dim %in% names(dim(obs)))) { + if (!all(dat_dim %in% names(dim(obs)))) { reset_obs_dim <- TRUE ori_obs_dim <- dim(obs) dim(obs) <- c(dim(obs), rep(1, length(dat_dim[which(!dat_dim %in% names(dim(obs)))]))) names(dim(obs)) <- c(names(ori_obs_dim), dat_dim[which(!dat_dim %in% names(dim(obs)))]) } # If dat_dim is not in obs, add it in - if (any(!dat_dim %in% names(dim(exp)))) { + if (!all(dat_dim %in% names(dim(exp)))) { reset_exp_dim <- TRUE ori_exp_dim <- dim(exp) dim(exp) <- c(dim(exp), rep(1, length(dat_dim[which(!dat_dim %in% names(dim(exp)))]))) @@ -129,10 +129,8 @@ Ano_CrossValid <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', } } # memb_dim and dat_dim - if (!memb) { - if (!memb_dim %in% dat_dim) { - stop("Parameter 'memb_dim' must be one element in parameter 'dat_dim'.") - } + if (!memb && !memb_dim %in% dat_dim) { + stop("Parameter 'memb_dim' must be one element in parameter 'dat_dim'.") } ## ncores @@ -146,14 +144,14 @@ Ano_CrossValid <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) if (!is.null(dat_dim)) { - for (i in 1:length(dat_dim)) { + for (i in seq_along(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim[i])] name_obs <- name_obs[-which(name_obs == dat_dim[i])] } } if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have the same length of ", - "all dimensions except 'dat_dim'.")) + stop("Parameter 'exp' and 'obs' must have the same length of ", + "all dimensions except 'dat_dim'.") } ############################### @@ -161,15 +159,14 @@ Ano_CrossValid <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', name_exp <- names(dim(exp)) name_obs <- names(dim(obs)) order_obs <- match(name_exp, name_obs) - if (any(order_obs != sort(order_obs))) { - obs <- Reorder(obs, order_obs) - } + obs <- Reorder(obs, order_obs) #----------------------------------- - # Per-paired method: If any sdate along dat_dim is NA, turn all sdate points along dat_dim into NA. + # Per-paired method: If any sdate along dat_dim is NA, turn all sdate points + # along dat_dim into NA. if (!is.null(dat_dim)) { pos <- rep(0, length(dat_dim)) # dat_dim: [dataset, member] - for (i in 1:length(dat_dim)) { + for (i in seq_along(dat_dim)) { pos[i] <- which(names(dim(obs)) == dat_dim[i]) } outrows_exp <- MeanDims(exp, pos, na.rm = FALSE) + @@ -226,8 +223,6 @@ Ano_CrossValid <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', if (is.null(dat_dim)) { ini_dims_exp <- dim(exp) ini_dims_obs <- dim(obs) - ini_dims_exp_for_clim <- dim(exp) - ini_dims_obs_for_clim <- dim(exp) exp <- InsertDim(exp, posdim = 2, lendim = 1, name = 'dataset') exp_for_clim <- InsertDim(exp_for_clim, posdim = 2, lendim = 1, name = 'dataset') obs <- InsertDim(obs, posdim = 2, lendim = 1, name = 'dataset') @@ -239,12 +234,13 @@ Ano_CrossValid <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', ano_exp_list <- vector('list', length = dim(exp)[1]) #length: [sdate] ano_obs_list <- vector('list', length = dim(obs)[1]) - for (tt in 1:dim(exp)[1]) { #[sdate] + for (tt in seq_len(dim(exp)[1])) { #[sdate] # calculate clim - exp_sub <- ClimProjDiags::Subset(exp_for_clim, 1, c(1:dim(exp)[1])[-tt]) - obs_sub <- ClimProjDiags::Subset(obs_for_clim, 1, c(1:dim(obs)[1])[-tt]) - clim_exp <- apply(exp_sub, c(1:length(dim(exp)))[-1], mean, na.rm = TRUE) # average out time_dim -> [dat, memb] - clim_obs <- apply(obs_sub, c(1:length(dim(obs)))[-1], mean, na.rm = TRUE) + exp_sub <- ClimProjDiags::Subset(exp_for_clim, 1, seq_len(dim(exp)[1])[-tt]) + obs_sub <- ClimProjDiags::Subset(obs_for_clim, 1, seq_len(dim(obs)[1])[-tt]) + # Average out time_dim -> [dat, memb] + clim_exp <- apply(exp_sub, seq_along(dim(exp))[-1], mean, na.rm = TRUE) + clim_obs <- apply(obs_sub, seq_along(dim(obs))[-1], mean, na.rm = TRUE) # ensemble mean if (!memb) { @@ -253,7 +249,7 @@ Ano_CrossValid <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', clim_obs <- mean(clim_obs, na.rm = TRUE) } else { pos <- which(names(dim(clim_exp)) == memb_dim) - pos <- c(1:length(dim(clim_exp)))[-pos] + pos <- seq_along(dim(clim_exp))[-pos] dim_name <- names(dim(clim_exp)) dim_exp_ori <- dim(clim_exp) dim_obs_ori <- dim(clim_obs) @@ -272,13 +268,8 @@ Ano_CrossValid <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', clim_obs_tmp <- array(clim_obs, dim = c(dim_obs_ori[pos], dim_obs_ori[-pos])) # Reorder it back to dim(clim_exp) tmp <- match(dim_exp_ori, dim(clim_exp_tmp)) - if (any(tmp != sort(tmp))) { - clim_exp <- Reorder(clim_exp_tmp, tmp) - clim_obs <- Reorder(clim_obs_tmp, tmp) - } else { - clim_exp <- clim_exp_tmp - clim_obs <- clim_obs_tmp - } + clim_exp <- Reorder(clim_exp_tmp, tmp) + clim_obs <- Reorder(clim_obs_tmp, tmp) } } # calculate ano diff --git a/R/Bias.R b/R/Bias.R index 0319a0f08e23e388046ce895e7a06aa82a0d9a41..d4887189220daaad310cf7794870ffb108c6435d 100644 --- a/R/Bias.R +++ b/R/Bias.R @@ -27,6 +27,8 @@ #' bias. The default value is FALSE. #'@param time_mean A logical value indicating whether to compute the temporal #' mean of the bias. The default value is TRUE. +#'@param alpha A numeric or NULL (default) to indicate the significance level +#' using Welch's t-test. Only available when absolute is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -34,7 +36,10 @@ #'A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of #''exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number #'of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation -#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If +#'alpha is specified, and absolute is FALSE, the result is a list with two +#'elements: the bias as described above and the significance as a logical array +#'with the same dimensions. #' #'@references #'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 @@ -43,12 +48,15 @@ #'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) #'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) #'bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') +#'bias2 <- Bias(exp = exp, obs = obs, memb_dim = 'member', alpha = 0.01) +#'abs_bias <- Bias(exp = exp, obs = obs, memb_dim = 'member', absolute = TRUE, alpha = NULL) #' #'@import multiApply #'@importFrom ClimProjDiags Subset #'@export -Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, na.rm = FALSE, - absolute = FALSE, time_mean = TRUE, ncores = NULL) { +Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, + na.rm = FALSE, absolute = FALSE, time_mean = TRUE, + alpha = 0.05, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -56,8 +64,8 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, stop("Parameter 'exp' must be a numeric array.") if (!is.array(obs) | !is.numeric(obs)) stop("Parameter 'obs' must be a numeric array.") - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } ## time_dim @@ -105,8 +113,8 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim' and 'dat_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") } ## na.rm if (!is.logical(na.rm) | length(na.rm) > 1) { @@ -120,6 +128,17 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, if (!is.logical(time_mean) | length(time_mean) > 1) { stop("Parameter 'time_mean' must be one logical value.") } + ## alpha + if (!is.null(alpha)) { + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop("Parameter 'alpha' must be null or a numeric value.") + } + if (absolute) { + alpha <- NULL + .warning("Parameter 'absolute' is TRUE, so 'alpha' has been set to", + "false and significance will not be returned.") + } + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | @@ -144,16 +163,19 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, na.rm = na.rm, absolute = absolute, time_mean = time_mean, - ncores = ncores)$output1 - + alpha = alpha, + ncores = ncores) + + if (is.null(alpha)) { + bias <- bias$output1 + } return(bias) } .Bias <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, na.rm = FALSE, - absolute = FALSE, time_mean = TRUE) { + absolute = FALSE, time_mean = TRUE, alpha = NULL) { # exp and obs: [sdate, (dat)] - if (is.null(dat_dim)) { bias <- exp - obs @@ -164,15 +186,33 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, if (isTRUE(time_mean)) { bias <- mean(bias, na.rm = na.rm) } - + + if (!is.null(alpha)) { + if (!absolute) { + if (all(is.na(bias))) { + sign <- NA + } else { + pval <- t.test(x = obs, y = exp, alternative = "two.sided")$p.value + sign <- pval <= alpha + } + } + } } else { nexp <- as.numeric(dim(exp)[dat_dim]) nobs <- as.numeric(dim(obs)[dat_dim]) bias <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) - + pval <- array(dim = c(nexp = nexp, nobs = nobs)) + sign <- array(dim = c(nexp = nexp, nobs = nobs)) for (i in 1:nexp) { for (j in 1:nobs) { bias[, i, j] <- exp[, i] - obs[, j] + if (!is.null(alpha)) { + if (!absolute) { + pval[i, j] <- t.test(x = obs[, j], y = exp[, i], + alternative = "two.sided")$p.value + sign[i, j] <- pval[i, j] <= alpha + } + } } } @@ -182,8 +222,14 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, if (isTRUE(time_mean)) { bias <- MeanDims(bias, time_dim, na.rm = na.rm) + if (!is.null(sign)) { + sign[which(is.na(bias))] <- NA + } } + } + if (!is.null(alpha) && !absolute) { + return(list(bias = bias, sign = sign)) + } else { + return(bias) } - - return(bias) } diff --git a/R/BrierScore.R b/R/BrierScore.R index 22f497d18553240f67693d3891c912cc43be059a..82526c03fe97195d961ff91ccfccde5ad5571e3e 100644 --- a/R/BrierScore.R +++ b/R/BrierScore.R @@ -108,11 +108,11 @@ BrierScore <- function(exp, obs, thresholds = seq(0.1, 0.9, 0.1), time_dim = 'sd dim(obs) <- c(length(obs)) names(dim(obs)) <- time_dim } - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } - if (any(!obs %in% c(0, 1))) { + if (!all(obs %in% c(0, 1))) { stop("Parameter 'obs' must be binary events (0 or 1).") } ## thresholds @@ -146,10 +146,8 @@ BrierScore <- function(exp, obs, thresholds = seq(0.1, 0.9, 0.1), time_dim = 'sd if (!memb_dim %in% names(dim(exp))) { stop("Parameter 'memb_dim' is not found in 'exp' dimension.") } - if (memb_dim %in% names(dim(obs))) { - if (dim(obs)[memb_dim] != 1) { - stop("The length of parameter 'memb_dim' in 'obs' must be 1.") - } + if (memb_dim %in% names(dim(obs)) && dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") } } ## exp and obs (2) @@ -158,7 +156,7 @@ BrierScore <- function(exp, obs, thresholds = seq(0.1, 0.9, 0.1), time_dim = 'sd stop("Parameter 'exp' must be within [0, 1] range.") } } else { - if (any(!exp %in% c(0, 1))) { + if (!all(exp %in% c(0, 1))) { stop("Parameter 'exp' must be 0 or 1 if it has memb_dim.") } } @@ -174,13 +172,13 @@ BrierScore <- function(exp, obs, thresholds = seq(0.1, 0.9, 0.1), time_dim = 'sd name_exp <- name_exp[-which(name_exp == dat_dim)] name_obs <- name_obs[-which(name_obs == dat_dim)] } - if (any(!name_exp %in% name_obs) | any(!name_obs %in% name_exp)) { - stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", - "of all the dimensions except 'dat_dim' and 'memb_dim'.")) + if (!all(name_exp %in% name_obs) | !all(name_obs %in% name_exp)) { + stop("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions except 'dat_dim' and 'memb_dim'.") } if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", - "of all the dimensions except 'dat_dim' and 'memb_dim'.")) + stop("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions except 'dat_dim' and 'memb_dim'.") } ## ncores if (!is.null(ncores)) { @@ -281,7 +279,8 @@ BrierScore <- function(exp, obs, thresholds = seq(0.1, 0.9, 0.1), time_dim = 'sd ressum <- ressum + nk[i] * (okbar[i] - obar)^2 for (j in 1:nk[i]) { term1 <- term1 + (exp[bins[[i]][[1]][j]] - fkbar[i])^2 - term2 <- term2 + (exp[bins[[i]][[1]][j]] - fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) + term2 <- term2 + (exp[bins[[i]][[1]][j]] - + fkbar[i]) * (obs[bins[[i]][[1]][j]] - okbar[i]) } } } @@ -306,18 +305,15 @@ BrierScore <- function(exp, obs, thresholds = seq(0.1, 0.9, 0.1), time_dim = 'sd rel_bias_corrected <- rel - term_a gres_bias_corrected <- gres - term_a + term_b if (rel_bias_corrected < 0 || gres_bias_corrected < 0) { - rel_bias_corrected2 <- max(rel_bias_corrected, rel_bias_corrected - gres_bias_corrected, 0) - gres_bias_corrected2 <- max(gres_bias_corrected, gres_bias_corrected - rel_bias_corrected, 0) + rel_bias_corrected2 <- max(rel_bias_corrected, + rel_bias_corrected - gres_bias_corrected, 0) + gres_bias_corrected2 <- max(gres_bias_corrected, + gres_bias_corrected - rel_bias_corrected, 0) rel_bias_corrected <- rel_bias_corrected2 gres_bias_corrected <- gres_bias_corrected2 } unc_bias_corrected <- unc + term_b bss_bias_corrected <- (gres_bias_corrected - rel_bias_corrected) / unc_bias_corrected - - #if (round(bs, 8) == round(bs_check_gres, 8) & round(bs_check_gres, 8) == round((rel_bias_corrected - gres_bias_corrected + unc_bias_corrected), 8)) { - # cat("No error found \ n") - # cat("BS = REL - GRES + UNC = REL_lessbias - GRES_lessbias + UNC_lessbias \ n") - #} # Add name for nk, fkbar, okbar names(dim(nk)) <- 'bin' diff --git a/R/CDORemap.R b/R/CDORemap.R index 4ea14fd5864883da1f5db9a597b62da10d158747..0933d113507fa04453a557e1552ba377d14f8346 100644 --- a/R/CDORemap.R +++ b/R/CDORemap.R @@ -76,6 +76,8 @@ #'@param write_dir Path to the directory where to create the intermediate #' files for CDO to work. By default, the R session temporary directory is #' used (\code{tempdir()}). +#'@param print_sys_msg A logical value indicating to print the messages from +#' system CDO commands. The default is FALSE to keep function using clean. #'@param ncores An integer indicating the number of theads used for #' interpolation (i.e., \code{-P} in cdo command.) The default value is NULL #' and \code{-P} is not used. @@ -227,6 +229,7 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, avoid_writes = TRUE, crop = TRUE, force_remap = FALSE, write_dir = tempdir(), + print_sys_msg = FALSE, ncores = NULL) { #, mask = NULL) { .isRegularVector <- function(x, tol = 0.1) { if (length(x) < 2) { @@ -292,7 +295,7 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, } } - data_array <- array(as.numeric(NA), array_dims) + data_array <- array(NA_real_, array_dims) } if (!(is.logical(data_array) || is.numeric(data_array)) || !is.array(data_array)) { stop("Parameter 'data_array' must be a numeric array.") @@ -410,14 +413,16 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, method <- 'con' } else if (method %in% c('dis', 'distance-weighted')) { method <- 'dis' - } else if (method %in% 'nn') { + } else if (method == 'nn') { method <- 'nn' - } else if (method %in% 'laf') { + } else if (method == 'laf') { method <- 'laf' - } else if (method %in% 'con2') { + } else if (method == 'con2') { method <- 'con2' } else { - stop("Unsupported CDO remap method. Only 'bilinear', 'bicubic', 'conservative', 'distance-weighted', 'nn', 'laf', and 'con2' are supported.") + stop("Unsupported CDO remap method. Only 'bilinear', ", + "'bicubic', 'conservative', 'distance-weighted', 'nn', ", + "'laf', and 'con2' are supported.") } # Check avoid_writes if (!is.logical(avoid_writes)) { @@ -429,7 +434,8 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, if (crop == 'tight') { crop_tight <- TRUE } else if (crop != 'preserve') { - stop("Parameter 'crop' can only take the values 'tight' or 'preserve' if specified as a character string.") + stop("Parameter 'crop' can only take the values 'tight' or 'preserve' ", + "if specified as a character string.") } crop <- TRUE } @@ -453,9 +459,11 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, tmp_lon <- lons } else { min_pos <- which(lons == min(lons), arr.ind = TRUE)[1, ] - tmp_lon <- Subset(lons, lat_dim, min_pos[which(names(dim(lons)) == lat_dim)], drop = 'selected') + tmp_lon <- Subset(lons, lat_dim, + min_pos[which(names(dim(lons)) == lat_dim)], + drop = 'selected') } - i <- 1:length(tmp_lon) + i <- seq_along(tmp_lon) degree <- min(3, length(i) - 1) lon_model <- lm(tmp_lon ~ poly(i, degree)) lon_extremes <- c(NA, NA) @@ -468,7 +476,9 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, # The signif is needed because cdo sellonlatbox crashes with too many digits lon_extremes[1] <- tmp_lon[1] - first_lon_cell_width / 2 } else { - lon_extremes[1] <- min(tmp_lon) + next_lon <- predict(lon_model, data.frame(i = length(tmp_lon) + 1)) + last_lon_cell_width <- (next_lon - tmp_lon[length(tmp_lon)]) + lon_extremes[1] <- tmp_lon[length(tmp_lon)] + last_lon_cell_width / 2 } if (which.max(tmp_lon) == length(tmp_lon)) { right_is_max <- TRUE @@ -476,18 +486,25 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, last_lon_cell_width <- (next_lon - tmp_lon[length(tmp_lon)]) lon_extremes[2] <- tmp_lon[length(tmp_lon)] + last_lon_cell_width / 2 } else { - lon_extremes[2] <- max(tmp_lon) + prev_lon <- predict(lon_model, data.frame(i = 0)) + first_lon_cell_width <- (tmp_lon[1] - prev_lon) + # The signif is needed because cdo sellonlatbox crashes with too many digits + lon_extremes[2] <- tmp_lon[1] - first_lon_cell_width / 2 } + tolerance <- 1e-10 + lon_extremes <- round(lon_extremes, 10) # Adjust the crop window if possible in order to keep lons from 0 to 360 # or from -180 to 180 when the extremes of the cropped window are contiguous. if (right_is_max) { if (lon_extremes[1] < -180) { - if (!((lon_extremes[2] < 180) && !((180 - lon_extremes[2]) <= last_lon_cell_width / 2))) { + if (!((lon_extremes[2] < 180) && + !(abs((180 - lon_extremes[2]) - last_lon_cell_width / 2) <= tolerance))) { lon_extremes[1] <- -180 lon_extremes[2] <- 180 } } else if (lon_extremes[1] < 0) { - if (!((lon_extremes[2] < 360) && !((360 - lon_extremes[2]) <= last_lon_cell_width / 2))) { + if (!((lon_extremes[2] < 360) && + !(abs((360 - lon_extremes[2]) - last_lon_cell_width / 2) <= tolerance))) { lon_extremes[1] <- 0 lon_extremes[2] <- 360 } @@ -495,12 +512,14 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, } if (left_is_min) { if (lon_extremes[2] > 360) { - if (!((lon_extremes[1] > 0) && !(lon_extremes[1] <= first_lon_cell_width / 2))) { + if (!((lon_extremes[1] > 0) && + !(abs(lon_extremes[1] - first_lon_cell_width / 2) <= tolerance))) { lon_extremes[1] <- 0 lon_extremes[2] <- 360 } } else if (lon_extremes[2] > 180) { - if (!((lon_extremes[1] > -180) && !((180 + lon_extremes[1]) <= first_lon_cell_width / 2))) { + if (!((lon_extremes[1] > -180) && + !(abs((180 + lon_extremes[1]) - first_lon_cell_width / 2) <= tolerance))) { lon_extremes[1] <- -180 lon_extremes[2] <- 180 } @@ -513,9 +532,10 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, tmp_lat <- lats } else { min_pos <- which(lats == min(lats), arr.ind = TRUE)[1, ] - tmp_lat <- Subset(lats, lon_dim, min_pos[which(names(dim(lats)) == lon_dim)], drop = 'selected') + tmp_lat <- Subset(lats, lon_dim, min_pos[which(names(dim(lats)) == lon_dim)], + drop = 'selected') } - i <- 1:length(tmp_lat) + i <- seq_along(tmp_lat) degree <- min(3, length(i) - 1) lat_model <- lm(tmp_lat ~ poly(i, degree)) lat_extremes <- c(NA, NA) @@ -523,14 +543,17 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, prev_lat <- predict(lat_model, data.frame(i = 0)) lat_extremes[1] <- tmp_lat[1] - (tmp_lat[1] - prev_lat) / 2 } else { - lat_extremes[1] <- min(tmp_lat) + next_lat <- predict(lat_model, data.frame(i = length(tmp_lat) + 1)) + lat_extremes[1] <- tmp_lat[length(tmp_lat)] + (next_lat - tmp_lat[length(tmp_lat)]) / 2 } if (which.max(tmp_lat) == length(tmp_lat)) { next_lat <- predict(lat_model, data.frame(i = length(tmp_lat) + 1)) lat_extremes[2] <- tmp_lat[length(tmp_lat)] + (next_lat - tmp_lat[length(tmp_lat)]) / 2 } else { - lat_extremes[2] <- max(tmp_lat) + prev_lat <- predict(lat_model, data.frame(i = 0)) + lat_extremes[2] <- tmp_lat[1] - (tmp_lat[1] - prev_lat) / 2 } + lat_extremes <- round(lat_extremes, 10) ## lat_extremes <- signif(lat_extremes, 5) # Adjust crop window if (lat_extremes[1] < -90) { @@ -548,7 +571,8 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, } } else if (is.numeric(crop)) { if (length(crop) != 4) { - stop("Paramrter 'crop' must be a logical value or a numeric vector of length 4: c(western border, eastern border, southern border, northern border.") + stop("Paramrter 'crop' must be a logical value or a numeric vector of length 4: ", + "c(western border, eastern border, southern border, northern border.") } else { lon_extremes <- crop[1:2] lat_extremes <- crop[3:4] @@ -618,18 +642,16 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, # } # Check if interpolation can be skipped. interpolation_needed <- TRUE - if (!force_remap) { - if (!(grid_type == 'custom')) { - if (length(lons) == grid_lons && length(lats) == grid_lats) { - if (grid_type == 'regular') { - if (.isRegularVector(lons) && .isRegularVector(lats)) { - interpolation_needed <- FALSE - } - } else if (grid_type == 'gaussian') { - # TODO: improve this check. Gaussian quadrature should be used. - if (.isRegularVector(lons) && !.isRegularVector(lats)) { - interpolation_needed <- FALSE - } + if (!force_remap && !(grid_type == 'custom')) { + if (length(lons) == grid_lons && length(lats) == grid_lats) { + if (grid_type == 'regular') { + if (.isRegularVector(lons) && .isRegularVector(lats)) { + interpolation_needed <- FALSE + } + } else if (grid_type == 'gaussian') { + # TODO: improve this check. Gaussian quadrature should be used. + if (.isRegularVector(lons) && !.isRegularVector(lats)) { + interpolation_needed <- FALSE } } } @@ -676,7 +698,8 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, unlimited_dim <- NULL dims_to_iterate <- NULL total_slices <- 1 - other_dims_per_chunk <- ifelse(is_irregular, 1, 2) # 4 (the maximum accepted by CDO) - 2 (lon, lat) = 2. + # Explanation for below: 4 (the maximum accepted by CDO) - 2 (lon, lat) = 2 + other_dims_per_chunk <- ifelse(is_irregular, 1, 2) if (length(other_dims) > 1 || (length(other_dims) > 0 && (is_irregular))) { # If lat/lon is the last dimension OR the largest other_dims is not the last one, # reorder the largest other dimension to the last as unlimited dim. @@ -687,7 +710,7 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, dims_mod[which(names(dim(data_array)) %in% c(lon_dim, lat_dim))] <- 0 dim_to_move <- which.max(dims_mod) - permutation <- (1:length(dim(data_array)))[-dim_to_move] + permutation <- seq_along(dim(data_array))[-dim_to_move] permutation <- c(permutation, dim_to_move) permutation_back <- sort(permutation, index.return = TRUE)$ix # dim_backup <- dim(data_array) @@ -700,8 +723,10 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, other_dims_per_chunk <- 1 } } - other_dims_ordered_by_size <- other_dims[sort(dim(data_array)[other_dims], index.return = TRUE)$ix] - dims_to_iterate <- sort(head(other_dims_ordered_by_size, length(other_dims) - other_dims_per_chunk)) + other_dims_ordered_by_size <- other_dims[sort(dim(data_array)[other_dims], + index.return = TRUE)$ix] + dims_to_iterate <- sort(head(other_dims_ordered_by_size, length(other_dims) - + other_dims_per_chunk)) if (length(dims_to_iterate) == 0) { dims_to_iterate <- NULL } else { @@ -724,13 +749,15 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, dim_backup <- dim(data_array) attributes(data_array) <- NULL dim(data_array) <- dim_backup - names(dim(data_array)) <- paste0('dim', 1:length(dim(data_array))) + names(dim(data_array)) <- paste0('dim', seq_along(dim(data_array))) names(dim(data_array))[c(lon_pos, lat_pos)] <- c(lon_dim, lat_dim) if (!is.null(unlimited_dim)) { # This will make ArrayToNc create this dim as unlimited. names(dim(data_array))[unlimited_dim] <- 'time' - # create time variable. The value is random since CDORemap() doesn't support time remapping now and we just want to avoid cdo warning - time_attr <- array(c(1:dim(data_array)[unlimited_dim]), dim = c(dim(data_array)[unlimited_dim])) + # create time variable. The value is random since CDORemap() doesn't support + #time remapping now and we just want to avoid cdo warning + time_attr <- array(seq_len(dim(data_array)[unlimited_dim]), + dim = c(dim(data_array)[unlimited_dim])) } if (length(dim(lons)) == 1) { names(dim(lons)) <- lon_dim @@ -777,7 +804,7 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, pos_lat_dim_in_lons <- which(names(dim(lons)) == lat_dim) if ((pos_lon > pos_lat && pos_lon_dim_in_lons < pos_lat_dim_in_lons) || (pos_lon < pos_lat && pos_lon_dim_in_lons > pos_lat_dim_in_lons)) { - new_pos <- 1:length(dim(subset)) + new_pos <- seq_along(dim(subset)) new_pos[pos_lon] <- pos_lat new_pos[pos_lat] <- pos_lon subset <- .aperm2(subset, new_pos) @@ -793,9 +820,11 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, # dims_before_crop <- dim(subset) # Make sure subset goes along with metadata if (is.null(unlimited_dim)) { - easyNCDF::ArrayToNc(setNames(list(subset, lons, lats), c('var', lon_var_name, lat_var_name)), tmp_file) + easyNCDF::ArrayToNc(setNames(list(subset, lons, lats), + c('var', lon_var_name, lat_var_name)), tmp_file) } else { - easyNCDF::ArrayToNc(setNames(list(subset, lons, lats, time_attr), c('var', lon_var_name, lat_var_name, 'time')), tmp_file) + easyNCDF::ArrayToNc(setNames(list(subset, lons, lats, time_attr), + c('var', lon_var_name, lat_var_name, 'time')), tmp_file) } } else { if (is_irregular) { @@ -805,7 +834,7 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, pos_lat_dim_in_lons <- which(names(dim(lons)) == lat_dim) if ((pos_lon > pos_lat && pos_lon_dim_in_lons < pos_lat_dim_in_lons) || (pos_lon < pos_lat && pos_lon_dim_in_lons > pos_lat_dim_in_lons)) { - new_pos <- 1:length(dim(data_array)) + new_pos <- seq_along(dim(data_array)) new_pos[pos_lon] <- pos_lat new_pos[pos_lat] <- pos_lon data_array <- .aperm2(data_array, new_pos) @@ -813,9 +842,11 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, } # dims_before_crop <- dim(data_array) if (is.null(unlimited_dim)) { - easyNCDF::ArrayToNc(setNames(list(data_array, lons, lats), c('var', lon_var_name, lat_var_name)), tmp_file) + easyNCDF::ArrayToNc(setNames(list(data_array, lons, lats), + c('var', lon_var_name, lat_var_name)), tmp_file) } else { - easyNCDF::ArrayToNc(setNames(list(data_array, lons, lats, time_attr), c('var', lon_var_name, lat_var_name, 'time')), tmp_file) + easyNCDF::ArrayToNc(setNames(list(data_array, lons, lats, time_attr), + c('var', lon_var_name, lat_var_name, 'time')), tmp_file) } } sellonlatbox <- '' @@ -828,17 +859,19 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, if (is.null(ncores)) { err <- try({ system(paste0("cdo -s ", sellonlatbox, "remap", method, ",", grid, " ", - tmp_file, " ", tmp_file2), ignore.stdout = T, ignore.stderr = T) + tmp_file, " ", tmp_file2), + ignore.stdout = !print_sys_msg, ignore.stderr = !print_sys_msg) }) } else { err <- try({ - system(paste0("cdo -P ", ncores," -s ", sellonlatbox, "remap", method, ",", - grid, " ", tmp_file, " ", tmp_file2), ignore.stdout = T, ignore.stderr = T) + system(paste0("cdo -P ", ncores, " -s ", sellonlatbox, "remap", method, ",", + grid, " ", tmp_file, " ", tmp_file2), + ignore.stdout = !print_sys_msg, ignore.stderr = !print_sys_msg) }) } file.remove(tmp_file) if (is(err, 'try-error') || err > 0) { - stop("CDO remap failed. Possible problem: parameter 'grid'.") + stop("CDO remap failed. Set 'print_sys_msg' to TRUE to see CDO system message..") } ncdf_remapped <- nc_open(tmp_file2) if (!lons_lats_taken) { @@ -915,13 +948,11 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, lat_pos <- which(names(new_dims) == lat_dim) # Fix issue 259, expected order from CDO output is lon lat # If is irregular, lat and lon position need to be checked: - if (is_irregular) { - if (lon_pos > lat_pos) { - new_pos <- 1:length(new_dims) - new_pos[lon_pos] <- lat_pos - new_pos[lat_pos] <- lon_pos - new_dims <- new_dims[new_pos] - } + if (is_irregular && lon_pos > lat_pos) { + new_pos <- seq_along(new_dims) + new_pos[lon_pos] <- lat_pos + new_pos[lat_pos] <- lon_pos + new_dims <- new_dims[new_pos] } result_array <- array(dim = new_dims) store_indices <- as.list(rep(TRUE, length(dim(result_array)))) @@ -929,23 +960,26 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, } if (return_array) { store_indices[dims_to_iterate] <- as.list(slice_indices) - # If is irregular, the order of dimenesions in result_array and file may be different and need to be checked before reading the temporal file: + # If is irregular, the order of dimenesions in result_array and file may be + # different and need to be checked before reading the temporal file: if (is_irregular) { test_dims <- dim(ncvar_get(ncdf_remapped, 'var', collapse_degen = FALSE)) test_dims <- test_dims[which(test_dims > 1)] - pos_test_dims <- match(dim(result_array), test_dims) + pos_test_dims <- match(names(dim(result_array)), + names(test_dims)) if (is.unsorted(pos_test_dims, na.rm = TRUE)) { # pos_new_dims is used later in the code. Don't overwrite - pos_new_dims <- 1:length(dim(result_array)) + pos_new_dims <- seq_along(dim(result_array)) pos_new_dims[which(!is.na(pos_test_dims))] <- - match(test_dims, dim(result_array)) + match(names(test_dims), names(dim(result_array))) backup_result_array_dims <- dim(result_array) dim(result_array) <- dim(result_array)[pos_new_dims] } } result_array <- do.call('[<-', c(list(x = result_array), store_indices, - list(value = ncvar_get(ncdf_remapped, 'var', collapse_degen = FALSE)))) + list(value = ncvar_get(ncdf_remapped, 'var', + collapse_degen = FALSE)))) } } else { new_dims <- dim(data_array) @@ -954,7 +988,7 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, lon_pos <- which(names(new_dims) == lon_dim) lat_pos <- which(names(new_dims) == lat_dim) if (lon_pos > lat_pos) { - new_pos <- 1:length(new_dims) + new_pos <- seq_along(new_dims) new_pos[lon_pos] <- lat_pos new_pos[lat_pos] <- lon_pos new_dims <- new_dims[new_pos] @@ -969,8 +1003,9 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, # If is irregular, the order of dimension may need to be recovered after reading all the file: if (is_irregular & (!is.null(dims_to_iterate))) { if (exists('pos_new_dims')) { - pos_new_dims <- 1:length(dim(result_array)) - dims_to_change <- match(backup_result_array_dims, dim(result_array)) + pos_new_dims <- seq_along(dim(result_array)) + dims_to_change <- match(names(backup_result_array_dims), + names(dim(result_array))) pos_new_dims[which(dims_to_change != 1)] <- dims_to_change[which(dims_to_change != 1)] result_array <- .aperm2(result_array, pos_new_dims) @@ -987,14 +1022,17 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, if (length(dim(found_lats)) > 1 && length(dim(found_lons)) > 1) { result_is_irregular <- TRUE } - attribute_backup[['dim']][which(names(dim(result_array)) == lon_dim)] <- dim(result_array)[lon_dim] - attribute_backup[['dim']][which(names(dim(result_array)) == lat_dim)] <- dim(result_array)[lat_dim] + attribute_backup[['dim']][which(names(dim(result_array)) == lon_dim)] <- + dim(result_array)[lon_dim] + attribute_backup[['dim']][which(names(dim(result_array)) == lat_dim)] <- + dim(result_array)[lat_dim] names(attribute_backup[['dim']])[which(names(dim(result_array)) == lon_dim)] <- new_lon_name names(attribute_backup[['dim']])[which(names(dim(result_array)) == lat_dim)] <- new_lat_name - if (!is.null(attribute_backup[['variables']]) && (length(attribute_backup[['variables']]) > 0)) { - for (var in 1:length(attribute_backup[['variables']])) { + if (!is.null(attribute_backup[['variables']]) && + (length(attribute_backup[['variables']]) > 0)) { + for (var in seq_along(attribute_backup[['variables']])) { if (length(attribute_backup[['variables']][[var]][['dim']]) > 0) { - for (dim in 1:length(attribute_backup[['variables']][[var]][['dim']])) { + for (dim in seq_along(attribute_backup[['variables']][[var]][['dim']])) { dim_name <- NULL if ('name' %in% names(attribute_backup[['variables']][[var]][['dim']][[dim]])) { dim_name <- attribute_backup[['variables']][[var]][['dim']][[dim]][['name']] @@ -1009,9 +1047,11 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, dim_name <- names(attribute_backup[['variables']][[var]][['dim']])[dim] if (dim_name %in% c(lon_dim, lat_dim)) { if (dim_name == lon_dim) { - names(attribute_backup[['variables']][[var]][['dim']])[which(names(attribute_backup[['variables']][[var]][['dim']]) == lon_dim)] <- new_lon_name + tmp <- which(names(attribute_backup[['variables']][[var]][['dim']]) == lon_dim) + names(attribute_backup[['variables']][[var]][['dim']])[tmp] <- new_lon_name } else { - names(attribute_backup[['variables']][[var]][['dim']])[which(names(attribute_backup[['variables']][[var]][['dim']]) == lat_dim)] <- new_lat_name + tmp <- which(names(attribute_backup[['variables']][[var]][['dim']]) == lat_dim) + names(attribute_backup[['variables']][[var]][['dim']])[tmp] <- new_lat_name } } } @@ -1023,13 +1063,15 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, new_vals <- found_lats[TRUE] } if (!is.null(attribute_backup[['variables']][[var]][['dim']][[dim]][['len']])) { - attribute_backup[['variables']][[var]][['dim']][[dim]][['len']] <- length(new_vals) + attribute_backup[['variables']][[var]][['dim']][[dim]][['len']] <- + length(new_vals) } if (!is.null(attribute_backup[['variables']][[var]][['dim']][[dim]][['vals']])) { if (!result_is_irregular) { attribute_backup[['variables']][[var]][['dim']][[dim]][['vals']] <- new_vals } else { - attribute_backup[['variables']][[var]][['dim']][[dim]][['vals']] <- 1:length(new_vals) + attribute_backup[['variables']][[var]][['dim']][[dim]][['vals']] <- + seq_along(new_vals) } } } @@ -1046,10 +1088,10 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, attributes(result_array) <- attribute_backup lons_attr_bk[['dim']] <- dim(found_lons) if (!is.null(lons_attr_bk[['variables']]) && (length(lons_attr_bk[['variables']]) > 0)) { - for (var in 1:length(lons_attr_bk[['variables']])) { + for (var in seq_along(lons_attr_bk[['variables']])) { if (length(lons_attr_bk[['variables']][[var]][['dim']]) > 0) { dims_to_remove <- NULL - for (dim in 1:length(lons_attr_bk[['variables']][[var]][['dim']])) { + for (dim in seq_along(lons_attr_bk[['variables']][[var]][['dim']])) { dim_name <- NULL if ('name' %in% names(lons_attr_bk[['variables']][[var]][['dim']][[dim]])) { dim_name <- lons_attr_bk[['variables']][[var]][['dim']][[dim]][['name']] @@ -1064,9 +1106,11 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, dim_name <- names(lons_attr_bk[['variables']][[var]][['dim']])[dim] if (dim_name %in% c(lon_dim, lat_dim)) { if (dim_name == lon_dim) { - names(lons_attr_bk[['variables']][[var]][['dim']])[which(names(lons_attr_bk[['variables']][[var]][['dim']]) == lon_dim)] <- new_lon_name + tmp <- which(names(lons_attr_bk[['variables']][[var]][['dim']]) == lon_dim) + names(lons_attr_bk[['variables']][[var]][['dim']])[tmp] <- new_lon_name } else { - names(lons_attr_bk[['variables']][[var]][['dim']])[which(names(lons_attr_bk[['variables']][[var]][['dim']]) == lat_dim)] <- new_lat_name + tmp <- which(names(lons_attr_bk[['variables']][[var]][['dim']]) == lat_dim) + names(lons_attr_bk[['variables']][[var]][['dim']])[tmp] <- new_lat_name } } } @@ -1087,14 +1131,16 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, if (!result_is_irregular) { lons_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']] <- new_vals } else { - lons_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']] <- 1:length(new_vals) + lons_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']] <- + seq_along(new_vals) } } } } } if (length(dims_to_remove) > 1) { - lons_attr_bk[['variables']][[var]][['dim']] <- lons_attr_bk[['variables']][[var]][['dim']][[-dims_to_remove]] + lons_attr_bk[['variables']][[var]][['dim']] <- + lons_attr_bk[['variables']][[var]][['dim']][[-dims_to_remove]] } } } @@ -1104,10 +1150,10 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, attributes(found_lons) <- lons_attr_bk lats_attr_bk[['dim']] <- dim(found_lats) if (!is.null(lats_attr_bk[['variables']]) && (length(lats_attr_bk[['variables']]) > 0)) { - for (var in 1:length(lats_attr_bk[['variables']])) { + for (var in seq_along(lats_attr_bk[['variables']])) { if (length(lats_attr_bk[['variables']][[var]][['dim']]) > 0) { dims_to_remove <- NULL - for (dim in 1:length(lats_attr_bk[['variables']][[var]][['dim']])) { + for (dim in seq_along(lats_attr_bk[['variables']][[var]][['dim']])) { dim_name <- NULL if ('name' %in% names(lats_attr_bk[['variables']][[var]][['dim']][[dim]])) { dim_name <- lats_attr_bk[['variables']][[var]][['dim']][[dim]][['name']] @@ -1122,9 +1168,11 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, dim_name <- names(lats_attr_bk[['variables']][[var]][['dim']])[dim] if (dim_name %in% c(lon_dim, lat_dim)) { if (dim_name == lon_dim) { - names(lats_attr_bk[['variables']][[var]][['dim']])[which(names(lats_attr_bk[['variables']][[var]][['dim']]) == lon_dim)] <- new_lon_name + tmp <- which(names(lats_attr_bk[['variables']][[var]][['dim']]) == lon_dim) + names(lats_attr_bk[['variables']][[var]][['dim']])[tmp] <- new_lon_name } else { - names(lats_attr_bk[['variables']][[var]][['dim']])[which(names(lats_attr_bk[['variables']][[var]][['dim']]) == lat_dim)] <- new_lat_name + tmp <- which(names(lats_attr_bk[['variables']][[var]][['dim']]) == lat_dim) + names(lats_attr_bk[['variables']][[var]][['dim']])[tmp] <- new_lat_name } } } @@ -1145,14 +1193,16 @@ CDORemap <- function(data_array = NULL, lons, lats, grid, method, if (!result_is_irregular) { lats_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']] <- new_vals } else { - lats_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']] <- 1:length(new_vals) + lats_attr_bk[['variables']][[var]][['dim']][[dim]][['vals']] <- + seq_along(new_vals) } } } } } if (length(dims_to_remove) > 1) { - lats_attr_bk[['variables']][[var]][['dim']] <- lats_attr_bk[['variables']][[var]][['dim']][[-dims_to_remove]] + lats_attr_bk[['variables']][[var]][['dim']] <- + lats_attr_bk[['variables']][[var]][['dim']][[-dims_to_remove]] } } } diff --git a/R/CRPS.R b/R/CRPS.R index 7dedf4fcb90a833b1034e7a1eb038e2588fe15ca..bb63095c0e0e58f18a6c24c70d747776af0325be 100644 --- a/R/CRPS.R +++ b/R/CRPS.R @@ -25,6 +25,9 @@ #'@param Fair A logical indicating whether to compute the FairCRPS (the #' potential CRPS that the forecast would have with an infinite ensemble size). #' The default value is FALSE. +#'@param return_mean A logical indicating whether to return the temporal mean +#' of the CRPS or not. If TRUE, the temporal mean is calculated along time_dim, +#' if FALSE the time dimension is not aggregated. The default is TRUE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -47,15 +50,15 @@ #'@importFrom ClimProjDiags Subset #'@export CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, - Fair = FALSE, ncores = NULL) { + Fair = FALSE, return_mean = TRUE, ncores = NULL) { # Check inputs ## exp and obs (1) if (!is.array(exp) | !is.numeric(exp)) stop("Parameter 'exp' must be a numeric array.") if (!is.array(obs) | !is.numeric(obs)) stop("Parameter 'obs' must be a numeric array.") - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } ## time_dim @@ -86,7 +89,8 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') } else { - stop("Not implemented for observations with members ('obs' can have 'memb_dim', but it should be of length = 1).") + stop("Not implemented for observations with members ", + "('obs' can have 'memb_dim', but it should be of length = 1).") } } name_exp <- sort(names(dim(exp))) @@ -98,13 +102,17 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim' and 'dat_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") } ## Fair - if (!is.logical(Fair) | length(Fair) > 1) { + if (!is.logical(Fair) | length(Fair) > 1) { stop("Parameter 'Fair' must be either TRUE or FALSE.") } + ## return_mean + if (!is.logical(return_mean) | length(return_mean) > 1) { + stop("Parameter 'return_mean' must be either TRUE or FALSE.") + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { @@ -123,17 +131,21 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU Fair = Fair, ncores = ncores)$output1 - # Return only the mean CRPS - crps <- MeanDims(crps, time_dim, na.rm = FALSE) + if (return_mean) { + crps <- MeanDims(crps, time_dim, na.rm = FALSE) + } else { + crps <- crps + } return(crps) } .CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, Fair = FALSE) { + # exp: [sdate, memb, (dat_dim)] # obs: [sdate, (dat_dim)] - + # Adjust dimensions if needed if (is.null(dat_dim)) { nexp <- 1 @@ -144,28 +156,28 @@ CRPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', dat_dim = NU nexp <- as.numeric(dim(exp)[dat_dim]) nobs <- as.numeric(dim(obs)[dat_dim]) } - + # for FairCRPS R_new <- ifelse(Fair, Inf, NA) - + CRPS <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) - + for (i in 1:nexp) { for (j in 1:nobs) { - exp_data <- exp[ , , i] - obs_data <- obs[ , j] - + exp_data <- exp[, , i] + obs_data <- obs[, j] + if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1]) - + crps <- SpecsVerification::enscrps_cpp(ens = exp_data, obs = obs_data, R_new = R_new) - CRPS[ , i, j] <- crps + CRPS[, i, j] <- crps } } - + if (is.null(dat_dim)) { dim(CRPS) <- c(dim(CRPS)[time_dim]) } - + return(CRPS) } diff --git a/R/CRPSS.R b/R/CRPSS.R index 159e2bdb2894ce509008da1cbe899d2bcdfe6e90..e4d094a935c73c399e1fb433c7e3122bb34b7d0f 100644 --- a/R/CRPSS.R +++ b/R/CRPSS.R @@ -48,6 +48,12 @@ #' the default of \code{RandomWalkTest()}. #'@param alpha A numeric of the significance level to be used in the statistical #' significance test. The default value is 0.05. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be NA (and it will be computed with the s2dv:::.Eno), FALSE +#' (and it will use the length of "obs" along "time_dim", so the +#' autocorrelation is not taken into account), a numeric (which is used for +#' all cases), or an array with the same dimensions as "obs" except "time_dim" +#' (for a particular N.eff to be used for each case). The default value is NA. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -80,7 +86,7 @@ #'@export CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, Fair = FALSE, clim.cross.val = TRUE, sig_method.type = 'two.sided.approx', - alpha = 0.05, ncores = NULL) { + alpha = 0.05, N.eff = NA, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -90,14 +96,14 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.array(obs) | !is.numeric(obs)) { stop("Parameter 'obs' must be a numeric array.") } - if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } if (!is.null(ref)) { if (!is.array(ref) | !is.numeric(ref)) stop("Parameter 'ref' must be a numeric array.") - if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + if (any(is.null(names(dim(ref)))) | any(nchar(names(dim(ref))) == 0)) { stop("Parameter 'ref' must have dimension names.") } } @@ -149,8 +155,8 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of all dimensions", - " except 'memb_dim' and 'dat_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of all dimensions", + " except 'memb_dim' and 'dat_dim'.") } if (!is.null(ref)) { name_ref <- sort(names(dim(ref))) @@ -158,17 +164,17 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.null(dat_dim)) { if (dat_dim %in% name_ref) { if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { - stop(paste0("If parameter 'ref' has dataset dimension it must be", - " equal to dataset dimension of 'exp'.")) + stop("If parameter 'ref' has dataset dimension it must be", + " equal to dataset dimension of 'exp'.") } name_ref <- name_ref[-which(name_ref == dat_dim)] } } if (!identical(length(name_exp), length(name_ref)) | !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'ref' must have same length of ", - "all dimensions except 'memb_dim' and 'dat_dim' if there is ", - "only one reference dataset.")) + stop("Parameter 'exp' and 'ref' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.") } } ## Fair @@ -186,13 +192,30 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', ## sig_method.type #NOTE: These are the types of RandomWalkTest() if (!sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { - stop("Parameter 'sig_method.type' must be 'two.sided.approx', 'two.sided', 'greater', or 'less'.") + stop("Parameter 'sig_method.type' must be 'two.sided.approx', 'two.sided', ", + "'greater', or 'less'.") } - if (sig_method.type == 'two.sided.approx') { - if (alpha != 0.05) { - .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", - "= 0.05 only. Returning the significance at the 0.05 significance level.") + if (sig_method.type == 'two.sided.approx' && alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) { + stop("Parameter 'N.eff' must be numeric.") + } + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop("If parameter 'N.eff' is provided with an array, it must ", + "have the same dimensions as 'obs' except 'time_dim'.") } + } else if (any((!is.na(N.eff) & !isFALSE(N.eff) & + !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop("Parameter 'N.eff' must be NA, FALSE, a numeric, or an array with ", + "the same dimensions as 'obs' except 'time_dim'.") + } + if ((!is.na(N.eff) & !isFALSE(N.eff)) && sig_method.type == 'two.sided.approx') { + warning("'N.eff' will not be used if 'sig_method.type' is 'two.sided.approx'.") } ## ncores if (!is.null(ncores)) { @@ -219,26 +242,64 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', target_dims = list(exp = c(time_dim, memb_dim, dat_dim), obs = c(time_dim, dat_dim)) } - output <- Apply(data, - target_dims = target_dims, - fun = .CRPSS, - time_dim = time_dim, memb_dim = memb_dim, - dat_dim = dat_dim, - Fair = Fair, clim.cross.val = clim.cross.val, - sig_method.type = sig_method.type, alpha = alpha, - ncores = ncores) + + if (is.array(N.eff)) { + data$N.eff <- N.eff + target_dims[length(target_dims)+1] <- list(NULL) + if (!is.null(ref)){ + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, + Fair = Fair, clim.cross.val = clim.cross.val, + sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + } else { # ref=NULL + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + ref = ref, + time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, + Fair = Fair, clim.cross.val = clim.cross.val, + sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + } + } else { # N.eff not an array + if (!is.null(ref)){ + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, + Fair = Fair, clim.cross.val = clim.cross.val, + sig_method.type = sig_method.type, alpha = alpha, + N.eff = N.eff, ncores = ncores) + } else { # ref=NULL + output <- Apply(data, + target_dims = target_dims, + fun = .CRPSS, + ref = ref, + time_dim = time_dim, memb_dim = memb_dim, + dat_dim = dat_dim, + Fair = Fair, clim.cross.val = clim.cross.val, + sig_method.type = sig_method.type, alpha = alpha, + N.eff = N.eff, ncores = ncores) + } + } return(output) } .CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', dat_dim = NULL, Fair = FALSE, clim.cross.val = TRUE, - sig_method.type = 'two.sided.approx', alpha = 0.05) { + sig_method.type = 'two.sided.approx', alpha = 0.05, N.eff = NA) { # exp: [sdate, memb, (dat)] # obs: [sdate, (dat)] # ref: [sdate, memb, (dat)] or NULL - + if (is.null(dat_dim)) { nexp <- 1 nobs <- 1 @@ -264,7 +325,8 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (isFALSE(clim.cross.val)) { ## Without cross-validation ref <- array(data = rep(obs, each = obs_time_len), dim = c(obs_time_len, obs_time_len)) - } else if (isTRUE(clim.cross.val)) { ## With cross-validation (excluding the value of that year to create ref for that year) + } else if (isTRUE(clim.cross.val)) { + # With cross-validation (excluding the value of that year to create ref for that year) ref <- array(data = NA, dim = c(obs_time_len, obs_time_len - 1)) for (i in 1:obs_time_len) { ref[i, ] <- obs[-i] @@ -283,8 +345,10 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', for (i_obs in 1:nobs) { if (isFALSE(clim.cross.val)) { ## Without cross-validation - ref <- array(data = rep(obs[, i_obs], each = obs_time_len), dim = c(obs_time_len, obs_time_len)) - } else if (isTRUE(clim.cross.val)) { ## With cross-validation (excluding the value of that year to create ref for that year) + ref <- array(data = rep(obs[, i_obs], each = obs_time_len), + dim = c(obs_time_len, obs_time_len)) + } else if (isTRUE(clim.cross.val)) { + # With cross-validation (excluding the value of that year to create ref for that year) ref <- array(data = NA, dim = c(obs_time_len, obs_time_len - 1)) for (i in 1:obs_time_len) { ref[i, ] <- obs[-i, i_obs] @@ -292,8 +356,10 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } names(dim(ref)) <- c(time_dim, memb_dim) - crps_ref[, i_obs] <- .CRPS(exp = ref, obs = ClimProjDiags::Subset(obs, dat_dim, i_obs, drop = 'selected'), - time_dim = time_dim, memb_dim = memb_dim, dat_dim = NULL, Fair = Fair) + crps_ref[, i_obs] <- + .CRPS(exp = ref, + obs = ClimProjDiags::Subset(obs, dat_dim, i_obs, drop = 'selected'), + time_dim = time_dim, memb_dim = memb_dim, dat_dim = NULL, Fair = Fair) } # crps_ref should be [sdate, nobs] } @@ -301,7 +367,7 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } else { # ref is not NULL if (!is.null(dat_dim) && (!dat_dim %in% names(dim(ref)))) { remove_dat_dim <- TRUE - ref <- InsertDim(data = ref, posdim = length(dim(ref)) + 1 , lendim = 1, name = dat_dim) + ref <- InsertDim(data = ref, posdim = length(dim(ref)) + 1, lendim = 1, name = dat_dim) } else { remove_dat_dim <- FALSE } @@ -318,7 +384,8 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', #----- CRPSS if (!is.null(dat_dim)) { - # If ref != NULL & ref has dat_dim, crps_ref = [sdate, nexp, nobs]; else, crps_ref = [sdate, nobs] + # If ref != NULL & ref has dat_dim, crps_ref = [sdate, nexp, nobs]; + # else, crps_ref = [sdate, nobs] crps_exp_mean <- MeanDims(crps_exp, time_dim, na.rm = FALSE) crps_ref_mean <- MeanDims(crps_ref, time_dim, na.rm = FALSE) @@ -329,28 +396,38 @@ CRPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', for (i in 1:nexp) { for (j in 1:nobs) { crpss[i, j] <- 1 - crps_exp_mean[i, j] / crps_ref_mean[j] + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, j], na.action = na.pass) ## effective degrees of freedom + } sign[i, j] <- .RandomWalkTest(skill_A = crps_exp_mean[i, j], skill_B = crps_ref_mean[j], test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } } } else { for (i in 1:nexp) { for (j in 1:nobs) { crpss[i, j] <- 1 - crps_exp_mean[i, j] / crps_ref_mean[i, j] - sign[i, j] <- .RandomWalkTest(skill_A = crps_exp_mean[i, j], skill_B = crps_ref_mean[i, j], + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, j], na.action = na.pass) ## effective degrees of freedom + } + sign[i, j] <- .RandomWalkTest(skill_A = crps_exp_mean[i, j], + skill_B = crps_ref_mean[i, j], test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } } } - } else { + } else { # dat_dim = NULL crpss <- 1 - mean(crps_exp) / mean(crps_ref) # Significance + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs, na.action = na.pass) ## effective degrees of freedom + } sign <- .RandomWalkTest(skill_A = crps_exp, skill_B = crps_ref, test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } return(list(crpss = crpss, sign = sign)) diff --git a/R/Clim.R b/R/Clim.R index c144025c1f8af795bb06afb31884949f868cb08f..d3dde13d3b0dbf94c68b8ccf2c4dc96505eb53dd 100644 --- a/R/Clim.R +++ b/R/Clim.R @@ -3,7 +3,8 @@ #'This function computes per-pair climatologies for the experimental #'and observational data using one of the following methods: #'\enumerate{ -#' \item{per-pair method (Garcia-Serrano and Doblas-Reyes, CD, 2012 https://doi.org/10.1007/s00382-012-1413-1)} +#' \item{per-pair method (Garcia-Serrano and Doblas-Reyes, CD, 2012 +#' https://doi.org/10.1007/s00382-012-1413-1)} #' \item{Kharin method (Kharin et al, GRL, 2012 https://doi.org/10.1029/2012GL052647)} #' \item{Fuckar method (Fuckar et al, GRL, 2014 https://doi.org/10.1002/2014GL060815)} #'} @@ -86,11 +87,11 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), stop("Parameter 'exp' and 'obs' must be a numeric array.") } if (is.null(dim(exp)) | is.null(dim(obs))) { - stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", - "containing time_dim and dat_dim.")) + stop("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.") } - if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } ## time_dim @@ -110,7 +111,7 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), stop("Parameter 'dat_dim' is not found in 'exp' dimensions.") } # If dat_dim is not in obs, add it in - if (any(!dat_dim %in% names(dim(obs)))) { + if (!all(dat_dim %in% names(dim(obs)))) { reset_obs_dim <- TRUE ori_obs_dim <- dim(obs) dim(obs) <- c(dim(obs), rep(1, length(dat_dim[which(!dat_dim %in% names(dim(obs)))]))) @@ -166,14 +167,14 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) if (!is.null(dat_dim)) { - for (i in 1:length(dat_dim)) { + for (i in seq_along(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim[i])] name_obs <- name_obs[-which(name_obs == dat_dim[i])] } } - if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have the same dimensions ", - "except 'dat_dim'.")) + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have the same dimensions ", + "except 'dat_dim'.") } ############################### @@ -191,7 +192,7 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), # Per-pair: Remove all sdate if not complete along dat_dim if (!is.null(dat_dim)) { pos <- rep(0, length(dat_dim)) - for (i in 1:length(dat_dim)) { #[dat, sdate] + for (i in seq_along(dat_dim)) { #[dat, sdate] ## dat_dim: [dataset, member] pos[i] <- which(names(dim(obs)) == dat_dim[i]) } @@ -199,7 +200,7 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), MeanDims(obs, pos, na.rm = FALSE) outrows_obs <- outrows_exp - for (i in 1:length(pos)) { + for (i in seq_along(pos)) { outrows_exp <- InsertDim(outrows_exp, pos[i], dim(exp)[pos[i]]) outrows_obs <- InsertDim(outrows_obs, pos[i], dim(obs)[pos[i]]) } @@ -283,7 +284,7 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), clim_obs <- mean(clim_obs, na.rm = TRUE) } else { dim_name <- names(dim(clim_exp)) - pos <- c(1:length(dim(clim_exp)))[-which(dim_name == memb_dim)] + pos <- c(seq_along(dim(clim_exp)))[-which(dim_name == memb_dim)] clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE) clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE) if (is.null(dim(clim_exp))) { @@ -322,6 +323,7 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), tmp_exp <- Trend(data = exp, time_dim = time_dim, interval = 1, polydeg = 1, conf = FALSE, ncores = ncores_input)$trend # tmp_exp: [stats, dat_dim] + ##NOTE: Cannot use rowMeans here because tmp_obs may have only one dim tmp_obs_mean <- apply(tmp_obs, 1, mean) #average out dat_dim (dat and member) #tmp_obs_mean: [stats = 2] if (!is.null(dat_dim)) { @@ -337,7 +339,7 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), } trend_exp <- list() trend_obs <- list() - for (jdate in 1:dim(exp)[time_dim]) { + for (jdate in seq_len(dim(exp)[time_dim])) { trend_exp[[jdate]] <- intercept_exp + jdate * slope_exp trend_obs[[jdate]] <- intercept_obs + jdate * slope_obs } @@ -351,18 +353,18 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), } # average out dat_dim, get a number - if (is.null(dim(clim_obs))) { +# if (is.null(dim(clim_obs))) { clim_obs_mean <- mean(clim_obs) - } else { - clim_obs_mean <- mean(apply(clim_obs, 1, mean)) - } +# } else { +# clim_obs_mean <- mean(rowMeans(clim_obs)) +# } clim_obs_mean <- array(clim_obs_mean, dim = dim(exp)) #enlarge it for the next line clim_exp <- trend_exp - trend_obs + clim_obs_mean ## member mean if (!memb) { - pos_exp <- c(1:length(dim(clim_exp)))[-which(names(dim(clim_exp)) == memb_dim)] - pos_obs <- c(1:length(dim(clim_obs)))[-which(names(dim(clim_obs)) == memb_dim)] + pos_exp <- c(seq_along(dim(clim_exp)))[-which(names(dim(clim_exp)) == memb_dim)] + pos_obs <- c(seq_along(dim(clim_obs)))[-which(names(dim(clim_obs)) == memb_dim)] tmp_dim_exp <- dim(clim_exp) tmp_dim_obs <- dim(clim_obs) clim_exp <- apply(clim_exp, pos_exp, mean, na.rm = TRUE) @@ -418,26 +420,31 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), na.action = na.omit, pval = FALSE, conf = FALSE, ncores = ncores_input)$regression #tmp_: [stats = 2, dat_dim, ftime] - tmp_obs_mean <- apply(tmp_obs, c(1, length(dim(tmp_obs))), mean) #average out dat_dim (dat and member) + #average out dat_dim (dat and member) + tmp_obs_mean <- apply(tmp_obs, c(1, length(dim(tmp_obs))), mean) #tmp_obs_mean: [stats = 2, ftime] - ini_obs_mean <- apply(ini_obs, c(1, length(dim(ini_obs))), mean) #average out dat_dim + #average out dat_dim + ini_obs_mean <- apply(ini_obs, c(1, length(dim(ini_obs))), mean) #ini_obs_mean: [sdate, ftime] # Find intercept and slope intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected') #[dat_dim, ftime] slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected') #[dat_dim, ftime] - intercept_obs <- array(tmp_obs_mean[1, ], dim = c(dim_ftime, dim_dat)) #[ftime, dat_dim] exp + intercept_obs <- array(tmp_obs_mean[1, ], + dim = c(dim_ftime, dim_dat)) #[ftime, dat_dim] exp if (!is.null(dat_dim)) { - intercept_obs <- Reorder(intercept_obs, c(2:length(dim(intercept_obs)), 1)) #[dat_dim, ftime] exp + intercept_obs <- Reorder(intercept_obs, + c(2:length(dim(intercept_obs)), 1)) #[dat_dim, ftime] exp } slope_obs <- array(tmp_obs_mean[2, ], dim = c(dim_ftime, dim_dat)) #[ftime, dat_dim] exp if (!is.null(dat_dim)) { - slope_obs <- Reorder(slope_obs, c(2:length(dim(slope_obs)), 1)) #[dat_dim, ftime] exp + slope_obs <- Reorder(slope_obs, + c(2:length(dim(slope_obs)), 1)) #[dat_dim, ftime] exp } trend_exp <- list() trend_obs <- list() - for (jdate in 1:dim(exp)[time_dim]) { + for (jdate in seq_len(dim(exp)[time_dim])) { tmp <- Subset(ini_exp, time_dim, jdate, drop = 'selected') #[dat_dim, ftime] trend_exp[[jdate]] <- intercept_exp + tmp * slope_exp #[dat_dim, ftime] @@ -470,8 +477,8 @@ Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'), ## member mean if (!memb) { - pos_exp <- c(1:length(dim(clim_exp)))[-which(names(dim(clim_exp)) == memb_dim)] - pos_obs <- c(1:length(dim(clim_obs)))[-which(names(dim(clim_obs)) == memb_dim)] + pos_exp <- c(seq_along(dim(clim_exp)))[-which(names(dim(clim_exp)) == memb_dim)] + pos_obs <- c(seq_along(dim(clim_obs)))[-which(names(dim(clim_obs)) == memb_dim)] tmp_dim_exp <- dim(clim_exp) tmp_dim_obs <- dim(clim_obs) diff --git a/R/Cluster.R b/R/Cluster.R index 2fac687a3aa1dc4afcde425fa170a938247f535d..e428b2bb9a4418ba4189ebb2196b3810f6d4add2 100644 --- a/R/Cluster.R +++ b/R/Cluster.R @@ -142,7 +142,7 @@ Cluster <- function(data, weights = NULL, time_dim = 'sdate', space_dim = NULL, dim(data) <- c(length(data)) names(dim(data)) <- time_dim } - if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } @@ -154,7 +154,7 @@ Cluster <- function(data, weights = NULL, time_dim = 'sdate', space_dim = NULL, if (is.null(dim(weights))) { #is vector dim(weights) <- c(length(weights)) } - if (any(is.null(names(dim(weights))))| any(nchar(names(dim(weights))) == 0)) { + if (any(is.null(names(dim(weights)))) | any(nchar(names(dim(weights))) == 0)) { stop("Parameter 'weights' must have dimension names.") } if (any(!names(dim(weights)) %in% names(dim(data)) | @@ -174,7 +174,7 @@ Cluster <- function(data, weights = NULL, time_dim = 'sdate', space_dim = NULL, if (!is.character(space_dim)) { stop("Parameter 'space_dim' must be a character vector.") } - if (any(!space_dim %in% names(dim(data)))) { + if (!all(space_dim %in% names(dim(data)))) { stop("Parameter 'space_dim' is not found in 'data' dimensions.") } if (!is.null(weights)) { @@ -202,7 +202,8 @@ Cluster <- function(data, weights = NULL, time_dim = 'sdate', space_dim = NULL, ## index if (!is.character(index) | length(index) > 1) { - stop("Parameter 'index' should be a character strings accepted as 'index' by the function NbClust::NbClust.") + stop("Parameter 'index' should be a character strings accepted as 'index' ", + "by the function NbClust::NbClust.") } ## ncores @@ -240,7 +241,7 @@ Cluster <- function(data, weights = NULL, time_dim = 'sdate', space_dim = NULL, return(output) } -.Cluster <- function(data, weights = NULL, nclusters, index = 'sdindex') { +.Cluster <- function(data, nclusters, weights = NULL, index = 'sdindex') { # data: [time, (lat, lon)] dat_dim <- dim(data) @@ -252,7 +253,9 @@ Cluster <- function(data, weights = NULL, time_dim = 'sdate', space_dim = NULL, if (!is.null(weights)) { dim(weights) <- prod(dim(weights)) # a vector data_list <- lapply(1:dat_dim[1], - function(x) { data[x, ] * weights }) + function(x) { + data[x, ] * weights + }) data <- do.call(abind::abind, c(data_list, along = 0)) } } diff --git a/R/Composite.R b/R/Composite.R index 03f0d585a26e6d649a7145973828e3dff8d1a580..3831e07edffd4ba6404230c61269d65f7537e1a7 100644 --- a/R/Composite.R +++ b/R/Composite.R @@ -100,7 +100,7 @@ Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), if (length(dim(data)) < 3) { stop("Parameter 'data' must have at least three dimensions.") } - if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } ## occ @@ -118,8 +118,8 @@ Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), stop("Parameter 'time_dim' is not found in 'data' dimension.") } if (dim(data)[time_dim] != length(occ)) { - stop(paste0("The length of time_dim dimension in parameter 'data' is not ", - "equal to length of parameter 'occ'.")) + stop("The length of time_dim dimension in parameter 'data' is not ", + "equal to length of parameter 'occ'.") } ## space_dim if (!is.character(space_dim) | length(space_dim) != 2) { @@ -185,7 +185,7 @@ Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), ncores = ncores) if (!is.null(fileout)) { - save(output, file = paste(fileout, '.sav', sep = '')) + save(output, file = paste0(fileout, '.sav')) } return(output) @@ -203,7 +203,7 @@ Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), dof <- array(dim = dim(data)[1:2]) pval <- array(dim = c(dim(data)[1:2], composite = K)) - if (eno == TRUE) { + if (eno) { n_tot <- Eno(data, time_dim = time_dim, ncores = ncores_input) } else { n_tot <- length(occ) @@ -214,14 +214,14 @@ Composite <- function(data, occ, time_dim = 'time', space_dim = c('lon', 'lat'), for (k in 1:K) { - if (length(which(occ == k)) >= 1) { + if (any(occ == k)) { indices <- which(occ == k) + lag toberemoved <- which(0 > indices | indices > dim(data)[3]) if (length(toberemoved) > 0) { indices <- indices[-toberemoved] } - if (eno == TRUE) { + if (eno) { data_tmp <- data[, , indices] names(dim(data_tmp)) <- names(dim(data)) n_k <- Eno(data_tmp, time_dim = time_dim, ncores = ncores_input) diff --git a/R/Consist_Trend.R b/R/Consist_Trend.R index ee956840bf613fc25b4d9ca65760a86e2f9bb4b3..8289168c437233c7c85e56d0f7d9cc861f2efc92 100644 --- a/R/Consist_Trend.R +++ b/R/Consist_Trend.R @@ -90,15 +90,15 @@ Consist_Trend <- function(exp, obs, dat_dim = 'dataset', time_dim = 'sdate', int stop("Parameter 'exp' and 'obs' must be a numeric array.") } if (is.null(dim(exp)) | is.null(dim(obs))) { - stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", - "containing time_dim and dat_dim.")) + stop("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.") } - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } - if(!all(names(dim(exp)) %in% names(dim(obs))) | - !all(names(dim(obs)) %in% names(dim(exp)))) { + if (!all(names(dim(exp)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp)))) { stop("Parameter 'exp' and 'obs' must have the same dimension names.") } ## time_dim @@ -118,13 +118,13 @@ Consist_Trend <- function(exp, obs, dat_dim = 'dataset', time_dim = 'sdate', int ## exp and obs (2) name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) - for (i in 1:length(dat_dim)) { + for (i in seq_along(dat_dim)) { name_exp <- name_exp[-which(name_exp == dat_dim[i])] name_obs <- name_obs[-which(name_obs == dat_dim[i])] } - if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension except 'dat_dim'.")) + if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'dat_dim'.") } ## interval if (!is.numeric(interval) | interval <= 0 | length(interval) > 1) { @@ -161,7 +161,7 @@ Consist_Trend <- function(exp, obs, dat_dim = 'dataset', time_dim = 'sdate', int # obs: [nobs, sdate] # Find common points - nan <- apply(exp, 2, mean, na.rm = FALSE) + apply(obs, 2, mean, na.rm = FALSE) # [sdate] + nan <- colMeans(exp, na.rm = FALSE) + colMeans(obs, na.rm = FALSE) # [sdate] exp[, is.na(nan)] <- NA obs[, is.na(nan)] <- NA diff --git a/R/Corr.R b/R/Corr.R index c11fcf69a9bf4197982c072bebe5d7c051e59971..28d166eaf8e0f4a09b409de870602acaeeb06476 100644 --- a/R/Corr.R +++ b/R/Corr.R @@ -1,4 +1,5 @@ -#'Compute the correlation coefficient between an array of forecast and their corresponding observation +#'Compute the correlation coefficient between an array of forecast and their +#'corresponding observation #' #'Calculate the correlation coefficient (Pearson, Kendall or Spearman) for #'an array of forecast and an array of observation. The correlations are @@ -118,11 +119,11 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, stop("Parameter 'exp' and 'obs' must be a numeric array.") } if (is.null(dim(exp)) | is.null(dim(obs))) { - stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", - "containing time_dim and dat_dim.")) + stop("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.") } - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } ## time_dim @@ -158,8 +159,8 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, } if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { - stop(paste0("Parameter 'limits' must be a vector of two positive ", - "integers smaller than the length of paramter 'comp_dim'.")) + stop("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.") } } ## method @@ -172,7 +173,8 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, stop("Parameter 'memb_dim' must be a character string.") } if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { - stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. Set it as NULL if there is no member dimension.") + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. ", + "Set it as NULL if there is no member dimension.") } # Add [member = 1] if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { @@ -223,8 +225,8 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, name_obs <- name_obs[-which(name_obs == memb_dim)] } if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimension except 'dat_dim' and 'memb_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'dat_dim' and 'memb_dim'.") } if (dim(exp)[time_dim] < 3) { stop("The length of time_dim must be at least 3 to compute correlation.") @@ -282,25 +284,30 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, .Corr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', time_dim = 'sdate', method = 'pearson', conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05) { + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + if (is.null(memb_dim)) { + CORR <- array(dim = c(nexp = nexp, nobs = nobs)) + if (is.null(dat_dim)) { # exp: [sdate] # obs: [sdate] - nexp <- 1 - nobs <- 1 - CORR <- array(dim = c(nexp = nexp, nobs = nobs)) - if (any(!is.na(exp)) && sum(!is.na(obs)) > 2) { - CORR <- cor(exp, obs, use = "pairwise.complete.obs", method = method) + if (!all(is.na(exp)) && sum(!is.na(obs)) > 2) { + CORR[, ] <- cor(exp, obs, use = "pairwise.complete.obs", method = method) } } else { # exp: [sdate, dat_exp] # obs: [sdate, dat_obs] - nexp <- as.numeric(dim(exp)[dat_dim]) - nobs <- as.numeric(dim(obs)[dat_dim]) - CORR <- array(dim = c(nexp = nexp, nobs = nobs)) for (j in 1:nobs) { for (y in 1:nexp) { - if (any(!is.na(exp[, y])) && sum(!is.na(obs[, j])) > 2) { + if (!all(is.na(exp[, y])) && sum(!is.na(obs[, j])) > 2) { CORR[y, j] <- cor(exp[, y], obs[, j], use = "pairwise.complete.obs", method = method) @@ -328,17 +335,15 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, exp_memb <- as.numeric(dim(exp)[memb_dim]) # memb_dim obs_memb <- as.numeric(dim(obs)[memb_dim]) + CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + if (is.null(dat_dim)) { # exp: [sdate, memb_exp] # obs: [sdate, memb_obs] - nexp <- 1 - nobs <- 1 - CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) - for (j in 1:obs_memb) { for (y in 1:exp_memb) { - if (any(!is.na(exp[,y])) && sum(!is.na(obs[, j])) > 2) { + if (!all(is.na(exp[, y])) && sum(!is.na(obs[, j])) > 2) { CORR[, , y, j] <- cor(exp[, y], obs[, j], use = "pairwise.complete.obs", method = method) @@ -349,16 +354,11 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, } else { # exp: [sdate, dat_exp, memb_exp] # obs: [sdate, dat_obs, memb_obs] - nexp <- as.numeric(dim(exp)[dat_dim]) - nobs <- as.numeric(dim(obs)[dat_dim]) - - CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) - for (j in 1:obs_memb) { for (y in 1:exp_memb) { CORR[, , y, j] <- sapply(1:nobs, function(i) { sapply(1:nexp, function (x) { - if (any(!is.na(exp[, x, y])) && sum(!is.na(obs[, i, j])) > 2) { + if (!all(is.na(exp[, x, y])) && sum(!is.na(obs[, i, j])) > 2) { cor(exp[, x, y], obs[, i, j], use = "pairwise.complete.obs", method = method) @@ -390,7 +390,9 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, if (pval || conf || sign) { if (method == "kendall" | method == "spearman") { if (!is.null(dat_dim) | !is.null(memb_dim)) { - tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3) + tmp <- apply(obs, + c(seq_along(dim(obs)))[-1], + rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3) names(dim(tmp))[1] <- time_dim eno <- Eno(tmp, time_dim) } else { @@ -409,7 +411,8 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, eno_expand[i, ] <- eno } } else { #member - eno_expand <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs, + exp_memb = exp_memb, obs_memb = obs_memb)) for (i in 1:nexp) { for (j in 1:exp_memb) { eno_expand[i, , j, ] <- eno @@ -438,14 +441,33 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, ################################### # Remove nexp and nobs if dat_dim = NULL - if (is.null(dat_dim) & !is.null(memb_dim)) { - dim(CORR) <- dim(CORR)[3:length(dim(CORR))] - if (pval) { - dim(p.val) <- dim(p.val)[3:length(dim(p.val))] - } - if (conf) { - dim(conflow) <- dim(conflow)[3:length(dim(conflow))] - dim(confhigh) <- dim(confhigh)[3:length(dim(confhigh))] + if (is.null(dat_dim)) { +# if (is.null(dat_dim) & !is.null(memb_dim)) { + + if (length(dim(CORR)) == 2) { + dim(CORR) <- NULL + if (pval) { + dim(p.val) <- NULL + } + if (conf) { + dim(conflow) <- NULL + dim(confhigh) <- NULL + } + if (sign) { + dim(signif) <- NULL + } + } else { + dim(CORR) <- dim(CORR)[3:length(dim(CORR))] + if (pval) { + dim(p.val) <- dim(p.val)[3:length(dim(p.val))] + } + if (conf) { + dim(conflow) <- dim(conflow)[3:length(dim(conflow))] + dim(confhigh) <- dim(confhigh)[3:length(dim(confhigh))] + } + if (sign) { + dim(signif) <- dim(signif)[3:length(dim(signif))] + } } } diff --git a/R/DiffCorr.R b/R/DiffCorr.R index d42dfc24ec583b333ede3a36f038811a762b0ef1..3e82402f79bf45c463f02859d04318d42257847b 100644 --- a/R/DiffCorr.R +++ b/R/DiffCorr.R @@ -100,12 +100,12 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") if (!all(names(dim(N.eff)) %in% names(dim(obs))) | any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { - stop(paste0('If parameter "N.eff" is provided with an array, it must ', - 'have the same dimensions as "obs" except "time_dim".')) + stop('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "obs" except "time_dim".') } } else if (any((!is.na(N.eff) & !is.numeric(N.eff)) | length(N.eff) != 1)) { - stop(paste0('Parameter "N.eff" must be NA, a numeric, or an array with ', - 'the same dimensions as "obs" except "time_dim".')) + stop('Parameter "N.eff" must be NA, a numeric, or an array with ', + 'the same dimensions as "obs" except "time_dim".') } ## time_dim if (!is.character(time_dim) | length(time_dim) != 1) @@ -132,9 +132,10 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', name_ref <- name_ref[-which(name_ref == memb_dim)] } if (length(name_exp) != length(name_obs) | length(name_exp) != length(name_ref) | - !identical(dim(exp)[name_exp], dim(obs)[name_obs]) | !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp', 'obs', and 'ref' must have same length of ", - "all dimensions except 'memb_dim'.")) + !identical(dim(exp)[name_exp], dim(obs)[name_obs]) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop("Parameter 'exp', 'obs', and 'ref' must have same length of ", + "all dimensions except 'memb_dim'.") } ## method if (!method %in% c("pearson", "spearman")) { @@ -175,13 +176,12 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', # Calculate ensemble means dim_exp <- dim(exp) dim_ref <- dim(ref) - dim_obs <- dim(obs) if (!is.null(memb_dim)) { exp_memb_dim_ind <- which(names(dim_exp) == memb_dim) ref_memb_dim_ind <- which(names(dim_ref) == memb_dim) - exp <- apply(exp, c(1:length(dim_exp))[-exp_memb_dim_ind], mean, na.rm = FALSE) - ref <- apply(ref, c(1:length(dim_ref))[-ref_memb_dim_ind], mean, na.rm = FALSE) + exp <- apply(exp, c(seq_along(dim_exp))[-exp_memb_dim_ind], mean, na.rm = FALSE) + ref <- apply(ref, c(seq_along(dim_ref))[-ref_memb_dim_ind], mean, na.rm = FALSE) if (is.null(dim(exp))) exp <- array(exp, dim = c(dim_exp[time_dim])) if (is.null(dim(ref))) ref <- array(ref, dim = c(dim_ref[time_dim])) } @@ -232,12 +232,16 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', N.eff <- .Eno(x = obs, na.action = na.pass) ## effective degrees of freedom } - # Significance with one-sided or two-sided test for equality of dependent correlation coefficients (Steiger, 1980) + # Significance with one-sided or two-sided test for equality of dependent + # correlation coefficients (Steiger, 1980) r12 <- cor.exp r13 <- cor.ref r23 <- cor(exp, ref) R <- (1 - r12 * r12 - r13 * r13 - r23 * r23) + 2 * r12 * r13 * r23 - t <- (r12 - r13) * sqrt((N.eff - 1) * (1 + r23) / (2 * ((N.eff - 1) / (N.eff - 3)) * R + 0.25 * (r12 + r13)^2 * (1 - r23)^3)) + t <- (r12 - r13) * + sqrt((N.eff - 1) * (1 + r23) / + (2 * ((N.eff - 1) / (N.eff - 3)) * R + + 0.25 * (r12 + r13)^2 * (1 - r23)^3)) if (test.type == 'one-sided') { @@ -251,7 +255,7 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', output$p.val <- p.value } if (sign) { - output$sign <- ifelse(!is.na(p.value) & p.value <= alpha & output$diff.corr > 0, TRUE, FALSE) + output$sign <- !is.na(p.value) & p.value <= alpha & output$diff.corr > 0 } } else if (test.type == 'two-sided') { @@ -266,7 +270,7 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', output$p.val <- p.value } if (sign) { - output$sign <- ifelse(!is.na(p.value) & p.value <= alpha / 2, TRUE, FALSE) + output$sign <- !is.na(p.value) & p.value <= alpha / 2 } } else { @@ -288,7 +292,8 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', ref <- ref[!nna] output <- .diff.corr(exp = exp, obs = obs, ref = ref, method = method, - N.eff = N.eff, alpha = alpha, pval = pval, sign = sign, test.type = test.type) + N.eff = N.eff, alpha = alpha, pval = pval, sign = sign, + test.type = test.type) } else if (handle.na == 'return.na') { # Data contain NA, return NAs directly without passing to .diff.corr @@ -303,7 +308,8 @@ DiffCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', } else { ## There is no NA output <- .diff.corr(exp = exp, obs = obs, ref = ref, method = method, - N.eff = N.eff, alpha = alpha, pval = pval, sign = sign, test.type = test.type) + N.eff = N.eff, alpha = alpha, pval = pval, sign = sign, + test.type = test.type) } return(output) diff --git a/R/EOF.R b/R/EOF.R index 66e69da59e93cae4384aab2289d6ef3dba248988..38c3fae0970f3e487441d64bea40311fbf02e37c 100644 --- a/R/EOF.R +++ b/R/EOF.R @@ -100,7 +100,7 @@ EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), if (!is.numeric(ano)) { stop("Parameter 'ano' must be a numeric array.") } - if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { stop("Parameter 'ano' must have dimension names.") } ## time_dim @@ -114,21 +114,21 @@ EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), if (!is.character(space_dim) | length(space_dim) != 2) { stop("Parameter 'space_dim' must be a character vector of 2.") } - if (any(!space_dim %in% names(dim(ano)))) { + if (!all(space_dim %in% names(dim(ano)))) { stop("Parameter 'space_dim' is not found in 'ano' dimension.") } ## lat if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { - stop(paste0("Parameter 'lat' must be a numeric vector with the same ", - "length as the latitude dimension of 'ano'.")) + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") } if (any(lat > 90 | lat < -90)) { stop("Parameter 'lat' must contain values within the range [-90, 90].") } ## lon if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { - stop(paste0("Parameter 'lon' must be a numeric vector with the same ", - "length as the longitude dimension of 'ano'.")) + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") } if (any(lon > 360 | lon < -360)) { .warning("Some 'lon' is out of the range [-360, 360].") @@ -158,7 +158,7 @@ EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), # Area weighting. Weights for EOF; needed to compute the # fraction of variance explained by each EOFs space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) - wght <- array(cos(lat * pi/180), dim = dim(ano)[space_ind]) + wght <- array(cos(lat * pi / 180), dim = dim(ano)[space_ind]) # We want the covariance matrix to be weigthed by the grid # cell area so the anomaly field is weighted by its square @@ -221,9 +221,9 @@ EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), ano <- ano * InsertDim(wght, 1, nt) # The use of the correlation matrix is done under the option corr. - if (corr == TRUE) { + if (corr) { stdv <- apply(ano, c(2, 3), sd, na.rm = T) - ano <- ano/InsertDim(stdv, 1, nt) + ano <- ano / InsertDim(stdv, 1, nt) } # Time/space matrix for SVD @@ -273,7 +273,7 @@ EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), # Computation of the % of variance associated with each mode W <- pca$d[1:neofs] tot.var <- sum(pca$d^2) - var.eof <- 100 * pca$d[1:neofs]^2/tot.var + var.eof <- 100 * pca$d[1:neofs]^2 / tot.var for (e in 1:neofs) { # Set all masked grid points to NA in the EOFs diff --git a/R/Eno.R b/R/Eno.R index e2324de5a2157e1f3bda77c90ad2653c8315b4e4..cb927602221e10f6d3c0853bf0935aad93834099 100644 --- a/R/Eno.R +++ b/R/Eno.R @@ -43,7 +43,7 @@ Eno <- function(data, time_dim = 'sdate', na.action = na.pass, ncores = NULL) { dim(data) <- c(length(data)) names(dim(data)) <- time_dim } - if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } ## time_dim @@ -54,14 +54,14 @@ Eno <- function(data, time_dim = 'sdate', na.action = na.pass, ncores = NULL) { stop("Parameter 'time_dim' is not found in 'data' dimension.") } ## na.action - if (as.character(substitute(na.action)) != c("na.pass") & - as.character(substitute(na.action)) != c("na.fail")) { + if (as.character(substitute(na.action)) != "na.pass" & + as.character(substitute(na.action)) != "na.fail") { stop("Parameter 'na.action' must be a function either na.pass or na.fail.") } - if(as.character(substitute(na.action))== c("na.fail") && anyNA(data)) { - stop(paste0("Calculation fails because NA is found in paratemter 'data', ", - "which is not accepted when ", - "parameter 'na.action' = na.fail.")) + if (as.character(substitute(na.action)) == "na.fail" && anyNA(data)) { + stop("Calculation fails because NA is found in paratemter 'data', ", + "which is not accepted when ", + "parameter 'na.action' = na.fail.") } ## ncores if (!is.null(ncores)) { diff --git a/R/EuroAtlanticTC.R b/R/EuroAtlanticTC.R index 2860a5336695698ecd62eb43e6bef81e13d68b57..a18e6fbc722e8f74e5b48a9c05c3a6e87193ff35 100644 --- a/R/EuroAtlanticTC.R +++ b/R/EuroAtlanticTC.R @@ -75,7 +75,7 @@ EuroAtlanticTC <- function(ano, lat, lon, ntrunc = 30, time_dim = 'sdate', if (!is.numeric(ano)) { stop("Parameter 'ano' must be a numeric array.") } - if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { stop("Parameter 'ano' must have dimension names.") } ## time_dim @@ -89,20 +89,20 @@ EuroAtlanticTC <- function(ano, lat, lon, ntrunc = 30, time_dim = 'sdate', if (!is.character(space_dim) | length(space_dim) != 2) { stop("Parameter 'space_dim' must be a character vector of 2.") } - if (any(!space_dim %in% names(dim(ano)))) { + if (!all(space_dim %in% names(dim(ano)))) { stop("Parameter 'space_dim' is not found in 'ano' dimension.") } ## lat and lon if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { - stop(paste0("Parameter 'lat' must be a numeric vector with the same ", - "length as the latitude dimension of 'ano'.")) + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") } if (any(lat > 90 | lat < -90)) { stop("Parameter 'lat' must contain values within the range [-90, 90].") } if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { - stop(paste0("Parameter 'lon' must be a numeric vector with the same ", - "length as the longitude dimension of 'ano'.")) + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") } if (all(lon >= 0)) { if (any(lon > 360 | lon < 0)) { @@ -178,7 +178,7 @@ EuroAtlanticTC <- function(ano, lat, lon, ntrunc = 30, time_dim = 'sdate', # Area weighting is needed to compute the fraction of variance explained by # each mode space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) - wght <- array(cos(lat * pi/180), dim = dim(ano)[space_ind]) + wght <- array(cos(lat * pi / 180), dim = dim(ano)[space_ind]) # We want the covariance matrix to be weigthed by the grid # cell area so the anoaly field is weighted by its square diff --git a/R/Filter.R b/R/Filter.R index c4e76bf20faba914f7b287c2b0b6d6b902df8ec0..e7043e2714caf5b8c69b380f1742c2f76291656e 100644 --- a/R/Filter.R +++ b/R/Filter.R @@ -53,7 +53,7 @@ Filter <- function(data, freq, time_dim = 'ftime', ncores = NULL) { dim(data) <- c(length(data)) names(dim(data)) <- time_dim } - if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } ## freq @@ -102,19 +102,18 @@ Filter <- function(data, freq, time_dim = 'ftime', ncores = NULL) { for (jfreq in seq(freq - 0.5 / ndat2, freq + 0.5 / ndat2, 0.1 / (ndat2 * fac1))) { for (phase in seq(0, pi, (pi / (10 * fac2)))) { - xtest <- cos(phase + c(1:ndat) * jfreq * 2 * pi) - test <- lm(data[is.na(data) == FALSE] ~ xtest[ - is.na(data) == FALSE])$fitted.values - if (sum(test ^ 2) > maxi) { + xtest <- cos(phase + 1:ndat * jfreq * 2 * pi) + test <- lm(data[!is.na(data)] ~ xtest[!is.na(data)])$fitted.values + if (sum(test^2) > maxi) { endphase <- phase endfreq <- jfreq } - maxi <- max(sum(test ^ 2), maxi) + maxi <- max(sum(test^2), maxi) } } - xend <- cos(endphase + c(1:ndat) * endfreq * 2 * pi) - data[is.na(data) == FALSE] <- data[is.na(data) == FALSE] - lm( - data[is.na(data) == FALSE] ~ xend[is.na(data) == FALSE] + xend <- cos(endphase + 1:ndat * endfreq * 2 * pi) + data[!is.na(data)] <- data[!is.na(data)] - lm( + data[!is.na(data)] ~ xend[!is.na(data)] )$fitted.values return(invisible(data)) diff --git a/R/GMST.R b/R/GMST.R index 92109ea6ae570fea6e2454f6f1f0fa75b5fa22e1..65cf99d59f7b542d9e910e0df1ec757d01d5976f 100644 --- a/R/GMST.R +++ b/R/GMST.R @@ -148,13 +148,13 @@ GMST <- function(data_tas, data_tos, data_lats, data_lons, mask_sea_land, sea_va # data_lats and data_lons part2 if (dim(data_tas)[lat_dim] != length(data_lats) | dim(data_tos)[lat_dim] != length(data_lats)) { - stop(paste0("The latitude dimension of parameter 'data_tas' and 'data_tos'", - " must be the same length of parameter 'data_lats'.")) + stop("The latitude dimension of parameter 'data_tas' and 'data_tos'", + " must be the same length of parameter 'data_lats'.") } if (dim(data_tas)[lon_dim] != length(data_lons) | dim(data_tos)[lon_dim] != length(data_lons)) { - stop(paste0("The longitude dimension of parameter 'data_tas' and 'data_tos'", - " must be the same length of parameter 'data_lons'.")) + stop("The longitude dimension of parameter 'data_tas' and 'data_tos'", + " must be the same length of parameter 'data_lons'.") } # sea_value if (!is.numeric(sea_value) | length(sea_value) != 1) { @@ -167,7 +167,8 @@ GMST <- function(data_tas, data_tos, data_lats, data_lons, mask_sea_land, sea_va stop("Parameter 'mask_sea_land' must be an array with dimensions [lat_dim, lon_dim].") } else if (!identical(as.integer(dim(mask_sea_land)), c(length(data_lats), length(data_lons)))) { - stop("Parameter 'mask_sea_land' dimensions must be equal to the length of 'data_lats' and 'data_lons'.") + stop("Parameter 'mask_sea_land' dimensions must be equal to the length of ", + "'data_lats' and 'data_lons'.") } # type if (!type %in% c('dcpp', 'hist', 'obs')) { @@ -175,19 +176,17 @@ GMST <- function(data_tas, data_tos, data_lats, data_lons, mask_sea_land, sea_va } # mask if (!is.null(mask)) { - if (!is.array(mask) | !identical(names(dim(mask)), c(lat_dim,lon_dim)) | + if (!is.array(mask) | !identical(names(dim(mask)), c(lat_dim, lon_dim)) | !identical(as.integer(dim(mask)), c(length(data_lats), length(data_lons)))) { - stop(paste0("Parameter 'mask' must be NULL (no mask) or a numerical array ", - "with c(lat_dim, lon_dim) dimensions and 0 in those grid ", - "points that have to be masked.")) + stop("Parameter 'mask' must be NULL (no mask) or a numerical array ", + "with c(lat_dim, lon_dim) dimensions and 0 in those grid ", + "points that have to be masked.") } } # monini - if (type == 'dcpp') { - if (!is.numeric(monini) | monini %% 1 != 0 | monini < 1 | - monini > 12) { - stop("Parameter 'monini' must be an integer from 1 to 12.") - } + if (type == 'dcpp' && + (!is.numeric(monini) | monini %% 1 != 0 | monini < 1 | monini > 12)) { + stop("Parameter 'monini' must be an integer from 1 to 12.") } # fmonth_dim if (type == 'dcpp') { @@ -209,11 +208,11 @@ GMST <- function(data_tas, data_tos, data_lats, data_lons, mask_sea_land, sea_va } # indices_for_clim if (!is.null(indices_for_clim)) { - if (!class(indices_for_clim) %in% c('numeric', 'integer') - & !(is.logical(indices_for_clim) & !any(indices_for_clim))) { - stop(paste0("The parameter 'indices_for_clim' must be a numeric vector ", - "or NULL to compute the anomalies based on the whole period, ", - "or FALSE if data are already anomalies")) + if (!(is(indices_for_clim, "numeric") || is(indices_for_clim, "integer")) & + !(is.logical(indices_for_clim) & !any(indices_for_clim))) { + stop("The parameter 'indices_for_clim' must be a numeric vector ", + "or NULL to compute the anomalies based on the whole period, ", + "or FALSE if data are already anomalies") } } # year_dim @@ -235,7 +234,7 @@ GMST <- function(data_tas, data_tos, data_lats, data_lons, mask_sea_land, sea_va } } # na.rm - if (!na.rm %in% c(TRUE,FALSE)) { + if (!na.rm %in% c(TRUE, FALSE)) { stop("Parameter 'na.rm' must be TRUE or FALSE") } # ncores @@ -252,7 +251,7 @@ GMST <- function(data_tas, data_tos, data_lats, data_lons, mask_sea_land, sea_va data[mask_sea_land == sea_value] <- data_tos[mask_sea_land == sea_value] return(data) } - mask_sea_land <- s2dv::Reorder(data = mask_sea_land, order = c(lat_dim,lon_dim)) + mask_sea_land <- s2dv::Reorder(data = mask_sea_land, order = c(lat_dim, lon_dim)) data <- multiApply::Apply(data = list(data_tas, data_tos), target_dims = c(lat_dim, lon_dim), fun = mask_tas_tos, mask_sea_land = mask_sea_land, @@ -276,9 +275,9 @@ GMST <- function(data_tas, data_tos, data_lats, data_lons, mask_sea_land, sea_va londim = lon_dim, latdim = lat_dim) - if (type == 'dcpp'){ + if (type == 'dcpp') { target_dims <- c(sdate_dim, fmonth_dim) - } else if (type %in% c('hist','obs')){ + } else if (type %in% c('hist', 'obs')) { target_dims <- c(year_dim, month_dim) } diff --git a/R/GSAT.R b/R/GSAT.R index 0cde94ab0a798d2603a04a1b51c733b4b3032392..eaf914fbb62284ace1c51c1dd4dca96f28078efd 100644 --- a/R/GSAT.R +++ b/R/GSAT.R @@ -119,13 +119,13 @@ GSAT <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l stop("Parameter 'lon_dim' is not found in 'data' dimension.") } # data_lats and data_lons part2 - if (dim(data)[lat_dim] != length(data_lats)){ - stop(paste0("The latitude dimension of parameter 'data' must be the same", - " length of parameter 'data_lats'.")) + if (dim(data)[lat_dim] != length(data_lats)) { + stop("The latitude dimension of parameter 'data' must be the same", + " length of parameter 'data_lats'.") } - if (dim(data)[lon_dim] != length(data_lons)){ - stop(paste0("The longitude dimension of parameter 'data' must be the same", - " length of parameter 'data_lons'.")) + if (dim(data)[lon_dim] != length(data_lons)) { + stop("The longitude dimension of parameter 'data' must be the same", + " length of parameter 'data_lons'.") } # ncores if (!is.null(ncores)) { @@ -136,28 +136,26 @@ GSAT <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l } # mask if (!is.null(mask)) { - if (is.array(mask) & identical(names(dim(mask)), c(lat_dim,lon_dim)) & + if (is.array(mask) & identical(names(dim(mask)), c(lat_dim, lon_dim)) & identical(as.integer(dim(mask)), c(length(data_lats), length(data_lons)))) { ## To mask those grid point that are missing in the observations mask <- s2dv::Reorder(data = mask, order = c(lat_dim, lon_dim)) - fun_mask <- function(data, mask){ + fun_mask <- function(data, mask) { data[mask == 0] <- NA return(data) } data <- multiApply::Apply(data = data, target_dims = c(lat_dim, lon_dim), fun = fun_mask, mask = mask, ncores = ncores)$output1 } else { - stop(paste0("Parameter 'mask' must be NULL (no mask) or a numerical array ", - "with c(lat_dim, lon_dim) dimensions and 0 in those grid ", - "points that have to be masked.")) + stop("Parameter 'mask' must be NULL (no mask) or a numerical array ", + "with c(lat_dim, lon_dim) dimensions and 0 in those grid ", + "points that have to be masked.") } } # monini - if (type == 'dcpp') { - if (!is.numeric(monini) | monini %% 1 != 0 | monini < 1 | - monini > 12) { - stop("Parameter 'monini' must be an integer from 1 to 12.") - } + if (type == 'dcpp' && + (!is.numeric(monini) | monini %% 1 != 0 | monini < 1 | monini > 12)) { + stop("Parameter 'monini' must be an integer from 1 to 12.") } # fmonth_dim if (type == 'dcpp') { @@ -179,11 +177,11 @@ GSAT <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l } # indices_for_clim if (!is.null(indices_for_clim)) { - if (!class(indices_for_clim) %in% c('numeric', 'integer') - & !(is.logical(indices_for_clim) & !any(indices_for_clim))) { - stop(paste0("The parameter 'indices_for_clim' must be a numeric vector ", - "or NULL to compute the anomalies based on the whole period, ", - "or FALSE if data are already anomalies")) + if (!(is(indices_for_clim, "numeric") || is(indices_for_clim, "integer")) & + !(is.logical(indices_for_clim) & !any(indices_for_clim))) { + stop("The parameter 'indices_for_clim' must be a numeric vector ", + "or NULL to compute the anomalies based on the whole period, ", + "or FALSE if data are already anomalies") } } # year_dim @@ -205,7 +203,7 @@ GSAT <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l } } # na.rm - if (!na.rm %in% c(TRUE,FALSE)) { + if (!na.rm %in% c(TRUE, FALSE)) { stop("Parameter 'na.rm' must be TRUE or FALSE") } @@ -214,9 +212,9 @@ GSAT <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l londim = lon_dim, latdim = lat_dim) - if (type == 'dcpp'){ + if (type == 'dcpp') { target_dims <- c(sdate_dim, fmonth_dim) - } else if (type %in% c('hist','obs')){ + } else if (type %in% c('hist', 'obs')) { target_dims <- c(year_dim, month_dim) } diff --git a/R/GetProbs.R b/R/GetProbs.R index 9960c53ff3604d6f1fff182290d647fea449df5b..2a538892951b745d6ff4dc4de868b9aeec0c58ac 100644 --- a/R/GetProbs.R +++ b/R/GetProbs.R @@ -9,7 +9,8 @@ #'each category. For observations (or forecast without member dimension), 1 #'means that the event happened, while 0 indicates that the event did not #'happen. Weighted probabilities can be computed if the weights are provided for -#'each ensemble member and time step. +#'each ensemble member and time step. The absolute thresholds can also be +#'provided directly for probabilities calculation. #' #'@param data A named numerical array of the forecasts or observations with, at #' least, time dimension. @@ -22,9 +23,21 @@ #'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to #' 1) between the categories. The default value is c(1/3, 2/3), which #' corresponds to tercile equiprobable categories. +#'@param abs_thresholds A numeric array or vector of the absolute thresholds in +#' the same units as \code{data}. If an array is provided, it should have at +#' least 'bin_dim_abs' dimension. If it has more dimensions (e.g. different +#' thresholds for different locations, i.e. lon and lat dimensions), they +#' should match the dimensions of \code{data}, except the member dimension +#' which should not be included. The default value is NULL and, in this case, +#' 'prob_thresholds' is used for calculating the probabilities. +#'@param bin_dim_abs A character string of the dimension name of +#' 'abs_thresholds' array in which category limits are stored. It will also be +#' the probabilistic category dimension name in the output. The default value +#' is 'bin'. #'@param indices_for_quantiles A vector of the indices to be taken along #' 'time_dim' for computing the absolute thresholds between the probabilistic -#' categories. If NULL, the whole period is used. The default value is NULL. +#' categories. If NULL (default), the whole period is used. It is only used +#' when 'prob_thresholds' is provided. #'@param weights A named numerical array of the weights for 'data' with #' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value #' is NULL. The ensemble should have at least 70 members or span at least 10 @@ -37,20 +50,30 @@ #' computation. The default value is NULL. #' #'@return -#'A numerical array of probabilities with dimensions c(bin, the rest dimensions -#'of 'data' except 'memb_dim'). 'bin' dimension has the length of probabilistic -#'categories, i.e., \code{length(prob_thresholds) + 1}. +#'A numerical array of probabilities with dimensions c(bin_dim_abs, the rest +#'dimensions of 'data' except 'memb_dim'). 'bin' dimension has the length of +#'probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. #' #'@examples #'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) #'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', #' indices_for_quantiles = 4:17) #' +#'# abs_thresholds is provided +#'abs_thr1 <- c(-0.2, 0.3) +#'abs_thr2 <- array(c(-0.2, 0.3) + rnorm(40) * 0.1, dim = c(cat = 2, sdate = 20)) +#'res1 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr1) +#'res2 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr2, bin_dim_abs = 'cat') +#' #'@import multiApply #'@importFrom easyVerification convert2prob #'@export -GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', indices_for_quantiles = NULL, - prob_thresholds = c(1/3, 2/3), weights = NULL, cross.val = FALSE, ncores = NULL) { +GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', + indices_for_quantiles = NULL, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + bin_dim_abs = 'bin', weights = NULL, cross.val = FALSE, ncores = NULL) { # Check inputs ## data @@ -79,23 +102,67 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', indices_for_ "dimension exists, set it as NULL.") } } - ## prob_thresholds - if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | - any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { - stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + ## bin_dim_abs + if (!is.character(bin_dim_abs) | length(bin_dim_abs) != 1) { + stop('Parameter "bin_dim_abs" must be a character string.') } - ## indices_for_quantiles - if (is.null(indices_for_quantiles)) { - indices_for_quantiles <- 1:dim(data)[time_dim] - } else { - if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { - stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") - } else if (length(indices_for_quantiles) > dim(data)[time_dim] | - max(indices_for_quantiles) > dim(data)[time_dim] | - any(indices_for_quantiles < 1)) { - stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") + ## prob_thresholds, abs_thresholds + if (!is.null(abs_thresholds) & !is.null(prob_thresholds)) { + .warning(paste0("Parameters 'prob_thresholds' and 'abs_thresholds' are both provided. ", + "Only the first one is used.")) + abs_thresholds <- NULL + } else if (is.null(abs_thresholds) & is.null(prob_thresholds)) { + stop("One of the parameters 'prob_thresholds' and 'abs_thresholds' must be provided.") + } + if (!is.null(prob_thresholds)) { + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_quantiles + if (is.null(indices_for_quantiles)) { + indices_for_quantiles <- seq_len(dim(data)[time_dim]) + } else { + if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { + stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") + } else if (length(indices_for_quantiles) > dim(data)[time_dim] | + max(indices_for_quantiles) > dim(data)[time_dim] | + any(indices_for_quantiles < 1)) { + stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") + } + } + + } else { # abs_thresholds + + if (is.null(dim(abs_thresholds))) { # a vector + dim(abs_thresholds) <- length(abs_thresholds) + names(dim(abs_thresholds)) <- bin_dim_abs + } + # bin_dim_abs + if (!(bin_dim_abs %in% names(dim(abs_thresholds)))) { + stop("Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension.") + } + if (!is.null(memb_dim) && memb_dim %in% names(dim(abs_thresholds))) { + stop("Parameter abs_thresholds' cannot have member dimension.") + } + dim_name_abs <- names(dim(abs_thresholds))[which(names(dim(abs_thresholds)) != bin_dim_abs)] + if (!all(dim_name_abs %in% names(dim(data)))) { + stop("Parameter 'abs_thresholds' dimensions except 'bin_dim_abs' must be in 'data' as well.") + } else { + if (any(dim(abs_thresholds)[dim_name_abs] != dim(data)[dim_name_abs])) { + stop("Parameter 'abs_thresholds' dimensions must have the same length as 'data'.") + } } + if (!is.null(indices_for_quantiles)) { + warning("Parameter 'indices_for_quantiles' is not used when 'abs_thresholds' are provided.") + } + abs_target_dims <- bin_dim_abs + if (time_dim %in% names(dim(abs_thresholds))) { + abs_target_dims <- c(bin_dim_abs, time_dim) + } + } + ## weights if (!is.null(weights)) { if (!is.array(weights) | !is.numeric(weights)) @@ -110,19 +177,21 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', indices_for_ namesdim_weights <- c(time_dim) } if (length(dim(weights)) != lendim_weights | - any(!names(dim(weights)) %in% namesdim_weights)) { - stop(paste0("Parameter 'weights' must have dimension ", - paste0(namesdim_weights, collapse = ' and '), ".")) + !all(names(dim(weights)) %in% namesdim_weights)) { + stop("Parameter 'weights' must have dimension ", + paste0(namesdim_weights, collapse = ' and '), ".") } if (any(dim(weights)[namesdim_weights] != dim(data)[namesdim_weights])) { - stop(paste0("Parameter 'weights' must have the same dimension length as ", - paste0(namesdim_weights, collapse = ' and '), " dimension in 'data'.")) + stop("Parameter 'weights' must have the same dimension length as ", + paste0(namesdim_weights, collapse = ' and '), " dimension in 'data'.") } weights <- Reorder(weights, namesdim_weights) # } else { -# if (length(dim(weights)) != 3 | any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) -# stop("Parameter 'weights' must have three dimensions with the names of 'memb_dim', 'time_dim' and 'dat_dim'.") +# if (length(dim(weights)) != 3 | +# any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) +# stop("Parameter 'weights' must have three dimensions with the names of ", +# "'memb_dim', 'time_dim' and 'dat_dim'.") # if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | # dim(weights)[time_dim] != dim(exp)[time_dim] | # dim(weights)[dat_dim] != dim(exp)[dat_dim]) { @@ -145,68 +214,94 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', indices_for_ } ############################### - - res <- Apply(data = list(data = data), - target_dims = c(time_dim, memb_dim), #, dat_dim), - output_dims = c("bin", time_dim), - fun = .GetProbs, -# dat_dim = dat_dim, - prob_thresholds = prob_thresholds, - indices_for_quantiles = indices_for_quantiles, - weights = weights, cross.val = cross.val, ncores = ncores)$output1 + if (is.null(abs_thresholds)) { + res <- Apply(data = list(data = data), + target_dims = c(time_dim, memb_dim), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = prob_thresholds, + indices_for_quantiles = indices_for_quantiles, + weights = weights, cross.val = cross.val, ncores = ncores)$output1 + } else { + res <- Apply(data = list(data = data, abs_thresholds = abs_thresholds), + target_dims = list(c(time_dim, memb_dim), abs_target_dims), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = NULL, + indices_for_quantiles = NULL, + weights = NULL, cross.val = FALSE, ncores = ncores)$output1 + } return(res) } .GetProbs <- function(data, indices_for_quantiles, - prob_thresholds = c(1/3, 2/3), weights = NULL, cross.val = FALSE) { + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + weights = NULL, cross.val = FALSE) { # .GetProbs() is used in RPS, RPSS, ROCSS # data ## if data is exp: [sdate, memb] ## if data is obs: [sdate, (memb)] # weights: [sdate, (memb)], same as data + # if abs_thresholds is not NULL: [bin, (sdate)] # Add dim [memb = 1] to data if it doesn't have memb_dim if (length(dim(data)) == 1) { dim(data) <- c(dim(data), 1) if (!is.null(weights)) dim(weights) <- c(dim(weights), 1) } - # Absolute thresholds - if (cross.val) { - quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) - for (i_time in 1:dim(data)[1]) { + + # Calculate absolute thresholds + if (is.null(abs_thresholds)) { + if (cross.val) { + quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) + for (i_time in seq_len(dim(data)[1])) { + if (is.null(weights)) { + tmp <- which(indices_for_quantiles != i_time) + quantiles[, i_time] <- + quantile(x = as.vector(data[indices_for_quantiles[tmp], ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + tmp <- which(indices_for_quantiles != i_time) + sorted_arrays <- + .sorted_distributions(data[indices_for_quantiles[tmp], ], + weights[indices_for_quantiles[tmp], ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles[, i_time] <- approx(cumulative_weights, sorted_data, + prob_thresholds, "linear")$y + } + } + + } else { if (is.null(weights)) { - quantiles[, i_time] <- quantile(x = as.vector(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]), - probs = prob_thresholds, type = 8, na.rm = TRUE) + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) } else { # weights: [sdate, memb] - sorted_arrays <- .sorted_distributions(data[indices_for_quantiles[which(indices_for_quantiles != i_time)], ], - weights[indices_for_quantiles[which(indices_for_quantiles != i_time)], ]) + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], + weights[indices_for_quantiles, ]) sorted_data <- sorted_arrays$data cumulative_weights <- sorted_arrays$cumulative_weights - quantiles[, i_time] <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y } + quantiles <- array(rep(quantiles, dim(data)[1]), + dim = c(bin = length(quantiles), dim(data)[1])) } - } else { - if (is.null(weights)) { - quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), - probs = prob_thresholds, type = 8, na.rm = TRUE) - } else { - # weights: [sdate, memb] - sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], - weights[indices_for_quantiles, ]) - sorted_data <- sorted_arrays$data - cumulative_weights <- sorted_arrays$cumulative_weights - quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } else { # abs_thresholds provided + quantiles <- abs_thresholds + if (length(dim(quantiles)) == 1) { + quantiles <- InsertDim(quantiles, lendim = dim(data)[1], + posdim = 2, name = names(dim(data))[1]) } - quantiles <- array(rep(quantiles, dim(data)[1]), dim = c(bin = length(quantiles), dim(data)[1])) } # quantiles: [bin-1, sdate] # Probabilities probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] - for (i_time in 1:dim(data)[1]) { + for (i_time in seq_len(dim(data)[1])) { if (anyNA(data[i_time, ])) { probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) } else { @@ -219,7 +314,7 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', indices_for_ cumulative_weights <- sorted_arrays$cumulative_weights # find any quantiles that are outside the data range integrated_probs <- array(dim = dim(quantiles)) - for (i_quant in 1:dim(quantiles)[1]) { + for (i_quant in seq_len(dim(quantiles)[1])) { # for thresholds falling under the distribution if (quantiles[i_quant, i_time] < min(sorted_data)) { integrated_probs[i_quant, i_time] <- 0 @@ -231,9 +326,10 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', indices_for_ quantiles[i_quant, i_time], "linear")$y } } - probs[, i_time] <- append(integrated_probs[, i_time], 1) - append(0, integrated_probs[, i_time]) + probs[, i_time] <- append(integrated_probs[, i_time], 1) - + append(0, integrated_probs[, i_time]) if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { - stop(paste0("Probability in i_time = ", i_time, " is out of [0, 1].")) + stop("Probability in i_time = ", i_time, " is out of [0, 1].") } } } @@ -250,7 +346,8 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', indices_for_ sorted_weights <- weights_vector[sorter] cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 - cumulative_weights <- cumulative_weights / cumulative_weights[length(cumulative_weights)] # fix the 1 - return(list("data" = data_vector[sorter], "cumulative_weights" = cumulative_weights)) + cumulative_weights <- cumulative_weights / + cumulative_weights[length(cumulative_weights)] # fix the 1 + return(list(data = data_vector[sorter], cumulative_weights = cumulative_weights)) } diff --git a/R/Histo2Hindcast.R b/R/Histo2Hindcast.R index f910a9a349e53ba62653d511c9a27b15ca522391..649b75bb6349497bbe4247ef72dbc746d33aa803 100644 --- a/R/Histo2Hindcast.R +++ b/R/Histo2Hindcast.R @@ -67,22 +67,22 @@ Histo2Hindcast <- function(data, sdatesin, sdatesout, nleadtimesout, stop("Parameter 'sdatesin' cannot be NULL.") } if (!is.character(sdatesin) || length(sdatesin) > 1) { - stop(paste0("Parameter 'sdatesin' must be a character string in the format", - " 'YYYYMMDD' or 'YYYYMM'.")) + stop("Parameter 'sdatesin' must be a character string in the format", + " 'YYYYMMDD' or 'YYYYMM'.") } else if (!nchar(sdatesin) %in% c(6, 8) | is.na(as.numeric(sdatesin))) { - stop(paste0("Parameter 'sdatesin' must be a character string in the format", - " 'YYYYMMDD' or 'YYYYMM'.")) + stop("Parameter 'sdatesin' must be a character string in the format", + " 'YYYYMMDD' or 'YYYYMM'.") } # sdatesout if (is.null(sdatesout)) { stop("Parameter 'sdatesout' cannot be NULL.") } if (!is.character(sdatesout) | !is.vector(sdatesout)) { - stop(paste0("Parameter 'sdatesout' must be a vector of character in the ", - "format 'YYYYMMDD' or 'YYYYMM'.")) + stop("Parameter 'sdatesout' must be a vector of character in the ", + "format 'YYYYMMDD' or 'YYYYMM'.") } else if (!all(nchar(sdatesout) %in% c(6, 8)) | anyNA(as.numeric(sdatesin))) { - stop(paste0("Parameter 'sdatesout' must be a vector of character in the ", - "format 'YYYYMMDD' or 'YYYYMM'.")) + stop("Parameter 'sdatesout' must be a vector of character in the ", + "format 'YYYYMMDD' or 'YYYYMM'.") } # nleadtimesout if (is.null(nleadtimesout)) { @@ -122,13 +122,18 @@ Histo2Hindcast <- function(data, sdatesin, sdatesout, nleadtimesout, yrout <- as.numeric(substr(sdatesout, 1, 4)) mthin <- as.numeric(substr(sdatesin, 5, 6)) if (mthin > 12) { - stop(paste0("Parameter 'sdatesin' must be in the format 'YYYYMMDD' or ", - "'YYYYMM'. Found the month is over 12.")) + stop("Parameter 'sdatesin' must be in the format 'YYYYMMDD' or ", + "'YYYYMM'. Found the month is over 12.") } mthout <- as.numeric(substr(sdatesout, 5, 6)) if (any(mthout > 12)) { - stop(paste0("Parameter 'sdatesout' must be a vector of character in the ", - "format 'YYYYMMDD' or 'YYYYMM'. Found certain month is over 12.")) + stop("Parameter 'sdatesout' must be a vector of character in the ", + "format 'YYYYMMDD' or 'YYYYMM'. Found certain month is over 12.") + } + if (any((yrout - yrin) * 12 + (mthout - mthin) < 0)) { + warning("Some of the start dates requested in 'sdatesout' are ", + "earlier than the original start date 'sdatesin'. These ", + "sdates will be filled with NA values") } res <- Apply(data, @@ -144,18 +149,20 @@ Histo2Hindcast <- function(data, sdatesin, sdatesout, nleadtimesout, } -.Histo2Hindcast <- function(data, yrin = yrin, yrout = yrout, mthin = mthin, mthout = mthout, nleadtimesout) { +.Histo2Hindcast <- function(data, yrin, yrout, mthin, mthout, nleadtimesout) { # data: [sdate = 1, ftime] res <- array(dim = c(sdate = length(yrout), ftime = nleadtimesout)) diff_mth <- (yrout - yrin) * 12 + (mthout - mthin) - for (i in 1:length(diff_mth)) { - if (diff_mth[i] < dim(data)[2]) { - ftime_ind <- max(1 + diff_mth[i], 1):min(nleadtimesout + diff_mth[i], dim(data)[2]) - res[i, 1:length(ftime_ind)] <- data[1, ftime_ind] + for (i in seq_along(diff_mth)) { + ftime_ind <- max(1 + diff_mth[i], 1):min(nleadtimesout + diff_mth[i], dim(data)[2]) + if (diff_mth[i] < 0) { + # Fill with NA values if the requested date is earlier than available data + res[i, seq_along(ftime_ind)] <- rep(NA, length(seq_along(ftime_ind))) + } else if (diff_mth[i] < dim(data)[2]) { + res[i, seq_along(ftime_ind)] <- data[1, ftime_ind] } } - return(res) } diff --git a/R/MSE.R b/R/MSE.R index 61cf3bcfb0586682c7e3f107848959742d33e268..5c502baed41a12e8fb118a91d457724d2c8ef97c 100644 --- a/R/MSE.R +++ b/R/MSE.R @@ -10,7 +10,8 @@ #'all leadtimes.\cr #'The confidence interval is computed by the chi2 distribution.\cr #' -#'@param exp A named numeric array of experimental data, with at least #' 'time_dim' dimension. It can also be a vector with the same length as 'obs'. +#'@param exp A named numeric array of experimental data, with at least +#' 'time_dim' dimension. It can also be a vector with the same length as 'obs'. #'@param obs A named numeric array of observational data, same dimensions as #' parameter 'exp' except along 'dat_dim' and 'memb_dim'. It can also be a #' vector with the same length as 'exp'. @@ -91,15 +92,15 @@ MSE <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, memb_dim = NULL, obs <- array(obs, dim = c(length(obs))) names(dim(obs)) <- c(time_dim) } else { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions time_dim and dat_dim, or vector of same length.")) + stop("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.") } } else if (is.null(dim(exp)) | is.null(dim(obs))) { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions time_dim and dat_dim, or vector of same length.")) + stop("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.") } - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } ## time_dim @@ -144,8 +145,8 @@ MSE <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, memb_dim = NULL, } if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { - stop(paste0("Parameter 'limits' must be a vector of two positive ", - "integers smaller than the length of paramter 'comp_dim'.")) + stop("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.") } } ## conf @@ -182,8 +183,8 @@ MSE <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, memb_dim = NULL, stop("Parameter 'exp' and 'obs' must have the same dimension names.") } if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'dat_dim' and 'memb_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'dat_dim' and 'memb_dim'.") } if (dim(exp)[time_dim] < 2) { stop("The length of time_dim must be at least 2 to compute MSE.") @@ -261,7 +262,9 @@ MSE <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, memb_dim = NULL, # dif for (i in 1:nobs) { - dif[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) + dif[, , i] <- sapply(1:nexp, function(x) { + exp[, x] - obs[, i] + }) } mse <- colMeans(dif^2, na.rm = TRUE) # array(dim = c(nexp, nobs)) diff --git a/R/MSSS.R b/R/MSSS.R index a11c50c85ee9a20b33c13666b333b0e6f6b22a97..ca854f30f8b18b3e8db3e82a1cb6d4487d9783c5 100644 --- a/R/MSSS.R +++ b/R/MSSS.R @@ -41,6 +41,12 @@ #' FALSE. #'@param alpha A numeric of the significance level to be used in the #' statistical significance test. The default value is 0.05. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test with the Random Walk. It can be NA (and it will be computed with the +#' s2dv:::.Eno), FALSE (and it will use the length of "obs" along "time_dim", so the +#' autocorrelation is not taken into account), a numeric (which is used for +#' all cases), or an array with the same dimensions as "obs" except "time_dim" +#' (for a particular N.eff to be used for each case). The default value is NA. #'@param sig_method A character string indicating the significance method. The #' options are "one-sided Fisher" (default) and "Random Walk". #'@param sig_method.type A character string indicating the test type of the @@ -87,7 +93,7 @@ #'@importFrom stats pf #'@export MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, - memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, + memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, N.eff = NA, sig_method = 'one-sided Fisher', sig_method.type = NULL, ncores = NULL) { # Check inputs @@ -105,15 +111,15 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, obs <- array(obs, dim = c(length(obs))) names(dim(obs)) <- c(time_dim) } else { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions time_dim and dat_dim, or vector of same length.")) + stop("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.") } } else if (is.null(dim(exp)) | is.null(dim(obs))) { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions time_dim and dat_dim, or vector of same length.")) + stop("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.") } - if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } if (!is.null(ref)) { @@ -121,14 +127,18 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, stop("Parameter 'ref' must be numeric.") } if (is.array(ref)) { - if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + if (any(is.null(names(dim(ref)))) | any(nchar(names(dim(ref))) == 0)) { stop("Parameter 'ref' must have dimension names.") } - } else if (length(ref) != 1 | any(!ref %in% c(0, 1))) { + } else if (length(ref) != 1 | !all(ref %in% c(0, 1))) { stop("Parameter 'ref' must be a numeric array or number 0 or 1.") } } else { ref <- 0 + .warning("If a reference dataset is not provided (ref = NULL), the default ", + "value for the climatology is 0 and MSSS results will only be ", + "correct if 'exp' and 'obs' are anomalies. Provide a non-null ", + "'ref' for full-field data.") } if (!is.array(ref)) { # 0 or 1 ref <- array(data = ref, dim = dim(exp)) @@ -172,6 +182,19 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, if (!is.numeric(alpha) | length(alpha) > 1) { stop("Parameter 'alpha' must be one numeric value.") } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "obs" except "time_dim".') + } + } else if (any((!is.na(N.eff) & !isFALSE(N.eff) & + !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop('Parameter "N.eff" must be NA, FALSE, a numeric, or an array with ', + 'the same dimensions as "obs" except "time_dim".') + } ## sig_method if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") @@ -184,7 +207,8 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, sig_method.type <- "two.sided" } if (!any(sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less'))) { - stop("Parameter 'sig_method.type' must be a method accepted by RandomWalkTest() parameter 'test.type'.") + stop("Parameter 'sig_method.type' must be a method accepted by RandomWalkTest() ", + "parameter 'test.type'.") } if (sig_method.type == 'two.sided.approx' & pval == T) { .warning("p-value cannot be calculated by Random Walk 'two.sided.approx' method.") @@ -221,8 +245,8 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, stop("Parameter 'exp' and 'obs' must have the same dimension names.") } if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'dat_dim' and 'memb_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'dat_dim' and 'memb_dim'.") } name_ref <- sort(names(dim(ref))) @@ -232,17 +256,17 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, if (!is.null(dat_dim)) { if (dat_dim %in% name_ref) { if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { - stop(paste0("If parameter 'ref' has dataset dimension, it must be ", - "equal to dataset dimension of 'exp'.")) + stop("If parameter 'ref' has dataset dimension, it must be ", + "equal to dataset dimension of 'exp'.") } name_ref <- name_ref[-which(name_ref == dat_dim)] } } if (!identical(length(name_exp), length(name_ref)) | !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", - "all dimensions except 'memb_dim' and 'dat_dim' if there is ", - "only one reference dataset.")) + stop("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.") } if (dim(exp)[time_dim] <= 2) { @@ -288,20 +312,32 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, target_dims <- list(exp = time_dim, obs = time_dim, ref = time_dim) } - res <- Apply(data, - target_dims = target_dims, - fun = .MSSS, - time_dim = time_dim, dat_dim = dat_dim, - pval = pval, sign = sign, alpha = alpha, - sig_method = sig_method, sig_method.type = sig_method.type, - ncores = ncores) + if (is.array(N.eff)) { + data$N.eff <- N.eff + target_dims[length(target_dims)+1] <- list(NULL) + res <- Apply(data, + target_dims = target_dims, + fun = .MSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, + sig_method = sig_method, sig_method.type = sig_method.type, + ncores = ncores) + } else { + res <- Apply(data, + target_dims = target_dims, + fun = .MSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, N.eff = N.eff, + sig_method = sig_method, sig_method.type = sig_method.type, + ncores = ncores) + } return(res) } .MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, pval = TRUE, - sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher', - sig_method.type = NULL) { + sign = FALSE, alpha = 0.05, N.eff = NA, + sig_method = 'one-sided Fisher', sig_method.type = NULL) { # exp: [sdate, (dat)] # obs: [sdate, (dat)] # ref: [sdate, (dat)] or NULL @@ -343,7 +379,9 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, names(dim(dif1)) <- c(time_dim, 'nexp', 'nobs') for (i in 1:nobs) { - dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) + dif1[, , i] <- sapply(1:nexp, function(x) { + exp[, x] - obs[, i] + }) } mse_exp <- colMeans(dif1^2, na.rm = TRUE) # [nexp, nobs] @@ -352,7 +390,9 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, dif2 <- array(dim = c(nsdate, nref, nobs)) names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') for (i in 1:nobs) { - dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) + dif2[, , i] <- sapply(1:nref, function(x) { + ref[, x] - obs[, i] + }) } mse_ref <- colMeans(dif2^2, na.rm = TRUE) # [nref, nobs] if (nexp != nref) { @@ -382,7 +422,7 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, } } - F.stat <- (eno2 * mse_ref^2 / (eno2 - 1)) / ((eno1 * mse_exp^2 / (eno1- 1))) + F.stat <- (eno2 * mse_ref^2 / (eno2 - 1)) / ((eno1 * mse_exp^2 / (eno1 - 1))) tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) if (sign) signif <- p_val <= alpha @@ -408,8 +448,11 @@ MSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, # nref = 1 error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) } + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, j], na.action = na.pass) ## effective degrees of freedom + } aux <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref, - test.type = sig_method.type, + test.type = sig_method.type, N.eff = N.eff, pval = pval, sign = sign, alpha = alpha) if (sign) signif[i, j] <- aux$sign if (pval) p_val[i, j] <- aux$p.val diff --git a/R/MeanDims.R b/R/MeanDims.R index 56a304dbefb98d359c0458ea44ee1ee450a016b4..45fd6f6c5dfd298cb21e988d091503234a11a84e 100644 --- a/R/MeanDims.R +++ b/R/MeanDims.R @@ -50,10 +50,8 @@ MeanDims <- function(data, dims, na.rm = FALSE, drop = TRUE) { stop("Parameter 'dims' exceeds the dimension length of parameter 'data'.") } } - if (is.character(dims)) { - if (!all(dims %in% names(dim(data)))) { - stop("Parameter 'dims' do not match the dimension names of parameter 'data'.") - } + if (is.character(dims) && !all(dims %in% names(dim(data)))) { + stop("Parameter 'dims' do not match the dimension names of parameter 'data'.") } ## na.rm if (!is.logical(na.rm) | length(na.rm) > 1) { @@ -80,7 +78,7 @@ MeanDims <- function(data, dims, na.rm = FALSE, drop = TRUE) { if (is.character(dims)) { dims <- which(names(dim_data) %in% dims) } - data <- aperm(data, c(dims, (1:length(dim_data))[-dims])) + data <- aperm(data, c(dims, (seq_along(dim_data))[-dims])) data <- colMeans(data, dims = length(dims), na.rm = na.rm) # If data is vector diff --git a/R/NAO.R b/R/NAO.R index fb5220cf5ce02e73987ec9e85c951d0234c213b3..2f562d3d33f2ec7cc6f85e01794b135657d7e535 100644 --- a/R/NAO.R +++ b/R/NAO.R @@ -7,11 +7,12 @@ #'anomalies onto the EOF pattern of the other years of the forecast. #'By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month #'lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns -#'cross-validated PCs of the NAO index for forecast (exp) and observations -#'(obs) based on the leading EOF pattern. +#'cross-validated PCs of the NAO index for hindcast (exp) and observations +#'(obs) based on the leading EOF pattern, or, if forecast (exp_cor) is provided, +#'the NAO index for forecast and the corresponding data (exp and obs). #' #'@param exp A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) -#' forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' hindcast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with #' dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. #' If only NAO of observational data needs to be computed, this parameter can #' be left to NULL. The default value is NULL. @@ -20,6 +21,12 @@ #' dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. #' If only NAO of experimental data needs to be computed, this parameter can #' be left to NULL. The default value is NULL. +#'@param exp_cor A named numeric array of the Nort Atlantic SLP (20-80N, 80W-40E) +#' forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimension 'time_dim' of length 1 (as in the case of an operational +#' forecast), 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of reference period needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. #'@param lat A vector of the latitudes of 'exp' and 'obs'. #'@param lon A vector of the longitudes of 'exp' and 'obs'. #'@param time_dim A character string indicating the name of the time dimension @@ -37,25 +44,32 @@ #' value is 2:4, i.e., from 2nd to 4th forecast time steps. #'@param obsproj A logical value indicating whether to compute the NAO index by #' projecting the forecast anomalies onto the leading EOF of observational -#' reference (TRUE) or compute the NAO by first computing the leading -#' EOF of the forecast anomalies (in cross-validation mode, i.e. leaving the -#' year you are evaluating out), and then projecting forecast anomalies onto -#' this EOF (FALSE). The default value is TRUE. +#' reference (TRUE, default) or compute the NAO by first computing the leading +#' EOF of the forecast anomalies (in cross-validation mode, i.e. leave the +#' evaluated year out), then projecting forecast anomalies onto this EOF +#' (FALSE). If 'exp_cor' is provided, 'obs' will be used when obsproj is TRUE +#' and 'exp' will be used when obsproj is FALSE, and no cross-validation is +#' applied. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' #'@return -#'A list which contains: +#'A list which contains some of the following items depending on the data inputs: #'\item{exp}{ -#' A numeric array of forecast NAO index in verification format with the same +#' A numeric array of hindcast NAO index in verification format with the same #' dimensions as 'exp' except space_dim and ftime_dim. If ftime_avg is NULL, #' ftime_dim remains. #' } #'\item{obs}{ -#' A numeric array of observed NAO index in verification format with the same +#' A numeric array of observation NAO index in verification format with the same #' dimensions as 'obs' except space_dim and ftime_dim. If ftime_avg is NULL, #' ftime_dim remains. #'} +#'\item{exp_cor}{ +#' A numeric array of forecast NAO index in verification format with the same +#' dimensions as 'exp_cor' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } #' #'@references #'Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of @@ -73,6 +87,8 @@ #'lon <- seq(-80, 40, length.out = 9) #'nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) #' +#'exp_cor <- array(rnorm(540), dim = c(member = 2, sdate = 1, ftime = 5, lat = 6, lon = 9)) +#'nao <- NAO(exp = exp, obs = obs, exp_cor = exp_cor, lat = lat, lon = lon, obsproj = TRUE) #'# plot the NAO index #' \dontrun{ #'nao$exp <- Reorder(nao$exp, c(2, 1)) @@ -84,13 +100,12 @@ #'@import multiApply #'@importFrom ClimProjDiags Subset #'@export -NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', +NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, lon, time_dim = 'sdate', memb_dim = 'member', space_dim = c('lat', 'lon'), ftime_dim = 'ftime', ftime_avg = 2:4, obsproj = TRUE, ncores = NULL) { - # Check inputs - ## exp and obs (1) + ## exp, obs, and exp_cor (1) if (is.null(obs) & is.null(exp)) { stop("Parameter 'exp' and 'obs' cannot both be NULL.") } @@ -99,10 +114,10 @@ NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', stop("Parameter 'exp' must be a numeric array.") } if (is.null(dim(exp))) { - stop(paste0("Parameter 'exp' must have at least dimensions ", - "time_dim, memb_dim, space_dim, and ftime_dim.")) + stop("Parameter 'exp' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.") } - if(any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0)) { stop("Parameter 'exp' must have dimension names.") } } @@ -111,27 +126,52 @@ NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', stop("Parameter 'obs' must be a numeric array.") } if (is.null(dim(obs))) { - stop(paste0("Parameter 'obs' must have at least dimensions ", - "time_dim, space_dim, and ftime_dim.")) + stop("Parameter 'obs' must have at least dimensions ", + "time_dim, space_dim, and ftime_dim.") } - if(any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'obs' must have dimension names.") } } + if (!is.null(exp_cor)) { + if (!is.numeric(exp_cor)) { + stop("Parameter 'exp_cor' must be a numeric array.") + } + if (is.null(dim(exp_cor))) { + stop(paste0("Parameter 'exp_cor' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.")) + } + if (any(is.null(names(dim(exp_cor)))) | any(nchar(names(dim(exp_cor))) == 0)) { + stop("Parameter 'exp_cor' must have dimension names.") + } + if (is.null(exp) || is.null(obs)) { + stop("Parameters 'exp' and 'obs' are required when 'exp_cor' is not provided.") + } + } ## time_dim if (!is.character(time_dim) | length(time_dim) > 1) { stop("Parameter 'time_dim' must be a character string.") } if (!is.null(exp)) { if (!time_dim %in% names(dim(exp))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + stop("Parameter 'time_dim' is not found in 'exp' dimension.") } } if (!is.null(obs)) { if (!time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + stop("Parameter 'time_dim' is not found in 'obs' dimension.") } } + if (!is.null(exp_cor)) { + if (!time_dim %in% names(dim(exp_cor))) { + stop("Parameter 'time_dim' is not found in 'exp_cor' dimension.") + } + if (dim(exp_cor)[time_dim] > 1) { + stop("Parameter 'exp_cor' is expected to have length 1 in ", + time_dim, "dimension.") + } + } + ## memb_dim if (!is.character(memb_dim) | length(memb_dim) > 1) { stop("Parameter 'memb_dim' must be a character string.") @@ -141,6 +181,7 @@ NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', stop("Parameter 'memb_dim' is not found in 'exp' dimension.") } } + add_member_back <- FALSE if (!is.null(obs)) { if (memb_dim %in% names(dim(obs))) { if (dim(obs)[memb_dim] != 1) { @@ -149,8 +190,11 @@ NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', add_member_back <- TRUE obs <- ClimProjDiags::Subset(obs, memb_dim, 1, drop = 'selected') } - } else { - add_member_back <- FALSE + } + } + if (!is.null(exp_cor)) { + if (!memb_dim %in% names(dim(exp_cor))) { + stop("Parameter 'memb_dim' is not found in 'exp_cor' dimension.") } } ## space_dim @@ -158,30 +202,41 @@ NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', stop("Parameter 'space_dim' must be a character vector of 2.") } if (!is.null(exp)) { - if (any(!space_dim %in% names(dim(exp)))) { + if (!all(space_dim %in% names(dim(exp)))) { stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") } } if (!is.null(obs)) { - if (any(!space_dim %in% names(dim(obs)))) { + if (!all(space_dim %in% names(dim(obs)))) { stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") } } + if (!is.null(exp_cor)) { + if (any(!space_dim %in% names(dim(exp_cor)))) { + stop("Parameter 'space_dim' is not found in 'exp_cor' dimensions.") + } + } ## ftime_dim if (!is.character(ftime_dim) | length(ftime_dim) > 1) { stop("Parameter 'ftime_dim' must be a character string.") } - if (!is.null(exp)) { + if (!is.null(exp) && !is.null(ftime_avg)) { if (!ftime_dim %in% names(dim(exp))) { stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") } } - if (!is.null(obs)) { + if (!is.null(obs) && !is.null(ftime_avg)) { if (!ftime_dim %in% names(dim(obs))) { stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") } } + if (!is.null(exp_cor)) { + if (!ftime_dim %in% names(dim(exp_cor))) { + stop("Parameter 'ftime_dim' is not found in 'exp_cor' dimensions.") + } + } ## exp and obs (2) + #TODO: Add checks for exp_cor if (!is.null(exp) & !is.null(obs)) { name_exp <- sort(names(dim(exp))) name_obs <- sort(names(dim(obs))) @@ -191,12 +246,12 @@ NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', throw_error <- TRUE } else if (any(name_exp != name_obs)) { throw_error <- TRUE - } else if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + } else if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { throw_error <- TRUE } if (throw_error) { - stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", - "of all the dimensions except 'memb_dim'.")) + stop("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions except 'memb_dim'.") } } ## ftime_avg @@ -208,11 +263,17 @@ NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', if (max(ftime_avg) > dim(exp)[ftime_dim] | min(ftime_avg) < 1) { stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") } - } else { + } + if (!is.null(obs)) { if (max(ftime_avg) > dim(obs)[ftime_dim] | min(ftime_avg) < 1) { stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") } } + if (!is.null(exp_cor)) { + if (max(ftime_avg) > dim(exp_cor)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } } ## sdate >= 2 if (!is.null(exp)) { @@ -227,23 +288,34 @@ NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', ## lat and lon if (!is.null(exp)) { if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) { - stop(paste0("Parameter 'lat' must be a numeric vector with the same ", - "length as the latitude dimension of 'exp' and 'obs'.")) + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") } if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) { - stop(paste0("Parameter 'lon' must be a numeric vector with the same ", - "length as the longitude dimension of 'exp' and 'obs'.")) + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") } - } else { + } + if (!is.null(obs)) { if (!is.numeric(lat) | length(lat) != dim(obs)[space_dim[1]]) { - stop(paste0("Parameter 'lat' must be a numeric vector with the same ", - "length as the latitude dimension of 'exp' and 'obs'.")) + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") } if (!is.numeric(lon) | length(lon) != dim(obs)[space_dim[2]]) { - stop(paste0("Parameter 'lon' must be a numeric vector with the same ", - "length as the longitude dimension of 'exp' and 'obs'.")) + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") } } + if (!is.null(exp_cor)) { + if (!is.numeric(lat) | length(lat) != dim(exp_cor)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp_cor'.") + } + if (!is.numeric(lon) | length(lon) != dim(exp_cor)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp_cor'.") + } + } stop_needed <- FALSE if (max(lat) > 80 | min(lat) < 20) { stop_needed <- TRUE @@ -268,8 +340,8 @@ NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', } } if (stop_needed) { - stop(paste0("The typical domain used to compute the NAO is 20N-80N, ", - "80W-40E. 'lat' or 'lon' is out of range.")) + stop("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.") } ## obsproj if (!is.logical(obsproj) | length(obsproj) > 1) { @@ -279,8 +351,8 @@ NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', if (is.null(obs)) { stop("Parameter 'obsproj' set to TRUE but no 'obs' provided.") } - if (is.null(exp)) { - .warning("parameter 'obsproj' set to TRUE but no 'exp' provided.") + if (is.null(exp) & is.null(exp_cor)) { + .warning("parameter 'obsproj' set to TRUE but no 'exp' nor 'exp_cor' provided.") } } ## ncores @@ -303,46 +375,63 @@ NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', obs_sub <- ClimProjDiags::Subset(obs, ftime_dim, ftime_avg, drop = FALSE) obs <- MeanDims(obs_sub, ftime_dim, na.rm = TRUE) } + if (!is.null(exp_cor)) { + exp_cor_sub <- ClimProjDiags::Subset(exp_cor, ftime_dim, ftime_avg, drop = FALSE) + exp_cor <- MeanDims(exp_cor_sub, ftime_dim, na.rm = TRUE) + } } # wght - wght <- array(sqrt(cos(lat * pi/180)), dim = c(length(lat), length(lon))) - - if (!is.null(exp) & !is.null(obs)) { - res <- Apply(list(exp, obs), + wght <- array(sqrt(cos(lat * pi / 180)), dim = c(length(lat), length(lon))) + if (is.null(exp_cor)) { + if (!is.null(exp) & !is.null(obs)) { + res <- Apply(list(exp, obs), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } else if (!is.null(exp)) { + res <- Apply(list(exp = exp), + target_dims = list(exp = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, obs = NULL, + obsproj = obsproj, add_member_back = FALSE, + ncores = ncores) + } else if (!is.null(obs)) { + if (add_member_back) { + output_dims <- list(obs = c(time_dim, memb_dim)) + } else { + output_dims <- list(obs = time_dim) + } + res <- Apply(list(obs = obs), + target_dims = list(obs = c(time_dim, space_dim)), + output_dims = output_dims, + fun = .NAO, + lat = lat, wght = wght, exp = NULL, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + } else { # exp_cor provided + res <- Apply(list(exp = exp, obs = obs, exp_cor = exp_cor), target_dims = list(exp = c(memb_dim, time_dim, space_dim), - obs = c(time_dim, space_dim)), + obs = c(time_dim, space_dim), + exp_cor = c(memb_dim, time_dim, space_dim)), fun = .NAO, - lat = lat, wght = wght, - obsproj = obsproj, add_member_back = add_member_back, - ncores = ncores) - } else if (!is.null(exp)) { - res <- Apply(list(exp = exp), - target_dims = list(exp = c(memb_dim, time_dim, space_dim)), - fun = .NAO, - lat = lat, wght = wght, obs = NULL, - obsproj = obsproj, add_member_back = FALSE, - ncores = ncores) - } else if (!is.null(obs)) { - if (add_member_back) { - output_dims <- list(obs = c(time_dim, memb_dim)) - } else { - output_dims <- list(obs = time_dim) - } - res <- Apply(list(obs = obs), - target_dims = list(obs = c(time_dim, space_dim)), - output_dims = output_dims, - fun = .NAO, - lat = lat, wght = wght, exp = NULL, + lat = lat, wght = wght, obsproj = obsproj, add_member_back = add_member_back, ncores = ncores) } + return(res) } -.NAO <- function(exp = NULL, obs = NULL, lat, wght, obsproj = TRUE, add_member_back = FALSE) { +.NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, wght, obsproj = TRUE, + add_member_back = FALSE) { # exp: [memb_exp, sdate, lat, lon] # obs: [sdate, lat, lon] + # exp_cor: [memb, sdate = 1, lat, lon] # wght: [lat, lon] if (!is.null(exp)) { @@ -355,76 +444,130 @@ NAO <- function(exp = NULL, obs = NULL, lat, lon, time_dim = 'sdate', nlat <- dim(obs)[2] nlon <- dim(obs)[3] } + if (!is.null(exp_cor)) { + ntime_exp_cor <- dim(exp_cor)[2] # should be 1 + nmemb_exp_cor <- dim(exp_cor)[1] + } - if (!is.null(obs)) NAOO.ver <- array(NA, dim = ntime) - if (!is.null(exp)) NAOF.ver <- array(NA, dim = c(ntime, nmemb_exp)) + if (!is.null(obs)) nao_obs <- array(NA, dim = ntime) + if (!is.null(exp)) nao_exp <- array(NA, dim = c(ntime, nmemb_exp)) + if (!is.null(exp_cor)) { + nao_exp_cor <- array(NA, dim = c(ntime_exp_cor, nmemb_exp_cor)) + #NOTE: The dimensions are flipped to fill in data correctly. Need to flip it back later. + } - for (tt in 1:ntime) { #sdate + if (is.null(exp_cor)) { - if (!is.null(obs)) { - ## Calculate observation EOF. Excluding one forecast start year. - obs_sub <- obs[c(1:ntime)[-tt], , , drop = FALSE] - obs_EOF <- .EOF(obs_sub, neofs = 1, wght = wght) # $EOFs: [mode, lat, lon] + for (tt in 1:ntime) { # cross-validation - ## Correct polarity of pattern. - # dim(obs_EOF$EOFs): [mode, lat, lon] - if (0 < mean(obs_EOF$EOFs[1, which.min(abs(lat - 65)), ], na.rm = T)) { - obs_EOF$EOFs <- obs_EOF$EOFs * (-1) -# obs_EOF$PCs <- obs_EOF$PCs * (-1) # not used + if (!is.null(obs)) { + ## Calculate observation EOF. Excluding one forecast start year. + obs_sub <- obs[(1:ntime)[-tt], , , drop = FALSE] + EOF_obs <- .EOF(obs_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode = 1, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies. + PF <- .ProjectField(obs, eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + ## Keep PCs of excluded forecast start year. Fabian. + nao_obs[tt] <- PF[tt] } - ## Project observed anomalies. - PF <- .ProjectField(obs, eof_mode = obs_EOF$EOFs[1, , ], wght = wght) # [sdate] - ## Keep PCs of excluded forecast start year. Fabian. - NAOO.ver[tt] <- PF[tt] - } - if (!is.null(exp)) { - if (!obsproj) { - exp_sub <- exp[, c(1:ntime)[-tt], , , drop = FALSE] - # Combine 'memb' and 'sdate' to calculate EOF - dim(exp_sub) <- c(nmemb_exp * (ntime - 1), nlat, nlon) - exp_EOF <- .EOF(exp_sub, neofs = 1, wght = wght) # $EOFs: [mode, lat, lon] + if (!is.null(exp)) { + if (!obsproj) { + exp_sub <- exp[, (1:ntime)[-tt], , , drop = FALSE] + # Combine 'memb' and 'sdate' to calculate EOF + dim(exp_sub) <- c(nmemb_exp * (ntime - 1), nlat, nlon) + EOF_exp <- .EOF(exp_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] - ## Correct polarity of pattern. - ##NOTE: different from s2dverification, which doesn't use mean(). -# if (0 < exp_EOF$EOFs[1, which.min(abs(lat - 65)), ]) { - if (0 < mean(exp_EOF$EOFs[1, which.min(abs(lat - 65)), ], na.rm = T)) { - exp_EOF$EOFs <- exp_EOF$EOFs * (-1) -# exp_EOF$PCs <- exp_EOF$PCs * sign # not used - } + ## Correct polarity of pattern + ##NOTE: different from s2dverification, which doesn't use mean(). +# if (0 < EOF_exp[1, which.min(abs(lat - 65)), ]) { + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } - ### Lines below could be simplified further by computing - ### ProjectField() only on the year of interest... (though this is - ### not vital). Lauriane - for (imemb in 1:nmemb_exp) { - PF <- .ProjectField(exp[imemb, , , ], eof_mode = exp_EOF$EOFs[1, , ], wght = wght) # [sdate, memb] - NAOF.ver[tt, imemb] <- PF[tt] - } - } else { - ## Project forecast anomalies on obs EOF - for (imemb in 1:nmemb_exp) { - PF <- .ProjectField(exp[imemb, , , ], eof_mode = obs_EOF$EOFs[1, , ], wght = wght) # [sdate] - NAOF.ver[tt, imemb] <- PF[tt] + ### Lines below could be simplified further by computing + ### ProjectField() only on the year of interest... (though this is + ### not vital). Lauriane + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_exp[1, , ], wght = wght) # [sdate, memb] + nao_exp[tt, imemb] <- PF[tt] + } + } else { + ## Project forecast anomalies on obs EOF + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + nao_exp[tt, imemb] <- PF[tt] + } } } + + } # for loop sdate + + } else { # exp_cor provided + + ## Calculate observation EOF. Without cross-validation + EOF_obs <- .EOF(obs, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies + PF <- .ProjectField(obs, eof_mode = EOF_obs, wght = wght) # [mode = 1, sdate] + nao_obs[] <- PF[1, ] + + if (!obsproj) { + # Calculate EOF_exp + tmp <- array(exp, dim = c(nmemb_exp * ntime, nlat, nlon)) + EOF_exp <- .EOF(tmp, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } + eof_mode_input <- EOF_exp[1, , ] + } else { + eof_mode_input <- EOF_obs[1, , ] } - } # for loop sdate + # Calculate NAO_exp + for (imemb in 1:dim(exp)[1]) { + exp_sub <- ClimProjDiags::Subset(exp, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp[ , imemb] <- PF + } + # Calculate NAO_exp_cor + for (imemb in 1:dim(exp_cor)[1]) { + exp_sub <- ClimProjDiags::Subset(exp_cor, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp_cor[, imemb] <- PF + } + + } # add_member_back if (add_member_back) { - suppressWarnings( - NAOO.ver <- InsertDim(NAOO.ver, 2, 1, name = names(dim(exp))[1]) - ) + memb_dim_name <- ifelse(!is.null(names(dim(exp))[1]), names(dim(exp))[1], 'member') + nao_obs <- InsertDim(nao_obs, 2, 1, name = memb_dim_name) } - #NOTE: EOFs_obs is not returned because it's only the result of the last sdate - # (It is returned in s2dverification.) - if (!is.null(exp) & !is.null(obs)) { - return(list(exp = NAOF.ver, obs = NAOO.ver)) #, EOFs_obs = obs_EOF)) - } else if (!is.null(exp)) { - return(list(exp = NAOF.ver)) - } else if (!is.null(obs)) { - return(list(obs = NAOO.ver)) + # Return results + if (is.null(exp_cor)) { + res <- NULL + if (!is.null(exp)) { + res <- c(res, list(exp = nao_exp)) + } + if (!is.null(obs)) { + res <- c(res, list(obs = nao_obs)) + } + return(res) + + } else { + return(list(exp = nao_exp, obs = nao_obs, exp_cor = nao_exp_cor)) } } diff --git a/R/Persistence.R b/R/Persistence.R index 9895f479a460e22dafc731185bed30fa50f22cdf..b822c8c21ccc587bf02a620a7bdc2e4d54aec0f6 100644 --- a/R/Persistence.R +++ b/R/Persistence.R @@ -10,8 +10,6 @@ #' 'start'. #'@param dates A sequence of 4-digit integers (YYYY) or string (YYYY-MM-DD) #' in class 'Date' indicating the dates available in the observations. -#'@param time_dim A character string indicating the dimension along which to -#' compute the autoregression. The default value is 'time'. #'@param start A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date' #' indicating the first start date of the persistence forecast. It must be #' between 1850 and 2020. @@ -25,6 +23,8 @@ #' average forecast times for which persistence should be calculated in the #' case of a multi-timestep average persistence. The default value is #' 'ft_start'. +#'@param time_dim A character string indicating the dimension along which to +#' compute the autoregression. The default value is 'time'. #'@param max_ft An integer indicating the maximum forecast time possible for #' 'data'. For example, for decadal prediction 'max_ft' would correspond to 10 #' (years). The default value is 10. @@ -90,8 +90,8 @@ #' #'@import multiApply #'@export -Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, - ft_end = ft_start, max_ft = 10, nmemb = 1, na.action = 10, +Persistence <- function(data, dates, start, end, ft_start, ft_end = ft_start, + time_dim = 'time', max_ft = 10, nmemb = 1, na.action = 10, ncores = NULL) { # Check inputs @@ -106,7 +106,7 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, dim(data) <- c(length(data)) names(dim(data)) <- time_dim } - if(any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } ## time_dim @@ -119,76 +119,76 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, ## dates if (is.numeric(dates)) { #(YYYY) if (any(nchar(dates) != 4) | any(dates %% 1 != 0) | any(dates <= 0)) { - stop(paste0("Parameter 'dates' must be a sequence of integer (YYYY) or ", - "string (YYYY-MM-DD) in class 'Date'.")) + stop("Parameter 'dates' must be a sequence of integer (YYYY) or ", + "string (YYYY-MM-DD) in class 'Date'.") } } else if (inherits(dates, 'Date')) { #(YYYY-MM-DD) } else { - stop(paste0("Parameter 'dates' must be a sequence of integer (YYYY) or ", - "string (YYYY-MM-DD) in class 'Date'.")) + stop("Parameter 'dates' must be a sequence of integer (YYYY) or ", + "string (YYYY-MM-DD) in class 'Date'.") } if (length(dates) != dim(data)[time_dim]) { stop("Parameter 'dates' must have the same length as in 'time_dim'.") } ## dates, start, and end - if (!all(sapply(list(class(dates), class(start)), function(x) x == class(end)))) { + if (!all(sapply(list(dates, start), function(x) is(x, class(end))))) { stop("Parameter 'dates', 'start', and 'end' should be the same format.") } ## start if (is.numeric(start)) { #(YYYY) if (length(start) > 1 | any(start %% 1 != 0) | any(start < 1850) | any(start > 2020)) { - stop(paste0("Parameter 'start' must be an integer or a string in class ", - "'Date' between 1850 and 2020.")) + stop("Parameter 'start' must be an integer or a string in class ", + "'Date' between 1850 and 2020.") } if (is.na(match(start, dates))) { stop("Parameter 'start' must be one of the values of 'dates'.") } if (start < dates[1] + 40) { - stop(paste0("Parameter 'start' must start at least 40 time steps after ", - "the first 'dates'.")) + stop("Parameter 'start' must start at least 40 time steps after ", + "the first 'dates'.") } } else if (inherits(start, 'Date')) { if (length(start) > 1 | any(start < as.Date(ISOdate(1850, 1, 1))) | any(start > as.Date(ISOdate(2021, 1, 1)))) { - stop(paste0("Parameter 'start' must be an integer or a string in class ", - "'Date' between 1850 and 2020.")) + stop("Parameter 'start' must be an integer or a string in class ", + "'Date' between 1850 and 2020.") } if (is.na(match(start, dates))) { stop("Parameter 'start' must be one of the values of 'dates'.") } if (start < dates[1] + 40) { - stop(paste0("Parameter 'start' must start at least 40 time steps after ", - "the first 'dates'.")) + stop("Parameter 'start' must start at least 40 time steps after ", + "the first 'dates'.") } } else { - stop(paste0("Parameter 'start' must be an integer or a string in class ", - "'Date' between 1850 and 2020.")) + stop("Parameter 'start' must be an integer or a string in class ", + "'Date' between 1850 and 2020.") } ## end if (is.numeric(end)) { #(YYYY) if (length(end) > 1 | any(end %% 1 != 0) | any(end < 1850) | any(end > 2020)) { - stop(paste0("Parameter 'end' must be an integer or a string in class ", - "'Date' between 1850 and 2020.")) + stop("Parameter 'end' must be an integer or a string in class ", + "'Date' between 1850 and 2020.") } if (end > dates[length(dates)] + 1) { - stop(paste0("Parameter 'end' must end at most 1 time steps after ", - "the last 'dates'.")) + stop("Parameter 'end' must end at most 1 time steps after ", + "the last 'dates'.") } } else if (inherits(end, 'Date')) { if (length(end) > 1 | any(end < as.Date(ISOdate(1850, 1, 1))) | any(end > as.Date(ISOdate(2020, 12, 31)))) { - stop(paste0("Parameter 'end' must be an integer or a string in class ", - "'Date' between 1850 and 2020.")) + stop("Parameter 'end' must be an integer or a string in class ", + "'Date' between 1850 and 2020.") } if (end > dates[length(dates)] + 1) { - stop(paste0("Parameter 'end' must end at most 1 time steps after ", - "the last 'dates'.")) + stop("Parameter 'end' must end at most 1 time steps after ", + "the last 'dates'.") } } else { - stop(paste0("Parameter 'end' must be an integer or a string in class ", - "'Date' between 1850 and 2020.")) + stop("Parameter 'end' must be an integer or a string in class ", + "'Date' between 1850 and 2020.") } ## ft_start if (!is.numeric(ft_start) | ft_start %% 1 != 0 | ft_start < 0 | @@ -212,15 +212,15 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, } ## na.action if (!is.function(na.action) & !is.numeric(na.action)) { - stop(paste0("Parameter 'na.action' must be a function for NA values or ", - "a numeric indicating the number of NA values allowed ", - "before returning NA.")) + stop("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.") } if (is.numeric(na.action)) { if (any(na.action %% 1 != 0) | any(na.action < 0) | length(na.action) > 1) { - stop(paste0("Parameter 'na.action' must be a function for NA values or ", - "a numeric indicating the number of NA values allowed ", - "before returning NA.")) + stop("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.") } } ## ncores @@ -233,8 +233,6 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, ############################### # Calculate Persistence - dim_names <- names(dim(data)) - output <- Apply(list(data), target_dims = time_dim, @@ -256,8 +254,8 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, # x could be a vector timeseries # start/end is a year (4-digit numeric) or a date (ISOdate) # ft_start/ft_end are indices -.Persistence <- function(x, dates, time_dim = 'time', start, end, ft_start = 1, - ft_end = 1, max_ft = 10, nmemb = 1, na.action = 10) { +.Persistence <- function(x, dates, start, end, ft_start = 1, ft_end = 1, + time_dim = 'time', max_ft = 10, nmemb = 1, na.action = 10) { tm <- end - start + 1 max_date <- match(start, dates) interval <- ft_end - ft_start @@ -272,11 +270,9 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, min_x = max_ft # for extreme case: ex. forecast years 1-10, interval = 9 max_x = max_date + sdate - 2 - ft_start - regdates = max_y - min_y + 1 - for (val_x in min_x:max_x) { tmp_x <- mean(x[(val_x - interval):val_x]) - if (val_x == min_x){ + if (val_x == min_x) { obs_x <- tmp_x } else { obs_x <- c(obs_x, tmp_x) @@ -307,7 +303,7 @@ Persistence <- function(data, dates, time_dim = 'time', start, end, ft_start, AR.intercept[sdate] <- b AR.lowCI[sdate] <- reg$conf.lower[2] AR.highCI[sdate] <- reg$conf.upper[2] - persistence[ , sdate] <- rnorm(n = nmemb, mean = persistence.mean[sdate], + persistence[, sdate] <- rnorm(n = nmemb, mean = persistence.mean[sdate], sd = persistence.predint[sdate]) } diff --git a/R/PlotBoxWhisker.R b/R/PlotBoxWhisker.R index 2ddcec0d59a7627432b388db555678dcaf4041e8..9b4c88e02836f68d32c98d4b92cc0c1d4f1f24cd 100644 --- a/R/PlotBoxWhisker.R +++ b/R/PlotBoxWhisker.R @@ -90,7 +90,7 @@ #'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) #'ano_exp <- array(ano$exp, dim = dim(ano$exp)[-2]) #'ano_obs <- array(ano$obs, dim = dim(ano$obs)[-2]) -#'nao <- NAO(ano_exp, ano_obs, sampleData$lat, sampleData$lon) +#'nao <- NAO(exp = ano_exp, obs = ano_obs, lat = sampleData$lat, lon = sampleData$lon) #'# Finally plot the nao index #' \dontrun{ #'nao$exp <- Reorder(nao$exp, c(2, 1)) diff --git a/R/PlotEquiMap.R b/R/PlotEquiMap.R index 3b8f861f8b7dced6b9ddcba7b99b202717e59da9..a2a7e3d4328285bb69e9d45a839107b683221e38 100644 --- a/R/PlotEquiMap.R +++ b/R/PlotEquiMap.R @@ -531,7 +531,8 @@ PlotEquiMap <- function(var, lon, lat, varu = NULL, varv = NULL, } if (!all(is.na(var))) { - var_limits <- c(min(var, na.rm = TRUE), max(var, na.rm = TRUE)) + var_limits <- c(min(var[!is.infinite(var)], na.rm = TRUE), + max(var[!is.infinite(var)], na.rm = TRUE)) } else { .warning("All the data are NAs. The map will be filled with colNA.") if (!is.null(brks) && length(brks) > 1) { diff --git a/R/PlotLayout.R b/R/PlotLayout.R index 6553f8a5067a85d83619bbcacfc16411c29aabd5..c77b25a405d3b40535606870899711ad0cda77bd 100644 --- a/R/PlotLayout.R +++ b/R/PlotLayout.R @@ -348,7 +348,9 @@ PlotLayout <- function(fun, plot_dims, var, ..., special_args = NULL, # Check the rest of parameters (unless the user simply wants to build an empty layout) if (!all(sapply(var, is_single_na))) { if (!all(is.na(unlist(var)))) { - var_limits <- c(min(unlist(var), na.rm = TRUE), max(unlist(var), na.rm = TRUE)) + tmp <- !is.infinite(unlist(var)) + var_limits <- c(min(unlist(var)[tmp], na.rm = TRUE), + max(unlist(var)[tmp], na.rm = TRUE)) } else { if (!is.null(brks)) { #NOTE: var_limits be like this to avoid warnings from ColorBar diff --git a/R/PlotStereoMap.R b/R/PlotStereoMap.R index 4b4fbd22b93110e15e51ffe63071fac24f3b1d41..a2fdb85d27cc0faf2fd8d21160bfdfe9d1a3a6c0 100644 --- a/R/PlotStereoMap.R +++ b/R/PlotStereoMap.R @@ -170,8 +170,10 @@ #'data <- matrix(rnorm(100 * 50), 100, 50) #'x <- seq(from = 0, to = 360, length.out = 100) #'y <- seq(from = -90, to = 90, length.out = 50) +#' \dontrun{ #'PlotStereoMap(data, x, y, latlims = c(60, 90), brks = 50, #' toptitle = "This is the title") +#' } #'@import mapproj #'@importFrom grDevices dev.cur dev.new dev.off gray #'@importFrom stats median diff --git a/R/ProbBins.R b/R/ProbBins.R index ef293d94710fa7d70bbb3a8984a25898ddc68f72..adc889093fac343104834af7e6b81993f3c26bc3 100644 --- a/R/ProbBins.R +++ b/R/ProbBins.R @@ -71,7 +71,7 @@ ProbBins <- function(data, thr, fcyr = 'all', time_dim = 'sdate', memb_dim = 'me # dim(data) <- c(length(data)) # names(dim(data)) <- time_dim # } - if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } ## thr @@ -80,10 +80,8 @@ ProbBins <- function(data, thr, fcyr = 'all', time_dim = 'sdate', memb_dim = 'me } if (!is.numeric(thr) | !is.vector(thr)) { stop("Parameter 'thr' must be a numeric vector.") - } else if (quantile) { - if (!all(thr <= 1 & thr >= 0)) { - stop("Parameter 'thr' must be within the range [0, 1] if 'quantile' is TRUE.") - } + } else if (quantile && !all(thr <= 1 & thr >= 0)) { + stop("Parameter 'thr' must be within the range [0, 1] if 'quantile' is TRUE.") } ## time_dim if (!is.character(time_dim) | length(time_dim) > 1) { @@ -104,19 +102,21 @@ ProbBins <- function(data, thr, fcyr = 'all', time_dim = 'sdate', memb_dim = 'me if (!is.numeric(fcyr) | !is.vector(fcyr)) { stop("Parameter 'fcyr' must be a numeric vector or 'all'.") } else if (any(fcyr %% 1 != 0) | min(fcyr) < 1 | max(fcyr) > dim(data)[time_dim]) { - stop(paste0("Parameter 'fcyr' must be the indices of 'time_dim' within ", - "the range [1, ", dim(data)[time_dim], "].")) + stop("Parameter 'fcyr' must be the indices of 'time_dim' within ", + "the range [1, ", dim(data)[time_dim], "].") } } else { - fcyr <- 1:dim(data)[time_dim] + fcyr <- seq_len(dim(data)[time_dim]) } ## quantile if (!is.logical(quantile) | length(quantile) > 1) { stop("Parameter 'quantile' must be one logical value.") } ## compPeriod - if (length(compPeriod) != 1 | any(!compPeriod %in% c('Full period', 'Without fcyr', 'Cross-validation'))) { - stop("Parameter 'compPeriod' must be either 'Full period', 'Without fcyr', or 'Cross-validation'.") + if (length(compPeriod) != 1 | + !all(compPeriod %in% c('Full period', 'Without fcyr', 'Cross-validation'))) { + stop("Parameter 'compPeriod' must be either 'Full period', 'Without fcyr', ", + "or 'Cross-validation'.") } ## ncores if (!is.null(ncores)) { @@ -140,7 +140,8 @@ ProbBins <- function(data, thr, fcyr = 'all', time_dim = 'sdate', memb_dim = 'me return(res) } -.ProbBins <- function(data, thr = thr, fcyr = 'all', quantile, compPeriod = "Full period") { +.ProbBins <- function(data, thr, fcyr = 'all', quantile = TRUE, + compPeriod = "Full period") { # data: [sdate, member] @@ -201,7 +202,7 @@ counts <- function (dat, nbthr) { thr <- dat[1:nbthr] data <- dat[nbthr + 1:(length(dat) - nbthr)] prob <- array(NA, dim = c(nbthr + 1, length(dat) - nbthr)) - prob[1, ] <- 1*(data <= thr[1]) + prob[1, ] <- 1 * (data <= thr[1]) if (nbthr != 1) { for (ithr in 2:(nbthr)) { prob[ithr, ] <- 1 * ((data > thr[ithr - 1]) & (data <= thr[ithr])) diff --git a/R/ProjectField.R b/R/ProjectField.R index 55e7fd20e39fe8c9ec9b62f59b4e52d1b2f814cc..70c8552c934becbcc2f2b365139bc1f2aa178543 100644 --- a/R/ProjectField.R +++ b/R/ProjectField.R @@ -71,7 +71,7 @@ ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon if (!is.numeric(ano)) { stop("Parameter 'ano' must be a numeric array.") } - if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { stop("Parameter 'ano' must have dimension names.") } ## eof (1) @@ -88,12 +88,12 @@ ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon } else if ('patterns' %in% names(eof)) { EOFs <- "patterns" } else { - stop(paste0("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", - "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().")) + stop("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", + "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().") } if (!'wght' %in% names(eof)) { - stop(paste0("Parameter 'eof' must be a list that contains 'wght'. ", - "It can be generated by EOF() or REOF().")) + stop("Parameter 'eof' must be a list that contains 'wght'. ", + "It can be generated by EOF() or REOF().") } if (!is.numeric(eof[[EOFs]]) || !is.array(eof[[EOFs]])) { stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array.") @@ -112,23 +112,23 @@ ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon if (!is.character(space_dim) | length(space_dim) != 2) { stop("Parameter 'space_dim' must be a character vector of 2.") } - if (any(!space_dim %in% names(dim(ano)))) { + if (!all(space_dim %in% names(dim(ano)))) { stop("Parameter 'space_dim' is not found in 'ano' dimension.") } ## ano (2) if (!all(space_dim %in% names(dim(ano))) | !time_dim %in% names(dim(ano))) { - stop(paste0("Parameter 'ano' must be an array with dimensions named as ", - "parameter 'space_dim' and 'time_dim'.")) + stop("Parameter 'ano' must be an array with dimensions named as ", + "parameter 'space_dim' and 'time_dim'.") } ## eof (2) if (!all(space_dim %in% names(dim(eof[[EOFs]]))) | !'mode' %in% names(dim(eof[[EOFs]]))) { - stop(paste0("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", - "with dimensions named as parameter 'space_dim' and 'mode'.")) + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim' and 'mode'.") } if (length(dim(eof$wght)) != 2 | !all(names(dim(eof$wght)) %in% space_dim)) { - stop(paste0("The component 'wght' of parameter 'eof' must be an array ", - "with dimensions named as parameter 'space_dim'.")) + stop("The component 'wght' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim'.") } ## mode if (!is.null(mode)) { @@ -136,8 +136,8 @@ ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon stop("Parameter 'mode' must be NULL or a positive integer.") } if (mode > dim(eof[[EOFs]])['mode']) { - stop(paste0("Parameter 'mode' is greater than the number of available ", - "modes in 'eof'.")) + stop("Parameter 'mode' is greater than the number of available ", + "modes in 'eof'.") } } ## ncores @@ -182,15 +182,15 @@ ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon } else { if (!all(dimnames_without_mode %in% names(dim(ano)))) { - stop(paste0("The array 'EOF' in parameter 'eof' has dimension not in parameter ", - "'ano'. Check if 'ano' and 'eof' are compatible.")) + stop("The array 'EOF' in parameter 'eof' has dimension not in parameter ", + "'ano'. Check if 'ano' and 'eof' are compatible.") } common_dim_ano <- dim(ano)[which(names(dim(ano)) %in% dimnames_without_mode)] if (any(common_dim_ano[match(dimnames_without_mode, names(common_dim_ano))] != dim(eof_mode)[dimnames_without_mode])) { - stop(paste0("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ", - "with different length. Check if 'ano' and 'eof' are compatible.")) + stop("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ", + "with different length. Check if 'ano' and 'eof' are compatible.") } # Enlarge eof/ano is needed. The margin_dims of Apply() must be consistent @@ -198,7 +198,7 @@ ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon additional_dims <- dim(ano)[-which(names(dim(ano)) %in% names(dim(eof_mode)))] additional_dims <- additional_dims[-which(names(additional_dims) == time_dim)] if (length(additional_dims) != 0) { - for (i in 1:length(additional_dims)) { + for (i in seq_along(additional_dims)) { eof_mode <- InsertDim(eof_mode, posdim = (length(dim(eof_mode)) + 1), lendim = additional_dims[i], name = names(additional_dims)[i]) } @@ -229,7 +229,6 @@ ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon # wght: [lat, lon] ntime <- dim(ano)[1] - if (length(dim(eof_mode)) == 2) { # mode != NULL # Initialization of pc.ver. pc.ver <- array(NA, dim = ntime) #[sdate] diff --git a/R/REOF.R b/R/REOF.R index c9c82cf94e1091f88ea7a8c71dd2957e5079de80..1135332978782d8cdec5c92f67a2a3b63b29c034 100644 --- a/R/REOF.R +++ b/R/REOF.R @@ -85,7 +85,7 @@ REOF <- function(ano, lat, lon, ntrunc = 15, time_dim = 'sdate', if (!is.numeric(ano)) { stop("Parameter 'ano' must be a numeric array.") } - if(any(is.null(names(dim(ano))))| any(nchar(names(dim(ano))) == 0)) { + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { stop("Parameter 'ano' must have dimension names.") } ## time_dim @@ -99,21 +99,21 @@ REOF <- function(ano, lat, lon, ntrunc = 15, time_dim = 'sdate', if (!is.character(space_dim) | length(space_dim) != 2) { stop("Parameter 'space_dim' must be a character vector of 2.") } - if (any(!space_dim %in% names(dim(ano)))) { + if (!all(space_dim %in% names(dim(ano)))) { stop("Parameter 'space_dim' is not found in 'ano' dimension.") } ## lat if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { - stop(paste0("Parameter 'lat' must be a numeric vector with the same ", - "length as the latitude dimension of 'ano'.")) + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") } if (any(lat > 90 | lat < -90)) { stop("Parameter 'lat' must contain values within the range [-90, 90].") } ## lon if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { - stop(paste0("Parameter 'lon' must be a numeric vector with the same ", - "length as the longitude dimension of 'ano'.")) + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") } if (all(lon >= 0)) { if (any(lon > 360 | lon < 0)) { @@ -154,7 +154,7 @@ REOF <- function(ano, lat, lon, ntrunc = 15, time_dim = 'sdate', # Area weighting is needed to compute the fraction of variance explained by # each mode space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) - wght <- array(cos(lat * pi/180), dim = dim(ano)[space_ind]) + wght <- array(cos(lat * pi / 180), dim = dim(ano)[space_ind]) # We want the covariance matrix to be weigthed by the grid # cell area so the anomaly field is weighted by its square @@ -180,12 +180,12 @@ REOF <- function(ano, lat, lon, ntrunc = 15, time_dim = 'sdate', # ano: [sdate, lat, lon] # Dimensions - nt <- dim(ano)[1] ny <- dim(ano)[2] nx <- dim(ano)[3] # Get the first ntrunc EOFs: - eofs <- .EOF(ano = ano, neofs = ntrunc, corr = corr, wght = wght) #list(EOFs = EOF, PCs = PC, var = var.eof, mask = mask) + eofs <- .EOF(ano = ano, neofs = ntrunc, corr = corr, wght = wght) + #list(EOFs = EOF, PCs = PC, var = var.eof, mask = mask) # Recover loadings (with norm 1), weight the EOFs by the weigths # eofs$EOFs: [mode, lat, lon] @@ -211,13 +211,17 @@ REOF <- function(ano, lat, lon, ntrunc = 15, time_dim = 'sdate', RPCs <- t(ano.wght) %*% varim_loadings # [sdate, mode] ## Alternative methods suggested here: - ## https://stats.stackexchange.com/questions/59213/how-to-compute-varimax-rotated-principal-components-in-r/137003#137003 - ## gives same results as pinv is just transpose in this case, as loadings are ortonormal! - # invLoadings <- t(pracma::pinv(varim$loadings)) ## invert and traspose the rotated loadings. pinv uses a SVD again (!) + ##https://stats.stackexchange.com/questions/59213/ + ##how-to-compute-varimax-rotated-principal-components-in-r/137003#137003 + ##gives same results as pinv is just transpose in this case, as loadings are ortonormal! + # invLoadings <- t(pracma::pinv(varim$loadings)) + ## invert and traspose the rotated loadings. pinv uses a SVD again (!) # RPCs <- ano.wght %*% invLoadings # Compute explained variance fraction: - var <- apply(RPCs, 2, function(x) { sum(x*x) } ) * 100 / eofs$tot_var # [mode] + var <- apply(RPCs, 2, function(x) { + sum(x * x) + }) * 100 / eofs$tot_var # [mode] dim(var) <- c(mode = length(var)) return(invisible(list(REOFs = REOFs, RPCs = RPCs, var = var))) diff --git a/R/RMS.R b/R/RMS.R index 8f7e58b71c50039a58d9d3a814a83815de80e533..a08af1271ac8338b78a32c6ad8d6e2741419e6cd 100644 --- a/R/RMS.R +++ b/R/RMS.R @@ -94,12 +94,12 @@ RMS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, obs <- array(obs, dim = c(length(obs))) names(dim(obs)) <- c(time_dim) } else { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions time_dim and dat_dim, or vector of same length.")) + stop("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.") } } else if (is.null(dim(exp)) | is.null(dim(obs))) { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions time_dim and dat_dim, or vector of same length.")) + stop("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.") } if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { @@ -147,8 +147,8 @@ RMS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, } if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { - stop(paste0("Parameter 'limits' must be a vector of two positive ", - "integers smaller than the length of paramter 'comp_dim'.")) + stop("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.") } } ## conf @@ -186,8 +186,8 @@ RMS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, stop("Parameter 'exp' and 'obs' must have the same dimension names.") } if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'dat_dim' and 'memb_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'dat_dim' and 'memb_dim'.") } if (dim(exp)[time_dim] < 2) { stop("The length of time_dim must be at least 2 to compute RMS.") @@ -264,7 +264,9 @@ RMS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, # dif for (i in 1:nobs) { - dif[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) + dif[, , i] <- sapply(1:nexp, function(x) { + exp[, x] - obs[, i] + }) } rms <- colMeans(dif^2, na.rm = TRUE)^0.5 # [nexp, nobs] @@ -280,18 +282,19 @@ RMS <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, } # conf.lower chi <- sapply(1:nobs, function(i) { - qchisq(confhigh, eno[, i] - 1) + qchisq(confhigh, eno[, i] - 1) }) conf.lower <- (eno * rms ** 2 / chi) ** 0.5 # conf.upper chi <- sapply(1:nobs, function(i) { - qchisq(conflow, eno[, i] - 1) + qchisq(conflow, eno[, i] - 1) }) conf.upper <- (eno * rms ** 2 / chi) ** 0.5 } -#NOTE: Not sure if the calculation is correct. p_val is reasonable compared to the chi-square chart though. +#NOTE: Not sure if the calculation is correct. p_val is reasonable compared to +# the chi-square chart though. # if (pval | sign) { # chi <- array(dim = c(nexp = nexp, nobs = nobs)) # for (i in 1:nobs) { diff --git a/R/RMSSS.R b/R/RMSSS.R index c33a40e5a85b77246ab9f8fff35ded3e732a4374..3d1de43df26095c4195694c443000281be7fc036 100644 --- a/R/RMSSS.R +++ b/R/RMSSS.R @@ -42,6 +42,12 @@ #' FALSE. #'@param alpha A numeric of the significance level to be used in the #' statistical significance test. The default value is 0.05. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test with the Random Walk. It can be NA (and it will be computed with the +#' s2dv:::.Eno), FALSE (and it will use the length of 'obs' along 'time_dim', so the +#' autocorrelation is not taken into account), a numeric (which is used for +#' all cases), or an array with the same dimensions as 'obs' except 'time_dim' +#' (for a particular N.eff to be used for each case). The default value is NA. #'@param sig_method A character string indicating the significance method. The #' options are "one-sided Fisher" (default) and "Random Walk". #'@param sig_method.type A character string indicating the test type of the @@ -97,7 +103,7 @@ #'@importFrom stats pf #'@export RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, - memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, + memb_dim = NULL, pval = TRUE, sign = FALSE, alpha = 0.05, N.eff = NA, sig_method = 'one-sided Fisher', sig_method.type = NULL, ncores = NULL) { # Check inputs @@ -115,12 +121,12 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, obs <- array(obs, dim = c(length(obs))) names(dim(obs)) <- c(time_dim) } else { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions time_dim and dat_dim, or vector of same length.")) + stop("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.") } } else if (is.null(dim(exp)) | is.null(dim(obs))) { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions time_dim and dat_dim, or vector of same length.")) + stop("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions time_dim and dat_dim, or vector of same length.") } if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { @@ -131,14 +137,18 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, stop("Parameter 'ref' must be numeric.") } if (is.array(ref)) { - if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + if (any(is.null(names(dim(ref)))) | any(nchar(names(dim(ref))) == 0)) { stop("Parameter 'ref' must have dimension names.") } - } else if (length(ref) != 1 | any(!ref %in% c(0, 1))) { + } else if (length(ref) != 1 | !all(ref %in% c(0, 1))) { stop("Parameter 'ref' must be a numeric array or number 0 or 1.") } } else { ref <- 0 + .warning("If a reference dataset is not provided (ref = NULL), the default ", + "value for the climatology is 0 and RMSSS results will only be ", + "correct if 'exp' and 'obs' are anomalies. Provide a non-null ", + "'ref' for full-field data.") } if (!is.array(ref)) { # 0 or 1 ref <- array(data = ref, dim = dim(exp)) @@ -182,6 +192,19 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, if (!is.numeric(alpha) | length(alpha) > 1) { stop("Parameter 'alpha' must be one numeric value.") } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop("If parameter 'N.eff' is provided with an array, it must ", + "have the same dimensions as 'obs' except 'time_dim'.") + } + } else if (any((!is.na(N.eff) & !isFALSE(N.eff) & + !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop("Parameter 'N.eff' must be NA, FALSE, a numeric, or an array with ", + "the same dimensions as 'obs' except 'time_dim'.") + } ## sig_method if (length(sig_method) != 1 | !any(sig_method %in% c('one-sided Fisher', 'Random Walk'))) { stop("Parameter 'sig_method' must be one of 'one-sided Fisher' or 'Random Walk'.") @@ -197,7 +220,11 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, sig_method.type <- "two.sided" } if (!any(sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less'))) { - stop("Parameter 'sig_method.type' must be a method accepted by RandomWalkTest() parameter 'test.type'.") + stop("Parameter 'sig_method.type' must be a method accepted by RandomWalkTest() ", + "parameter 'test.type'.") + } + if ((!is.na(N.eff) & !isFALSE(N.eff)) && sig_method.type == 'two.sided.approx') { + .warning("'N.eff' will not be used if 'sig_method.type' is 'two.sided.approx'.") } if (sig_method.type == 'two.sided.approx' & pval == T) { .warning("p-value cannot be calculated by Random Walk 'two.sided.approx' method.") @@ -234,8 +261,8 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, stop("Parameter 'exp' and 'obs' must have the same dimension names.") } if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'dat_dim' and 'memb_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'dat_dim' and 'memb_dim'.") } name_ref <- sort(names(dim(ref))) @@ -245,17 +272,17 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, if (!is.null(dat_dim)) { if (dat_dim %in% name_ref) { if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { - stop(paste0("If parameter 'ref' has dataset dimension, it must be ", - "equal to dataset dimension of 'exp'.")) + stop("If parameter 'ref' has dataset dimension, it must be ", + "equal to dataset dimension of 'exp'.") } name_ref <- name_ref[-which(name_ref == dat_dim)] } } if (!identical(length(name_exp), length(name_ref)) | !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", - "all dimensions except 'memb_dim' and 'dat_dim' if there is ", - "only one reference dataset.")) + stop("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.") } if (dim(exp)[time_dim] <= 2) { @@ -301,20 +328,32 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, } else { target_dims <- list(exp = time_dim, obs = time_dim, ref = time_dim) } - - res <- Apply(data, - target_dims = target_dims, - fun = .RMSSS, - time_dim = time_dim, dat_dim = dat_dim, - pval = pval, sign = sign, alpha = alpha, - sig_method = sig_method, sig_method.type = sig_method.type, - ncores = ncores) + + if (is.array(N.eff)) { + data$N.eff <- N.eff + target_dims[length(target_dims) + 1] <- list(NULL) + res <- Apply(data, + target_dims = target_dims, + fun = .RMSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, + sig_method = sig_method, sig_method.type = sig_method.type, + ncores = ncores) + } else { + res <- Apply(data, + target_dims = target_dims, + fun = .RMSSS, + time_dim = time_dim, dat_dim = dat_dim, + pval = pval, sign = sign, alpha = alpha, N.eff = N.eff, + sig_method = sig_method, sig_method.type = sig_method.type, + ncores = ncores) + } return(res) } .RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, pval = TRUE, - sign = FALSE, alpha = 0.05, sig_method = 'one-sided Fisher', + sign = FALSE, alpha = 0.05, N.eff = NA, sig_method = 'one-sided Fisher', sig_method.type = NULL) { # exp: [sdate, (dat)] # obs: [sdate, (dat)] @@ -357,7 +396,9 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, names(dim(dif1)) <- c(time_dim, 'nexp', 'nobs') for (i in 1:nobs) { - dif1[, , i] <- sapply(1:nexp, function(x) {exp[, x] - obs[, i]}) + dif1[, , i] <- sapply(1:nexp, function(x) { + exp[, x] - obs[, i] + }) } rms_exp <- colMeans(dif1^2, na.rm = TRUE)^0.5 # [nexp, nobs] @@ -366,7 +407,9 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, dif2 <- array(dim = c(nsdate, nref, nobs)) names(dim(dif2)) <- c(time_dim, 'nexp', 'nobs') for (i in 1:nobs) { - dif2[, , i] <- sapply(1:nref, function(x) {ref[, x] - obs[, i]}) + dif2[, , i] <- sapply(1:nref, function(x) { + ref[, x] - obs[, i] + }) } rms_ref <- colMeans(dif2^2, na.rm = TRUE)^0.5 # [nref, nobs] if (nexp != nref) { @@ -396,7 +439,7 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, } } - F.stat <- (eno2 * rms_ref^2 / (eno2 - 1)) / ((eno1 * rms_exp^2 / (eno1- 1))) + F.stat <- (eno2 * rms_ref^2 / (eno2 - 1)) / ((eno1 * rms_exp^2 / (eno1 - 1))) tmp <- !is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2 p_val <- 1 - pf(F.stat, eno1 - 1, eno2 - 1) if (sign) signif <- p_val <= alpha @@ -422,8 +465,11 @@ RMSSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', dat_dim = NULL, # nref = 1 error_ref <- array(data = abs(ref - obs[, j]), dim = c(time = nsdate)) } + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, j], na.action = na.pass) ## effective degrees of freedom + } aux <- .RandomWalkTest(skill_A = error_exp, skill_B = error_ref, - test.type = sig_method.type, + test.type = sig_method.type, N.eff = N.eff, pval = pval, sign = sign, alpha = alpha) if (sign) signif[i, j] <- aux$sign if (pval) p_val[i, j] <- aux$p.val diff --git a/R/ROCSS.R b/R/ROCSS.R index 2ca078279228c9e7297a5c7c7a1440b97a622768..dcddeb261086b488ea42ec9f3bda36273eb7cfa1 100644 --- a/R/ROCSS.R +++ b/R/ROCSS.R @@ -91,14 +91,14 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.array(obs) | !is.numeric(obs)) { stop("Parameter 'obs' must be a numeric array.") } - if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } if (!is.null(ref)) { if (!is.array(ref) | !is.numeric(ref)) stop("Parameter 'ref' must be a numeric array.") - if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + if (any(is.null(names(dim(ref)))) | any(nchar(names(dim(ref))) == 0)) { stop("Parameter 'ref' must have dimension names.") } } @@ -163,8 +163,8 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim' and 'dat_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") } if (!is.null(ref)) { name_ref <- sort(names(dim(ref))) @@ -174,17 +174,17 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.null(dat_dim)) { if (dat_dim %in% name_ref) { if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { - stop(paste0("If parameter 'ref' has dataset dimension, it must be", - " equal to dataset dimension of 'exp'.")) + stop("If parameter 'ref' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") } name_ref <- name_ref[-which(name_ref == dat_dim)] } } if (!identical(length(name_exp), length(name_ref)) | !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", - "all dimensions except 'memb_dim' and 'dat_dim'", - " if there is only one reference dataset.")) + stop("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'", + " if there is only one reference dataset.") } } ## prob_thresholds @@ -194,7 +194,7 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } ## indices_for_clim if (is.null(indices_for_clim)) { - indices_for_clim <- 1:dim(obs)[time_dim] + indices_for_clim <- seq_len(dim(obs)[time_dim]) } else { if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") @@ -324,10 +324,12 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', # Input dim for .GetProbs ## if exp: [sdate, memb] ## if obs: [sdate, (memb)] - exp_probs <- .GetProbs(data = ClimProjDiags::Subset(exp, dat_dim, exp_i, drop = 'selected'), + exp_probs <- .GetProbs(data = ClimProjDiags::Subset(exp, dat_dim, exp_i, + drop = 'selected'), indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds, cross.val = cross.val) - obs_probs <- .GetProbs(data = ClimProjDiags::Subset(obs, dat_dim, obs_i, drop = 'selected'), + obs_probs <- .GetProbs(data = ClimProjDiags::Subset(obs, dat_dim, obs_i, + drop = 'selected'), indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds, cross.val = cross.val) ## exp_probs and obs_probs: [bin, sdate] @@ -337,19 +339,22 @@ ROCSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } ## ROCS (exp) - rocs_exp[exp_i, obs_i, ] <- unlist(EnsRoca(ens = Reorder(exp_probs, c(time_dim, 'bin')), - obs = Reorder(obs_probs, c(time_dim, 'bin')))[1:ncats]) + rocs_exp[exp_i, obs_i, ] <- + unlist(EnsRoca(ens = Reorder(exp_probs, c(time_dim, 'bin')), + obs = Reorder(obs_probs, c(time_dim, 'bin')))[1:ncats]) if (!is.null(ref)) { if (is.null(cat_dim)) { # calculate probs - ref_probs <- .GetProbs(ClimProjDiags::Subset(ref, dat_dim, exp_i, drop = 'selected'), + ref_probs <- .GetProbs(ClimProjDiags::Subset(ref, dat_dim, exp_i, + drop = 'selected'), indices_for_quantiles = indices_for_clim, prob_thresholds = prob_thresholds, cross.val = cross.val) } else { ref_probs <- ref[, , exp_i] } - rocs_ref[exp_i, obs_i, ] <- unlist(EnsRoca(ens = Reorder(ref_probs, c(time_dim, 'bin')), - obs = Reorder(obs_probs, c(time_dim, 'bin')))[1:ncats]) + rocs_ref[exp_i, obs_i, ] <- + unlist(EnsRoca(ens = Reorder(ref_probs, c(time_dim, 'bin')), + obs = Reorder(obs_probs, c(time_dim, 'bin')))[1:ncats]) } } } diff --git a/R/RPS.R b/R/RPS.R index c385f10cf32097c58e94386515762fcb72200405..59b2d01a0d842967cdaa4d0351ca1321e19ccf8c 100644 --- a/R/RPS.R +++ b/R/RPS.R @@ -52,6 +52,9 @@ #'@param cross.val A logical indicating whether to compute the thresholds #' between probabilistic categories in cross-validation. The default value is #' FALSE. +#'@param return_mean A logical indicating whether to return the temporal mean +#' of the RPS or not. If TRUE, the temporal mean is calculated along time_dim, +#' if FALSE the time dimension is not aggregated. The default is TRUE. #'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it #' means the lower limit for the fraction of the non-NA values. 1 is equal to #' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). @@ -85,7 +88,8 @@ #'@export RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, - Fair = FALSE, weights = NULL, cross.val = FALSE, na.rm = FALSE, ncores = NULL) { + Fair = FALSE, weights = NULL, cross.val = FALSE, return_mean = TRUE, + na.rm = FALSE, ncores = NULL) { # Check inputs ## exp and obs (1) @@ -93,8 +97,8 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL stop('Parameter "exp" must be a numeric array.') if (!is.array(obs) | !is.numeric(obs)) stop('Parameter "obs" must be a numeric array.') - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } ## time_dim @@ -150,8 +154,8 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim' and 'dat_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") } ## prob_thresholds if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | @@ -160,7 +164,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL } ## indices_for_clim if (is.null(indices_for_clim)) { - indices_for_clim <- 1:dim(obs)[time_dim] + indices_for_clim <- seq_len(dim(obs)[time_dim]) } else { if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") @@ -171,9 +175,13 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL } } ## Fair - if (!is.logical(Fair) | length(Fair) > 1) { + if (!is.logical(Fair) | length(Fair) > 1) { stop("Parameter 'Fair' must be either TRUE or FALSE.") } + ## return_mean + if (!is.logical(return_mean) | length(return_mean) > 1) { + stop("Parameter 'return_mean' must be either TRUE or FALSE.") + } ## cross.val if (!is.logical(cross.val) | length(cross.val) > 1) { stop("Parameter 'cross.val' must be either TRUE or FALSE.") @@ -183,26 +191,28 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL if (!is.array(weights) | !is.numeric(weights)) stop("Parameter 'weights' must be a named numeric array.") if (is.null(dat_dim)) { - if (length(dim(weights)) != 2 | any(!names(dim(weights)) %in% c(memb_dim, time_dim))) - stop("Parameter 'weights' must have two dimensions with the names of 'memb_dim' and 'time_dim'.") + if (length(dim(weights)) != 2 | !all(names(dim(weights)) %in% c(memb_dim, time_dim))) + stop("Parameter 'weights' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | dim(weights)[time_dim] != dim(exp)[time_dim]) { - stop(paste0("Parameter 'weights' must have the same dimension lengths ", - "as 'memb_dim' and 'time_dim' in 'exp'.")) + stop("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim' and 'time_dim' in 'exp'.") } weights <- Reorder(weights, c(time_dim, memb_dim)) - + } else { - if (length(dim(weights)) != 3 | any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) - stop("Parameter 'weights' must have three dimensions with the names of 'memb_dim', 'time_dim' and 'dat_dim'.") + if (length(dim(weights)) != 3 | !all(names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) + stop("Parameter 'weights' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | dim(weights)[time_dim] != dim(exp)[time_dim] | dim(weights)[dat_dim] != dim(exp)[dat_dim]) { - stop(paste0("Parameter 'weights' must have the same dimension lengths ", - "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) + stop("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.") } weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) - + } } else if (!is.null(weights) & !is.null(cat_dim)) { .warning(paste0("Parameter 'exp' and 'obs' are probabilities already, so parameter ", @@ -216,15 +226,15 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { + length(ncores) > 1) { stop("Parameter 'ncores' must be either NULL or a positive integer.") } } - + ############################### - + # Compute RPS - + ## Decide target_dims if (!is.null(memb_dim)) { target_dims_exp <- c(time_dim, memb_dim, dat_dim) @@ -236,7 +246,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL } else { # cat_dim target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) } - + rps <- Apply(data = list(exp = exp, obs = obs), target_dims = list(exp = target_dims_exp, obs = target_dims_obs), @@ -247,10 +257,13 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL indices_for_clim = indices_for_clim, Fair = Fair, weights = weights, cross.val = cross.val, na.rm = na.rm, ncores = ncores)$output1 - - # Return only the mean RPS - rps <- MeanDims(rps, time_dim, na.rm = TRUE) - + + if (return_mean) { + rps <- MeanDims(rps, time_dim, na.rm = TRUE) + } else { + rps <- rps + } + return(rps) } @@ -265,14 +278,14 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL #--- if cat_dim: # exp: [sdate, bin, (dat)] # obs: [sdate, bin, (dat)] - + # Adjust dimensions to be [sdate, memb, dat] for both exp and obs if (!is.null(memb_dim)) { if (!memb_dim %in% names(dim(obs))) { obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) } } - + if (is.null(dat_dim)) { nexp <- 1 nobs <- 1 @@ -283,17 +296,17 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL nexp <- as.numeric(dim(exp)[dat_dim]) nobs <- as.numeric(dim(obs)[dat_dim]) } - + rps <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) - + for (i in 1:nexp) { for (j in 1:nobs) { - exp_data <- exp[ , , i] - obs_data <- obs[ , , j] - + exp_data <- exp[, , i] + obs_data <- obs[, , j] + if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) - + # Find the fraction of NAs ## If any member/bin is NA at this time step, it is not good value. exp_mean <- rowMeans(exp_data) @@ -307,7 +320,7 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL } else { f_NAs <- na.rm } - + if (f_NAs <= sum(good_values) / length(obs_mean)) { exp_data <- exp_data[good_values, , drop = F] @@ -316,21 +329,23 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL # If the data inputs are forecast/observation, calculate probabilities if (is.null(cat_dim)) { if (!is.null(weights)) { - weights_data <- weights[which(good_values) , , i] + weights_data <- weights[which(good_values), , i] if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2]) } else { weights_data <- weights #NULL } - + # Subset indices_for_clim dum <- match(indices_for_clim, which(good_values)) good_indices_for_clim <- dum[!is.na(dum)] - + exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = good_indices_for_clim, - prob_thresholds = prob_thresholds, weights = weights_data, cross.val = cross.val) + prob_thresholds = prob_thresholds, weights = weights_data, + cross.val = cross.val) # exp_probs: [bin, sdate] obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = good_indices_for_clim, - prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) + prob_thresholds = prob_thresholds, weights = NULL, + cross.val = cross.val) # obs_probs: [bin, sdate] } else { # inputs are probabilities already @@ -343,29 +358,29 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL # rps: [sdate, nexp, nobs] rps [good_values, i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2) - + if (Fair) { # FairRPS - ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] R <- dim(exp)[2] #memb - R_new <- Inf adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) adjustment <- colSums(adjustment) - rps[ , i, j] <- rps[ , i, j] + adjustment + rps[, i, j] <- rps[, i, j] + adjustment } } else { ## not enough values different from NA - rps[ , i, j] <- as.numeric(NA) + rps[, i, j] <- NA_real_ } } } - + if (is.null(dat_dim)) { dim(rps) <- dim(exp)[time_dim] } - + return(rps) } diff --git a/R/RPSS.R b/R/RPSS.R index 91ca8c21acd8a877f17d3d0cf5d1db5d67ea0d3c..db73a494ee8a1504cbe38a2cc21d8f9f2a22c75c 100644 --- a/R/RPSS.R +++ b/R/RPSS.R @@ -79,6 +79,12 @@ #' the default of \code{RandomWalkTest()}. #'@param alpha A numeric of the significance level to be used in the statistical #' significance test. The default value is 0.05. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be NA (and it will be computed with the s2dv:::.Eno), FALSE +#' (and it will use the length of 'obs' along 'time_dim', so the +#' autocorrelation is not taken into account), a numeric (which is used for +#' all cases), or an array with the same dimensions as 'obs' except 'time_dim' +#' (for a particular N.eff to be used for each case). The default value is NA. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -121,15 +127,15 @@ #'obs_probs <- GetProbs(obs, memb_dim = NULL) #'ref_probs <- GetProbs(ref, memb_dim = 'member') #'res <- RPSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, -#' cat_dim = 'bin') +#' N.eff = FALSE, cat_dim = 'bin') #' #'@import multiApply #'@export RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, weights_exp = NULL, weights_ref = NULL, - cross.val = FALSE, na.rm = FALSE, - sig_method.type = 'two.sided.approx', alpha = 0.05, ncores = NULL) { + cross.val = FALSE, na.rm = FALSE, sig_method.type = 'two.sided.approx', + alpha = 0.05, N.eff = NA, ncores = NULL) { # Check inputs ## exp, obs, and ref (1) @@ -139,14 +145,14 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.array(obs) | !is.numeric(obs)) { stop("Parameter 'obs' must be a numeric array.") } - if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } if (!is.null(ref)) { if (!is.array(ref) | !is.numeric(ref)) stop("Parameter 'ref' must be a numeric array.") - if (any(is.null(names(dim(ref))))| any(nchar(names(dim(ref))) == 0)) { + if (any(is.null(names(dim(ref)))) | any(nchar(names(dim(ref))) == 0)) { stop("Parameter 'ref' must have dimension names.") } } @@ -211,8 +217,8 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } if (!identical(length(name_exp), length(name_obs)) | !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions except 'memb_dim' and 'dat_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") } if (!is.null(ref)) { name_ref <- sort(names(dim(ref))) @@ -222,17 +228,17 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!is.null(dat_dim)) { if (dat_dim %in% name_ref) { if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { - stop(paste0("If parameter 'ref' has dataset dimension, it must be", - " equal to dataset dimension of 'exp'.")) + stop("If parameter 'ref' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") } name_ref <- name_ref[-which(name_ref == dat_dim)] } } if (!identical(length(name_exp), length(name_ref)) | !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'ref' must have the same length of ", - "all dimensions except 'memb_dim' and 'dat_dim' if there is ", - "only one reference dataset.")) + stop("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.") } } ## prob_thresholds @@ -242,7 +248,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } ## indices_for_clim if (is.null(indices_for_clim)) { - indices_for_clim <- 1:dim(obs)[time_dim] + indices_for_clim <- seq_len(dim(obs)[time_dim]) } else { if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") @@ -266,22 +272,29 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', stop("Parameter 'weights_exp' must be a named numeric array.") if (is.null(dat_dim)) { - if (length(dim(weights_exp)) != 2 | any(!names(dim(weights_exp)) %in% c(memb_dim, time_dim))) - stop("Parameter 'weights_exp' must have two dimensions with the names of 'memb_dim' and 'time_dim'.") + if (length(dim(weights_exp)) != 2 | + !all(names(dim(weights_exp)) %in% c(memb_dim, time_dim))) { + stop("Parameter 'weights_exp' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + } if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] | dim(weights_exp)[time_dim] != dim(exp)[time_dim]) { - stop("Parameter 'weights_exp' must have the same dimension lengths as 'memb_dim' and 'time_dim' in 'exp'.") + stop("Parameter 'weights_exp' must have the same dimension lengths as ", + "'memb_dim' and 'time_dim' in 'exp'.") } weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim)) } else { - if (length(dim(weights_exp)) != 3 | any(!names(dim(weights_exp)) %in% c(memb_dim, time_dim, dat_dim))) - stop("Parameter 'weights_exp' must have three dimensions with the names of 'memb_dim', 'time_dim' and 'dat_dim'.") + if (length(dim(weights_exp)) != 3 | + !all(names(dim(weights_exp)) %in% c(memb_dim, time_dim, dat_dim))) { + stop("Parameter 'weights_exp' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + } if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] | dim(weights_exp)[time_dim] != dim(exp)[time_dim] | dim(weights_exp)[dat_dim] != dim(exp)[dat_dim]) { - stop(paste0("Parameter 'weights_exp' must have the same dimension lengths ", - "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) + stop("Parameter 'weights_exp' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.") } weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim, dat_dim)) } @@ -296,22 +309,29 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', stop("Parameter 'weights_ref' must be a named numeric array.") if (is.null(dat_dim) | ((!is.null(dat_dim)) && (!dat_dim %in% names(dim(ref))))) { - if (length(dim(weights_ref)) != 2 | any(!names(dim(weights_ref)) %in% c(memb_dim, time_dim))) - stop("Parameter 'weights_ref' must have two dimensions with the names of 'memb_dim' and 'time_dim'.") + if (length(dim(weights_ref)) != 2 | + !all(names(dim(weights_ref)) %in% c(memb_dim, time_dim))) { + stop("Parameter 'weights_ref' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + } if (dim(weights_ref)[memb_dim] != dim(exp)[memb_dim] | dim(weights_ref)[time_dim] != dim(exp)[time_dim]) { - stop("Parameter 'weights_ref' must have the same dimension lengths as 'memb_dim' and 'time_dim' in 'ref'.") + stop("Parameter 'weights_ref' must have the same dimension lengths as ", + "'memb_dim' and 'time_dim' in 'ref'.") } weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim)) } else { - if (length(dim(weights_ref)) != 3 | any(!names(dim(weights_ref)) %in% c(memb_dim, time_dim, dat_dim))) - stop("Parameter 'weights_ref' must have three dimensions with the names of 'memb_dim', 'time_dim' and 'dat_dim'.") + if (length(dim(weights_ref)) != 3 | + !all(names(dim(weights_ref)) %in% c(memb_dim, time_dim, dat_dim))) { + stop("Parameter 'weights_ref' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + } if (dim(weights_ref)[memb_dim] != dim(ref)[memb_dim] | dim(weights_ref)[time_dim] != dim(ref)[time_dim] | dim(weights_ref)[dat_dim] != dim(ref)[dat_dim]) { - stop(paste0("Parameter 'weights_ref' must have the same dimension lengths ", - "as 'memb_dim', 'time_dim' and 'dat_dim' in 'ref'.")) + stop("Parameter 'weights_ref' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'ref'.") } weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim, dat_dim)) } @@ -331,13 +351,33 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', ## sig_method.type #NOTE: These are the types of RandomWalkTest() if (!sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { - stop("Parameter 'sig_method.type' must be 'two.sided.approx', 'two.sided', 'greater', or 'less'.") - } - if (sig_method.type == 'two.sided.approx') { - if (alpha != 0.05) { - .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", - "= 0.05 only. Returning the significance at the 0.05 significance level.") + stop("Parameter 'sig_method.type' must be 'two.sided.approx', 'two.sided', ", + "'greater', or 'less'.") + } + if (sig_method.type == 'two.sided.approx' && alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(obs))) | + any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { + stop("If parameter 'N.eff' is provided with an array, it must ", + "have the same dimensions as 'obs' except 'time_dim'.") } + } else if (any((!is.na(N.eff) & !isFALSE(N.eff) & + !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop("Parameter 'N.eff' must be NA, FALSE, a numeric, or an array with ", + "the same dimensions as 'obs' except 'time_dim'.") + } + if ((!is.na(N.eff) & !isFALSE(N.eff)) && sig_method.type == 'two.sided.approx') { + .warning("'N.eff' will not be used if 'sig_method.type' is 'two.sided.approx'.") + } + if (identical(N.eff, NA) & !is.null(cat_dim)) { + stop("'N.eff' cannot be NA if probabilities are already provided ", + "(cat_dim != NULL). Please compute 'N.eff' with s2dv::Eno and ", + "provide the result to this function.") } ## ncores if (!is.null(ncores)) { @@ -382,19 +422,68 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', target_dims = list(exp = target_dims_exp, obs = target_dims_obs) } - - output <- Apply(data, - target_dims = target_dims, - fun = .RPSS, - time_dim = time_dim, memb_dim = memb_dim, - cat_dim = cat_dim, dat_dim = dat_dim, - prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair, - weights_exp = weights_exp, - weights_ref = weights_ref, - cross.val = cross.val, - na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, - ncores = ncores) + + if (is.array(N.eff)) { + data$N.eff <- N.eff + target_dims[length(target_dims)+1] <- list(NULL) + if (!is.null(ref)){ + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + } else { # ref=NULL + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + ref = ref, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + } + } else { # N.eff not an array + if (!is.null(ref)){ + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + N.eff = N.eff, ncores = ncores) + } else { # ref=NULL + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + ref = ref, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + N.eff = N.eff, ncores = ncores) + } + } return(output) @@ -403,7 +492,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', .RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, - na.rm = FALSE, sig_method.type = 'two.sided.approx', alpha = 0.05) { + na.rm = FALSE, sig_method.type = 'two.sided.approx', alpha = 0.05, N.eff = NA) { #--- if memb_dim: # exp: [sdate, memb, (dat)] # obs: [sdate, (memb), (dat)] @@ -460,7 +549,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', for (i in 1:nexp) { for (j in 1:nobs) { for (k in 1:nref) { - if (nref != 1 & k!=i) { # if nref is 1 or equal to nexp, calculate rps + if (nref != 1 & k != i) { # if nref is 1 or equal to nexp, calculate rps next } exp_data <- exp[, , i, drop = F] @@ -474,14 +563,16 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', good_indices_for_clim <- dum[!is.na(dum)] if (f_NAs <= sum(good_values) / length(good_values)) { - rps_exp[good_values,i,j] <- .RPS(exp = exp[good_values, , i], obs = obs[good_values, , j], + rps_exp[good_values, i, j] <- .RPS(exp = exp[good_values, , i], + obs = obs[good_values, , j], time_dim = time_dim, memb_dim = memb_dim, cat_dim = cat_dim, dat_dim = NULL, prob_thresholds = prob_thresholds, indices_for_clim = good_indices_for_clim, Fair = Fair, weights = weights_exp[good_values, , i], cross.val = cross.val, na.rm = na.rm) - rps_ref[good_values,i,j] <- .RPS(exp = ref[good_values, , k], obs = obs[good_values, , j], + rps_ref[good_values, i, j] <- .RPS(exp = ref[good_values, , k], + obs = obs[good_values, , j], time_dim = time_dim, memb_dim = memb_dim, cat_dim = cat_dim, dat_dim = NULL, prob_thresholds = prob_thresholds, @@ -526,8 +617,10 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', # Subset indices_for_clim dum <- match(indices_for_clim, which(good_values)) good_indices_for_clim <- dum[!is.na(dum)] - obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = good_indices_for_clim, - prob_thresholds = prob_thresholds, weights = NULL, cross.val = cross.val) + obs_probs <- .GetProbs(data = obs_data, + indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, + weights = NULL, cross.val = cross.val) } else { obs_probs <- t(obs_data) } @@ -544,7 +637,8 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', rps_ref[good_values, i, j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2) } # if (Fair) { # FairRPS - # ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] + # ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + # ## [formula taken from SpecsVerification::EnsRps] # R <- dim(exp)[2] #memb # R_new <- Inf # adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) @@ -570,7 +664,7 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', rpss <- array(dim = c(nexp = nexp, nobs = nobs)) sign <- array(dim = c(nexp = nexp, nobs = nobs)) - if (any(!is.na(rps_exp_mean))) { + if (!all(is.na(rps_exp_mean))) { for (i in 1:nexp) { for (j in 1:nobs) { rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j] @@ -578,9 +672,13 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', if (!any(ind_nonNA)) { sign[i, j] <- NA } else { - sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j], skill_B = rps_ref[ind_nonNA, i, j], + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs[, , j], na.action = na.pass) ## effective degrees of freedom + } + sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j], + skill_B = rps_ref[ind_nonNA, i, j], test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } } } @@ -598,9 +696,13 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', } else { # rps_exp and rps_ref: [sdate] rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE) - sign <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA], skill_B = rps_ref[ind_nonNA], + if (is.na(N.eff)) { + N.eff <- .Eno(x = obs, na.action = na.pass) ## effective degrees of freedom + } + sign <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA], + skill_B = rps_ref[ind_nonNA], test.type = sig_method.type, alpha = alpha, - sign = T, pval = F)$sign + sign = T, pval = F, N.eff = N.eff)$sign } } diff --git a/R/RandomWalkTest.R b/R/RandomWalkTest.R index 8d5f67f361a679dc078b8de2c692dc3f692fb0fb..1441d402b4cf804f8c23a8920f826cf9f0640218 100644 --- a/R/RandomWalkTest.R +++ b/R/RandomWalkTest.R @@ -28,6 +28,11 @@ #' significance test. The default value is TRUE. #'@param sign A logical value indicating whether to return the statistical #' significance of the test based on 'alpha'. The default value is FALSE. +#'@param N.eff Effective sample size to be used in the statistical significance +#' test. It can be FALSE (and the length of the time series will be used), a +#' numeric (which is used for all cases), or an array with the same dimensions +#' as "skill_A" except "time_dim" (for a particular N.eff to be used for each +#' case). The default value is FALSE. #'@param ncores An integer indicating the number of cores to use for parallel #' computation. The default value is NULL. #' @@ -87,14 +92,14 @@ #'@export RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', test.type = 'two.sided.approx', alpha = 0.05, pval = TRUE, - sign = FALSE, ncores = NULL) { + sign = FALSE, N.eff = FALSE, ncores = NULL) { # Check inputs ## skill_A and skill_B if (is.null(skill_A) | is.null(skill_B)) { stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") } - if(!is.numeric(skill_A) | !is.numeric(skill_B)) { + if (!is.numeric(skill_A) | !is.numeric(skill_B)) { stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") } if (!identical(dim(skill_A), dim(skill_B))) { @@ -112,7 +117,7 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', stop("Parameter 'alpha' must be a number between 0 and 1.") } ## test.type - if (!test.type %in% c('two.sided.approx','two.sided','greater','less')) { + if (!test.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { stop("Parameter 'test.type' must be 'two.sided.approx', 'two.sided', 'greater', or 'less'.") } if (test.type == 'two.sided.approx') { @@ -126,6 +131,21 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', } sign <- TRUE } + ## N.eff + if (is.array(N.eff)) { + if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") + if (!all(names(dim(N.eff)) %in% names(dim(skill_A))) | + any(dim(skill_A)[match(names(dim(N.eff)), names(dim(skill_A)))] != dim(N.eff))) { + stop('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "skill_A" except "time_dim".') + } + } else if (any((!isFALSE(N.eff) & !is.numeric(N.eff)) | length(N.eff) != 1)) { + stop('Parameter "N.eff" must be FALSE, a numeric, or an array with ', + 'the same dimensions as "skill_A" except "time_dim".') + } + if (!isFALSE(N.eff) & test.type=='two.sided.approx'){ + warning('"N.eff" will not be used if "test.type" is "two.sided.approx".') + } ## ncores if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { @@ -134,23 +154,34 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', } ## Compute the Random Walk Test - res <- Apply(data = list(skill_A = skill_A, - skill_B = skill_B), - target_dims = list(skill_A = time_dim, - skill_B = time_dim), - fun = .RandomWalkTest, - test.type = test.type, - alpha = alpha, pval = pval, sign = sign, - ncores = ncores) + if (is.array(N.eff)) { + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B, + N.eff = N.eff), + target_dims = list(skill_A = time_dim, + skill_B = time_dim, + N.eff = NULL), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, pval = pval, sign = sign, + ncores = ncores) + } else { + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B), + target_dims = list(skill_A = time_dim, + skill_B = time_dim), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, pval = pval, sign = sign, + N.eff = N.eff, ncores = ncores) + } return(res) } .RandomWalkTest <- function(skill_A, skill_B, test.type = 'two.sided.approx', - alpha = 0.05, pval = TRUE, sign = FALSE) { + alpha = 0.05, pval = TRUE, N.eff = FALSE, sign = FALSE) { #skill_A and skill_B: [sdate] - - N.eff <- length(skill_A) A_better <- sum(skill_B > skill_A) B_better <- sum(skill_B < skill_A) @@ -159,14 +190,16 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', output$score <- A_better - B_better if (test.type == 'two.sided.approx') { - output$sign <- ifelse(test = abs(output$score) > (2 * sqrt(N.eff)), yes = TRUE, no = FALSE) + output$sign <- abs(output$score) > (2 * sqrt(length(skill_A))) } else { - if (!is.na(output$score)) { - p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1 - alpha, + if (isFALSE(N.eff)){N.eff <- length(skill_A)} + + if (!is.na(output$score) & N.eff >= A_better) { + p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, + conf.level = 1 - alpha, alternative = test.type)$p.value - } else { p.val <- NA } @@ -175,7 +208,7 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', output$p.val <- p.val } if (sign) { - output$sign <- ifelse(!is.na(p.val) & p.val <= alpha, TRUE, FALSE) + output$sign <- !is.na(p.val) & p.val <= alpha } } diff --git a/R/RatioPredictableComponents.R b/R/RatioPredictableComponents.R index 3d5cae58fc08aaf79847c080a18bedb958f95fc6..3b30301869e7bb15987ff3385e0ef86f1851a82d 100644 --- a/R/RatioPredictableComponents.R +++ b/R/RatioPredictableComponents.R @@ -25,7 +25,9 @@ #' #'@import multiApply stats #'@export -RatioPredictableComponents <- function(exp, obs, time_dim = 'year', memb_dim = 'member', na.rm = FALSE, ncores = NULL) { +RatioPredictableComponents <- function(exp, obs, time_dim = 'year', + memb_dim = 'member', na.rm = FALSE, + ncores = NULL) { ## Checkings if (is.null(exp)) { @@ -61,7 +63,7 @@ RatioPredictableComponents <- function(exp, obs, time_dim = 'year', memb_dim = ' if (!is.logical(na.rm)) { stop("Parameter 'na.rm' must be TRUE or FALSE.") } - if (!is.null(ncores)){ + if (!is.null(ncores)) { if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } @@ -82,8 +84,8 @@ RatioPredictableComponents <- function(exp, obs, time_dim = 'year', memb_dim = ' # obs: [time] ## Ensemble mean and spread - ens_mean <- apply(exp, 1, mean, na.rm = na.rm) - ens_spread <- apply(exp, 2, "-", ens_mean) + ens_mean <- rowMeans(exp, na.rm = na.rm) + #ens_spread <- apply(exp, 2, "-", ens_mean) ## Ensemble mean variance -> signal var_signal <- var(ens_mean, na.rm = na.rm) diff --git a/R/RatioRMS.R b/R/RatioRMS.R index 51f39846e1eb9c8e5a1616627de6c69ae848f8e6..d09fc0fbac57cccfe8a76b992ce25f0eaca4412a 100644 --- a/R/RatioRMS.R +++ b/R/RatioRMS.R @@ -55,8 +55,10 @@ #'# time step. #'ano_exp_1 <- ClimProjDiags::Subset(ano_exp, 'member', c(1, 2)) #'ano_exp_2 <- ClimProjDiags::Subset(ano_exp, 'member', c(3)) -#'ano_exp_1 <- ClimProjDiags::Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected') -#'ano_exp_2 <- ClimProjDiags::Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +#'ano_exp_1 <- ClimProjDiags::Subset(ano_exp_1, c('dataset', 'ftime'), +#' list(1, 1), drop = 'selected') +#'ano_exp_2 <- ClimProjDiags::Subset(ano_exp_2, c('dataset', 'ftime'), +#' list(1, 1), drop = 'selected') #'ano_obs <- ClimProjDiags::Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected') #'# Compute ensemble mean and provide as inputs to RatioRMS. #'rrms <- RatioRMS(MeanDims(ano_exp_1, 'member'), @@ -92,14 +94,14 @@ RatioRMS <- function(exp1, exp2, obs, time_dim = 'sdate', pval = TRUE, ncores = dim(obs) <- c(length(obs)) names(dim(obs)) <- time_dim } - if(any(is.null(names(dim(exp1))))| any(nchar(names(dim(exp1))) == 0) | - any(is.null(names(dim(exp2))))| any(nchar(names(dim(exp2))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp1)))) | any(nchar(names(dim(exp1))) == 0) | + any(is.null(names(dim(exp2)))) | any(nchar(names(dim(exp2))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp1', 'exp2', and 'obs' must have dimension names.") } - if(!all(names(dim(exp1)) %in% names(dim(exp2))) | - !all(names(dim(exp2)) %in% names(dim(obs))) | - !all(names(dim(obs)) %in% names(dim(exp1)))) { + if (!all(names(dim(exp1)) %in% names(dim(exp2))) | + !all(names(dim(exp2)) %in% names(dim(obs))) | + !all(names(dim(obs)) %in% names(dim(exp1)))) { stop("Parameter 'exp1', 'exp2', and 'obs' must have same dimension names.") } name_1 <- sort(names(dim(exp1))) @@ -107,8 +109,8 @@ RatioRMS <- function(exp1, exp2, obs, time_dim = 'sdate', pval = TRUE, ncores = name_3 <- sort(names(dim(obs))) if (!all(dim(exp1)[name_1] == dim(exp2)[name_2]) | !all(dim(exp1)[name_1] == dim(obs)[name_3])) { - stop(paste0("Parameter 'exp1', 'exp2', and 'obs' must have the same length of ", - "all the dimensions.")) + stop("Parameter 'exp1', 'exp2', and 'obs' must have the same length of ", + "all the dimensions.") } ## time_dim if (!is.character(time_dim) | length(time_dim) > 1) { @@ -161,7 +163,8 @@ RatioRMS <- function(exp1, exp2, obs, time_dim = 'sdate', pval = TRUE, ncores = dif2 <- exp2 - obs rms1 <- MeanDims(dif1^2, time_dim, na.rm = TRUE)^0.5 rms2 <- MeanDims(dif2^2, time_dim, na.rm = TRUE)^0.5 - rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- max(abs(rms2), na.rm = TRUE) / 1000 + rms2[which(abs(rms2) <= (max(abs(rms2), na.rm = TRUE) / 1000))] <- + max(abs(rms2), na.rm = TRUE) / 1000 ratiorms <- rms1 / rms2 if (pval) { @@ -171,12 +174,12 @@ RatioRMS <- function(exp1, exp2, obs, time_dim = 'sdate', pval = TRUE, ncores = F[which(F < 1)] <- 1 / F[which(F < 1)] if (is.null(dim(ratiorms))) { - p.val <- c() + p.val <- NULL } else { p.val <- array(dim = dim(ratiorms)) } avail_ind <- which(!is.na(eno1) & !is.na(eno2) & eno1 > 2 & eno2 > 2) - p.val[avail_ind] <- (1 - pf(F,eno1[avail_ind] - 1, eno2[avail_ind] - 1)) * 2 + p.val[avail_ind] <- (1 - pf(F, eno1[avail_ind] - 1, eno2[avail_ind] - 1)) * 2 ratiorms[-avail_ind] <- NA } diff --git a/R/RatioSDRMS.R b/R/RatioSDRMS.R index 6040410b3b0e3eaffdc8f77fea75d4eaad806ed8..f7e848f3648869b5d52c6a683f9f677219d5d745 100644 --- a/R/RatioSDRMS.R +++ b/R/RatioSDRMS.R @@ -62,11 +62,11 @@ RatioSDRMS <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', stop("Parameter 'exp' and 'obs' must be a numeric array.") } if (is.null(dim(exp)) | is.null(dim(obs))) { - stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", - "dimensions memb_dim and time_dim.")) + stop("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions memb_dim and time_dim.") } - if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } ## dat_dim @@ -112,8 +112,8 @@ RatioSDRMS <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', name_exp <- name_exp[-which(name_exp == memb_dim)] name_obs <- name_obs[-which(name_obs == memb_dim)] if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all the dimensions except 'dat_dim' and 'memb_dim'.")) + stop("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions except 'dat_dim' and 'memb_dim'.") } ## pval if (!is.logical(pval) | length(pval) > 1) { @@ -171,7 +171,7 @@ RatioSDRMS <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', dif <- exp - InsertDim(ens_exp, 2, dim(exp)[2]) # [nexp, member, sdate] std <- apply(dif, 1, sd, na.rm = TRUE) # [nexp] - enosd <- apply(Eno(dif, names(dim(exp))[3]), 1, sum, na.rm = TRUE) + enosd <- rowSums(Eno(dif, names(dim(exp))[3]), na.rm = TRUE) # Create empty arrays ratiosdrms <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] @@ -182,7 +182,7 @@ RatioSDRMS <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', dif <- ens_exp[jexp, ] - ens_obs[jobs, ] rms <- mean(dif^2, na.rm = TRUE)^0.5 enorms <- Eno(dif) - ratiosdrms[jexp, jobs] <- std[jexp]/rms + ratiosdrms[jexp, jobs] <- std[jexp] / rms if (pval) { F <- (enosd[jexp] * std[jexp]^2 / (enosd[jexp] - 1)) / (enorms * rms^2 / (enorms - 1)) diff --git a/R/Regression.R b/R/Regression.R index 535f179c6ac15f46b3a23c9a58dd9f35e0c9ba62..ef92fc4e3011291b0678b1bc137025fd1f95eb06 100644 --- a/R/Regression.R +++ b/R/Regression.R @@ -108,17 +108,17 @@ Regression <- function(datay, datax, reg_dim = 'sdate', formula = y ~ x, dim(datax) <- c(length(datax)) names(dim(datax)) <- reg_dim } - if(any(is.null(names(dim(datay))))| any(nchar(names(dim(datay))) == 0) | - any(is.null(names(dim(datax))))| any(nchar(names(dim(datax))) == 0)) { + if (any(is.null(names(dim(datay)))) | any(nchar(names(dim(datay))) == 0) | + any(is.null(names(dim(datax)))) | any(nchar(names(dim(datax))) == 0)) { stop("Parameter 'datay' and 'datax' must have dimension names.") } - if(!all(names(dim(datay)) %in% names(dim(datax))) | - !all(names(dim(datax)) %in% names(dim(datay)))) { + if (!all(names(dim(datay)) %in% names(dim(datax))) | + !all(names(dim(datax)) %in% names(dim(datay)))) { stop("Parameter 'datay' and 'datax' must have same dimension name") } name_datay <- sort(names(dim(datay))) name_datax <- sort(names(dim(datax))) - if(!all(dim(datay)[name_datay] == dim(datax)[name_datax])) { + if (!all(dim(datay)[name_datay] == dim(datax)[name_datax])) { stop("Parameter 'datay' and 'datax' must have same length of all dimensions.") } ## reg_dim @@ -150,15 +150,15 @@ Regression <- function(datay, datax, reg_dim = 'sdate', formula = y ~ x, } ## na.action if (!is.function(na.action) & !is.numeric(na.action)) { - stop(paste0("Parameter 'na.action' must be a function for NA values or ", - "a numeric indicating the number of NA values allowed ", - "before returning NA.")) + stop("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.") } if (is.numeric(na.action)) { if (any(na.action %% 1 != 0) | any(na.action < 0) | length(na.action) > 1) { - stop(paste0("Parameter 'na.action' must be a function for NA values or ", - "a numeric indicating the number of NA values allowed ", - "before returning NA.")) + stop("Parameter 'na.action' must be a function for NA values or ", + "a numeric indicating the number of NA values allowed ", + "before returning NA.") } } ## ncores @@ -201,7 +201,7 @@ Regression <- function(datay, datax, reg_dim = 'sdate', formula = y ~ x, .Regression <- function(y, x, formula = y~x, pval = TRUE, conf = TRUE, sign = FALSE, alpha = 0.05, na.action = na.omit) { - NApos <- 1:length(x) + NApos <- seq_along(x) NApos[which(is.na(x) | is.na(y))] <- NA filtered <- rep(NA, length(x)) check_na <- FALSE @@ -226,17 +226,15 @@ Regression <- function(datay, datax, reg_dim = 'sdate', formula = y ~ x, filtered[!is.na(NApos)] <- y[!is.na(NApos)] - lm.out$fitted.values # Check if NA is too many - if (check_na) { - if (sum(is.na(NApos)) > na_threshold) { #turn everything into NA - coeff[which(!is.na(coeff))] <- NA - if (conf) { - conf.lower[which(!is.na(conf.lower))] <- NA - conf.upper[which(!is.na(conf.upper))] <- NA - } - if (pval) p.val[which(!is.na(p.val))] <- NA - if (sign) signif[which(!is.na(signif))] <- NA - filtered[which(!is.na(filtered))] <- NA + if (check_na && sum(is.na(NApos)) > na_threshold) { #turn everything into NA + coeff[which(!is.na(coeff))] <- NA + if (conf) { + conf.lower[which(!is.na(conf.lower))] <- NA + conf.upper[which(!is.na(conf.upper))] <- NA } + if (pval) p.val[which(!is.na(p.val))] <- NA + if (sign) signif[which(!is.na(signif))] <- NA + filtered[which(!is.na(filtered))] <- NA } res <- list(regression = coeff, filtered = filtered) diff --git a/R/Reorder.R b/R/Reorder.R index 71a22e34e0d2adab946b6e59f69f4fd3ed36dbe6..2809a00eab1860723acbe051222a6d7691cde61a 100644 --- a/R/Reorder.R +++ b/R/Reorder.R @@ -37,7 +37,7 @@ Reorder <- function(data, order) { } ## If attribute "dimensions" exists - attr.dim.reorder <- ifelse(!is.null(attributes(data)$dimensions), TRUE, FALSE) + attr.dim.reorder <- !is.null(attributes(data)$dimensions) ## order if (is.null(order)) { @@ -63,11 +63,9 @@ Reorder <- function(data, order) { } } else { dim_names <- names(dim(data)) - if (attr.dim.reorder) { - if (any(attributes(data)$dimensions != dim_names)) { - warning("Found attribute 'dimensions' has different names from ", - "names(dim(x)). Use the latter one to reorder.") - } + if (attr.dim.reorder && any(attributes(data)$dimensions != dim_names)) { + warning("Found attribute 'dimensions' has different names from ", + "names(dim(x)). Use the latter one to reorder.") } } if (!all(order %in% dim_names)) { @@ -75,8 +73,8 @@ Reorder <- function(data, order) { } } if (length(order) != length(dim(data))) { - stop(paste0("The length of parameter 'order' should be the same with the ", - "dimension length of parameter 'data'.")) + stop("The length of parameter 'order' should be the same with the ", + "dimension length of parameter 'data'.") } @@ -97,7 +95,7 @@ Reorder <- function(data, order) { if (is.numeric(data)) { data <- aperm(data, order) } else { - y <- array(1:length(data), dim = dim(data)) + y <- array(seq_along(data), dim = dim(data)) y <- aperm(y, order) data <- data[as.vector(y)] dim(data) <- old_dims[order] diff --git a/R/ResidualCorr.R b/R/ResidualCorr.R index 18ca539fb9d0ed540af062999d61fac2b942c3b6..53cec7c15539df3d182023643e78892c13682a00 100644 --- a/R/ResidualCorr.R +++ b/R/ResidualCorr.R @@ -94,12 +94,12 @@ ResidualCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', if (!is.numeric(N.eff)) stop("Parameter 'N.eff' must be numeric.") if (!all(names(dim(N.eff)) %in% names(dim(obs))) | any(dim(obs)[match(names(dim(N.eff)), names(dim(obs)))] != dim(N.eff))) { - stop(paste0('If parameter "N.eff" is provided with an array, it must ', - 'have the same dimensions as "obs" except "time_dim".')) + stop('If parameter "N.eff" is provided with an array, it must ', + 'have the same dimensions as "obs" except "time_dim".') } } else if (any((!is.na(N.eff) & !is.numeric(N.eff)) | length(N.eff) != 1)) { - stop(paste0('Parameter "N.eff" must be NA, a numeric, or an array with ', - 'the same dimensions as "obs" except "time_dim".')) + stop('Parameter "N.eff" must be NA, a numeric, or an array with ', + 'the same dimensions as "obs" except "time_dim".') } ## time_dim @@ -126,10 +126,12 @@ ResidualCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', name_exp <- name_exp[-which(name_exp == memb_dim)] name_ref <- name_ref[-which(name_ref == memb_dim)] } - if (length(name_exp) != length(name_obs) | length(name_exp) != length(name_ref) | - any(dim(exp)[name_exp] != dim(obs)[name_obs]) | any(dim(exp)[name_exp] != dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp', 'obs', and 'ref' must have same length of ", - "all dimensions except 'memb_dim'.")) + if (length(name_exp) != length(name_obs) | + length(name_exp) != length(name_ref) | + any(dim(exp)[name_exp] != dim(obs)[name_obs]) | + any(dim(exp)[name_exp] != dim(ref)[name_ref])) { + stop("Parameter 'exp', 'obs', and 'ref' must have same length of ", + "all dimensions except 'memb_dim'.") } ## method if (!method %in% c("pearson", "kendall", "spearman")) { @@ -167,14 +169,13 @@ ResidualCorr <- function(exp, obs, ref, N.eff = NA, time_dim = 'sdate', # Calculate ensemble mean dim_exp <- dim(exp) - dim_obs <- dim(obs) dim_ref <- dim(ref) if (!is.null(memb_dim)) { exp_memb_dim_ind <- which(names(dim_exp) == memb_dim) ref_memb_dim_ind <- which(names(dim_ref) == memb_dim) - exp <- apply(exp, c(1:length(dim_exp))[-exp_memb_dim_ind], mean, na.rm = FALSE) - ref <- apply(ref, c(1:length(dim_ref))[-ref_memb_dim_ind], mean, na.rm = FALSE) + exp <- apply(exp, c(seq_along(dim_exp))[-exp_memb_dim_ind], mean, na.rm = FALSE) + ref <- apply(ref, c(seq_along(dim_ref))[-ref_memb_dim_ind], mean, na.rm = FALSE) if (is.null(dim(exp))) exp <- array(exp, dim = c(dim_exp[time_dim])) if (is.null(dim(ref))) ref <- array(ref, dim = c(dim_ref[time_dim])) } diff --git a/R/SPOD.R b/R/SPOD.R index 3a8ff73dc50fe0d567f94722256e5ffa5892966a..3f019578b11720614736aff35a3268d44fe6159f 100644 --- a/R/SPOD.R +++ b/R/SPOD.R @@ -122,13 +122,13 @@ SPOD <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l stop("Parameter 'lon_dim' is not found in 'data' dimension.") } # data_lats and data_lons part2 - if (dim(data)[lat_dim] != length(data_lats)){ - stop(paste0("The latitude dimension of parameter 'data' must be the same", - " length of parameter 'data_lats'.")) + if (dim(data)[lat_dim] != length(data_lats)) { + stop("The latitude dimension of parameter 'data' must be the same", + " length of parameter 'data_lats'.") } - if (dim(data)[lon_dim] != length(data_lons)){ - stop(paste0("The longitude dimension of parameter 'data' must be the same", - " length of parameter 'data_lons'.")) + if (dim(data)[lon_dim] != length(data_lons)) { + stop("The longitude dimension of parameter 'data' must be the same", + " length of parameter 'data_lons'.") } # ncores if (!is.null(ncores)) { @@ -139,28 +139,26 @@ SPOD <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l } # mask if (!is.null(mask)) { - if (is.array(mask) & identical(names(dim(mask)), c(lat_dim,lon_dim)) & + if (is.array(mask) & identical(names(dim(mask)), c(lat_dim, lon_dim)) & identical(as.integer(dim(mask)), c(length(data_lats), length(data_lons)))) { ## To mask those grid point that are missing in the observations mask <- s2dv::Reorder(data = mask, order = c(lat_dim, lon_dim)) - fun_mask <- function(data, mask){ + fun_mask <- function(data, mask) { data[mask == 0] <- NA return(data) } data <- multiApply::Apply(data = data, target_dims = c(lat_dim, lon_dim), fun = fun_mask, mask = mask, ncores = ncores)$output1 } else { - stop(paste0("Parameter 'mask' must be NULL (no mask) or a numerical array ", - "with c(lat_dim, lon_dim) dimensions and 0 in those grid ", - "points that have to be masked.")) + stop("Parameter 'mask' must be NULL (no mask) or a numerical array ", + "with c(lat_dim, lon_dim) dimensions and 0 in those grid ", + "points that have to be masked.") } } # monini - if (type == 'dcpp') { - if (!is.numeric(monini) | monini %% 1 != 0 | monini < 1 | - monini > 12) { - stop("Parameter 'monini' must be an integer from 1 to 12.") - } + if (type == 'dcpp' && + (!is.numeric(monini) | monini %% 1 != 0 | monini < 1 | monini > 12)) { + stop("Parameter 'monini' must be an integer from 1 to 12.") } # fmonth_dim if (type == 'dcpp') { @@ -182,11 +180,11 @@ SPOD <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l } # indices_for_clim if (!is.null(indices_for_clim)) { - if (!class(indices_for_clim) %in% c('numeric', 'integer') - & !(is.logical(indices_for_clim) & !any(indices_for_clim))) { - stop(paste0("The parameter 'indices_for_clim' must be a numeric vector ", - "or NULL to compute the anomalies based on the whole period, ", - "or FALSE if data are already anomalies")) + if (!(is(indices_for_clim, "numeric") || is(indices_for_clim, "integer")) & + !(is.logical(indices_for_clim) & !any(indices_for_clim))) { + stop("The parameter 'indices_for_clim' must be a numeric vector ", + "or NULL to compute the anomalies based on the whole period, ", + "or FALSE if data are already anomalies") } } # year_dim @@ -208,7 +206,7 @@ SPOD <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l } } # na.rm - if (!na.rm %in% c(TRUE,FALSE)) { + if (!na.rm %in% c(TRUE, FALSE)) { stop("Parameter 'na.rm' must be TRUE or FALSE") } @@ -230,12 +228,12 @@ SPOD <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'l londim = lon_dim, latdim = lat_dim) - data <- ClimProjDiags::CombineIndices(indices = list(mean_1,mean_2), + data <- ClimProjDiags::CombineIndices(indices = list(mean_1, mean_2), weights = NULL, operation = 'subtract') # (mean_1 - mean_2) - if (type == 'dcpp'){ + if (type == 'dcpp') { target_dims <- c(sdate_dim, fmonth_dim) - } else if (type %in% c('hist','obs')){ + } else if (type %in% c('hist', 'obs')) { target_dims <- c(year_dim, month_dim) } diff --git a/R/Season.R b/R/Season.R index 1425d592db2ae55481d30c4f4e6b7d2bd83f4991..46dcc489994f91920316eb759437ec78a06a41cc 100644 --- a/R/Season.R +++ b/R/Season.R @@ -5,15 +5,15 @@ #'accounted. #' #'@param data A named numeric array with at least one dimension 'time_dim'. -#'@param time_dim A character string indicating the name of dimension along -#' which the seasonal mean or other calculations are computed. The default -#' value is 'ftime'. #'@param monini An integer indicating what the first month of the time series is. #' It can be from 1 to 12. #'@param moninf An integer indicating the starting month of the seasonal #' calculation. It can be from 1 to 12. #'@param monsup An integer indicating the end month of the seasonal calculation. #' It can be from 1 to 12. +#'@param time_dim A character string indicating the name of dimension along +#' which the seasonal mean or other calculations are computed. The default +#' value is 'ftime'. #'@param method An R function to be applied for seasonal calculation. For #' example, 'sum' can be used for total precipitation. The default value is mean. #'@param na.rm A logical value indicating whether to remove NA values along @@ -38,7 +38,7 @@ #'res <- Season(data = dat2, monini = 3, moninf = 1, monsup = 2, na.rm = FALSE) #'@import multiApply #'@export -Season <- function(data, time_dim = 'ftime', monini, moninf, monsup, +Season <- function(data, monini, moninf, monsup, time_dim = 'ftime', method = mean, na.rm = TRUE, ncores = NULL) { # Check inputs @@ -53,7 +53,7 @@ Season <- function(data, time_dim = 'ftime', monini, moninf, monsup, dim(data) <- c(length(data)) names(dim(data)) <- time_dim } - if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } ## time_dim @@ -144,7 +144,7 @@ Season <- function(data, time_dim = 'ftime', monini, moninf, monsup, names(dim(res)) <- time_dim } else { time_dim_ind <- match(time_dim, names(dim(data))) - res <- apply(data, c(1:length(dim(data)))[-time_dim_ind], .Season, + res <- apply(data, c(seq_along(dim(data)))[-time_dim_ind], .Season, monini = monini, moninf = moninf, monsup = monsup, method = method, na.rm = na.rm) if (is.null(dim(res))) { @@ -176,18 +176,24 @@ Season <- function(data, time_dim = 'ftime', monini, moninf, monsup, pos <- moninf : monsup # Extended index for all period: if (length(x) > pos[length(pos)]) { - pos2 <- lapply(pos, function(y) {seq(y, length(x), 12)}) + pos2 <- lapply(pos, function(y) { + seq(y, length(x), 12) + }) } else { pos2 <- pos } # Correct if the final season is not complete: maxyear <- min(unlist(lapply(pos2, length))) - pos2 <- lapply(pos2, function(y) {y[1 : maxyear]}) + pos2 <- lapply(pos2, function(y) { + y[1 : maxyear] + }) # Convert to array: pos2 <- unlist(pos2) - dim(pos2) <- c(year = maxyear, month = length(pos2)/maxyear) + dim(pos2) <- c(year = maxyear, month = length(pos2) / maxyear) - timeseries <- apply(pos2, 1, function(y) {method(x[y], na.rm = na.rm)}) + timeseries <- apply(pos2, 1, function(y) { + method(x[y], na.rm = na.rm) + }) timeseries <- as.array(timeseries) return(timeseries) diff --git a/R/SignalNoiseRatio.R b/R/SignalNoiseRatio.R index 18dccf407a52c7eb3c7d6e193edf0fe2d1e470b5..6ea72b6c39a4bd4e9ebd9cd3137154d632482d49 100644 --- a/R/SignalNoiseRatio.R +++ b/R/SignalNoiseRatio.R @@ -25,7 +25,8 @@ #'@import multiApply #'@importFrom stats var #'@export -SignalNoiseRatio <- function(data, time_dim = 'year', member_dim = 'member', na.rm = FALSE, ncores = NULL) { +SignalNoiseRatio <- function(data, time_dim = 'year', member_dim = 'member', + na.rm = FALSE, ncores = NULL) { ## Input Check if (is.null(data)) { @@ -49,8 +50,8 @@ SignalNoiseRatio <- function(data, time_dim = 'year', member_dim = 'member', na. if (!is.logical(na.rm)) { stop("Parameter 'na.rm' must be TRUE or FALSE.") } - if (!is.null(ncores)){ - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1){ + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { stop("Parameter 'ncores' must be a positive integer.") } } @@ -68,7 +69,7 @@ SignalNoiseRatio <- function(data, time_dim = 'year', member_dim = 'member', na. # data: [time, member] ## Ensemble mean and spread - ens_mean <- apply(data, 1, mean, na.rm = na.rm) + ens_mean <- rowMeans(data, na.rm = na.rm) ens_spread <- apply(data, 2, "-", ens_mean) ## Ensemble mean variance -> signal diff --git a/R/Smoothing.R b/R/Smoothing.R index 1b31e6594858262fbea5353392e04a7435e2aec8..f1efa449c09ab1d0edbae06cd69fc0543e3772c5 100644 --- a/R/Smoothing.R +++ b/R/Smoothing.R @@ -45,7 +45,7 @@ Smoothing <- function(data, time_dim = 'ftime', runmeanlen = 12, ncores = NULL) dim(data) <- c(length(data)) names(dim(data)) <- time_dim } - if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } ## time_dim @@ -62,12 +62,12 @@ Smoothing <- function(data, time_dim = 'ftime', runmeanlen = 12, ncores = NULL) } time_dim_length <- dim(data)[which(names(dim(data)) == time_dim)] if (runmeanlen >= time_dim_length & time_dim_length %% 2 == 0) { - stop(paste0("Parameter 'runmeanlen' must be within [1, ", time_dim_length - 1, - "].")) + stop("Parameter 'runmeanlen' must be within [1, ", time_dim_length - 1, + "].") } if (runmeanlen > time_dim_length & time_dim_length %% 2 != 0) { - stop(paste0("Parameter 'runmeanlen' must be within [1, ", time_dim_length, - "].")) + stop("Parameter 'runmeanlen' must be within [1, ", time_dim_length, + "].") } ## ncores if (!is.null(ncores)) { diff --git a/R/Spectrum.R b/R/Spectrum.R index a75ead6d340c4a555e42619711889f3e50c59145..cbaf135946fdb5c35000319bf248d3efd51775b8 100644 --- a/R/Spectrum.R +++ b/R/Spectrum.R @@ -59,7 +59,7 @@ Spectrum <- function(data, time_dim = 'ftime', alpha = 0.05, ncores = NULL) { dim(data) <- c(length(data)) names(dim(data)) <- time_dim } - if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } ## time_dim @@ -97,7 +97,7 @@ Spectrum <- function(data, time_dim = 'ftime', alpha = 0.05, ncores = NULL) { .Spectrum <- function(data, alpha = 0.05) { # data: [time] - data <- data[is.na(data) == FALSE] + data <- data[!is.na(data)] ndat <- length(data) if (ndat >= 3) { @@ -118,7 +118,7 @@ Spectrum <- function(data, time_dim = 'ftime', alpha = 0.05, ncores = NULL) { toto2 <- spectrum(toto, plot = FALSE) store[jt, ] <- toto2$spec } - for (jx in 1:length(tmp$spec)) { + for (jx in seq_along(tmp$spec)) { output[jx, 3] <- quantile(store[, jx], 1 - alpha) } } else { diff --git a/R/SprErr.R b/R/SprErr.R new file mode 100644 index 0000000000000000000000000000000000000000..f89d37a10fd41dd7f19ec03a848d5b8daa39b639 --- /dev/null +++ b/R/SprErr.R @@ -0,0 +1,220 @@ +#'Compute the ratio between the ensemble spread and RMSE +#' +#'Compute the ratio between the spread of the members around the +#'ensemble mean in experimental data and the RMSE between the ensemble mean of +#'experimental and observational data. The p-value and/or the statistical +#'significance is provided by a two-sided Fisher's test. +#' +#'@param exp A named numeric array of experimental data with at least two +#' dimensions 'memb_dim' and 'time_dim'. +#'@param obs A named numeric array of observational data with at least two +#' dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. The default value +#' is 'member'. +#'@param time_dim A character string indicating the name of dimension along +#' which the ratio is computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute the p-value +#' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param na.rm A logical value indicating whether to remove NA values. The +#' default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of two arrays with dimensions c(nexp, nobs, the rest of +#' dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is +#' the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. +#' If dat_dim is NULL, nexp and nobs are omitted. \cr +#'\item{$ratio}{ +#' The ratio of the ensemble spread and RMSE. +#'} +#'\item{$p_val}{ +#' The p-value of the two-sided Fisher's test with Ho: Spread/RMSE = 1. Only +#' present if \code{pval = TRUE}. +#'} +#' +#'@examples +#'exp <- array(rnorm(30), dim = c(lat = 2, sdate = 3, member = 5)) +#'obs <- array(rnorm(30), dim = c(lat = 2, sdate = 3)) +#'sprerr1 <- SprErr(exp, obs) +#'sprerr2 <- SprErr(exp, obs, pval = FALSE, sign = TRUE) +#'sprerr3 <- SprErr(exp, obs, pval = TRUE, sign = TRUE) +#' +#'@import multiApply +#'@export +SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', pval = TRUE, sign = FALSE, + alpha = 0.05, na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions memb_dim and time_dim.")) + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimensions. ", + "'exp' must have the member dimension to compute the spread.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { ## check no longer needed? + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions except 'dat_dim' and 'memb_dim'.")) + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + # alpha + if (!is.numeric(alpha) | any(alpha < 0) | any(alpha > 1) | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + # na.rm + if (!na.rm %in% c(TRUE, FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate RatioSDRMS + + # If dat_dim = NULL, insert dat dim + remove_dat_dim <- FALSE + if (is.null(dat_dim)) { + dat_dim <- 'dataset' + exp <- InsertDim(exp, posdim = 1, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = 'dataset') + remove_dat_dim <- TRUE + } + + res <- Apply(list(exp, obs), + target_dims = list(c(dat_dim, memb_dim, time_dim), + c(dat_dim, memb_dim, time_dim)), + pval = pval, + sign = sign, + alpha = alpha, + na.rm = na.rm, + fun = .SprErr, + ncores = ncores) + + if (remove_dat_dim) { + if (length(dim(res[[1]])) > 2) { + res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') + } else { + res <- lapply(res, as.vector) + } + } + + return(res) +} + +.SprErr <- function(exp, obs, pval = TRUE, sign = FALSE, alpha = 0.05, na.rm = FALSE) { + + # exp: [dat_exp, member, sdate] + # obs: [dat_obs, member, sdate] + nexp <- dim(exp)[1] + nobs <- dim(obs)[1] + + # ensemble mean + ens_exp <- MeanDims(exp, 2, na.rm = na.rm) # [dat, sdate] + ens_obs <- MeanDims(obs, 2, na.rm = na.rm) + + # Create empty arrays + ratio <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + p.val <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + + # spread and error + spread <- sqrt(mean(apply(exp[jexp,,], 2, var, na.rm = na.rm), na.rm = na.rm)) + error <- sqrt(mean((ens_obs - ens_exp[jexp,])^2, na.rm = na.rm)) + ratio[jexp, jobs] <- spread/error + + # effective sample size + enospr <- sum(Eno(apply(exp[jexp,,], 2, var, na.rm = na.rm), names(dim(exp))[3])) + enodif <- .Eno((ens_exp[jexp, ] - ens_obs[jobs, ])^2, na.action = na.pass) + if (pval | sign) { + f_statistic <- (enospr * spread^2 / (enospr - 1)) / (enodif * error^2 / (enodif - 1)) + if (!is.na(f_statistic) & !is.na(enospr) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + p.val[jexp, jobs] <- pf(f_statistic, enospr - 1, enodif - 1) + p.val[jexp, jobs] <- 2 * min(p.val[jexp, jobs], 1 - p.val[jexp, jobs]) + } else { + p.val[jexp, jobs] <- NA + } + } + } + } + + res <- list(ratio = ratio) + if (pval) {res$p.val <- p.val} + if (sign) {res$sign <- p.val <= alpha} + + return(res) +} diff --git a/R/Spread.R b/R/Spread.R index 5fba8cab793ba4eccb6c05801a15a1c3535e47fc..88ead15b78a454d144a58e0cd0fb6e6efdd6d697 100644 --- a/R/Spread.R +++ b/R/Spread.R @@ -97,14 +97,14 @@ Spread <- function(data, compute_dim = 'member', na.rm = TRUE, dim(data) <- c(length(data)) names(dim(data)) <- compute_dim[1] } - if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } ## compute_dim if (!is.character(compute_dim)) { stop("Parameter 'compute_dim' must be a character vector.") } - if (any(!compute_dim %in% names(dim(data)))) { + if (!all(compute_dim %in% names(dim(data)))) { stop("Parameter 'compute_dim' has some element not in 'data' dimension names.") } ## na.rm diff --git a/R/StatSeasAtlHurr.R b/R/StatSeasAtlHurr.R index 764215a4fe2081d4a2be80d184c4dcfe29063f7d..5f94d5c60cd9227db942c6bc12d1cd7069ba6e23 100644 --- a/R/StatSeasAtlHurr.R +++ b/R/StatSeasAtlHurr.R @@ -70,19 +70,19 @@ StatSeasAtlHurr <- function(atlano, tropano, hrvar = "HR", ncores = NULL) { dim(tropano) <- c(length(tropano)) names(dim(tropano)) <- 'dim1' } - if(any(is.null(names(dim(atlano))))| any(nchar(names(dim(atlano))) == 0) | - any(is.null(names(dim(tropano))))| any(nchar(names(dim(tropano))) == 0)) { + if (any(is.null(names(dim(atlano)))) | any(nchar(names(dim(atlano))) == 0) | + any(is.null(names(dim(tropano)))) | any(nchar(names(dim(tropano))) == 0)) { stop("Parameter 'atlano' and 'tropano' must have dimension names.") } - if(!all(names(dim(atlano)) %in% names(dim(tropano))) | - !all(names(dim(tropano)) %in% names(dim(atlano)))) { + if (!all(names(dim(atlano)) %in% names(dim(tropano))) | + !all(names(dim(tropano)) %in% names(dim(atlano)))) { stop("Parameter 'atlano' and 'tropano' must have same dimension names.") } name_1 <- sort(names(dim(atlano))) name_2 <- sort(names(dim(tropano))) if (!all(dim(atlano)[name_1] == dim(tropano)[name_2])) { - stop(paste0("Parameter 'atlano' and 'tropano' must have the same length of ", - "all the dimensions.")) + stop("Parameter 'atlano' and 'tropano' must have the same length of ", + "all the dimensions.") } ## hrvar if (hrvar != "HR" & hrvar != "TC" & hrvar != "PDI") { diff --git a/R/TPI.R b/R/TPI.R index 167664f6bb0655161a930f0f9b45f8117dd480ec..ecde7acbf353055d3d93ca94c1c93e1c224fcf33 100644 --- a/R/TPI.R +++ b/R/TPI.R @@ -121,13 +121,13 @@ TPI <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo stop("Parameter 'lon_dim' is not found in 'data' dimension.") } # data_lats and data_lons part2 - if (dim(data)[lat_dim] != length(data_lats)){ - stop(paste0("The latitude dimension of parameter 'data' must be the same", - " length of parameter 'data_lats'.")) + if (dim(data)[lat_dim] != length(data_lats)) { + stop("The latitude dimension of parameter 'data' must be the same", + " length of parameter 'data_lats'.") } - if (dim(data)[lon_dim] != length(data_lons)){ - stop(paste0("The longitude dimension of parameter 'data' must be the same", - " length of parameter 'data_lons'.")) + if (dim(data)[lon_dim] != length(data_lons)) { + stop("The longitude dimension of parameter 'data' must be the same", + " length of parameter 'data_lons'.") } # ncores if (!is.null(ncores)) { @@ -138,28 +138,26 @@ TPI <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo } # mask if (!is.null(mask)) { - if (is.array(mask) & identical(names(dim(mask)), c(lat_dim,lon_dim)) & + if (is.array(mask) & identical(names(dim(mask)), c(lat_dim, lon_dim)) & identical(as.integer(dim(mask)), c(length(data_lats), length(data_lons)))) { ## To mask those grid point that are missing in the observations mask <- s2dv::Reorder(data = mask, order = c(lat_dim, lon_dim)) - fun_mask <- function(data, mask){ + fun_mask <- function(data, mask) { data[mask == 0] <- NA return(data) } data <- multiApply::Apply(data = data, target_dims = c(lat_dim, lon_dim), fun = fun_mask, mask = mask, ncores = ncores)$output1 } else { - stop(paste0("Parameter 'mask' must be NULL (no mask) or a numerical array ", - "with c(lat_dim, lon_dim) dimensions and 0 in those grid ", - "points that have to be masked.")) + stop("Parameter 'mask' must be NULL (no mask) or a numerical array ", + "with c(lat_dim, lon_dim) dimensions and 0 in those grid ", + "points that have to be masked.") } } # monini - if (type == 'dcpp') { - if (!is.numeric(monini) | monini %% 1 != 0 | monini < 1 | - monini > 12) { - stop("Parameter 'monini' must be an integer from 1 to 12.") - } + if (type == 'dcpp' && + (!is.numeric(monini) | monini %% 1 != 0 | monini < 1 | monini > 12)) { + stop("Parameter 'monini' must be an integer from 1 to 12.") } # fmonth_dim if (type == 'dcpp') { @@ -183,9 +181,9 @@ TPI <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo if (!is.null(indices_for_clim)) { if (!(inherits(indices_for_clim, 'numeric') | inherits(indices_for_clim, 'integer')) & !(is.logical(indices_for_clim) & !any(indices_for_clim))) { - stop(paste0("The parameter 'indices_for_clim' must be a numeric vector ", - "or NULL to compute the anomalies based on the whole period, ", - "or FALSE if data are already anomalies")) + stop("The parameter 'indices_for_clim' must be a numeric vector ", + "or NULL to compute the anomalies based on the whole period, ", + "or FALSE if data are already anomalies") } } # year_dim @@ -207,7 +205,7 @@ TPI <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo } } # na.rm - if (!na.rm %in% c(TRUE,FALSE)) { + if (!na.rm %in% c(TRUE, FALSE)) { stop("Parameter 'na.rm' must be TRUE or FALSE") } @@ -240,11 +238,12 @@ TPI <- function(data, data_lats, data_lons, type, lat_dim = 'lat', lon_dim = 'lo weights = NULL, operation = 'mean') data <- ClimProjDiags::CombineIndices(indices = list(mean_2, mean_1_3), - weights = NULL, operation = 'subtract') # mean_2 - ((mean_1 + mean_3)/2) + weights = NULL, operation = 'subtract') + # mean_2 - ((mean_1 + mean_3)/2) - if (type == 'dcpp'){ + if (type == 'dcpp') { target_dims <- c(sdate_dim, fmonth_dim) - } else if (type %in% c('hist','obs')){ + } else if (type %in% c('hist', 'obs')) { target_dims <- c(year_dim, month_dim) } diff --git a/R/Trend.R b/R/Trend.R index e10fe19901a581d92c2c05bde61c9f350395c3df..883bcdf42ef3024bb3d0a95636d45c26bf6d356d 100644 --- a/R/Trend.R +++ b/R/Trend.R @@ -89,7 +89,7 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, alpha = 0 dim(data) <- c(length(data)) names(dim(data)) <- time_dim } - if(any(is.null(names(dim(data))))| any(nchar(names(dim(data))) == 0)) { + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { stop("Parameter 'data' must have dimension names.") } ## time_dim @@ -163,7 +163,7 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, alpha = 0 mon <- seq(x) * interval # remove NAs for potential poly() - NApos <- 1:length(x) + NApos <- seq_along(x) NApos[which(is.na(x))] <- NA x2 <- x[!is.na(NApos)] mon2 <- mon[!is.na(NApos)] @@ -179,13 +179,13 @@ Trend <- function(data, time_dim = 'ftime', interval = 1, polydeg = 1, alpha = 0 } if (pval | sign) { - p.value <- as.array(stats::anova(lm.out)$'Pr(>F)'[1]) + p.value <- as.array(stats::anova(lm.out)[['Pr(>F)']][1]) if (pval) p.val <- p.value if (sign) signif <- !is.na(p.value) & p.value <= alpha } - detrended <- c() - detrended[is.na(x) == FALSE] <- x[is.na(x) == FALSE] - lm.out$fitted.values + detrended <- NULL + detrended[!is.na(x)] <- x[!is.na(x)] - lm.out$fitted.values } else { diff --git a/R/UltimateBrier.R b/R/UltimateBrier.R index 44498a365633d16d612869768b567e63c6a2eb18..79b9864574cbc35724d509aa0f92da2ba4e5c56b 100644 --- a/R/UltimateBrier.R +++ b/R/UltimateBrier.R @@ -98,8 +98,8 @@ UltimateBrier <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', time_di if (!is.numeric(exp) | !is.numeric(obs)) { stop("Parameter 'exp' and 'obs' must be a vector or a numeric array.") } - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { stop("Parameter 'exp' and 'obs' must have dimension names.") } ## dat_dim @@ -140,12 +140,12 @@ UltimateBrier <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', time_di name_exp <- name_exp[-which(name_exp == dat_dim)] name_obs <- name_obs[-which(name_obs == dat_dim)] if (any(name_exp != name_obs)) { - stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", - "of all the dimensions except 'dat_dim' and 'memb_dim'.")) + stop("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions except 'dat_dim' and 'memb_dim'.") } if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have the same names and lengths ", - "of all the dimensions except 'dat_dim' and 'memb_dim'.")) + stop("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions except 'dat_dim' and 'memb_dim'.") } ## quantile if (!is.logical(quantile) | length(quantile) > 1) { @@ -155,17 +155,17 @@ UltimateBrier <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', time_di if (!is.numeric(thr) | !is.vector(thr)) { stop("Parameter 'thr' must be a numeric vector.") } - if (quantile) { - if (!all(thr < 1 & thr > 0)) { - stop("Parameter 'thr' must be between 0 and 1 when quantile is TRUE.") - } + if (quantile && !all(thr < 1 & thr > 0)) { + stop("Parameter 'thr' must be between 0 and 1 when quantile is TRUE.") } if (!quantile & (type %in% c('FairEnsembleBSS', 'FairEnsembleBS'))) { stop("Parameter 'quantile' must be TRUE if 'type' is 'FairEnsembleBSS' or 'FairEnsembleBS'.") } ## type - if (!(type %in% c("BS", "BSS", "FairEnsembleBS", "FairEnsembleBSS", "FairStartDatesBS", "FairStartDatesBSS"))) { - stop("Parameter 'type' must be one of 'BS', 'BSS', 'FairEnsembleBS', 'FairEnsembleBSS', 'FairStartDatesBS' or 'FairStartDatesBSS'.") + if (!(type %in% c("BS", "BSS", "FairEnsembleBS", "FairEnsembleBSS", + "FairStartDatesBS", "FairStartDatesBSS"))) { + stop("Parameter 'type' must be one of 'BS', 'BSS', 'FairEnsembleBS', ", + "'FairEnsembleBSS', 'FairStartDatesBS' or 'FairStartDatesBSS'.") } ## decomposition if (!is.logical(decomposition) | length(decomposition) > 1) { @@ -243,15 +243,16 @@ UltimateBrier <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', time_di res <- array(dim = c(nexp = as.numeric(dim(exp)[2]), nobs = as.numeric(dim(obs)[2]), bin = length(thr) + 1)) - for (n_exp in 1:dim(exp)[2]) { - for (n_obs in 1:dim(obs)[2]) { + for (n_exp in seq_len(dim(exp)[2])) { + for (n_obs in seq_len(dim(obs)[2])) { ens_ref <- matrix(obs[, n_obs, 1], size_ens_ref, size_ens_ref, byrow = TRUE) - for (n_thr in 1:length(c(thr, 1))) { + for (n_thr in seq_along(c(thr, 1))) { #NOTE: FairBreirSs is deprecated now. Should change to SkillScore (according to # SpecsVerification's documentation) - res[n_exp, n_obs, n_thr] <- SpecsVerification::FairBrierSs(exp[, n_exp, ] > c(thr, 1)[n_thr], - ens_ref > c(thr, 1)[n_thr], - obs[, n_obs, 1] > c(thr, 1)[n_thr])['skillscore'] + res[n_exp, n_obs, n_thr] <- + SpecsVerification::FairBrierSs(exp[, n_exp, ] > c(thr, 1)[n_thr], + ens_ref > c(thr, 1)[n_thr], + obs[, n_obs, 1] > c(thr, 1)[n_thr])['skillscore'] } } } @@ -261,7 +262,8 @@ UltimateBrier <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', time_di } else if (type == 'FairEnsembleBS') { #NOTE: The calculation in s2dverification::UltimateBrier is wrong. In the final stage, - # the function calculates like "take(result, 3, 1) - take(result, 3, 2) + take(result, 3, 3)", + # the function calculates like + # "take(result, 3, 1) - take(result, 3, 2) + take(result, 3, 3)", # but the 3rd dim of result is 'bins' instead of decomposition. 'FairEnsembleBS' does # not have decomposition. # The calculation is fixed here. @@ -272,9 +274,9 @@ UltimateBrier <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', time_di res <- array(dim = c(nexp = as.numeric(dim(exp)[2]), nobs = as.numeric(dim(obs)[2]), bin = length(thr) + 1)) - for (n_exp in 1:dim(exp)[2]) { - for (n_obs in 1:dim(obs)[2]) { - for (n_thr in 1:length(c(thr, 1))) { + for (n_exp in seq_len(dim(exp)[2])) { + for (n_obs in seq_len(dim(obs)[2])) { + for (n_thr in seq_along(c(thr, 1))) { fb <- SpecsVerification::FairBrier(ens = exp[, n_exp, ] > c(thr, 1)[n_thr], obs = obs[, n_obs, 1] > c(thr, 1)[n_thr]) res[n_exp, n_obs, n_thr] <- mean(fb, na.rm = T) @@ -295,8 +297,8 @@ UltimateBrier <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', time_di comp <- array(dim = c(nexp = as.numeric(dim(exp)[2]), nobs = as.numeric(dim(obs)[2]), comp = 3)) - for (n_exp in 1:dim(exp)[2]) { - for (n_obs in 1:dim(obs)[2]) { + for (n_exp in seq_len(dim(exp)[2])) { + for (n_obs in seq_len(dim(obs)[2])) { #NOTE: Parameter 'bins' is default. comp[n_exp, n_obs, ] <- SpecsVerification::BrierDecomp(p = exp[, n_exp], y = obs[, n_obs])[1, ] diff --git a/R/Utils.R b/R/Utils.R index 362bdf8ff412601fd19d3ac14b7304c9f457a4a3..c6e233c00ca1c532c7bcd7224b62411e5585e209 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -5,10 +5,11 @@ ## Function to tell if a regexpr() match is a complete match to a specified name .IsFullMatch <- function(x, name) { - ifelse(x > 0 && attributes(x)$match.length == nchar(name), TRUE, FALSE) + x > 0 && attributes(x)$match.length == nchar(name) } -.ConfigReplaceVariablesInString <- function(string, replace_values, allow_undefined_key_vars = FALSE) { +.ConfigReplaceVariablesInString <- function(string, replace_values, + allow_undefined_key_vars = FALSE) { # This function replaces all the occurrences of a variable in a string by # their corresponding string stored in the replace_values. if (length(strsplit(string, "\\$")[[1]]) > 1) { @@ -17,14 +18,18 @@ i <- 0 for (part in parts) { if (i %% 2 == 0) { - output <- paste(output, part, sep = "") + output <- paste0(output, part) } else { if (part %in% names(replace_values)) { - output <- paste(output, .ConfigReplaceVariablesInString(replace_values[[part]], replace_values, allow_undefined_key_vars), sep = "") + output <- paste0(output, + .ConfigReplaceVariablesInString(replace_values[[part]], + replace_values, + allow_undefined_key_vars)) } else if (allow_undefined_key_vars) { output <- paste0(output, "$", part, "$") } else { - stop(paste('Error: The variable $', part, '$ was not defined in the configuration file.', sep = '')) + stop('Error: The variable $', part, + '$ was not defined in the configuration file.', sep = '') } } i <- i + 1 @@ -37,10 +42,12 @@ .KnownLonNames <- function() { known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon') + return(known_lon_names) } .KnownLatNames <- function() { known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') + return(known_lat_names) } .t2nlatlon <- function(t) { @@ -70,7 +77,8 @@ } else { nlons <- nlons + 2 if (nlons > 9999) { - stop("Error: pick another gaussian grid truncation. It doesn't fulfill the standards to apply FFT.") + stop("Error: pick another gaussian grid truncation. ", + "It doesn't fulfill the standards to apply FFT.") } } } @@ -95,8 +103,8 @@ position <- 1 dims <- rev(dims) indices <- rev(indices) - for (i in 1:length(indices)) { - position <- position + (indices[i] - 1) * prod(dims[-c(1:i)]) + for (i in seq_along(indices)) { + position <- position + (indices[i] - 1) * prod(dims[-(1:i)]) } position } @@ -112,7 +120,7 @@ output <- work_piece[['output']] # The names of all data files in the directory of the repository that match # the pattern are obtained. - if (length(grep("^http", filename)) > 0) { + if (any(grep("^http", filename))) { is_url <- TRUE files <- filename ## TODO: Check that the user is not using shell globbing exps. @@ -129,13 +137,11 @@ found_file <- filename mask <- work_piece[['mask']] - if (!silent) { - if (explore_dims) { - .message(paste("Exploring dimensions...", filename)) - } - ##} else { - ## cat(paste("* Reading & processing data...", filename, '\n')) - ##} + if (!silent && explore_dims) { + .message(paste("Exploring dimensions...", filename)) + ##} else { + ## cat(paste("* Reading & processing data...", filename, '\n')) + ##} } # We will fill in 'expected_dims' with the names of the expected dimensions of @@ -145,7 +151,7 @@ # But first we open the file and work out whether the requested variable is 2d fnc <- nc_open(filein) if (!(namevar %in% names(fnc$var))) { - stop(paste("Error: The variable", namevar, "is not defined in the file", filename)) + stop("Error: The variable", namevar, "is not defined in the file", filename) } var_long_name <- fnc$var[[namevar]]$longname units <- fnc$var[[namevar]]$units @@ -175,9 +181,12 @@ stop("Error: CDO libraries not available") } - cdo_version <- strsplit(suppressWarnings(system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5] + cdo_version <- + strsplit(suppressWarnings( + system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5] - cdo_version <- as.numeric_version(unlist(strsplit(cdo_version, "[A-Za-z]", fixed = FALSE))[[1]]) + cdo_version <- + as.numeric_version(unlist(strsplit(cdo_version, "[A-Za-z]", fixed = FALSE))[[1]]) } # If the variable to load is 2-d, we need to determine whether: @@ -200,19 +209,20 @@ } grids_first_lines <- grids_positions + 2 grids_last_lines <- c((grids_positions - 2)[-1], length(file_info)) - grids_info <- as.list(1:length(grids_positions)) - grids_info <- lapply(grids_info, function (x) file_info[grids_first_lines[x]:grids_last_lines[x]]) + grids_info <- as.list(seq_along(grids_positions)) + grids_info <- lapply(grids_info, + function (x) file_info[grids_first_lines[x]:grids_last_lines[x]]) grids_info <- lapply(grids_info, function (x) gsub(" *", " ", x)) grids_info <- lapply(grids_info, function (x) gsub("^ | $", "", x)) grids_info <- lapply(grids_info, function (x) unlist(strsplit(x, " | = "))) grids_types <- unlist(lapply(grids_info, function (x) x[grep('gridtype', x) + 1])) grids_matches <- unlist(lapply(grids_info, function (x) { - nlons <- if (length(grep('xsize', x)) > 0) { + nlons <- if (any(grep('xsize', x))) { as.numeric(x[grep('xsize', x) + 1]) } else { NA } - nlats <- if (length(grep('ysize', x)) > 0) { + nlats <- if (any(grep('ysize', x))) { as.numeric(x[grep('ysize', x) + 1]) } else { NA @@ -239,7 +249,9 @@ grids_info[which(grids_matches)][[1]])))) { grid_type <- grids_types[which(grids_matches)][1] } else { - stop("Error: Load() can't disambiguate: More than one lonlat/gaussian grids with the same size as the requested variable defined in ", filename) + stop("Error: Load() can't disambiguate: ", + "More than one lonlat/gaussian grids with the same size as ", + "the requested variable defined in ", filename) } } else if (sum(grids_matches) == 1) { grid_type <- grids_types[which(grids_matches)] @@ -263,13 +275,13 @@ # later on. if (!is.null(work_piece[['grid']])) { # Now we calculate the common grid type and its lons and lats - if (length(grep('^t\\d{1,+}grid$', work_piece[['grid']])) > 0) { + if (any(grep('^t\\d{1,+}grid$', work_piece[['grid']]))) { common_grid_type <- 'gaussian' common_grid_res <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) nlonlat <- .t2nlatlon(common_grid_res) common_grid_lats <- nlonlat[1] common_grid_lons <- nlonlat[2] - } else if (length(grep('^r\\d{1,+}x\\d{1,+}$', work_piece[['grid']])) > 0) { + } else if (any(grep('^r\\d{1,+}x\\d{1,+}$', work_piece[['grid']]))) { common_grid_type <- 'lonlat' common_grid_lons <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) common_grid_lats <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][3]) @@ -283,7 +295,7 @@ common_grid_lats <- length(lat) } first_common_grid_lon <- 0 - last_common_grid_lon <- 360 - 360/common_grid_lons + last_common_grid_lon <- 360 - 360 / common_grid_lons ## This is not true for gaussian grids or for some regular grids, but ## is a safe estimation first_common_grid_lat <- -90 @@ -318,11 +330,20 @@ } .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), " in '", - work_piece[['dataset_type']], "' doesn't start at longitude 0 and will be re-interpolated in order to align its longitudes with the standard CDO grids definable with the names 'tgrid' or 'rx', which are by definition starting at the longitude 0.\n")) + work_piece[['dataset_type']], + "' doesn't start at longitude 0 and will be re-interpolated in order ", + "to align its longitudes with the standard CDO grids definable with ", + "the names 'tgrid' or 'rx', which are by definition ", + "starting at the longitude 0.\n")) if (!is.null(mask)) { .warning(paste0("A mask was provided for the dataset with index ", tail(work_piece[['indices']], 1), " in '", - work_piece[['dataset_type']], "'. This dataset has been re-interpolated to align its longitudes to start at 0. You must re-interpolate the corresponding mask to align its longitudes to start at 0 as well, if you haven't done so yet. Running cdo remapcon,", common_grid_name, " original_mask_file.nc new_mask_file.nc will fix it.\n")) + work_piece[['dataset_type']], + "'. This dataset has been re-interpolated to align its longitudes to ", + "start at 0. You must re-interpolate the corresponding mask to align ", + "its longitudes to start at 0 as well, if you haven't done so yet. ", + "Running cdo remapcon,", common_grid_name, + " original_mask_file.nc new_mask_file.nc will fix it.\n")) } } if (remap_needed && (grid_lons < common_grid_lons || grid_lats < common_grid_lats)) { @@ -351,14 +372,14 @@ lon_subsetting_requested <- TRUE } } else { - if ((lonmin - lonmax) > 360/common_grid_lons) { + if ((lonmin - lonmax) > 360 / common_grid_lons) { lon_subsetting_requested <- TRUE } else { - gap_width <- floor(lonmin / (360/common_grid_lons)) - - floor(lonmax / (360/common_grid_lons)) + gap_width <- floor(lonmin / (360 / common_grid_lons)) - + floor(lonmax / (360 / common_grid_lons)) if (gap_width > 0) { - if (!(gap_width == 1 && (lonmin %% (360/common_grid_lons) == 0) && - (lonmax %% (360/common_grid_lons) == 0))) { + if (!(gap_width == 1 && (lonmin %% (360 / common_grid_lons) == 0) && + (lonmax %% (360 / common_grid_lons) == 0))) { lon_subsetting_requested <- TRUE } } @@ -390,7 +411,7 @@ system(paste0("cdo -L -s remap", work_piece[['remap']], ",", common_grid_name, " -selname,", namevar, " ", filecopy, " ", filein, - " 2>/dev/null", sep = "")) + " 2>/dev/null")) file.remove(filecopy) work_piece[['dimnames']][['lon']] <- 'lon' work_piece[['dimnames']][['lat']] <- 'lat' @@ -404,7 +425,7 @@ ###mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') if (is.list(mask)) { if (!file.exists(mask[['path']])) { - stop(paste("Error: Couldn't find the mask file", mask[['path']])) + stop("Error: Couldn't find the mask file", mask[['path']]) } mask_file <- mask[['path']] ###file.copy(work_piece[['mask']][['path']], mask_file) @@ -413,26 +434,29 @@ if ('nc_var_name' %in% names(mask)) { if (!(mask[['nc_var_name']] %in% vars_in_mask)) { - stop(paste("Error: couldn't find variable", mask[['nc_var_name']], - "in the mask file", mask[['path']])) + stop("Error: couldn't find variable", mask[['nc_var_name']], + "in the mask file", mask[['path']]) } } else { if (length(vars_in_mask) != 1) { - stop(paste("Error: one and only one non-coordinate variable should be defined in the mask file", - mask[['path']], "if the component 'nc_var_name' is not specified. Currently found: ", - paste(vars_in_mask, collapse = ', '), ".")) + stop("Error: one and only one non-coordinate variable should be ", + "defined in the mask file", + mask[['path']], + "if the component 'nc_var_name' is not specified. ", + "Currently found: ", + toString(vars_in_mask), ".") } else { mask[['nc_var_name']] <- vars_in_mask } } if (sum(fnc_mask$var[[mask[['nc_var_name']]]]$size > 1) != 2) { - stop(paste0("Error: the variable '", + stop("Error: the variable '", mask[['nc_var_name']], "' must be defined only over the dimensions '", work_piece[['dimnames']][['lon']], "' and '", work_piece[['dimnames']][['lat']], "' in the mask file ", - mask[['path']])) + mask[['path']]) } mask <- ncvar_get(fnc_mask, mask[['nc_var_name']], collapse_degen = TRUE) nc_close(fnc_mask) @@ -454,16 +478,28 @@ ### Now ready to check that the mask is right ##if (!(lonlat_subsetting_requested && remap_needed)) { ### if ((dim(mask)[2] != length(lon)) || (dim(mask)[1] != length(lat))) { - ### stop(paste("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), " in '", work_piece[['dataset_type']], "' is wrong. It must be on the common grid if the selected output type is 'lonlat', 'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on the grid of the corresponding dataset if the selected output type is 'areave' and no 'grid' has been specified. For more information check ?Load and see help on parameters 'grid', 'maskmod' and 'maskobs'.", sep = "")) + ### stop(paste("Error: the mask of the dataset with index ", + ### tail(work_piece[['indices']], 1), " in '", + ### work_piece[['dataset_type']], "' is wrong. ", + ### "It must be on the common grid if the selected output type is 'lonlat', ", + ### "'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on ", + ### "the grid of the corresponding dataset if the selected output type is ", + ### "'areave' and no 'grid' has been specified. For more information ", + ### "check ?Load and see help on parameters 'grid', 'maskmod' and ", + ### "'maskobs'.", sep = "")) ### } - ###if (!(identical(mask_lon, lon) && identical(mask_lat, lat))) { - ### stop(paste0("Error: the longitudes and latitudes in the masks must be identical to the ones in the corresponding data files if output = 'areave' or, if the selected output is 'lon', 'lat' or 'lonlat', the longitudes in the mask file must start by 0 and the latitudes must be ordered from highest to lowest. See\n ", - ### work_piece[['mask']][['path']], " and ", filein)) - ###} + ###if (!(identical(mask_lon, lon) && identical(mask_lat, lat))) { + ### stop(paste0("Error: the longitudes and latitudes in the masks must be ", + ### "identical to the ones in the corresponding data files if output = 'areave' ", + ### " or, if the selected output is 'lon', 'lat' or 'lonlat', the longitudes in ", + ### "the mask file must start by 0 and the latitudes must be ordered from ", + ### "highest to lowest. See\n ", + ### work_piece[['mask']][['path']], " and ", filein)) + ###} } } - lon_indices <- 1:length(lon) + lon_indices <- seq_along(lon) if (!(lonlat_subsetting_requested && remap_needed)) { lon[which(lon < 0)] <- lon[which(lon < 0)] + 360 } @@ -478,11 +514,17 @@ ## always the latitudes are reordered. ## TODO: This could be avoided in future. if (lat[1] < lat[length(lat)]) { - lat_indices <- lat_indices[length(lat_indices):1] + lat_indices <- lat_indices[rev(seq_along(lat_indices))] } if (!is.null(mask) && !(lonlat_subsetting_requested && remap_needed)) { if ((dim(mask)[1] != length(lon)) || (dim(mask)[2] != length(lat))) { - stop(paste("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), " in '", work_piece[['dataset_type']], "' is wrong. It must be on the common grid if the selected output type is 'lonlat', 'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on the grid of the corresponding dataset if the selected output type is 'areave' and no 'grid' has been specified. For more information check ?Load and see help on parameters 'grid', 'maskmod' and 'maskobs'.", sep = "")) + stop("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is wrong. It must be on the ", + "common grid if the selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must be on the grid of ", + "the corresponding dataset if the selected output type is 'areave' and ", + "no 'grid' has been specified. For more information check ?Load and see ", + "help on parameters 'grid', 'maskmod' and 'maskobs'.") } mask <- mask[lon_indices, lat_indices] } @@ -495,15 +537,17 @@ ## if the requested number of points goes beyond the left or right ## sides of the map, we need to take the entire map so that the ## interpolation works properly - lon_indices <- 1:length(lon) + lon_indices <- seq_along(lon) } else { extra_points <- min(maximum_extra_points, head(lon_indices, 1) - 1) if (extra_points > 0) { - lon_indices <- c((head(lon_indices, 1) - extra_points):(head(lon_indices, 1) - 1), lon_indices) + lon_indices <- + c((head(lon_indices, 1) - extra_points):(head(lon_indices, 1) - 1), lon_indices) } extra_points <- min(maximum_extra_points, length(lon) - tail(lon_indices, 1)) if (extra_points > 0) { - lon_indices <- c(lon_indices, (tail(lon_indices, 1) + 1):(tail(lon_indices, 1) + extra_points)) + lon_indices <- c(lon_indices, + (tail(lon_indices, 1) + 1):(tail(lon_indices, 1) + extra_points)) } } min_lat_ind <- min(lat_indices) @@ -543,7 +587,8 @@ work_piece[['dimnames']][['member']] <- 'lev' } if (work_piece[['dimnames']][['member']] %in% var_dimnames) { - nmemb <- fnc$var[[namevar]][['dim']][[match(work_piece[['dimnames']][['member']], var_dimnames)]]$len + nmemb <- fnc$var[[namevar]][['dim']][[match(work_piece[['dimnames']][['member']], + var_dimnames)]]$len expected_dims <- c(expected_dims, work_piece[['dimnames']][['member']]) } else { nmemb <- 1 @@ -554,9 +599,9 @@ if (!is.null(old_members_dimname)) { expected_dims[which(expected_dims == 'lev')] <- old_members_dimname } - stop(paste("Error: the expected dimension(s)", - paste(expected_dims[which(is.na(dim_matches))], collapse = ', '), - "were not found in", filename)) + stop("Error: the expected dimension(s)", + toString(expected_dims[which(is.na(dim_matches))]), + "were not found in", filename) } time_dimname <- var_dimnames[-dim_matches] } else { @@ -571,10 +616,13 @@ if (!is.null(old_members_dimname)) { expected_dims[which(expected_dims == 'lev')] <- old_members_dimname } - stop(paste("Error: the variable", namevar, - "is defined over more dimensions than the expected (", - paste(c(expected_dims, 'time'), collapse = ', '), - "). It could also be that the members, longitude or latitude dimensions are named incorrectly. In that case, either rename the dimensions in the file or adjust Load() to recognize the actual name with the parameter 'dimnames'. See file", filename)) + stop("Error: the variable ", namevar, + " is defined over more dimensions than the expected (", + toString(c(expected_dims, 'time')), + "). It could also be that the members, longitude or latitude ", + "dimensions are named incorrectly. In that case, either rename ", + "the dimensions in the file or adjust Load() to recognize the actual ", + "name with the parameter 'dimnames'. See file ", filename) } } else { nltime <- 1 @@ -588,7 +636,7 @@ # to regrid it and work out the number of longitudes and latitudes. # We don't need more. members <- 1 - ltimes_list <- list(c(1)) + ltimes_list <- list(1) } else { # The data is arranged in the array 'tmp' with the dimensions in a # common order: @@ -648,7 +696,7 @@ } final_dims <- c(length(subset_indices[[1]]), length(subset_indices[[2]]), 1, 1) } else { - start <- end <- c() + start <- end <- NULL subset_indices <- list() ncdf_dims <- list() final_dims <- c(1, 1, 1, 1) @@ -663,16 +711,17 @@ final_dims[3] <- length(members) } if (time_dimname %in% expected_dims) { - if (any(!is.na(ltimes))) { + if (!all(is.na(ltimes))) { start <- c(start, head(ltimes[which(!is.na(ltimes))], 1)) end <- c(end, tail(ltimes[which(!is.na(ltimes))], 1)) - subset_indices <- c(subset_indices, list(ltimes - head(ltimes[which(!is.na(ltimes))], 1) + 1)) + subset_indices <- c(subset_indices, + list(ltimes - head(ltimes[which(!is.na(ltimes))], 1) + 1)) } else { start <- c(start, NA) end <- c(end, NA) subset_indices <- c(subset_indices, list(ltimes)) } - dim_time <- ncdim_def(time_dimname, "", 1:length(ltimes), unlim = TRUE) + dim_time <- ncdim_def(time_dimname, "", seq_along(ltimes), unlim = TRUE) ncdf_dims <- c(ncdf_dims, list(dim_time)) final_dims[4] <- length(ltimes) } @@ -684,7 +733,7 @@ if (prod(final_dims) > 0) { tmp <- take(ncvar_get(fnc, namevar, start, count, collapse_degen = FALSE), - 1:length(subset_indices), subset_indices) + seq_along(subset_indices), subset_indices) # The data is regridded if it corresponds to an atmospheric variable. When # the chosen output type is 'areave' the data is not regridded to not # waste computing time unless the user specified a common grid. @@ -725,7 +774,7 @@ paste0(lonmin, ",", lonmax, ",") }, latmin, ",", latmax, " -remap", work_piece[['remap']], ",", common_grid_name, - " ", filein2, " ", filein, " 2>/dev/null", sep = "")) + " ", filein2, " ", filein, " 2>/dev/null")) file.remove(filein2) fnc2 <- nc_open(filein) sub_lon <- ncvar_get(fnc2, 'lon') @@ -734,42 +783,54 @@ ## In principle cdo should put in order the longitudes ## and slice them properly unless data is across greenwich sub_lon[which(sub_lon < 0)] <- sub_lon[which(sub_lon < 0)] + 360 - sub_lon_indices <- 1:length(sub_lon) + sub_lon_indices <- seq_along(sub_lon) if (lonmax < lonmin) { sub_lon_indices <- sub_lon_indices[which((sub_lon <= lonmax) | (sub_lon >= lonmin))] } - sub_lat_indices <- 1:length(sub_lat) + sub_lat_indices <- seq_along(sub_lat) ## In principle cdo should put in order the latitudes if (sub_lat[1] < sub_lat[length(sub_lat)]) { - sub_lat_indices <- length(sub_lat):1 + sub_lat_indices <- rev(seq_along(sub_lat)) } final_dims[c(1, 2)] <- c(length(sub_lon_indices), length(sub_lat_indices)) subset_indices[[dim_matches[1]]] <- sub_lon_indices subset_indices[[dim_matches[2]]] <- sub_lat_indices tmp <- take(ncvar_get(fnc2, namevar, collapse_degen = FALSE), - 1:length(subset_indices), subset_indices) + seq_along(subset_indices), subset_indices) if (!is.null(mask)) { ## We create a very simple 2d netcdf file that is then interpolated to the common ## grid to know what are the lons and lats of our slice of data mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') mask_file_remap <- tempfile(pattern = 'loadMask', fileext = '.nc') - dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", c(0, 360)) - dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", c(-90, 90)) + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], + "degrees_east", c(0, 360)) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], + "degrees_north", c(-90, 90)) ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') fnc_mask <- nc_create(mask_file, list(ncdf_var)) ncvar_put(fnc_mask, ncdf_var, array(rep(0, 4), dim = c(2, 2))) nc_close(fnc_mask) - system(paste0("cdo -L -s remap", work_piece[['remap']], ",", common_grid_name, - " ", mask_file, " ", mask_file_remap, " 2>/dev/null", sep = "")) + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " ", mask_file, " ", mask_file_remap, " 2>/dev/null")) fnc_mask <- nc_open(mask_file_remap) mask_lons <- ncvar_get(fnc_mask, 'lon') mask_lats <- ncvar_get(fnc_mask, 'lat') nc_close(fnc_mask) file.remove(mask_file, mask_file_remap) if ((dim(mask)[1] != common_grid_lons) || (dim(mask)[2] != common_grid_lats)) { - stop(paste("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), " in '", work_piece[['dataset_type']], "' is wrong. It must be on the common grid if the selected output type is 'lonlat', 'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on the grid of the corresponding dataset if the selected output type is 'areave' and no 'grid' has been specified. For more information check ?Load and see help on parameters 'grid', 'maskmod' and 'maskobs'.", sep = "")) + stop("Error: the mask of the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "' is wrong. It must be on the common grid if the ", + "selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must ", + "be on the grid of the corresponding dataset if the ", + "selected output type is 'areave' and no 'grid' has been ", + "specified. For more information check ?Load and see help ", + "on parameters 'grid', 'maskmod' and 'maskobs'.") } mask_lons[which(mask_lons < 0)] <- mask_lons[which(mask_lons < 0)] + 360 if (lonmax >= lonmin) { @@ -779,7 +840,7 @@ } mask_lat_indices <- which((mask_lats >= latmin) & (mask_lats <= latmax)) if (sub_lat[1] < sub_lat[length(sub_lat)]) { - mask_lat_indices <- mask_lat_indices[length(mask_lat_indices):1] + mask_lat_indices <- mask_lat_indices[rev(seq_along(mask_lat_indices))] } mask <- mask[mask_lon_indices, mask_lat_indices] } @@ -800,10 +861,18 @@ ###} } } - if (!all(dim_matches == sort(dim_matches))) { - if (!found_disordered_dims && rev(work_piece[['indices']])[2] == 1 && rev(work_piece[['indices']])[3] == 1) { + if (is.unsorted(dim_matches)) { + if (!found_disordered_dims && + rev(work_piece[['indices']])[2] == 1 && + rev(work_piece[['indices']])[3] == 1) { found_disordered_dims <- TRUE - .warning(paste0("The dimensions for the variable ", namevar, " in the files of the experiment with index ", tail(work_piece[['indices']], 1), " are not in the optimal order for loading with Load(). The optimal order would be '", paste(expected_dims, collapse = ', '), "'. One of the files of the dataset is stored in ", filename)) + .warning(paste0("The dimensions for the variable ", namevar, + " in the files of the experiment with index ", + tail(work_piece[['indices']], 1), + " are not in the optimal order for loading with Load(). ", + "The optimal order would be '", + toString(expected_dims), + "'. One of the files of the dataset is stored in ", filename)) } tmp <- aperm(tmp, dim_matches) } @@ -846,13 +915,15 @@ } if (output == 'areave' || output == 'lon') { - weights <- InsertDim(cos(final_lats * pi / 180), 1, length(final_lons), name = 'lon') + weights <- InsertDim(cos(final_lats * pi / 180), 1, + length(final_lons), name = 'lon') weights[which(is.na(x))] <- NA if (output == 'areave') { weights <- weights / mean(weights, na.rm = TRUE) mean(x * weights, na.rm = TRUE) } else { - weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, length(final_lats), name = 'lat') + weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, + length(final_lats), name = 'lat') MeanDims(x * weights, 2, na.rm = TRUE) } } else if (output == 'lat') { @@ -920,7 +991,7 @@ ### foobar <- writeBin(work_piece[['progress_amount']], progress_connection) ###} if (!silent && !is.null(work_piece[['progress_amount']])) { - message(paste0(work_piece[['progress_amount']]), appendLF = FALSE) + message(work_piece[['progress_amount']], appendLF = FALSE) } found_file } @@ -957,8 +1028,7 @@ dataOut <- sampleData dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times, , ] dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times, , ] - } - else if (output == 'areave') { + } else if (output == 'areave') { sampleData <- s2dv::sampleTimeSeries if (is.null(leadtimemax)) { leadtimemax <- dim(sampleData$mod)[lead_times_position] @@ -992,7 +1062,10 @@ } else { id <- 'OBS' } - defaults <- c(paste0('$DEFAULT_', id, '_MAIN_PATH$'), paste0('$DEFAULT_', id, '_FILE_PATH$'), '$DEFAULT_NC_VAR_NAME$', '$DEFAULT_SUFFIX$', '$DEFAULT_VAR_MIN$', '$DEFAULT_VAR_MAX$') + defaults <- c(paste0('$DEFAULT_', id, '_MAIN_PATH$'), + paste0('$DEFAULT_', id, '_FILE_PATH$'), + '$DEFAULT_NC_VAR_NAME$', '$DEFAULT_SUFFIX$', + '$DEFAULT_VAR_MIN$', '$DEFAULT_VAR_MAX$') info <- NULL for (entry in matching_entries) { @@ -1042,7 +1115,7 @@ x <- gsub('\\\\', '', x) x <- gsub('\\^', '', x) x <- gsub('\\$', '', x) - x <- unname(sapply(strsplit(x, '[',fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) + x <- unname(sapply(strsplit(x, '[', fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) do.call(paste0, as.list(x)) } else { x @@ -1069,9 +1142,11 @@ right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1) right_known_no_tags <- .ConfigReplaceVariablesInString(right_known, replace_values) path_with_globs_rx <- utils::glob2rx(paste0(path_with_globs, right_known_no_tags)) - match <- regexpr(gsub('$', '', path_with_globs_rx, fixed = TRUE), paste0(actual_path, file_name)) + match <- regexpr(gsub('$', '', path_with_globs_rx, fixed = TRUE), + paste0(actual_path, file_name)) if (match != 1) { - stop("Incorrect parameters to replace glob expressions. The path with expressions does not match the actual path.") + stop("Incorrect parameters to replace glob expressions. ", + "The path with expressions does not match the actual path.") } if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) { path_with_globs <- paste0(path_with_globs, right_known_no_tags, '*') @@ -1079,32 +1154,41 @@ } } path_with_globs_rx <- utils::glob2rx(path_with_globs) - values_to_replace <- c() - tags_to_replace_starts <- c() - tags_to_replace_ends <- c() + values_to_replace <- NULL + tags_to_replace_starts <- NULL + tags_to_replace_ends <- NULL give_warning <- FALSE for (tag in tags_to_keep) { matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]] lengths <- attr(matches, 'match.length') if (!(length(matches) == 1 && matches[1] == -1)) { - for (i in 1:length(matches)) { + for (i in seq_along(matches)) { left <- NULL if (matches[i] > 1) { - left <- .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 1, matches[i] - 1), replace_values) - left_known <- strReverse(head(strsplit(strReverse(left), strReverse('.*'), fixed = TRUE)[[1]], 1)) + left <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 1, + matches[i] - 1), replace_values) + left_known <- + strReverse(head(strsplit(strReverse(left), + strReverse('.*'), fixed = TRUE)[[1]], 1)) } right <- NULL if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) { - right <- .ConfigReplaceVariablesInString(substr(path_with_globs_rx, matches[i] + lengths[i], nchar(path_with_globs_rx)), replace_values) + right <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, + matches[i] + lengths[i], + nchar(path_with_globs_rx)), + replace_values) right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1) } - final_match <- NULL match_limits <- NULL if (!is.null(left)) { left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) match_len <- attr(left_match, 'match.length') - left_match_limits <- c(left_match + match_len - 1 - nchar(clean(right_known)) - nchar(replace_values[[tag]]) + 1, - left_match + match_len - 1 - nchar(clean(right_known))) + left_match_limits <- + c(left_match + match_len - 1 - nchar(clean(right_known)) - + nchar(replace_values[[tag]]) + 1, + left_match + match_len - 1 - nchar(clean(right_known))) if (!(left_match < 1)) { match_limits <- left_match_limits } @@ -1113,8 +1197,10 @@ if (!is.null(right)) { right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) match_len <- attr(right_match, 'match.length') - right_match_limits <- c(right_match + nchar(clean(left_known)), - right_match + nchar(clean(left_known)) + nchar(replace_values[[tag]]) - 1) + right_match_limits <- + c(right_match + nchar(clean(left_known)), + right_match + nchar(clean(left_known)) + + nchar(replace_values[[tag]]) - 1) if (is.null(match_limits) && !(right_match < 1)) { match_limits <- right_match_limits } @@ -1143,8 +1229,11 @@ while (length(values_to_replace) > 0) { actual_path <- paste0(substr(actual_path, 1, head(tags_to_replace_starts, 1) - 1), '$', head(values_to_replace, 1), '$', - substr(actual_path, head(tags_to_replace_ends, 1) + 1, nchar(actual_path))) - extra_chars <- nchar(head(values_to_replace, 1)) + 2 - (head(tags_to_replace_ends, 1) - head(tags_to_replace_starts, 1) + 1) + substr(actual_path, head(tags_to_replace_ends, 1) + 1, + nchar(actual_path))) + extra_chars <- nchar(head(values_to_replace, 1)) + 2 - + (head(tags_to_replace_ends, 1) - + head(tags_to_replace_starts, 1) + 1) values_to_replace <- values_to_replace[-1] tags_to_replace_starts <- tags_to_replace_starts[-1] tags_to_replace_ends <- tags_to_replace_ends[-1] @@ -1155,7 +1244,8 @@ if (give_warning) { .warning(paste0("Too complex path pattern specified for ", dataset_name, - ". Double check carefully the '$Files' fetched for this dataset or specify a simpler path pattern.")) + ". Double check carefully the '$Files' fetched for this dataset ", + "or specify a simpler path pattern.")) } if (permissive) { @@ -1169,11 +1259,11 @@ tag <- paste0('\\$', tag, '\\$') path_with_globs_and_tag <- paste0('^', path_with_globs_and_tag, '$') parts <- strsplit(path_with_globs_and_tag, '*', fixed = TRUE)[[1]] - parts <- as.list(parts[grep(tag, parts)]) - longest_couples <- c() - pos_longest_couples <- c() + parts <- as.list(grep(tag, parts, value = TRUE)) + longest_couples <- NULL + pos_longest_couples <- NULL found_value <- NULL - for (i in 1:length(parts)) { + for (i in seq_along(parts)) { parts[[i]] <- strsplit(parts[[i]], tag)[[1]] if (length(parts[[i]]) == 1) { parts[[i]] <- c(parts[[i]], '') @@ -1184,13 +1274,15 @@ longest_couples <- c(longest_couples, max(len_couples)) } chosen_part <- which.max(longest_couples) - parts[[chosen_part]] <- parts[[chosen_part]][pos_longest_couples[chosen_part]:(pos_longest_couples[chosen_part] + 1)] + parts[[chosen_part]] <- + parts[[chosen_part]][pos_longest_couples[chosen_part]:(pos_longest_couples[chosen_part] + 1)] if (nchar(parts[[chosen_part]][1]) >= nchar(parts[[chosen_part]][2])) { if (nchar(parts[[chosen_part]][1]) > 0) { matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] if (length(matches) == 1) { match_left <- matches - actual_path <- substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) + actual_path <- + substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) } } if (nchar(parts[[chosen_part]][2]) > 0) { @@ -1212,7 +1304,9 @@ matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] if (length(matches) == 1) { match_left <- matches - found_value <- substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) + found_value <- + substr(actual_path, match_left + attr(match_left, 'match.length'), + nchar(actual_path)) } } } @@ -1292,7 +1386,9 @@ } # Change filenames when necessary if (any(ext != ext[1])) { - .warning(paste0("some extensions of the filenames provided in 'fileout' are not ", ext[1],". The extensions are being converted to ", ext[1], ".")) + .warning(paste0("some extensions of the filenames provided in 'fileout' ", + "are not ", ext[1], + ". The extensions are being converted to ", ext[1], ".")) fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) } } else { @@ -1359,9 +1455,9 @@ } args[["tag"]] <- NULL - message(paste0(tag, paste(strwrap( - args, indent = indent, exdent = exdent - ), collapse = collapse)), appendLF = appendLF, domain = domain) + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + message(tmp, appendLF = appendLF, domain = domain) } .warning <- function(...) { @@ -1430,15 +1526,15 @@ } args[["tag"]] <- NULL - warning(paste0(tag, paste(strwrap( - args, indent = indent, exdent = exdent - ), collapse = collapse)), call. = call, immediate. = immediate, - noBreaks. = noBreaks, domain = domain) + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + warning(tmp, call. = call, immediate. = immediate, + noBreaks. = noBreaks, domain = domain) } .IsColor <- function(x) { res <- try(col2rgb(x), silent = TRUE) - return(!"try-error" %in% class(res)) + return(!is(res, "try-error")) } # This function switches to a specified figure at position (row, col) in a layout. @@ -1502,7 +1598,7 @@ if (is.numeric(x)) { x <- aperm(x, new_order) } else { - y <- array(1:length(x), dim = dim(x)) + y <- array(seq_along(x), dim = dim(x)) y <- aperm(y, new_order) x <- x[as.vector(y)] } @@ -1521,8 +1617,8 @@ # the same name are found in the two inputs, and they have a different # length, the maximum is taken. .MergeArrayDims <- function(dims1, dims2) { - new_dims1 <- c() - new_dims2 <- c() + new_dims1 <- NULL + new_dims2 <- NULL while (length(dims1) > 0) { if (names(dims1)[1] %in% names(dims2)) { pos <- which(names(dims2) == names(dims1)[1]) @@ -1533,7 +1629,7 @@ new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) new_dims2 <- c(new_dims2, dims2[1:pos]) dims1 <- dims1[-1] - dims2 <- dims2[-c(1:pos)] + dims2 <- dims2[-(1:pos)] } else { new_dims1 <- c(new_dims1, dims1[1]) new_dims2 <- c(new_dims2, 1) @@ -1569,7 +1665,7 @@ new_dims <- .MergeArrayDims(dim(array1), dim(array2)) dim(array1) <- new_dims[[1]] dim(array2) <- new_dims[[2]] - for (j in 1:length(dim(array1))) { + for (j in seq_along(dim(array1))) { if (names(dim(array1))[j] != along) { if (dim(array1)[j] != dim(array2)[j]) { if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { @@ -1641,7 +1737,7 @@ data <- Season(data = data, time_dim = fmonth_dim, monini = monini, moninf = 1, monsup = 12, method = mean, na.rm = na.rm) - names(dim(data))[which(names(dim(data))==fmonth_dim)] <- fyear_dim + names(dim(data))[which(names(dim(data)) == fmonth_dim)] <- fyear_dim if (identical(indices_for_clim, FALSE)) { ## data is already anomalies @@ -1657,33 +1753,42 @@ last_years_for_clim <- n_sdates : (n_sdates - n_fyears + 1) } else { ## indices_for_clim specified as a numeric vector first_years_for_clim <- seq(from = indices_for_clim[1], by = -1, length.out = n_fyears) - last_years_for_clim <- seq(from = indices_for_clim[length(indices_for_clim)], by = -1, length.out = n_fyears) + last_years_for_clim <- + seq(from = indices_for_clim[length(indices_for_clim)], + by = -1, length.out = n_fyears) } data <- s2dv::Reorder(data = data, order = c(fyear_dim, sdate_dim)) anom <- array(data = NA, dim = dim(data)) for (i in 1:n_fyears) { - clim <- mean(data[i,first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm) - anom[i,] <- data[i,] - clim + clim <- mean(data[i, first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm) + anom[i, ] <- data[i, ] - clim } } - } else if (type %in% c('obs','hist')) { + } else if (type %in% c('obs', 'hist')) { - data <- multiApply::Apply(data = data, target_dims = month_dim, fun = mean, na.rm = na.rm)$output1 + data <- multiApply::Apply(data = data, target_dims = month_dim, + fun = mean, na.rm = na.rm)$output1 if (identical(indices_for_clim, FALSE)) { ## data is already anomalies clim <- 0 - } else if (is.null(indices_for_clim)) { ## climatology over the whole period - clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 - } else { ## indices_for_clim specified as a numeric vector - clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, indices = indices_for_clim), + } else if (is.null(indices_for_clim)) { + ## climatology over the whole period + clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, + na.rm = na.rm)$output1 + } else { + ## indices_for_clim specified as a numeric vector + clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, + indices = indices_for_clim), target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 } anom <- data - clim - } else {stop('type must be dcpp, hist or obs')} + } else { + stop('type must be dcpp, hist or obs') + } return(anom) } @@ -1698,7 +1803,8 @@ GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE bar_limits, var_limits = NULL, triangle_ends = NULL, plot = TRUE, draw_separators = FALSE, - bar_titles = NULL, title_scale = 1, label_scale = 1, extra_margin = rep(0, 4), + bar_titles = NULL, title_scale = 1, + label_scale = 1, extra_margin = rep(0, 4), ...) { # bar_limits if (!is.numeric(bar_limits) || length(bar_limits) != 2) { @@ -1727,7 +1833,7 @@ GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE chosen_sets <- 1:nmap chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) } else { - chosen_sets <- array(1:length(col_sets), nmap) + chosen_sets <- array(seq_along(col_sets), nmap) } cols <- col_sets[chosen_sets] } else { @@ -1742,7 +1848,7 @@ GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE "maps in 'maps'.") } } - for (i in 1:length(cols)) { + for (i in seq_along(cols)) { if (length(cols[[i]]) != (length(brks) - 1)) { cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) } diff --git a/inst/CITATION b/inst/CITATION new file mode 100644 index 0000000000000000000000000000000000000000..8f040ce145c0c6ffcaf283d10f7d56f9563daa80 --- /dev/null +++ b/inst/CITATION @@ -0,0 +1,22 @@ +citHeader("To cite package 's2dv' in publications use:") + +yr <- sub('.*(2[[:digit:]]{3})-.*', '\\1', meta$Date, perl = TRUE) +if (length(yr) == 0) yr <- format(Sys.Date(), '%Y') + +bibentry( + bibtype = 'Manual', + title = paste0(meta$Package, ': ', meta$Title), + author = Filter(function(p) 'aut' %in% p$role, as.person(meta$Author)), + year = yr, + note = paste('R package version', meta$Version), + url = meta$URL +) + +bibentry( + bibtype = "Misc", + author = c(person("Nicolau", "Manubens"), person("", "et al.")), + title = "An R package for climate forecast verification", + doi = "10.1016/j.envsoft.2018.01.018", + publisher = "Elsevier", + year = "2018" +) diff --git a/man/ACC.Rd b/man/ACC.Rd index e1a8fb2e0d566e388c3b2afe210b9f95ea6c2f8f..f733f74181780f00938449d2a44f573dcb767e93 100644 --- a/man/ACC.Rd +++ b/man/ACC.Rd @@ -2,7 +2,8 @@ % Please edit documentation in R/ACC.R \name{ACC} \alias{ACC} -\title{Compute the spatial anomaly correlation coefficient between the forecast and corresponding observation} +\title{Compute the spatial anomaly correlation coefficient between the forecast and +corresponding observation} \usage{ ACC( exp, diff --git a/man/Bias.Rd b/man/Bias.Rd index 2a02f2d52b9d39b8df0c7b8fa06ef02d702b1b11..e94beb54298a98aa07d3aadc878808a5ed2a9ca1 100644 --- a/man/Bias.Rd +++ b/man/Bias.Rd @@ -13,6 +13,7 @@ Bias( na.rm = FALSE, absolute = FALSE, time_mean = TRUE, + alpha = 0.05, ncores = NULL ) } @@ -44,6 +45,9 @@ bias. The default value is FALSE.} \item{time_mean}{A logical value indicating whether to compute the temporal mean of the bias. The default value is TRUE.} +\item{alpha}{A numeric or NULL (default) to indicate the significance level +using Welch's t-test. Only available when absolute is FALSE.} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } @@ -51,7 +55,10 @@ computation. The default value is NULL.} A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of 'exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation -(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. +(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If +alpha is specified, and absolute is FALSE, the result is a list with two +elements: the bias as described above and the significance as a logical array +with the same dimensions. } \description{ The Mean Bias or Mean Error (Wilks, 2011) is defined as the mean difference @@ -66,6 +73,8 @@ pair of exp and obs data. exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') +bias2 <- Bias(exp = exp, obs = obs, memb_dim = 'member', alpha = 0.01) +abs_bias <- Bias(exp = exp, obs = obs, memb_dim = 'member', absolute = TRUE, alpha = NULL) } \references{ diff --git a/man/CDORemap.Rd b/man/CDORemap.Rd index d7eee21405d3ac4822e88d67a19266b040a2f464..9bfeaa557bfce79400e4b1959e1e0c555157c0b3 100644 --- a/man/CDORemap.Rd +++ b/man/CDORemap.Rd @@ -14,6 +14,7 @@ CDORemap( crop = TRUE, force_remap = FALSE, write_dir = tempdir(), + print_sys_msg = FALSE, ncores = NULL ) } @@ -87,6 +88,9 @@ is already on the target grid.} files for CDO to work. By default, the R session temporary directory is used (\code{tempdir()}).} +\item{print_sys_msg}{A logical value indicating to print the messages from +system CDO commands. The default is FALSE to keep function using clean.} + \item{ncores}{An integer indicating the number of theads used for interpolation (i.e., \code{-P} in cdo command.) The default value is NULL and \code{-P} is not used.} diff --git a/man/CRPS.Rd b/man/CRPS.Rd index 453c1994608459bdb95eeb36149915b69599f19d..97e6a4838cee93f11a8ebb908ca51d27dd719d1f 100644 --- a/man/CRPS.Rd +++ b/man/CRPS.Rd @@ -11,6 +11,7 @@ CRPS( memb_dim = "member", dat_dim = NULL, Fair = FALSE, + return_mean = TRUE, ncores = NULL ) } @@ -36,6 +37,10 @@ default value is NULL.} potential CRPS that the forecast would have with an infinite ensemble size). The default value is FALSE.} +\item{return_mean}{A logical indicating whether to return the temporal mean +of the CRPS or not. If TRUE, the temporal mean is calculated along time_dim, +if FALSE the time dimension is not aggregated. The default is TRUE.} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } diff --git a/man/CRPSS.Rd b/man/CRPSS.Rd index b6091880d4d632f724aa71bbbb1e2f6d459c69c1..ff99f85867b42faf96bd49f94d731eb736c756b8 100644 --- a/man/CRPSS.Rd +++ b/man/CRPSS.Rd @@ -15,6 +15,7 @@ CRPSS( clim.cross.val = TRUE, sig_method.type = "two.sided.approx", alpha = 0.05, + N.eff = NA, ncores = NULL ) } @@ -65,6 +66,13 @@ the default of \code{RandomWalkTest()}.} \item{alpha}{A numeric of the significance level to be used in the statistical significance test. The default value is 0.05.} +\item{N.eff}{Effective sample size to be used in the statistical significance +test. It can be NA (and it will be computed with the s2dv:::.Eno), FALSE +(and it will use the length of "obs" along "time_dim", so the +autocorrelation is not taken into account), a numeric (which is used for +all cases), or an array with the same dimensions as "obs" except "time_dim" +(for a particular N.eff to be used for each case). The default value is NA.} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } diff --git a/man/Clim.Rd b/man/Clim.Rd index a5a6f19608a1aef12c82bced943418d74bc9eee3..c3bd96fa0165213f4399ff94156912321ed21f2f 100644 --- a/man/Clim.Rd +++ b/man/Clim.Rd @@ -74,7 +74,8 @@ A list of 2: This function computes per-pair climatologies for the experimental and observational data using one of the following methods: \enumerate{ - \item{per-pair method (Garcia-Serrano and Doblas-Reyes, CD, 2012 https://doi.org/10.1007/s00382-012-1413-1)} + \item{per-pair method (Garcia-Serrano and Doblas-Reyes, CD, 2012 + https://doi.org/10.1007/s00382-012-1413-1)} \item{Kharin method (Kharin et al, GRL, 2012 https://doi.org/10.1029/2012GL052647)} \item{Fuckar method (Fuckar et al, GRL, 2014 https://doi.org/10.1002/2014GL060815)} } diff --git a/man/Corr.Rd b/man/Corr.Rd index 9fc2d3117e122fc7acbd91ff37570cac53dc0d01..38d5c6469bebd1d668741ee450b8cd5a23e5353a 100644 --- a/man/Corr.Rd +++ b/man/Corr.Rd @@ -2,7 +2,8 @@ % Please edit documentation in R/Corr.R \name{Corr} \alias{Corr} -\title{Compute the correlation coefficient between an array of forecast and their corresponding observation} +\title{Compute the correlation coefficient between an array of forecast and their +corresponding observation} \usage{ Corr( exp, diff --git a/man/GetProbs.Rd b/man/GetProbs.Rd index fd84d2f878ecb208baafdf9176e67dfc145070d5..06ad046a1764d59c76f61028281a864e116f2aad 100644 --- a/man/GetProbs.Rd +++ b/man/GetProbs.Rd @@ -10,6 +10,8 @@ GetProbs( memb_dim = "member", indices_for_quantiles = NULL, prob_thresholds = c(1/3, 2/3), + abs_thresholds = NULL, + bin_dim_abs = "bin", weights = NULL, cross.val = FALSE, ncores = NULL @@ -29,12 +31,26 @@ member). The default value is 'member'.} \item{indices_for_quantiles}{A vector of the indices to be taken along 'time_dim' for computing the absolute thresholds between the probabilistic -categories. If NULL, the whole period is used. The default value is NULL.} +categories. If NULL (default), the whole period is used. It is only used +when 'prob_thresholds' is provided.} \item{prob_thresholds}{A numeric vector of the relative thresholds (from 0 to 1) between the categories. The default value is c(1/3, 2/3), which corresponds to tercile equiprobable categories.} +\item{abs_thresholds}{A numeric array or vector of the absolute thresholds in +the same units as \code{data}. If an array is provided, it should have at +least 'bin_dim_abs' dimension. If it has more dimensions (e.g. different +thresholds for different locations, i.e. lon and lat dimensions), they +should match the dimensions of \code{data}, except the member dimension +which should not be included. The default value is NULL and, in this case, +'prob_thresholds' is used for calculating the probabilities.} + +\item{bin_dim_abs}{A character string of the dimension name of +'abs_thresholds' array in which category limits are stored. It will also be +the probabilistic category dimension name in the output. The default value +is 'bin'.} + \item{weights}{A named numerical array of the weights for 'data' with dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value is NULL. The ensemble should have at least 70 members or span at least 10 @@ -49,9 +65,9 @@ is FALSE.} computation. The default value is NULL.} } \value{ -A numerical array of probabilities with dimensions c(bin, the rest dimensions -of 'data' except 'memb_dim'). 'bin' dimension has the length of probabilistic -categories, i.e., \code{length(prob_thresholds) + 1}. +A numerical array of probabilities with dimensions c(bin_dim_abs, the rest +dimensions of 'data' except 'memb_dim'). 'bin' dimension has the length of +probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. } \description{ Compute probabilistic forecasts from an ensemble based on the relative @@ -63,11 +79,20 @@ the probabilities are calculated as the percentage of members that fall into each category. For observations (or forecast without member dimension), 1 means that the event happened, while 0 indicates that the event did not happen. Weighted probabilities can be computed if the weights are provided for -each ensemble member and time step. +each ensemble member and time step. The absolute thresholds can also be +provided directly for probabilities calculation. } \examples{ data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', indices_for_quantiles = 4:17) +# abs_thresholds is provided +abs_thr1 <- c(-0.2, 0.3) +abs_thr2 <- array(c(-0.2, 0.3) + rnorm(40) * 0.1, dim = c(cat = 2, sdate = 20)) +res1 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', + prob_thresholds = NULL, abs_thresholds = abs_thr1) +res2 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', + prob_thresholds = NULL, abs_thresholds = abs_thr2, bin_dim_abs = 'cat') + } diff --git a/man/MSE.Rd b/man/MSE.Rd index cd58402766e777dddce5b27027b6cdf2e34fc798..ded3c248a76bc82f3835c6f8fcf1e332c772f96d 100644 --- a/man/MSE.Rd +++ b/man/MSE.Rd @@ -18,7 +18,8 @@ MSE( ) } \arguments{ -\item{exp}{A named numeric array of experimental data, with at least #' 'time_dim' dimension. It can also be a vector with the same length as 'obs'.} +\item{exp}{A named numeric array of experimental data, with at least +'time_dim' dimension. It can also be a vector with the same length as 'obs'.} \item{obs}{A named numeric array of observational data, same dimensions as parameter 'exp' except along 'dat_dim' and 'memb_dim'. It can also be a diff --git a/man/MSSS.Rd b/man/MSSS.Rd index 33df4501aac5e5137446830419d51860471fa797..c3c5662eba72eb242c483961206112dde625badc 100644 --- a/man/MSSS.Rd +++ b/man/MSSS.Rd @@ -14,6 +14,7 @@ MSSS( pval = TRUE, sign = FALSE, alpha = 0.05, + N.eff = NA, sig_method = "one-sided Fisher", sig_method.type = NULL, ncores = NULL @@ -60,6 +61,13 @@ FALSE.} \item{alpha}{A numeric of the significance level to be used in the statistical significance test. The default value is 0.05.} +\item{N.eff}{Effective sample size to be used in the statistical significance +test with the Random Walk. It can be NA (and it will be computed with the +s2dv:::.Eno), FALSE (and it will use the length of "obs" along "time_dim", so the +autocorrelation is not taken into account), a numeric (which is used for +all cases), or an array with the same dimensions as "obs" except "time_dim" +(for a particular N.eff to be used for each case). The default value is NA.} + \item{sig_method}{A character string indicating the significance method. The options are "one-sided Fisher" (default) and "Random Walk".} diff --git a/man/NAO.Rd b/man/NAO.Rd index 999fd75f6f45177e353006e6bd061e094f5678c2..8115fa22ff25b1a8793827ca5c9203bebb7499e1 100644 --- a/man/NAO.Rd +++ b/man/NAO.Rd @@ -7,6 +7,7 @@ NAO( exp = NULL, obs = NULL, + exp_cor = NULL, lat, lon, time_dim = "sdate", @@ -20,7 +21,7 @@ NAO( } \arguments{ \item{exp}{A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) -forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +hindcast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. If only NAO of observational data needs to be computed, this parameter can be left to NULL. The default value is NULL.} @@ -31,6 +32,13 @@ dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. If only NAO of experimental data needs to be computed, this parameter can be left to NULL. The default value is NULL.} +\item{exp_cor}{A named numeric array of the Nort Atlantic SLP (20-80N, 80W-40E) +forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +dimension 'time_dim' of length 1 (as in the case of an operational +forecast), 'memb_dim', 'ftime_dim', and 'space_dim' at least. +If only NAO of reference period needs to be computed, this parameter can +be left to NULL. The default value is NULL.} + \item{lat}{A vector of the latitudes of 'exp' and 'obs'.} \item{lon}{A vector of the longitudes of 'exp' and 'obs'.} @@ -55,26 +63,33 @@ value is 2:4, i.e., from 2nd to 4th forecast time steps.} \item{obsproj}{A logical value indicating whether to compute the NAO index by projecting the forecast anomalies onto the leading EOF of observational -reference (TRUE) or compute the NAO by first computing the leading -EOF of the forecast anomalies (in cross-validation mode, i.e. leaving the -year you are evaluating out), and then projecting forecast anomalies onto -this EOF (FALSE). The default value is TRUE.} +reference (TRUE, default) or compute the NAO by first computing the leading +EOF of the forecast anomalies (in cross-validation mode, i.e. leave the +evaluated year out), then projecting forecast anomalies onto this EOF +(FALSE). If 'exp_cor' is provided, 'obs' will be used when obsproj is TRUE +and 'exp' will be used when obsproj is FALSE, and no cross-validation is +applied.} \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } \value{ -A list which contains: +A list which contains some of the following items depending on the data inputs: \item{exp}{ - A numeric array of forecast NAO index in verification format with the same + A numeric array of hindcast NAO index in verification format with the same dimensions as 'exp' except space_dim and ftime_dim. If ftime_avg is NULL, ftime_dim remains. } \item{obs}{ - A numeric array of observed NAO index in verification format with the same + A numeric array of observation NAO index in verification format with the same dimensions as 'obs' except space_dim and ftime_dim. If ftime_avg is NULL, ftime_dim remains. } +\item{exp_cor}{ + A numeric array of forecast NAO index in verification format with the same + dimensions as 'exp_cor' except space_dim and ftime_dim. If ftime_avg is NULL, + ftime_dim remains. + } } \description{ Compute the North Atlantic Oscillation (NAO) index based on the leading EOF @@ -84,8 +99,9 @@ observed anomalies onto the observed EOF pattern or the forecast anomalies onto the EOF pattern of the other years of the forecast. By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns -cross-validated PCs of the NAO index for forecast (exp) and observations -(obs) based on the leading EOF pattern. +cross-validated PCs of the NAO index for hindcast (exp) and observations +(obs) based on the leading EOF pattern, or, if forecast (exp_cor) is provided, +the NAO index for forecast and the corresponding data (exp and obs). } \examples{ # Make up synthetic data @@ -97,6 +113,8 @@ lat <- seq(20, 80, length.out = 6) lon <- seq(-80, 40, length.out = 9) nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) +exp_cor <- array(rnorm(540), dim = c(member = 2, sdate = 1, ftime = 5, lat = 6, lon = 9)) +nao <- NAO(exp = exp, obs = obs, exp_cor = exp_cor, lat = lat, lon = lon, obsproj = TRUE) # plot the NAO index \dontrun{ nao$exp <- Reorder(nao$exp, c(2, 1)) diff --git a/man/Persistence.Rd b/man/Persistence.Rd index 9b09ac3546a880d605f0b48c6772c3a1c38ceaf8..5ce2b5e5eb49d1c483f56b957245db9c1a3ad526 100644 --- a/man/Persistence.Rd +++ b/man/Persistence.Rd @@ -7,11 +7,11 @@ Persistence( data, dates, - time_dim = "time", start, end, ft_start, ft_end = ft_start, + time_dim = "time", max_ft = 10, nmemb = 1, na.action = 10, @@ -27,9 +27,6 @@ The data should start at least 40 time steps (years or days) before \item{dates}{A sequence of 4-digit integers (YYYY) or string (YYYY-MM-DD) in class 'Date' indicating the dates available in the observations.} -\item{time_dim}{A character string indicating the dimension along which to -compute the autoregression. The default value is 'time'.} - \item{start}{A 4-digit integer (YYYY) or a string (YYYY-MM-DD) in class 'Date' indicating the first start date of the persistence forecast. It must be between 1850 and 2020.} @@ -47,6 +44,9 @@ average forecast times for which persistence should be calculated in the case of a multi-timestep average persistence. The default value is 'ft_start'.} +\item{time_dim}{A character string indicating the dimension along which to +compute the autoregression. The default value is 'time'.} + \item{max_ft}{An integer indicating the maximum forecast time possible for 'data'. For example, for decadal prediction 'max_ft' would correspond to 10 (years). The default value is 10.} diff --git a/man/PlotBoxWhisker.Rd b/man/PlotBoxWhisker.Rd index 9c5a3f48ab5ae30097f36dbc78bf141b0f648c9c..e8681607b4b639b9ccfe9b29651f618c2680b699 100644 --- a/man/PlotBoxWhisker.Rd +++ b/man/PlotBoxWhisker.Rd @@ -123,7 +123,7 @@ sampleData$lat[] <- c(20, 80) ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) ano_exp <- array(ano$exp, dim = dim(ano$exp)[-2]) ano_obs <- array(ano$obs, dim = dim(ano$obs)[-2]) -nao <- NAO(ano_exp, ano_obs, sampleData$lat, sampleData$lon) +nao <- NAO(exp = ano_exp, obs = ano_obs, lat = sampleData$lat, lon = sampleData$lon) # Finally plot the nao index \dontrun{ nao$exp <- Reorder(nao$exp, c(2, 1)) diff --git a/man/PlotStereoMap.Rd b/man/PlotStereoMap.Rd index 1b7f166a161c3fff921d517bd4569fa54bd734bf..10e7503e3332b49deb233530a8a670118e313ddd 100644 --- a/man/PlotStereoMap.Rd +++ b/man/PlotStereoMap.Rd @@ -283,6 +283,8 @@ compatible with figure layouts if colour bar is disabled. data <- matrix(rnorm(100 * 50), 100, 50) x <- seq(from = 0, to = 360, length.out = 100) y <- seq(from = -90, to = 90, length.out = 50) + \dontrun{ PlotStereoMap(data, x, y, latlims = c(60, 90), brks = 50, toptitle = "This is the title") } +} diff --git a/man/RMSSS.Rd b/man/RMSSS.Rd index 7b31e26f2810fbab6ad23e9d4df0b1302e02d28e..61cc11c4c741f309a7e5cfac393191f164b68742 100644 --- a/man/RMSSS.Rd +++ b/man/RMSSS.Rd @@ -14,6 +14,7 @@ RMSSS( pval = TRUE, sign = FALSE, alpha = 0.05, + N.eff = NA, sig_method = "one-sided Fisher", sig_method.type = NULL, ncores = NULL @@ -60,6 +61,13 @@ FALSE.} \item{alpha}{A numeric of the significance level to be used in the statistical significance test. The default value is 0.05.} +\item{N.eff}{Effective sample size to be used in the statistical significance +test with the Random Walk. It can be NA (and it will be computed with the +s2dv:::.Eno), FALSE (and it will use the length of 'obs' along 'time_dim', so the +autocorrelation is not taken into account), a numeric (which is used for +all cases), or an array with the same dimensions as 'obs' except 'time_dim' +(for a particular N.eff to be used for each case). The default value is NA.} + \item{sig_method}{A character string indicating the significance method. The options are "one-sided Fisher" (default) and "Random Walk".} diff --git a/man/RPS.Rd b/man/RPS.Rd index 041ca0779961570b72cb3349dc8669f3aff0b525..b1374db10226d970d1a4c089f5999a30b011d4da 100644 --- a/man/RPS.Rd +++ b/man/RPS.Rd @@ -16,6 +16,7 @@ RPS( Fair = FALSE, weights = NULL, cross.val = FALSE, + return_mean = TRUE, na.rm = FALSE, ncores = NULL ) @@ -68,6 +69,10 @@ the weighted and unweighted methodologies is desired.} between probabilistic categories in cross-validation. The default value is FALSE.} +\item{return_mean}{A logical indicating whether to return the temporal mean +of the RPS or not. If TRUE, the temporal mean is calculated along time_dim, +if FALSE the time dimension is not aggregated. The default is TRUE.} + \item{na.rm}{A logical or numeric value between 0 and 1. If it is numeric, it means the lower limit for the fraction of the non-NA values. 1 is equal to FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). diff --git a/man/RPSS.Rd b/man/RPSS.Rd index 4b5b52250ab32f830a289d8a3c47122cfffecd2a..a8cd2bce291227eea874946e6ba0f0ab1f897451 100644 --- a/man/RPSS.Rd +++ b/man/RPSS.Rd @@ -21,6 +21,7 @@ RPSS( na.rm = FALSE, sig_method.type = "two.sided.approx", alpha = 0.05, + N.eff = NA, ncores = NULL ) } @@ -99,6 +100,13 @@ the default of \code{RandomWalkTest()}.} \item{alpha}{A numeric of the significance level to be used in the statistical significance test. The default value is 0.05.} +\item{N.eff}{Effective sample size to be used in the statistical significance +test. It can be NA (and it will be computed with the s2dv:::.Eno), FALSE +(and it will use the length of 'obs' along 'time_dim', so the +autocorrelation is not taken into account), a numeric (which is used for +all cases), or an array with the same dimensions as 'obs' except 'time_dim' +(for a particular N.eff to be used for each case). The default value is NA.} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } @@ -158,7 +166,7 @@ exp_probs <- GetProbs(exp, memb_dim = 'member') obs_probs <- GetProbs(obs, memb_dim = NULL) ref_probs <- GetProbs(ref, memb_dim = 'member') res <- RPSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, - cat_dim = 'bin') + N.eff = FALSE, cat_dim = 'bin') } \references{ diff --git a/man/RandomWalkTest.Rd b/man/RandomWalkTest.Rd index e123669ca902eef4c1ee581f7c9dbe3226d2d4d3..1440f7f318c7e7ff1486ec338d4ae3d3f1afe325 100644 --- a/man/RandomWalkTest.Rd +++ b/man/RandomWalkTest.Rd @@ -12,6 +12,7 @@ RandomWalkTest( alpha = 0.05, pval = TRUE, sign = FALSE, + N.eff = FALSE, ncores = NULL ) } @@ -47,6 +48,12 @@ significance test. The default value is TRUE.} \item{sign}{A logical value indicating whether to return the statistical significance of the test based on 'alpha'. The default value is FALSE.} +\item{N.eff}{Effective sample size to be used in the statistical significance +test. It can be FALSE (and the length of the time series will be used), a +numeric (which is used for all cases), or an array with the same dimensions +as "skill_A" except "time_dim" (for a particular N.eff to be used for each +case). The default value is FALSE.} + \item{ncores}{An integer indicating the number of cores to use for parallel computation. The default value is NULL.} } diff --git a/man/RatioRMS.Rd b/man/RatioRMS.Rd index 3330eb5a31aec9507a6836451f38ef829fcc108d..4e834ad3d9cf58f6eecd97438e3c62a285911dfa 100644 --- a/man/RatioRMS.Rd +++ b/man/RatioRMS.Rd @@ -69,8 +69,10 @@ ano_obs <- Ano(sampleData$obs, clim$clim_obs) # time step. ano_exp_1 <- ClimProjDiags::Subset(ano_exp, 'member', c(1, 2)) ano_exp_2 <- ClimProjDiags::Subset(ano_exp, 'member', c(3)) -ano_exp_1 <- ClimProjDiags::Subset(ano_exp_1, c('dataset', 'ftime'), list(1, 1), drop = 'selected') -ano_exp_2 <- ClimProjDiags::Subset(ano_exp_2, c('dataset', 'ftime'), list(1, 1), drop = 'selected') +ano_exp_1 <- ClimProjDiags::Subset(ano_exp_1, c('dataset', 'ftime'), + list(1, 1), drop = 'selected') +ano_exp_2 <- ClimProjDiags::Subset(ano_exp_2, c('dataset', 'ftime'), + list(1, 1), drop = 'selected') ano_obs <- ClimProjDiags::Subset(ano_obs, c('dataset', 'ftime'), list(1, 1), drop = 'selected') # Compute ensemble mean and provide as inputs to RatioRMS. rrms <- RatioRMS(MeanDims(ano_exp_1, 'member'), diff --git a/man/Season.Rd b/man/Season.Rd index fccd9ffdfd24479c261488f6eeeed2eb863d8eb1..60fbbbc6f12b1b633f90c70be9eff905cf542fec 100644 --- a/man/Season.Rd +++ b/man/Season.Rd @@ -6,10 +6,10 @@ \usage{ Season( data, - time_dim = "ftime", monini, moninf, monsup, + time_dim = "ftime", method = mean, na.rm = TRUE, ncores = NULL @@ -18,10 +18,6 @@ Season( \arguments{ \item{data}{A named numeric array with at least one dimension 'time_dim'.} -\item{time_dim}{A character string indicating the name of dimension along -which the seasonal mean or other calculations are computed. The default -value is 'ftime'.} - \item{monini}{An integer indicating what the first month of the time series is. It can be from 1 to 12.} @@ -31,6 +27,10 @@ calculation. It can be from 1 to 12.} \item{monsup}{An integer indicating the end month of the seasonal calculation. It can be from 1 to 12.} +\item{time_dim}{A character string indicating the name of dimension along +which the seasonal mean or other calculations are computed. The default +value is 'ftime'.} + \item{method}{An R function to be applied for seasonal calculation. For example, 'sum' can be used for total precipitation. The default value is mean.} diff --git a/man/SprErr.Rd b/man/SprErr.Rd new file mode 100644 index 0000000000000000000000000000000000000000..b9a02140f3fcadb93d144dda37cbbeb6f14686ba --- /dev/null +++ b/man/SprErr.Rd @@ -0,0 +1,80 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SprErr.R +\name{SprErr} +\alias{SprErr} +\title{Compute the ratio between the ensemble spread and RMSE} +\usage{ +SprErr( + exp, + obs, + dat_dim = NULL, + memb_dim = "member", + time_dim = "sdate", + pval = TRUE, + sign = FALSE, + alpha = 0.05, + na.rm = FALSE, + ncores = NULL +) +} +\arguments{ +\item{exp}{A named numeric array of experimental data with at least two +dimensions 'memb_dim' and 'time_dim'.} + +\item{obs}{A named numeric array of observational data with at least two +dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +parameter 'exp' except along 'dat_dim' and 'memb_dim'.} + +\item{dat_dim}{A character string indicating the name of dataset (nobs/nexp) +dimension. The default value is NULL (no dataset).} + +\item{memb_dim}{A character string indicating the name of the member +dimension. It must be one dimension in 'exp' and 'obs'. The default value +is 'member'.} + +\item{time_dim}{A character string indicating the name of dimension along +which the ratio is computed. The default value is 'sdate'.} + +\item{pval}{A logical value indicating whether to compute the p-value +of the test Ho : SD/RMSE = 1 or not. The default value is TRUE.} + +\item{sign}{A logical value indicating whether to retrieve the statistical +significance of the test Ho: ACC = 0 based on 'alpha'. The default value is +FALSE.} + +\item{alpha}{A numeric indicating the significance level for the statistical +significance test. The default value is 0.05.} + +\item{na.rm}{A logical value indicating whether to remove NA values. The +default value is FALSE.} + +\item{ncores}{An integer indicating the number of cores to use for parallel +computation. The default value is NULL.} +} +\value{ +A list of two arrays with dimensions c(nexp, nobs, the rest of + dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is + the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. + If dat_dim is NULL, nexp and nobs are omitted. \cr +\item{$ratio}{ + The ratio of the ensemble spread and RMSE. +} +\item{$p_val}{ + The p-value of the two-sided Fisher's test with Ho: Spread/RMSE = 1. Only + present if \code{pval = TRUE}. +} +} +\description{ +Compute the ratio between the spread of the members around the +ensemble mean in experimental data and the RMSE between the ensemble mean of +experimental and observational data. The p-value and/or the statistical +significance is provided by a two-sided Fisher's test. +} +\examples{ +exp <- array(rnorm(30), dim = c(lat = 2, sdate = 3, member = 5)) +obs <- array(rnorm(30), dim = c(lat = 2, sdate = 3)) +sprerr1 <- SprErr(exp, obs) +sprerr2 <- SprErr(exp, obs, pval = FALSE, sign = TRUE) +sprerr3 <- SprErr(exp, obs, pval = TRUE, sign = TRUE) + +} diff --git a/man/s2dv-package.Rd b/man/s2dv-package.Rd index f0c407d5652edc0bdff367888e72101da43d7f7f..24ed0bf802f81c1248d504ca058d81385b3235d9 100644 --- a/man/s2dv-package.Rd +++ b/man/s2dv-package.Rd @@ -20,11 +20,12 @@ Useful links: } \author{ -\strong{Maintainer}: An-Chi Ho \email{an.ho@bsc.es} +\strong{Maintainer}: Ariadna Batalla \email{ariadna.batalla@bsc.es} [contributor] Authors: \itemize{ \item BSC-CNS [copyright holder] + \item An-Chi Ho \email{an.ho@bsc.es} \item Nuria Perez-Zanon \email{nuria.perez@bsc.es} } @@ -37,6 +38,8 @@ Other contributors: \item Andrea Manrique \email{andrea.manrique@bsc.es} [contributor] \item Deborah Verfaillie \email{deborah.verfaillie@bsc.es} [contributor] \item Eva Rifà \email{eva.rifarovira@bsc.es} [contributor] + \item Victòria Agudetse \email{victoria.agudetse@bsc.es} [contributor] + \item Nadia Milders \email{nadia.milders@bsc.es} [contributor] } } diff --git a/tests/testthat.R b/tests/testthat.R index a7dec961977515915d829f763b67f71ad23ccd14..9a6a709beff01f82ef0893df479b9b6859b1f927 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,4 +1,2 @@ library(testthat) -library(s2dv) - test_check("s2dv") diff --git a/tests/testthat/test-Bias.R b/tests/testthat/test-Bias.R index 4c6cc99f39cf5e981bad69ada0c44c599d97c5ee..3dee73a95b6d4dd8911b2e9e4ae9021bc3ed40e3 100644 --- a/tests/testthat/test-Bias.R +++ b/tests/testthat/test-Bias.R @@ -67,7 +67,7 @@ test_that("1. Input checks", { ) # dat_dim expect_error( - Bias(exp1, obs1, dat_dim = TRUE, ), + Bias(exp1, obs1, dat_dim = TRUE), "Parameter 'dat_dim' must be a character string." ) expect_error( @@ -94,6 +94,11 @@ test_that("1. Input checks", { Bias(exp2, obs2, memb_dim = 'member', time_mean = 1.5), "Parameter 'time_mean' must be one logical value." ) + # alpha + expect_error( + Bias(exp1, obs1, alpha = TRUE), + "Parameter 'alpha' must be null or a numeric value." + ) # ncores expect_error( Bias(exp2, obs2, memb_dim = 'member', ncores = 1.5), @@ -106,25 +111,33 @@ test_that("1. Input checks", { test_that("2. Output checks: dat1", { expect_equal( - dim(Bias(exp1, obs1)), + dim(Bias(exp1, obs1)$bias), + c(lat = 2) + ) + expect_equal( + dim(Bias(exp1, obs1)$sign), c(lat = 2) ) expect_equal( - dim(Bias(exp1, obs1, time_mean = FALSE)), + dim(Bias(exp1, obs1, time_mean = FALSE)$bias), c(sdate = 10, lat = 2) ) expect_equal( - as.vector(Bias(exp1, obs1)), + as.vector(Bias(exp1, obs1)$bias), c(-0.07894886, 0.06907455), tolerance = 0.0001 ) expect_equal( - as.vector(Bias(exp1, obs1, absolute = TRUE)), + as.vector(Bias(exp1, obs1)$sign), + c(FALSE, FALSE), + ) + expect_equal( + as.vector(Bias(exp1, obs1, absolute = TRUE, alpha = NULL)), c(0.9557288, 0.8169118), tolerance = 0.0001 ) expect_equal( - as.vector(Bias(exp1, obs1, time_mean = FALSE, na.rm = TRUE))[1:5], + as.vector(Bias(exp1, obs1, time_mean = FALSE, na.rm = TRUE))$bias[1:5], c(0.27046074, -0.00120586, -2.42347394, 2.72565648, 0.40975953), tolerance = 0.0001 ) @@ -135,20 +148,24 @@ test_that("2. Output checks: dat1", { test_that("3. Output checks: dat2", { expect_equal( - dim(Bias(exp2, obs2, memb_dim = 'member')), + dim(Bias(exp2, obs2, memb_dim = 'member')$bias), c(lat = 2) ) expect_equal( - dim(Bias(exp2, obs2, memb_dim = 'member', time_mean = FALSE)), + dim(Bias(exp2, obs2, memb_dim = 'member', time_mean = FALSE)$bias), c(sdate = 10, lat = 2) ) expect_equal( - as.vector(Bias(exp2, obs2, memb_dim = 'member')), + dim(Bias(exp2, obs2, memb_dim = 'member', time_mean = FALSE)$sign), + c(lat = 2) + ) + expect_equal( + as.vector(Bias(exp2, obs2, memb_dim = 'member')$bias), c(-0.02062777, -0.18624194), tolerance = 0.0001 ) expect_equal( - as.vector(Bias(exp2, obs2, memb_dim = 'member', time_mean = FALSE)[1:2,1:2]), + as.vector(Bias(exp2, obs2, memb_dim = 'member', time_mean = FALSE)$bias[1:2,1:2]), c(0.6755093, 0.1949769, 0.4329061, -1.9391461), tolerance = 0.0001 ) @@ -159,30 +176,39 @@ test_that("3. Output checks: dat2", { test_that("4. Output checks: dat3", { expect_equal( - dim(Bias(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')), + dim(Bias(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$bias), + c(nexp = 2, nobs = 3, lat = 2) + ) + expect_equal( + dim(Bias(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$sign), c(nexp = 2, nobs = 3, lat = 2) ) expect_equal( - dim(Bias(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset', time_mean = FALSE)), + dim(Bias(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset', time_mean = FALSE)$bias), c(sdate = 10, nexp = 2, nobs = 3, lat = 2) ) expect_equal( - as.vector(Bias(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset'))[5:10], + dim(Bias(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset', time_mean = FALSE)$sign), + c(nexp = 2, nobs = 3, lat = 2) + ) + expect_equal( + as.vector(Bias(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$bias)[5:10], c(0.23519286, 0.18346575, -0.18624194, -0.07803352, 0.28918537, 0.39739379), tolerance = 0.0001 ) expect_equal( - as.vector(Bias(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset', absolute = TRUE, time_mean = FALSE))[5:10], + as.vector(Bias(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset', + absolute = TRUE, alpha = NULL, time_mean = FALSE))[5:10], c(0.2154482, 0.8183919, 2.1259250, 0.7796967, 1.5206510, 0.8463483), tolerance = 0.0001 ) expect_equal( - as.vector(Bias(exp2, obs2, memb_dim = 'member')), - as.vector(Bias(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')[1,1,]) + as.vector(Bias(exp2, obs2, memb_dim = 'member')$bias), + as.vector(Bias(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset')$bias[1,1,]) ) expect_equal( - as.vector(Bias(exp2, obs2, memb_dim = 'member', time_mean = FALSE)), - as.vector(Bias(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset', time_mean = FALSE)[ ,1,1,]) + as.vector(Bias(exp2, obs2, memb_dim = 'member', time_mean = FALSE)$bias), + as.vector(Bias(exp3, obs3, memb_dim = 'member', dat_dim = 'dataset', time_mean = FALSE)$bias[ ,1,1,]) ) }) @@ -190,49 +216,63 @@ test_that("4. Output checks: dat3", { ############################################## test_that("5. Output checks: dat4", { expect_equal( - dim(Bias(exp4, obs4)), + dim(Bias(exp4, obs4)$bias), NULL ) expect_equal( - dim(Bias(exp4, obs4, time_mean = F)), + dim(Bias(exp4, obs4, time_mean = F)$bias), c(sdate = 10) ) expect_equal( - as.vector(Bias(exp4, obs4, time_mean = F)), + dim(Bias(exp4, obs4, time_mean = F)$sign), + dim(Bias(exp4, obs4)$sign), + NULL + ) + expect_equal( + as.vector(Bias(exp4, obs4, time_mean = F)$bias), as.vector(exp4 - obs4) ) expect_equal( - as.vector(Bias(exp4, obs4, time_mean = F, absolute = T)), + as.vector(Bias(exp4, obs4, time_mean = F, absolute = T, alpha = NULL)), abs(as.vector(exp4 - obs4)) ) expect_equal( - as.vector(Bias(exp4, obs4, absolute = T)), + as.vector(Bias(exp4, obs4, absolute = T, alpha = NULL)), mean(abs(as.vector(exp4 - obs4))) ) - + expect_equal( - dim(Bias(exp4_1, obs4_1, dat_dim = 'dataset')), + dim(Bias(exp4_1, obs4_1, dat_dim = 'dataset')$bias), + dim(Bias(exp4_1, obs4_1, dat_dim = 'dataset')$sign), c(nexp = 1, nobs = 1) ) expect_equal( - dim(Bias(exp4_1, obs4_1, dat_dim = 'dataset', time_mean = F)), + dim(Bias(exp4_1, obs4_1, dat_dim = 'dataset', time_mean = F)$bias), c(sdate = 10, nexp = 1, nobs = 1) ) expect_equal( - as.vector(Bias(exp4_1, obs4_1, dat_dim = 'dataset', time_mean = F)), - as.vector(Bias(exp4, obs4, time_mean = F)) + as.vector(Bias(exp4_1, obs4_1, dat_dim = 'dataset', time_mean = F)$bias), + as.vector(Bias(exp4, obs4, time_mean = F)$bias) ) # 4_2: NA expect_equal( - as.vector(Bias(exp4_1, obs4_2, dat_dim = 'dataset')), + as.vector(Bias(exp4_1, obs4_2, dat_dim = 'dataset')$bias), as.numeric(NA) ) expect_equal( - as.vector(Bias(exp4_1, obs4_2, dat_dim = 'dataset', time_mean = F))[c(1, 3)], + as.vector(Bias(exp4_1, obs4_2, dat_dim = 'dataset')$sign), + as.logical(NA) + ) + expect_equal( + as.vector(Bias(exp4_1, obs4_2, dat_dim = 'dataset', time_mean = F)$bias)[c(1, 3)], as.numeric(c(NA, NA)) ) expect_equal( - as.vector(Bias(exp4_1, obs4_2, dat_dim = 'dataset', time_mean = F))[c(2, 4:10)], - as.vector(Bias(exp4, obs4, time_mean = F))[c(2, 4:10)] + as.vector(Bias(exp4_1, obs4_2, dat_dim = 'dataset', time_mean = F)$sign), + FALSE + ) + expect_equal( + as.vector(Bias(exp4_1, obs4_2, dat_dim = 'dataset', time_mean = F)$bias)[c(2, 4:10)], + as.vector(Bias(exp4, obs4, time_mean = F)$bias)[c(2, 4:10)] ) }) diff --git a/tests/testthat/test-CDORemap.R b/tests/testthat/test-CDORemap.R index 5492d51f6b3611e04b293b2922750636aa23a502..81d4964293765164539759a49b155d79057cb6dd 100644 --- a/tests/testthat/test-CDORemap.R +++ b/tests/testthat/test-CDORemap.R @@ -37,9 +37,21 @@ data4_1 <- drop(data4) data4_2 <- ClimProjDiags::Subset(data4, c(1,2,3,4,8), list(1,1,1,1,1), drop = 'selected') data4_3 <- .aperm2(data4_2, c(1, 3, 2)) - # data5: regular grid, more dimensions - data5 <- array(1:(4*10*8*3*2), dim = c(dat = 1, var = 1, memb = 4, lon = 10, lat = 8, sdate = 3, sweek = 2)) - data5_1 <- aperm(data5, c(1,2,3,6,4,5,7)) +# data5: regular grid, more dimensions +data5 <- array(1:(4*10*8*3*2), dim = c(dat = 1, var = 1, memb = 4, lon = 10, lat = 8, sdate = 3, sweek = 2)) +data5_1 <- aperm(data5, c(1,2,3,6,4,5,7)) + +# data6: regular grid, latitudes and longitudes in descent order +data6 <- rnorm(3*180*360) +dim(data6) <- c(time = 3, latitude = 180, longitude = 360) +lats6 <- seq(89.5, -89.5, -1) +lons6 <- seq(359.5,0.5, -1) + +# data7: regular grid, crop = T +data7 <- array(1:50, dim = c(25, 50)) +names(dim(data7)) <- c('lat', 'lon') +lons7 <- seq(0, 360 - 360/50, length.out = 50) +lats7 <- seq(-90, 90, length.out = 25) ############################################## @@ -311,3 +323,65 @@ as.vector(res5_1$lats), ) }) +############################################################ + +test_that("7. data6: regular regrid, descent order", { + suppressWarnings( + res6 <- CDORemap(data6, lats = lats6, lons = lons6, grid = 'r360x181', method = 'bil') + ) + expect_equal( + length(res6$lons), + 360 + ) + expect_equal( + length(res6$lats), + 181 + ) + expect_equal( + min(res6$lons), + 0 + ) + expect_equal( + max(res6$lons), + 359 + ) + expect_equal( + min(res6$lats), + -90 + ) + expect_equal( + max(res6$lats), + 90 + ) +}) + +############################################################ + +test_that("8. data7: regular grid, crop = T, global", { + suppressWarnings( + res7 <- CDORemap(data7, lons7, lats7, 't170grid', 'bil', TRUE) + ) + res_lon7 <- res7$lons + res_lat7 <- res7$lats + expect_equal( + dim(res7$data_array), + c(lat = 256, lon = 512) + ) + expect_equal( + as.vector(res7$lats)[1:10], + c(89.46282, 88.76695, 88.06697, 87.36606, 86.66480, 85.96337, 85.26185, + 84.56026, 83.85864, 83.15699), + tolerance = 0.000001 + ) + expect_equal( + as.vector(res7$lons)[1:10], + c(0.000000, 0.703125, 1.406250, 2.109375, 2.812500, 3.515625, 4.218750, + 4.921875, 5.625000, 6.328125), + tolerance = 0.000001 + ) + expect_equal( + c(min(res_lon7), max(res_lon7), min(res_lat7), max(res_lat7)), + c(0.00000, 359.29688, -89.46282, 89.46282), + tolerance = 0.000001 + ) +}) diff --git a/tests/testthat/test-CRPSS.R b/tests/testthat/test-CRPSS.R index f06919140c68b691ba45dec492d536ece65c0e9c..2c5a392a8925822b6e9bf20c6ef52e55ef94e3b4 100644 --- a/tests/testthat/test-CRPSS.R +++ b/tests/testthat/test-CRPSS.R @@ -332,9 +332,13 @@ test_that("4. Output checks: dat3", { # sig_method.type expect_equal( - as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset', sig_method.type = "two.sided", alpha = 0.5)$sign), + as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset', N.eff = FALSE, sig_method.type = "two.sided", alpha = 0.5)$sign), rep(F, 6) ) + expect_equal( + as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset', N.eff = NA, sig_method.type = "two.sided", alpha = 0.5)$sign), + rep(T, 6) + ) expect_equal( as.vector(CRPSS(exp3, obs3, dat_dim = 'dataset', sig_method.type = "less", alpha = 0.5)$sign), rep(T, 6) diff --git a/tests/testthat/test-GetProbs.R b/tests/testthat/test-GetProbs.R index f1958dc22c21bf35c227df8a5159040147bdf8db..47413c1111507ba263d6e3ce1625f378938196d9 100644 --- a/tests/testthat/test-GetProbs.R +++ b/tests/testthat/test-GetProbs.R @@ -6,11 +6,22 @@ data1 <- array(rnorm(60), dim = c(member = 3, sdate = 10, time = 2)) set.seed(2) weights1 <- array(abs(rnorm(30)), dim = c(member = 3, sdate = 10)) +abs_thr1_1 <- c(-0.2, 0.4) +abs_thr1_2 <- array(abs_thr1_1, dim = c(bin = 2)) +set.seed(4) +abs_thr1_3 <- array(abs_thr1_1 + rnorm(20)*0.1, dim = c(bin = 2, sdate = 10)) +abs_thr1_4 <- array(abs_thr1_3, dim = c(dim(abs_thr1_3), time = 2)) + # dat2 set.seed(1) data2 <- array(rnorm(20), dim = c(sdate = 10, time = 2)) set.seed(2) weights2 <- array(abs(rnorm(10)), dim = c(sdate = 10)) +abs_thr2_1 <- c(-0.2, 0.4) +abs_thr2_2 <- array(abs_thr2_1, dim = c(bin = 2)) +set.seed(4) +abs_thr2_3 <- array(abs_thr2_1 + rnorm(20)*0.3, dim = c(bin = 2, sdate = 10)) +abs_thr2_4 <- array(abs_thr2_3, dim = c(dim(abs_thr2_3), time = 2)) ############################################## @@ -47,6 +58,23 @@ test_that("1. Input checks", { GetProbs(data1, prob_thresholds = 1), "Parameter 'prob_thresholds' must be a numeric vector between 0 and 1." ) + # abs_thresholds + expect_error( + GetProbs(data1, prob_thresholds = NULL, abs_thresholds = abs_thr1_2, bin_dim_abs = 'cat'), + "Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension." + ) + expect_error( + GetProbs(data1, prob_thresholds = NULL, abs_thresholds = array(abs_thr1_3, dim = c(dim(abs_thr1_3), member = 3))), + "Parameter abs_thresholds' cannot have member dimension." + ) + expect_error( + GetProbs(data1, prob_thresholds = NULL, abs_thresholds = array(abs_thr1_3, dim = c(dim(abs_thr1_3), extra = 3))), + "Parameter 'abs_thresholds' dimensions except 'bin_dim_abs' must be in 'data' as well." + ) + expect_error( + GetProbs(data1, prob_thresholds = NULL, abs_thresholds = array(abs_thr1_3, dim = c(bin = 2, sdate = 5, time = 2))), + "Parameter 'abs_thresholds' dimensions must have the same length as 'data'." + ) # indices_for_clim expect_error( GetProbs(data1, indices_for_quantiles = array(1:6, dim = c(2, 3))), @@ -94,6 +122,10 @@ dim(GetProbs(data1)), c(bin = 3, sdate = 10, time = 2) ) expect_equal( +dim(GetProbs(data1, bin_dim_abs = "cat")), +c(cat = 3, sdate = 10, time = 2) +) +expect_equal( c(GetProbs(data1)[, 10, 2]), c(0.3333333, 0.3333333, 0.3333333), tolerance = 0.0001 @@ -173,6 +205,30 @@ c(0.3335612, 0.5277459, 0.1386929), tolerance = 0.0001 ) +# abs_threshold +expect_equal( +dim(GetProbs(data1, abs_thresholds = abs_thr1_1, prob_thresholds = NULL, indices_for_quantiles = NULL)), +c(bin = 3, sdate = 10, time = 2) +) +expect_equal( +c(GetProbs(data1, abs_thresholds = abs_thr1_1, prob_thresholds = NULL, indices_for_quantiles = NULL)[, 3, 2]), +c(0.3333333, 0.3333333, 0.3333333), +tolerance = 0.0001 +) +expect_equal( +GetProbs(data1, abs_thresholds = abs_thr1_1, prob_thresholds = NULL, indices_for_quantiles = NULL), +GetProbs(data1, abs_thresholds = abs_thr1_2, prob_thresholds = NULL, indices_for_quantiles = NULL) +) +expect_equal( +c(GetProbs(data1, abs_thresholds = abs_thr1_3, prob_thresholds = NULL, indices_for_quantiles = NULL)[, 3, 2]), +c(0.6666667, 0, 0.3333333), +tolerance = 0.0001 +) +expect_equal( +GetProbs(data1, abs_thresholds = abs_thr1_3, prob_thresholds = NULL, indices_for_quantiles = NULL), +GetProbs(data1, abs_thresholds = abs_thr1_4, prob_thresholds = NULL, indices_for_quantiles = NULL) +) + }) @@ -254,4 +310,28 @@ c(GetProbs(data2, memb_dim = NULL, cross.val = T, weights = weights2)[, 10, 2]), c(0, 1, 0) ) +# abs_threshold +expect_equal( +dim(GetProbs(data2, memb_dim = NULL, abs_thresholds = abs_thr2_1, prob_thresholds = NULL, indices_for_quantiles = NULL)), +c(bin = 3, sdate = 10, time = 2) +) +expect_equal( +c(GetProbs(data2, memb_dim = NULL, abs_thresholds = abs_thr2_1, prob_thresholds = NULL, indices_for_quantiles = NULL)[, 5, 1]), +c(0, 1, 0), +tolerance = 0.0001 +) +expect_equal( +GetProbs(data2, memb_dim = NULL, abs_thresholds = abs_thr2_1, prob_thresholds = NULL, indices_for_quantiles = NULL), +GetProbs(data2, memb_dim = NULL, abs_thresholds = abs_thr2_2, prob_thresholds = NULL, indices_for_quantiles = NULL) +) +expect_equal( +c(GetProbs(data2, memb_dim = NULL, abs_thresholds = abs_thr2_3, prob_thresholds = NULL, indices_for_quantiles = NULL)[, 5, 1]), +c(1, 0, 0), +tolerance = 0.0001 +) +expect_equal( +GetProbs(data2, memb_dim = NULL, abs_thresholds = abs_thr2_3, prob_thresholds = NULL, indices_for_quantiles = NULL), +GetProbs(data2, memb_dim = NULL, abs_thresholds = abs_thr2_4, prob_thresholds = NULL, indices_for_quantiles = NULL) +) + }) diff --git a/tests/testthat/test-NAO.R b/tests/testthat/test-NAO.R index 91b994329ed8f3051dc2ce9a2150562069bfe640..c3f0f75c9d1bebf4aff8cea366591877a4a39dea 100644 --- a/tests/testthat/test-NAO.R +++ b/tests/testthat/test-NAO.R @@ -6,7 +6,9 @@ obs1 <- array(rnorm(72), dim = c(dataset = 1, member = 1, sdate = 3, ftime = 4, lat = 2, lon = 3)) lat1 <- c(20, 80) lon1 <- c(40, 280, 350) - + set.seed(3) + exp1_cor <- array(rnorm(72), dim = c(sdate = 1, ftime = 4, member = 3, lat = 2, lon = 3)) + # dat2 set.seed(1) exp2 <- array(rnorm(216), dim = c(sdate = 3, ftime = 4, member = 2, lat = 3, lon = 3)) @@ -14,7 +16,8 @@ obs2 <- array(rnorm(108), dim = c(sdate = 3, ftime = 4, lat = 3, lon = 3)) lat2 <- c(80, 50, 20) lon2 <- c(-80, 0, 40) - + set.seed(3) + exp2_cor <- array(rnorm(72), dim = c(sdate = 1, ftime = 4, member = 2, lat = 3, lon = 3)) ############################################## test_that("1. Input checks", { @@ -57,7 +60,7 @@ test_that("1. Input checks", { ) expect_error( NAO(exp1, obs1, time_dim = 'a'), - "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + "Parameter 'time_dim' is not found in 'exp' dimension." ) # memb_dim expect_error( @@ -153,7 +156,7 @@ test_that("1. Input checks", { ) # ncores expect_error( - NAO(exp1, obs1, lat1, lon1, ncore = 3.5), + NAO(exp1, obs1, lat = lat1, lon = lon1, ncore = 3.5), "Parameter 'ncores' must be a positive integer." ) @@ -220,6 +223,38 @@ test_that("2. dat1", { c(NAO(exp1, obs1, lat = lat1, lon = lon1, ftime_avg = 1)$exp) ) + # exp_cor + expect_equal( + names(NAO(exp = exp1, obs = obs1, exp_cor = exp1_cor, lat = lat1, lon = lon1)), + c("exp", "obs", "exp_cor") + ) + expect_equal( + dim(NAO(exp = exp1, obs = obs1, exp_cor = exp1_cor, lat = lat1, lon = lon1)$exp_cor), + c(sdate = 1, member = 3, dataset = 1) + ) + expect_equal( + dim(NAO(exp = exp1, obs = obs1, exp_cor = exp1_cor, lat = lat1, lon = lon1)$exp), + c(sdate = 3, member = 2, dataset = 1) + ) + expect_equal( + dim(NAO(exp = exp1, obs = obs1, exp_cor = exp1_cor, lat = lat1, lon = lon1)$obs), + c(sdate = 3, member = 1, dataset = 1) + ) + expect_equal( + c(NAO(exp = exp1, obs = obs1, exp_cor = exp1_cor, lat = lat1, lon = lon1)$exp_cor), + c(0.3896168, 0.4384543, -0.1302738), + tolerance = 0.0001 + ) + expect_equal( + c(NAO(exp = exp1, obs = obs1, exp_cor = exp1_cor, lat = lat1, lon = lon1)$exp), + c(-0.1688756, -0.2658420, -0.8049575, -0.3022108, -0.3655258, 0.1237722), + tolerance = 0.0001 + ) + expect_equal( + c(NAO(exp = exp1, obs = obs1, exp_cor = exp1_cor, lat = lat1, lon = lon1)$obs), + c(-0.1762489, 0.1364694, -1.4581406), + tolerance = 0.0001 + ) }) ############################################## @@ -253,6 +288,39 @@ test_that("3. dat2", { tolerance = 0.00001 ) + # exp_cor + expect_equal( + names(NAO(exp = exp2, obs = obs2, exp_cor = exp2_cor, lat = lat2, lon = lon2)), + c("exp", "obs", "exp_cor") + ) + expect_equal( + dim(NAO(exp = exp2, obs = obs2, exp_cor = exp2_cor, lat = lat2, lon = lon2)$exp_cor), + c(sdate = 1, member = 2) + ) + expect_equal( + dim(NAO(exp = exp2, obs = obs2, exp_cor = exp2_cor, lat = lat2, lon = lon2)$exp), + c(sdate = 3, member = 2) + ) + expect_equal( + dim(NAO(exp = exp2, obs = obs2, exp_cor = exp2_cor, lat = lat2, lon = lon2)$obs), + c(sdate = 3) + ) + expect_equal( + c((NAO(exp = exp2, obs = obs2, exp_cor = exp2_cor, lat = lat2, lon = lon2)$exp_cor)), + c(0.2121340, 0.1634516), + tolerance = 0.0001 + ) + expect_equal( + c((NAO(exp = exp2, obs = obs2, exp_cor = exp2_cor, lat = lat2, lon = lon2)$exp)), + c(0.01457391, 0.06668166, 0.20193275, -0.20154315, -0.49487925, -0.04181974), + tolerance = 0.0001 + ) + expect_equal( + c((NAO(exp = exp2, obs = obs2, exp_cor = exp2_cor, lat = lat2, lon = lon2)$obs)), + c(0.3511294, -0.7196260, -1.5123894), + tolerance = 0.0001 + ) + }) ############################################## diff --git a/tests/testthat/test-RPSS.R b/tests/testthat/test-RPSS.R index 5e90e954ff48770d4f1fd848ae27b082e196093c..20acce4f6c41706abdd4fb1d974d867793ce8e74 100644 --- a/tests/testthat/test-RPSS.R +++ b/tests/testthat/test-RPSS.R @@ -272,11 +272,11 @@ test_that("2. Output checks: dat1", { # dat1_2 expect_equal( RPSS(exp1, obs1), - RPSS(exp1_2, obs1_2, memb_dim = NULL, cat_dim = 'bin') + RPSS(exp1_2, obs1_2, memb_dim = NULL, N.eff = FALSE, cat_dim = 'bin') ) expect_equal( RPSS(exp1, obs1, ref1), - RPSS(exp1_2, obs1_2, ref1_2, memb_dim = NULL, cat_dim = 'bin') + RPSS(exp1_2, obs1_2, ref1_2, memb_dim = NULL, N.eff = FALSE, cat_dim = 'bin') ) # dat1_3 @@ -309,10 +309,14 @@ test_that("2. Output checks: dat1", { c(F, F) ) expect_equal( - c(RPSS(exp1 + 1:60, obs = obs1 + 1:20, alpha = 0.025, sig_method.type = "two.sided")$sign), + c(RPSS(exp1 + 1:60, obs = obs1 + 1:20, alpha = 0.025, N.eff = FALSE, sig_method.type = "two.sided")$sign), c(T, T) ) expect_equal( + c(RPSS(exp1 + 1:60, obs = obs1 + 1:20, alpha = 0.025, N.eff = NA, sig_method.type = "two.sided")$sign), + c(F, F) + ) + expect_equal( c(RPSS(exp1 + 1:60, obs = obs1 + 1:20, alpha = 0.01, sig_method.type = "two.sided")$sign), c(F, F) ) diff --git a/tests/testthat/test-SprErr.R b/tests/testthat/test-SprErr.R new file mode 100644 index 0000000000000000000000000000000000000000..65ff728388ff671e2230fa0876396fc01da8de4c --- /dev/null +++ b/tests/testthat/test-SprErr.R @@ -0,0 +1,597 @@ +library(s2dv) +library(testthat) +library(multiApply) +library(ClimProjDiags) + + +############################################## +# data +############################################## + +# dat1 +set.seed(1) +exp1 <- array(rnorm(60), dim = c(member = 3, sdate = 10, lat = 2)) +set.seed(2) +obs1 <- array(rnorm(20), dim = c(sdate = 10, lat = 2)) + +# dat1_2 +exp1_2 <- exp1 + 1 +obs1_2 <- obs1 + 1 + +# dat1_3: NAs +exp1_3 <- exp1; exp1_3[1, 2, 1] <- NA +obs1_3 <- obs1; obs1_3[2, 1] <- NA + +# dat2 +set.seed(1) +exp2 <- array(rnorm(20), dim = c(member = 2, sdate = 10)) +set.seed(2) +obs2 <- array(rnorm(10), dim = c(sdate = 10)) +set.seed(2) +obs2_1 <- array(rnorm(10), dim = c(member = 1, sdate = 10)) + +# dat3 +set.seed(1) +exp3 <- array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3)) +set.seed(2) +obs3 <- array(rnorm(20), dim = c(member = 1, sdate = 10, dataset = 2)) + +# dat3_2 +set.seed(1) +exp3_2 <- array(rnorm(80), dim = c(member = 4, sdate = 5, dataset = 4)) +set.seed(2) +obs3_2 <- array(rnorm(30), dim = c(member = 2, sdate = 5, dataset = 3)) + +# dat4 +set.seed(1) +exp4 <- array(rnorm(60), dim = c(member = 2, sdate = 10, dataset = 3, lat = 2)) +set.seed(2) +obs4 <- array(rnorm(20), dim = c(member = 1, sdate = 10, dataset = 2, lat = 2)) + +# dat4_2: NAs +exp4_2 <- exp4; exp4_2[1, 2, 1, 1] <- NA +obs4_2 <- obs4; obs4_2[1, 1:4, 1, 1] <- NA + + +############################################## +# tests +############################################## + +test_that("1. Input checks", { + + # exp and obs (1) + expect_error( + SprErr(c(), obs1), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + SprErr(obs1, c()), + "Parameter 'exp' and 'obs' cannot be NULL." + ) + expect_error( + SprErr("", obs1), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + SprErr(exp1, ""), + "Parameter 'exp' and 'obs' must be a numeric array." + ) + expect_error( + SprErr(1, obs1), + "Parameter 'exp' and 'obs' must be array with as least two dimensions memb_dim and time_dim." + ) + expect_error( + SprErr(exp1, 1), + "Parameter 'exp' and 'obs' must be array with as least two dimensions memb_dim and time_dim." + ) + expect_error( + SprErr(array(1), obs1), + "Parameter 'exp' and 'obs' must have dimension names." + ) + expect_error( + SprErr(exp1, array(1)), + "Parameter 'exp' and 'obs' must have dimension names." + ) + # dat_dim + expect_error( + SprErr(exp1, obs1, dat_dim = 1), + "Parameter 'dat_dim' must be a character string." + ) + expect_error( + SprErr(exp1, obs1, dat_dim = 'dat'), + "Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension." + ) + # memb_dim + expect_error( + SprErr(exp1, obs1, memb_dim = 1), + "Parameter 'memb_dim' must be a character string." + ) + expect_error( + SprErr(exp1, obs1, memb_dim = 'check'), + "Parameter 'memb_dim' is not found in 'exp' dimensions. 'exp' must have the member dimension to compute the spread." + ) + # time_dim + expect_error( + SprErr(exp1, obs1, time_dim = 1), + "Parameter 'time_dim' must be a character string." + ) + expect_error( + SprErr(exp1, obs1, time_dim = 'time'), + "Parameter 'time_dim' is not found in 'exp' or 'obs' dimension." + ) + # exp and obs (2) + expect_error( + SprErr(exp1, array(1:9, dim = c(sdate = 9))), + "Parameter 'exp' and 'obs' must have same length of all the dimensions except 'dat_dim' and 'memb_dim'." + ) + # pval + expect_error( + SprErr(exp1, obs1, pval = 1), + "Parameter 'pval' must be one logical value." + ) + # sign + expect_error( + SprErr(exp1, obs1, sign = 1), + "Parameter 'sign' must be one logical value." + ) + # alpha + expect_error( + SprErr(exp1, obs1, alpha = -0.05), + "Parameter 'alpha' must be a numeric number between 0 and 1." + ) + # na.rm + expect_error( + SprErr(exp1, obs1, na.rm = ""), + "Parameter 'na.rm' must be TRUE or FALSE" + ) + # ncores + expect_error( + SprErr(exp2, obs2, ncores = 1.5), + "Parameter 'ncores' must be a positive integer." + ) +}) + +############################################## + + +test_that("2. Output checks: dat1", { + + # element names + expect_equal( + names(SprErr(exp1, obs1)), + c("ratio", "p.val") + ) + expect_equal( + names(SprErr(exp1, obs1, pval = FALSE, sign = TRUE)), + c("ratio", "sign") + ) + expect_equal( + names(SprErr(exp1, obs1, pval = TRUE, sign = TRUE)), + c("ratio", "p.val", "sign") + ) + # dimensions + expect_equal( + dim(SprErr(exp1, obs1)$ratio), + c(lat = 2) + ) + expect_equal( + dim(SprErr(exp1, obs1)$p.val), + c(lat = 2) + ) + expect_equal( + dim(SprErr(exp1, obs1, pval = FALSE, sign = TRUE)$ratio), + c(lat = 2) + ) + expect_equal( + dim(SprErr(exp1, obs1, pval = FALSE, sign = TRUE)$sign), + c(lat = 2) + ) + # values + expect_equal( + as.vector(SprErr(exp1, obs1)$ratio), + c(1.0646692, 0.6657522), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp1, obs1)$p.val), + c(0.8549593, 0.2412730), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp1, obs1, sign = TRUE)$sign), + c(FALSE, FALSE) + ) + # pval = FALSE + expect_equal( + as.vector(SprErr(exp1, obs1, pval = FALSE, sign = TRUE)$ratio), + c(01.0646692, 0.6657522), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp1, obs1, pval = FALSE, sign = TRUE)$sign), + c(FALSE, FALSE) + ) + # na.rm = TRUE + expect_equal( + as.vector(SprErr(exp1, obs1, na.rm = TRUE)$ratio), + c(1.0646692, 0.6657522), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp1, obs1, na.rm = TRUE)$p.val), + c(0.8549593, 0.2412730), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp1, obs1, sign = TRUE, na.rm = TRUE)$sign), + c(FALSE, FALSE) + ) + # alpha + expect_equal( + as.vector(SprErr(exp1, obs1, sign = TRUE, alpha = 0.5)$sign), + c(FALSE, TRUE) + ) + expect_equal( + as.vector(SprErr(exp1, obs1, sign = TRUE, alpha = 0.99)$sign), + c(TRUE, TRUE) + ) + + # dat1_2 + expect_equal( + SprErr(exp1, obs1), + SprErr(exp1_2, obs1_2) + ) + expect_equal( + SprErr(exp1, obs1, sign = TRUE)$ratio, + SprErr(exp1_2, obs1_2)$ratio + ) + expect_equal( + SprErr(exp1, obs1, sign = TRUE)$p.val, + SprErr(exp1_2, obs1_2)$p.val + ) + + # dat1_3 + expect_equal( + as.vector(SprErr(exp1_3, obs1_3)$ratio), + c(NA, 0.6657522), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp1_3, obs1_3)$p.val), + c(NA, 0.241273), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp1_3, obs1_3, sign = TRUE)$sign), + c(NA, FALSE) + ) + expect_equal( + as.vector(SprErr(exp1_3, obs1_3, na.rm = TRUE)$ratio), + c(0.9656329, 0.6657522), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp1_3, obs1_3, na.rm = TRUE)$p.val), + c(0.896455, 0.241273), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp1_3, obs1_3, sign = TRUE, na.rm = TRUE)$sign), + c(FALSE, FALSE) + ) + +}) + +############################################## + + +test_that("3. Output checks: dat2", { + + expect_equal( + names(SprErr(exp2, obs2)), + c("ratio", "p.val") + ) + expect_equal( + names(SprErr(exp2, obs2, sign = TRUE)), + c("ratio", "p.val","sign") + ) + expect_equal( + dim(SprErr(exp2, obs2)$ratio), + NULL + ) + expect_equal( + dim(SprErr(exp2, obs2)$p.val), + NULL + ) + expect_equal( + dim(SprErr(exp2, obs2, sign = TRUE)$sign), + NULL + ) + # values + expect_equal( + as.vector(SprErr(exp2, obs2)$ratio), + c(0.6866402), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp2, obs2)$p.val), + c(0.2779936), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp2, obs2, sign = TRUE)$sign), + FALSE + ) + # sign = TRUE + expect_equal( + as.vector(SprErr(exp2, obs2, sign = TRUE)$ratio), + c(0.6866402), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp2, obs2, sign = TRUE)$p.val), + c(0.2779936), + tolerance = 0.0001 + ) + # alpha + expect_equal( + as.vector(SprErr(exp2, obs2, sign = TRUE, alpha = 0.99)$sign), + TRUE + ) + # na.rm = TRUE + expect_equal( + as.vector(SprErr(exp2, obs2, na.rm = TRUE)$ratio), + c(0.6866402), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp2, obs2, na.rm = TRUE)$p.val), + c(0.2779936), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp2, obs2, na.rm = TRUE, sign = TRUE)$sign), + FALSE + ) + # other + expect_equal( + SprErr(exp2, obs2), + SprErr(exp2, obs2_1) + ) + +}) + +############################################## + + +test_that("4. Output checks: dat3", { + + expect_equal( + dim(SprErr(exp3, obs3, dat_dim = 'dataset')$ratio), + c('nexp' = 3, 'nobs' = 2) + ) + expect_equal( + dim(SprErr(exp3, obs3, dat_dim = 'dataset')$p.val), + c('nexp' = 3, 'nobs' = 2) + ) + # values + expect_equal( + mean(SprErr(exp3, obs3, dat_dim = 'dataset')$ratio), + c(0.5831841), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp3, obs3, dat_dim = 'dataset')$ratio), + c(0.7006396, 0.6277856, 0.4211269, 0.7006396, 0.6277856, 0.4211269), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp3, obs3, dat_dim = 'dataset')$p.val)[1:3], + c(0.30405979, 0.18162950, 0.01675207), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp3, obs3, dat_dim = 'dataset', sign = TRUE)$sign)[1:3], + c(FALSE, FALSE, TRUE) + ) + expect_equal( + mean(SprErr(exp3, obs3, dat_dim = 'dataset', sign = TRUE)$ratio), + c(0.5831841), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp3, obs3, dat_dim = 'dataset', sign = TRUE)$p.val)[1:3], + c(0.30405979, 0.18162950, 0.01675207), + tolerance = 0.0001 + ) + # alpha + expect_equal( + as.vector(SprErr(exp3, obs3, dat_dim = 'dataset', sign = TRUE, alpha = 0.99)$sign)[1:3], + c(TRUE, TRUE, TRUE) + ) + expect_equal( + as.vector(SprErr(exp3, obs3, dat_dim = 'dataset', sign = TRUE, alpha = 0.20)$sign)[1:3], + c(FALSE, TRUE, TRUE) + ) + # na.rm = TRUE + expect_equal( + as.vector(SprErr(exp3, obs3, dat_dim = 'dataset', na.rm = TRUE)$ratio), + c(0.7006396, 0.6277856, 0.4211269, 0.7006396, 0.6277856, 0.4211269), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp3, obs3, dat_dim = 'dataset', na.rm = TRUE)$p.val)[1:3], + c(0.30405979, 0.18162950, 0.01675207), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp3, obs3, dat_dim = 'dataset', na.rm = TRUE, sign = TRUE)$sign)[1:3], + c(FALSE, FALSE, TRUE) + ) + + # dat3_2 + expect_equal( + dim(SprErr(exp3_2, obs3_2, dat_dim = 'dataset')$ratio), + c('nexp' = 4, 'nobs' = 3) + ) + expect_equal( + dim(SprErr(exp3_2, obs3_2, dat_dim = 'dataset')$p.val), + c('nexp' = 4, 'nobs' = 3) + ) + # values + expect_equal( + mean(SprErr(exp3_2, obs3_2, dat_dim = 'dataset')$ratio), + c(1.25586), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp3_2, obs3_2, dat_dim = 'dataset')$p.val)[1:4], + c(0.6927309, 0.7390035, 0.8834023, 0.4421531), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp3_2, obs3_2, dat_dim = 'dataset', sign = TRUE)$sign)[1:4], + c(FALSE, FALSE, FALSE, FALSE) + ) + expect_equal( + mean(SprErr(exp3_2, obs3_2, dat_dim = 'dataset', sign = TRUE)$ratio), + c(1.25586), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp3_2, obs3_2, dat_dim = 'dataset', sign = TRUE)$p.val)[1:4], + c(0.6927309, 0.7390035, 0.8834023, 0.4421531), + tolerance = 0.0001 + ) + # alpha + expect_equal( + as.vector(SprErr(exp3_2, obs3_2, dat_dim = 'dataset', sign = TRUE, alpha = 0.99)$sign)[1:3], + c(TRUE, TRUE, TRUE) + ) + expect_equal( + as.vector(SprErr(exp3_2, obs3_2, dat_dim = 'dataset', sign = TRUE, alpha = 0.70)$sign)[1:4], + c(TRUE, FALSE, FALSE, TRUE) + ) + # na.rm = TRUE + expect_equal( + mean(SprErr(exp3_2, obs3_2, dat_dim = 'dataset', na.rm = TRUE)$ratio), + c(1.25586), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp3_2, obs3_2, dat_dim = 'dataset', na.rm = TRUE)$p.val)[1:4], + c(0.6927309, 0.7390035, 0.8834023, 0.4421531), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp3_2, obs3_2, dat_dim = 'dataset', na.rm = TRUE, + alpha = 0.70, sign = TRUE)$sign)[1:4], + c(TRUE, FALSE, FALSE, TRUE) + ) + +}) + +############################################## + + +test_that("5. Output checks: dat4", { + + expect_equal( + dim(SprErr(exp4, obs4, dat_dim = 'dataset')$ratio), + c('nexp' = 3, 'nobs' = 2, 'lat' = 2) + ) + expect_equal( + dim(SprErr(exp4, obs4, dat_dim = 'dataset', sign = TRUE)$p.val), + c('nexp' = 3, 'nobs' = 2, 'lat' = 2) + ) + # values + expect_equal( + mean(SprErr(exp4, obs4, dat_dim = 'dataset')$ratio), + c(0.5831841), + tolerance = 0.0001 + ) + expect_equal( + mean(SprErr(exp4, obs4, dat_dim = 'dataset')$p.val), + c(0.1674805), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp4, obs4, dat_dim = 'dataset')$ratio[, , 2]), + c(0.7006396, 0.6277856, 0.4211269, 0.7006396, 0.6277856, 0.4211269), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp4, obs4, dat_dim = 'dataset')$p.val[, , 2]), + c(0.30405979, 0.18162950, 0.01675207, 0.30405979, 0.18162950, 0.01675207), + tolerance = 0.0001 + ) + expect_equal( + SprErr(exp4, obs4, dat_dim = 'dataset')$ratio[, , 1], + SprErr(exp3, obs3, dat_dim = 'dataset')$ratio + ) + expect_equal( + mean(SprErr(exp4, obs4, dat_dim = 'dataset', sign = TRUE)$ratio), + c(0.5831841), + tolerance = 0.0001 + ) + expect_equal( + mean(SprErr(exp4, obs4, dat_dim = 'dataset', sign = TRUE)$p.val), + c(0.1674805), + tolerance = 0.0001 + ) + + # dat4_2: NAs + expect_equal( + as.vector(SprErr(exp4_2, obs4_2, dat_dim = 'dataset')$ratio[, , 1]), + c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp4_2, obs4_2, dat_dim = 'dataset')$p.val[, , 1]), + c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp4_2, obs4_2, dat_dim = 'dataset', sign = TRUE)$sign[, , 1]), + c(NA, NA, NA, NA, NA, NA), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp4_2, obs4_2, dat_dim = 'dataset')$ratio[, , 2]), + c(0.7006396, 0.6277856, 0.4211269, 0.7006396, 0.6277856, 0.4211269), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp4_2, obs4_2, dat_dim = 'dataset')$p.val[, , 2]), + c(0.30405979, 0.18162950, 0.01675207, 0.30405979, 0.18162950, 0.01675207), + tolerance = 0.0001 + ) + expect_equal( + which(is.na(SprErr(exp4_2, obs4_2, dat_dim = 'dataset')$ratio)), + which(is.na(SprErr(exp4_2, obs4_2, dat_dim = 'dataset')$p.val)) + ) + expect_equal( + as.vector(SprErr(exp4_2, obs4_2, dat_dim = 'dataset', na.rm = TRUE)$ratio[, , 1]), + c(0.4648097, 0.6571888, 0.3975804, 0.4648097, 0.6571888, 0.3975804), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp4_2, obs4_2, dat_dim = 'dataset', na.rm = TRUE)$p.val[, , 1]), + c(0.04674814, 0.21983481, 0.01350139, 0.04351013, 0.22700305, 0.01124875), + tolerance = 0.0001 + ) + expect_equal( + as.vector(SprErr(exp4_2, obs4_2, dat_dim = 'dataset', na.rm =TRUE, sign = TRUE)$sign[, , 1]), + c(TRUE, FALSE, TRUE, TRUE, FALSE, TRUE), + tolerance = 0.0001 + ) + expect_equal( + which(is.na(SprErr(exp4_2, obs4_2, dat_dim = 'dataset', na.rm = TRUE)$ratio)), + which(is.na(SprErr(exp4_2, obs4_2, dat_dim = 'dataset', na.rm = TRUE)$p.val)) + ) + expect_equal( + which(is.na(SprErr(exp4_2, obs4_2, dat_dim = 'dataset', na.rm = TRUE)$ratio)), + which(is.na(SprErr(exp4_2, obs4_2, dat_dim = 'dataset', na.rm = TRUE)$p.val)) + ) + +})