diff --git a/DESCRIPTION b/DESCRIPTION index 31a5e21886951aa67918788e51d4e13ee61ce3d5..7f5ee51271bfd44ddf337f8db7e69bf745b4eb46 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,6 +36,7 @@ Imports: s2dverification, rainfarmr, multiApply, + qmap, ClimProjDiags, ncdf4, plyr, diff --git a/NAMESPACE b/NAMESPACE index 22fc489a331107c86b096ad9192a93c5ea3d2003..cd4603c89f8fea52daba8bedc58b4bfd3240f859 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,10 +14,12 @@ export(CST_Load) export(CST_MultiEOF) export(CST_MultiMetric) export(CST_MultivarRMSE) +export(CST_QuantileMapping) export(CST_RFSlope) export(CST_RFWeights) export(CST_RainFARM) export(CST_SaveExp) +export(CST_SplitDim) export(EnsClustering) export(MultiEOF) export(PlotCombinedMap) @@ -25,12 +27,14 @@ export(PlotForecastPDF) export(PlotMostLikelyQuantileMap) export(RFSlope) export(RainFARM) +export(SplitDim) export(as.s2dv_cube) export(s2dv_cube) import(abind) import(ggplot2) import(multiApply) import(ncdf4) +import(qmap) import(rainfarmr) import(s2dverification) import(stats) diff --git a/R/CST_QuantileMapping.R b/R/CST_QuantileMapping.R new file mode 100644 index 0000000000000000000000000000000000000000..6682e7aa65b84c0a60cbe149f00b9ec995d25a4c --- /dev/null +++ b/R/CST_QuantileMapping.R @@ -0,0 +1,331 @@ +#'Quantiles Mapping for seasonal or decadal forecast data +#' +#'@description This function is a wrapper from fitQmap and doQmap from package 'qmap'to be applied in CSTools objects of class 's2dv_cube'. The quantile mapping adjustment between an experiment, tipically a hindcast, and observations is applied to the experiment itself or to a provided forecast. +#' +#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +#'@param exp an object of class \code{s2dv_cube} +#'@param obs an object of class \code{s2dv_cube} +#'@param exp_cor an object of class \code{s2dv_cube} in which the quantile mapping correction will be applied. If it is not specified, the correction is applied in object \code{exp}. +#'@param sample_dims a character vector indicating the dimensions that can be used as sample for the same distribution +#'@param sample_length a numeric value indicating the length of the timeseries window to be used as sample for the sample distribution and correction. By default, NULL, the total length of the timeseries will be used. +#'@param method a character string indicating the method to be used: 'PTF','DIST','RQUANT','QUANT','SSPLIN'. By default, the empirical quantile mapping 'QUANT' is used. +#'@param ncores an integer indicating the number of parallel processes to spawn for the use for parallel computation in multiple cores. +#'@param ... additional arguments passed to the method specified by \code{method}. +#' +#'@details The different methods are: +#'\itemize{ +#'\item{'PTF'} {fits a parametric transformations to the quantile-quantile relation of observed and modelled values. See \code{\link[qmap]{fitQmapPTF}}.} +#' \item{'DIST'} {fits a theoretical distribution to observed and to modelled time series. See \code{\link[qmap]{fitQmapDIST}}.} +#'\item{'RQUANT'} {estimates the values of the quantile-quantile relation of observed and modelled time series for regularly spaced quantiles using local linear least square regression. See \code{\link[qmap]{fitQmapRQUANT}}.} +#'\item{'QUANT'} {estimates values of the empirical cumulative distribution function of observed and modelled time series for regularly spaced quantiles. See \code{\link[qmap]{fitQmapQUANT}}.} +#'\item{'SSPLIN'} {fits a smoothing spline to the quantile-quantile plot of observed and modelled time series. See \code{\link[qmap]{fitQmapSSPLIN}}}.} +#'All methods accepts some common arguments: +#'\itemize{ +#'\item{wet.day} {logical indicating whether to perform wet day correction or not.(Not available in 'DIS' method)} +#'\item{qstep} {NULL or a numeric value between 0 and 1.}} +#'@return an oject of class \code{s2dv_cube} containing the experimental data after applyingthe quantile mapping correction. +#') <- c(dataset = 1, member = 10, sdate = 20, ftime = 60 , +#'@import qmap +#'@import multiApply +#'@import abind +#' +#'@seealso \code{\link[qmap]{fitQmap}} and \code{\link[qmap]{doQmap}} +#'@examples +#'library(qmap) +#'exp <- 1 : (1 * 5 * 10 * 6 * 2 * 3) +#'dim(exp) <- c(dataset = 1, member = 10, sdate = 5, ftime = 6 , +#' lat = 2, lon = 3) +#'exp <- list(data = exp) +#'class(exp) <- 's2dv_cube' +#'obs <- 101 : (100 + 1 * 1 * 5 * 6 * 2 * 3) +#'dim(obs) <- c(dataset = 1, member = 1, sdate = 5, ftime = 6 , +#' lat = 2, lon = 3) +#'obs <- list(data = obs) +#'class(obs) <- 's2dv_cube' +#'res <- CST_QuantileMapping(exp, obs, method = 'RQUANT') +#' +#'exp <- lonlat_data$exp +#'obs <- lonlat_data$obs +#'res <- CST_QuantileMapping(exp, obs) +#' +#'\donttest{ +#'data(obsprecip) +#'data(modprecip) +#'exp <- modprecip$MOSS[1:10000] +#'dim(exp) <- c(time = length(exp)) +#'exp <- list(data = exp) +#'class(exp) <- 's2dv_cube' +#'obs <- obsprecip$MOSS[1:10000] +#'dim(obs) <- c(time = length(obs)) +#'obs <- list(data = obs) +#'class(obs) <- 's2dv_cube' +#'res <- CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time', +#' method = 'DIST') +#'} +#'@export +CST_QuantileMapping <- function(exp, obs, exp_cor = NULL, + sample_dims = c('sdate', 'ftime', 'member'), + sample_length = NULL, method = 'QUANT', ncores = NULL, ...) { + if (!inherits(exp, 's2dv_cube') || !inherits(obs, 's2dv_cube')) { + stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + if (!is.null(exp_cor)) { + if (!inherits(exp_cor, 's2dv_cube')) { + stop("Parameter 'exp_cor' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + } + if (!(method %in% c('PTF','DIST','RQUANT','QUANT','SSPLIN'))) { + stop("Parameter 'method' must be one of the following methods: ", + "'PTF','DIST','RQUANT','QUANT','SSPLIN'.") + } + dimnames <- names(dim(exp$data)) + QMapped <- QuantileMapping(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, + sample_dims = sample_dims, sample_length = sample_length, + method = method, ncores = ncores, ...) + pos <- match(dimnames, names(dim(QMapped))) + QMapped <- aperm(QMapped, pos) + names(dim(QMapped)) <- dimnames + exp$data <- QMapped + exp$Datasets <- c(exp$Datasets, obs$Datasets) + exp$source_files <- c(exp$source_files, obs$source_files) + return(exp) +} +QuantileMapping <- function(exp, obs, exp_cor = NULL, sample_dims = 'ftime', + sample_length = NULL, method = 'QUANT', ncores = NULL, ...) { + obsdims <- names(dim(obs)) + expdims <- names(dim(exp)) + if (is.null(expdims)) { + stop("Parameter 'exp' must have dimension names.") + } + if (is.null(obsdims)) { + stop("Parameter 'obs' must have dimension names.") + } + if (any(is.na(exp))) { + stop("Parameter 'exp' contains NA values.") + } + if (any(is.na(obs))) { + stop("Parameter 'obs' contains NA values.") + } + if (!is.null(exp_cor)) { + exp_cordims <- names(dim(exp_cor)) + if (is.null(exp_cordims)) { + stop("Parameter 'exp_cor' must have dimension names.") + } + } + if (!all(sample_dims %in% expdims)) { + stop("Parameter 'sample_dims' must be a vector of string character ", + "indicating names of exiting dimension in parameter 'exp'.") + } + ## The sample_dims could be of any length (even different between exp and obs) + ## but the repeated dims that aren't in sample_dims should be drop: + commondims <- obsdims[obsdims %in% expdims] + commondims <- names(which(unlist(lapply(commondims, function(x) { + dim(obs)[obsdims == x] != dim(exp)[expdims == x]})))) + if (any(!(commondims %in% sample_dims))) { + todrop <- commondims[!(commondims %in% sample_dims)] + todrop <- match(todrop, obsdims) + if (all(dim(obs)[todrop] != 1)) { + stop("Review parameter 'sample_dims' or the data dimensions", + "since multiple dimensions with different length have ", + "being found in the data inputs that don't match with ", + "'sample_dims' parameter.") + } else { + obs <- adrop(obs, drop = todrop) + } + } + + if (!all(sample_dims %in% obsdims)) { + newobsdims <- sample_dims[!sample_dims %in% obsdims] + dim(obs) <- c(dim(obs), 1 : length(newobsdims)) + names(dim(obs))[-c(1:length(obsdims))] <- newobsdims + } + if (!is.null(sample_length) & !is.numeric(sample_length)) { + warning("Parameter 'sample_length' has not been correctly defined and ", + "the whole length of the timeseries will be used.") + sample_length <- NULL + } + if (length(sample_length) > 1) { + warning("Parameter 'sample_length' has length > 1 and only the first ", + "element will be used.") + sample_length <- sample_length[1] + } + if (!is.character(method)) { + warning("Parameter 'method' must be a character string indicating ", + "one of the following methods: 'PTF', 'DIST', 'RQUANT', + 'QUANT', 'SSPLIN'. Method 'QUANT' is being used.") + method = 'QUANT' + } + if (length(method) > 1) { + warning("Parameter 'method' has length > 1 and only the first element", + " is used.") + method <- method[1] + } + +# qmaped <- Apply(list(exp, obs), target_dims = sample_dims, fun = qmapcor, ..., +# exp_cor = exp_cor, sample_length = sample_length, + if (is.null(exp_cor)) { + qmaped <- Apply(list(exp, obs), target_dims = sample_dims, + fun = qmapcor, ..., sample_length = sample_length, + method = method, ncores = ncores)$output1 + } else { + qmaped <- Apply(list(exp, obs, exp_cor), target_dims = sample_dims, + fun = qmapcor, ..., sample_length = sample_length, + method = method, ncores = ncores)$output1 + } + pos <- match(expdims, names(dim(qmaped))) + qmaped <- aperm(qmaped, pos) + dim(qmaped) <- dim(exp) + return(qmaped) +} +qmapcor <- function(exp, obs, exp_cor = NULL, sample_length = NULL, method = 'QUANT', + ...) { + dimnames_exp <- names(dim(exp)) + dimnames_obs <- names(dim(obs)) + if (length(dimnames_exp) != length(dimnames_obs)) { + stop("Parameters 'exp' and 'obs' must have the same number of dimensions.") + } + if (!all(dimnames_exp %in% dimnames_obs)) { + stop("Parameters 'exp' and 'obs' must have the same dimension names.") + } + if (!(any(names(dim(exp)) %in% 'ftime') | any(names(dim(exp)) %in% 'time') | + any(names(dim(exp)) %in% 'sdate'))) { + stop("Parameters 'exp' and 'obs' must have a temporal dimension named ", + "'time', 'ftime' or 'sdate'.") + } + + dimensions <- dim(exp) + if (!is.null(exp_cor)) { + dimensions <- dim(exp_cor) + } + if (any(names(dim(exp)) %in% 'ftime') | any(names(dim(exp)) %in% 'time')) { + time_dim <- which(names(dim(exp)) == 'ftime' | names(dim(exp)) %in% 'time') + } else { + time_dim <- which(names(dim(exp)) == 'sdate') + } + if (any(names(dim(obs)) %in% 'ftime') | any(names(dim(obs)) %in% 'time')) { + time_dim_obs <- which(names(dim(obs)) == 'ftime' | names(dim(obs)) %in% 'time') + } else { + time_dim_obs <- which(names(dim(obs)) == 'sdate') + } + if (is.null(sample_length)) { + sample_length <- dim(exp)[time_dim] + } + nsamples <- dim(exp)[time_dim]/sample_length + if (nsamples %% 1 != 0) { + # add NA to complete the last sample + nsamples <- ceiling(nsamples) + fillsample1D <- rep(NA, nsamples * sample_length - dim(exp)[time_dim]) + if (length(dim(exp)) > 1) { + fillsample <- rep(fillsample1D, prod(dim(exp)[-time_dim])) + dims <- dim(exp) + exp <- c(exp, fillsample) + dim(exp) <- c(dims[-time_dim], sample = sample_length, + ceiling(dims[time_dim]/sample_length)) + fillsample <- rep(fillsample1D, prod(dim(obs)[-time_dim_obs])) + dims <- dim(obs) + obs <- c(obs, fillsample) + dim(obs) <- c(dims[-time_dim_obs], sample = sample_length, + ceiling(dims[time_dim_obs]/sample_length)) + } else { + exp <- abind(exp, fillsample1D, along = time_dim) + names(dim(exp)) <- dimnames_exp + obs <- abind(obs, fillsample1D, along = time_dim_obs) + names(dim(obs)) <- dimnames_obs + dim(exp) <- c(dim(exp)[-time_dim], sample = sample_length, + dim(exp)[time_dim]/sample_length) + dim(obs) <- c(dim(obs)[-time_dim_obs], sample = sample_length, + dim(obs)[time_dim_obs]/sample_length) + } + } else { + dim(exp) <- c(dim(exp)[-time_dim], sample = sample_length, + dim(exp)[time_dim]/sample_length) + dim(obs) <- c(dim(obs)[-time_dim_obs], sample = sample_length, + dim(obs)[time_dim_obs]/sample_length) + } + if (any(names(dim(exp)) %in% 'ftime') | any(names(dim(exp)) %in% 'time')) { + new_time_dim_exp <- which(names(dim(exp)) == 'ftime' | names(dim(exp)) %in% 'time') + } else { + new_time_dim_exp <- which(names(dim(exp)) == 'sdate') + } + if (any(names(dim(obs)) %in% 'ftime') | any(names(dim(obs)) %in% 'time')) { + new_time_dim_obs <- which(names(dim(obs)) == 'ftime' | names(dim(obs)) %in% 'time') + } else { + new_time_dim_obs <- which(names(dim(obs)) == 'sdate') + } + + if (!is.null(exp_cor)) { + if (any(names(dim(exp_cor)) %in% 'ftime') | any(names(dim(exp_cor)) %in% 'time')) { + time_dim_cor <- which(names(dim(exp_cor)) == 'ftime' | names(dim(exp_cor)) %in% 'time') + } else { + time_dim_cor <- which(names(dim(exp_cor)) == 'sdate') + } + + nsamples <- dimensions[time_dim_cor]/sample_length + if (nsamples %% 1 != 0) { + nsamples <- ceiling(nsamples) + fillsample1D <- rep(NA, nsamples * sample_length - dimensions[time_dim_cor]) + if (length(dimensions) > 1) { + fillsample <- rep(fillsample1D, prod(dimensions[-time_dim_cor])) + exp_cor <- c(exp_cor, fillsample) + dim(exp_cor) <- c(dim(exp_cor)[-time_dim_cor], sample = sample_length, + ceiling(dim(exp_cor)[time_dim_cor]/sample_length)) + } else { + exp_cor <- abind(exp_cor, fillsample1D, along = time_dim_cor) + names(dim(exp_cor)) <- names(dimensions) + } + } + dim(exp_cor) <- c(dim(exp_cor)[-time_dim_cor], sample = sample_length, + dim(exp_cor)[time_dim_cor]/sample_length) + if (any(names(dim(exp_cor)) %in% 'ftime') | any(names(dim(exp_cor)) %in% 'time')) { + new_time_dim_cor <- which(names(dim(exp_cor)) == 'ftime' | + names(dim(exp_cor)) %in% 'time') + } else { + new_time_dim_cor <- which(names(dim(exp_cor)) == 'sdate') + } + + } else { + time_dim_cor <- time_dim + exp_cor <- exp + new_time_dim_cor <- new_time_dim_exp + } + applied <- NULL + for (i in 1 : nsamples) { + if (i <= dim(obs)[new_time_dim_obs]) { + sample_obs <- as.vector(asub(obs, idx = i, dims = new_time_dim_obs)) + sample_exp <- as.vector(asub(exp, idx = i, dims = new_time_dim_exp)) + } else { + sample_obs <- as.vector(asub(obs, idx = dim(obs)[new_time_dim_obs], + dims = new_time_dim_obs)) + sample_exp <- as.vector(asub(exp, idx = dim(exp)[new_time_dim_exp], + dims = new_time_dim_exp)) + } + if (i >= dim(obs)[new_time_dim_obs]) { + sample_obs <- sample_obs[!is.na(sample_obs)] + sample_exp <- sample_exp[!is.na(sample_exp)] + } + if (sum(sample_obs) == 0) { + warning("The total sum of observed data sample in the sample number ", + i, ", is zero and the function may crash.") + } + if (sum(sample_exp) == 0) { + warning("The total sum of experimental data sample in the sample number ", + i, ", is zero and the function may crash.") + } + if (length(sample_exp) < sample_length) { + warning("The length of the sample used, ", length(sample_exp), + ", in the sample number ", i, + ", is smaller than the defined in parameter 'sample_length'.") + } + adjust <- fitQmap(sample_obs, sample_exp, method = method, + ...) + sample_cor <- as.vector(asub(exp_cor, idx = i, dims = new_time_dim_cor)) + if (i == nsamples) { + sample_cor <- sample_cor[!is.na(sample_cor)] + } + applied <- c(applied, doQmap(x = sample_cor, fobj = adjust, ...)) + } + dim(applied) <- dimensions + return(applied) +} diff --git a/R/CST_SplitDim.R b/R/CST_SplitDim.R new file mode 100644 index 0000000000000000000000000000000000000000..ce65e07f5c293920462dbb0e393346644cb0c481 --- /dev/null +++ b/R/CST_SplitDim.R @@ -0,0 +1,202 @@ +#'Function to Split Dimension +#' +#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +#' +#'@description This function split a dimension in two. The user can select the dimension to split and provide indices indicating how to split that dimension or dates and the frequency expected (monthly or by day, month and year). The user can also provide a numeric frequency indicating the length of each division. +#' +#'@param data a 's2dv_cube' object +#'@param split_dim a character string indicating the name of the dimension to split +#'@param indices a vector of numeric indices or dates. If left at NULL, the dates provided in the s2dv_cube object (element Dates) will be used. +#'@param freq a character string indicating the frequency: by 'day', 'month' and 'year' or 'monthly' (by default). 'month' identifies months between 1 and 12 independently of the year they belong to, while 'monthly' differenciates months from different years. +#' +#'@import abind +#'@import s2dverification +#'@examples +#' +#'data <- 1 : 20 +#'dim(data) <- c(time = 10, lat = 2) +#'data <-list(data = data) +#'class(data) <- 's2dv_cube' +#'indices <- c(rep(1,5), rep(2,5)) +#'new_data <- CST_SplitDim(data, indices = indices) +#'time <- c(seq(ISOdate(1903, 1, 1), ISOdate(1903, 1, 4), "days"), +#' seq(ISOdate(1903, 2, 1), ISOdate(1903, 2, 4), "days"), +#' seq(ISOdate(1904, 1, 1), ISOdate(1904, 1, 2), "days")) +#'data <- list(data = data$data, Dates = time) +#'class(data) <- 's2dv_cube' +#'new_data <- CST_SplitDim(data, indices = time) +#'dim(new_data$data) +#'new_data <- CST_SplitDim(data, indices = time, freq = 'day') +#'dim(new_data$data) +#'new_data <- CST_SplitDim(data, indices = time, freq = 'month') +#'dim(new_data$data) +#'new_data <- CST_SplitDim(data, indices = time, freq = 'year') +#'dim(new_data$data) +#'@export +CST_SplitDim <- function(data, split_dim = 'time', indices = NULL, + freq = 'monthly') { + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + if (is.null(indices)) { + if (is.list(data$Dates)) { + indices <- data$Dates[[1]] + } else { + indices <- data$Dates + } + if (any(names(dim(data$data)) %in% 'sdate')) { + if (!any(names(dim(data$data)) %in% split_dim)) { + stop("Parameter 'split_dims' must be one of the dimension ", + "names in parameter 'data'.") + } + indices <- indices[1 : dim(data$data)[which(names(dim(data$data)) == + split_dim)]] + } + } + data$data <- SplitDim(data$data, split_dim = split_dim, indices = indices, + freq = freq) + return(data) +} +#'Function to Split Dimension +#' +#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +#' +#'@description This function split a dimension in two. The user can select the dimension to split and provide indices indicating how to split that dimension or dates and the frequency expected (monthly or by day, month and year). The user can also provide a numeric frequency indicating the length of each division. +#' +#'@param data an n-dimensional array with named dimensions +#'@param split_dim a character string indicating the name of the dimension to split +#'@param indices a vector of numeric indices or dates +#'@param freq a character string indicating the frequency: by 'day', 'month' and 'year' or 'monthly' (by default). 'month' identifies months between 1 and 12 independetly of the year they belong to, while 'monthly' differenciates months from different years. Parameter 'freq' can also be numeric indicating the length in which to subset the dimension +#' +#'@import abind +#'@import s2dverification +#'@examples +#' +#'data <- 1 : 20 +#'dim(data) <- c(time = 10, lat = 2) +#'indices <- c(rep(1,5), rep(2,5)) +#'new_data <- SplitDim(data, indices = indices) +#'time <- c(seq(ISOdate(1903, 1, 1), ISOdate(1903, 1, 4), "days"), +#' seq(ISOdate(1903, 2, 1), ISOdate(1903, 2, 4), "days"), +#' seq(ISOdate(1904, 1, 1), ISOdate(1904, 1, 2), "days")) +#'new_data <- SplitDim(data, indices = time) +#'new_data <- SplitDim(data, indices = time, freq = 'day') +#'new_data <- SplitDim(data, indices = time, freq = 'month') +#'new_data <- SplitDim(data, indices = time, freq = 'year') +#'@export +SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly') { + # check data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (is.null(dim(data))) { + dim(data) = c(time = length(data)) + } + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must have dimension names.") + } + dims <- dim(data) + # check split_dim + if (!is.character(split_dim)) { + stop("Parameter 'split_dim' must be a character.") + } + if (length(split_dim) > 1) { + split_dim <- split_dim[1] + warning("Parameter 'split_dim' has length greater than ", + "one and only the first element will be used.") + } + if (!any(names(dims) %in% split_dim)) { + stop("Parameter 'split_dims' must be one of the dimension ", + "names in parameter 'data'.") + } + pos_split <- which(names(dims) == split_dim) + # check indices and freq + if (is.null(indices)) { + if (!is.numeric(freq)) { + stop("Parameter 'freq' must be a integer number indicating ", + " the length of each chunk.") + } else { + if (!((dims[pos_split] / freq) %% 1 == 0)) { + stop("Parameter 'freq' must be proportional to the ", + "length of the 'split_dim' in parameter 'data'.") + } + indices <- rep(1 : (dims[pos_split] / freq), freq) + indices <- sort(indices) + } + } else if (is.numeric(indices)) { + if (!is.null(freq)) { + if (freq != 'monthly') { + warning("Parameter 'freq' is not being used since ", + "parameter 'indices' is numeric.") + } + } + } else { + # Indices should be Dates and freq character + if (!is.character(freq)) { + stop("Parameter 'freq' must be a character indicating ", + "how to divide the dates provided in parameter 'indices'", + ", 'monthly', 'anually' or 'daily'.") + } + if (!(any(class(indices) %in% c('POSIXct')))) { + indices <- try( { + if (is.character(indices)) { + as.POSIXct(indices) + } else { + as.POSIXct(indices) + } + }) + if ('try-error' %in% class(indices) | + sum(is.na(indices)) == length(indices)) { + stop("Dates provided in parameter 'indices' must be of class", + " 'POSIXct' or convertable to 'POSIXct'.") + } + } + } + if (length(indices) != dims[pos_split]) { + stop("Parameter 'indices' has different length of parameter ", + "data in the dimension supplied in 'split_dim'.") + } + # check indices as dates: + if (!is.numeric(indices)) { + if (freq == 'day') { + indices <- as.numeric(strftime(indices, format = "%d")) + } else if (freq == 'month') { + indices <- as.numeric(strftime(indices, format = "%m")) + } else if (freq == 'year') { + indices <- as.numeric(strftime(indices, format = "%Y")) + } else if (freq == 'monthly' ) { + indices <- as.numeric(strftime(indices, format = "%m%Y")) + } else { + stop("Parameter 'freq' must be numeric or a character: ", + "by 'day', 'month', 'year' or 'monthly' (for ", + "distinguishable month).") + } + } + repited <- unique(indices) + max_times <- max(unlist(lapply(repited, + function(x){sum(indices == x)}))) + data <- lapply(repited, function(x) {rebuild(x, data, along = split_dim, + indices = indices, max_times)}) + data <- abind(data, along = length(dims) + 1) + if (is.character(freq)) { + names(dim(data)) <- c(names(dims), freq) + } else { + names(dim(data)) <- c(names(dims), 'index') + } +return(data) +} + +rebuild <- function(x, data, along, indices, max_times) { + a <- Subset(data, along = along, indices = which(indices == x)) + pos_dim <- which(names(dim(a)) == along) + if (dim(a)[pos_dim] != max_times) { + adding <- max_times - dim(a)[pos_dim] + new_dims <- dim(a) + new_dims[pos_dim] <- adding + extra <- array(NA, dim = new_dims) + a <- abind(a, extra, along = pos_dim) + names(dim(a)) <- names(dim(data)) + } + return(a) +} diff --git a/man/CST_QuantileMapping.Rd b/man/CST_QuantileMapping.Rd new file mode 100644 index 0000000000000000000000000000000000000000..577ff5421eba15a1e2721bf29b8523d44da9ba59 --- /dev/null +++ b/man/CST_QuantileMapping.Rd @@ -0,0 +1,87 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_QuantileMapping.R +\name{CST_QuantileMapping} +\alias{CST_QuantileMapping} +\title{Quantiles Mapping for seasonal or decadal forecast data} +\usage{ +CST_QuantileMapping(exp, obs, exp_cor = NULL, sample_dims = c("sdate", + "ftime", "member"), sample_length = NULL, method = "QUANT", + ncores = NULL, ...) +} +\arguments{ +\item{exp}{an object of class \code{s2dv_cube}} + +\item{obs}{an object of class \code{s2dv_cube}} + +\item{exp_cor}{an object of class \code{s2dv_cube} in which the quantile mapping correction will be applied. If it is not specified, the correction is applied in object \code{exp}.} + +\item{sample_dims}{a character vector indicating the dimensions that can be used as sample for the same distribution} + +\item{sample_length}{a numeric value indicating the length of the timeseries window to be used as sample for the sample distribution and correction. By default, NULL, the total length of the timeseries will be used.} + +\item{method}{a character string indicating the method to be used: 'PTF','DIST','RQUANT','QUANT','SSPLIN'. By default, the empirical quantile mapping 'QUANT' is used.} + +\item{ncores}{an integer indicating the number of parallel processes to spawn for the use for parallel computation in multiple cores.} + +\item{...}{additional arguments passed to the method specified by \code{method}.} +} +\value{ +an oject of class \code{s2dv_cube} containing the experimental data after applyingthe quantile mapping correction. +) <- c(dataset = 1, member = 10, sdate = 20, ftime = 60 , +} +\description{ +This function is a wrapper from fitQmap and doQmap from package 'qmap'to be applied in CSTools objects of class 's2dv_cube'. The quantile mapping adjustment between an experiment, tipically a hindcast, and observations is applied to the experiment itself or to a provided forecast. +} +\details{ +The different methods are: +\itemize{ +\item{'PTF'} {fits a parametric transformations to the quantile-quantile relation of observed and modelled values. See \code{\link[qmap]{fitQmapPTF}}.} +\item{'DIST'} {fits a theoretical distribution to observed and to modelled time series. See \code{\link[qmap]{fitQmapDIST}}.} +\item{'RQUANT'} {estimates the values of the quantile-quantile relation of observed and modelled time series for regularly spaced quantiles using local linear least square regression. See \code{\link[qmap]{fitQmapRQUANT}}.} +\item{'QUANT'} {estimates values of the empirical cumulative distribution function of observed and modelled time series for regularly spaced quantiles. See \code{\link[qmap]{fitQmapQUANT}}.} +\item{'SSPLIN'} {fits a smoothing spline to the quantile-quantile plot of observed and modelled time series. See \code{\link[qmap]{fitQmapSSPLIN}}}.} +All methods accepts some common arguments: +\itemize{ +\item{wet.day} {logical indicating whether to perform wet day correction or not.(Not available in 'DIS' method)} +\item{qstep} {NULL or a numeric value between 0 and 1.}} +} +\examples{ +library(qmap) +exp <- 1 : (1 * 5 * 10 * 6 * 2 * 3) +dim(exp) <- c(dataset = 1, member = 10, sdate = 5, ftime = 6 , + lat = 2, lon = 3) +exp <- list(data = exp) +class(exp) <- 's2dv_cube' +obs <- 101 : (100 + 1 * 1 * 5 * 6 * 2 * 3) +dim(obs) <- c(dataset = 1, member = 1, sdate = 5, ftime = 6 , + lat = 2, lon = 3) +obs <- list(data = obs) +class(obs) <- 's2dv_cube' +res <- CST_QuantileMapping(exp, obs, method = 'RQUANT') + +exp <- lonlat_data$exp +obs <- lonlat_data$obs +res <- CST_QuantileMapping(exp, obs) + +\donttest{ +data(obsprecip) +data(modprecip) +exp <- modprecip$MOSS[1:10000] +dim(exp) <- c(time = length(exp)) +exp <- list(data = exp) +class(exp) <- 's2dv_cube' +obs <- obsprecip$MOSS[1:10000] +dim(obs) <- c(time = length(obs)) +obs <- list(data = obs) +class(obs) <- 's2dv_cube' +res <- CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time', + method = 'DIST') +} +} +\author{ +Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +} +\seealso{ +\code{\link[qmap]{fitQmap}} and \code{\link[qmap]{doQmap}} +} + diff --git a/man/CST_SplitDim.Rd b/man/CST_SplitDim.Rd new file mode 100644 index 0000000000000000000000000000000000000000..2019ea7bb537173efec25d1dc7cb2e9e3fa5d952 --- /dev/null +++ b/man/CST_SplitDim.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_SplitDim.R +\name{CST_SplitDim} +\alias{CST_SplitDim} +\title{Function to Split Dimension} +\usage{ +CST_SplitDim(data, split_dim = "time", indices = NULL, freq = "monthly") +} +\arguments{ +\item{data}{a 's2dv_cube' object} + +\item{split_dim}{a character string indicating the name of the dimension to split} + +\item{indices}{a vector of numeric indices or dates. If left at NULL, the dates provided in the s2dv_cube object (element Dates) will be used.} + +\item{freq}{a character string indicating the frequency: by 'day', 'month' and 'year' or 'monthly' (by default). 'month' identifies months between 1 and 12 independently of the year they belong to, while 'monthly' differenciates months from different years.} +} +\description{ +This function split a dimension in two. The user can select the dimension to split and provide indices indicating how to split that dimension or dates and the frequency expected (monthly or by day, month and year). The user can also provide a numeric frequency indicating the length of each division. +} +\examples{ + +data <- 1 : 20 +dim(data) <- c(time = 10, lat = 2) +data <-list(data = data) +class(data) <- 's2dv_cube' +indices <- c(rep(1,5), rep(2,5)) +new_data <- CST_SplitDim(data, indices = indices) +time <- c(seq(ISOdate(1903, 1, 1), ISOdate(1903, 1, 4), "days"), + seq(ISOdate(1903, 2, 1), ISOdate(1903, 2, 4), "days"), + seq(ISOdate(1904, 1, 1), ISOdate(1904, 1, 2), "days")) +data <- list(data = data$data, Dates = time) +class(data) <- 's2dv_cube' +new_data <- CST_SplitDim(data, indices = time) +dim(new_data$data) +new_data <- CST_SplitDim(data, indices = time, freq = 'day') +dim(new_data$data) +new_data <- CST_SplitDim(data, indices = time, freq = 'month') +dim(new_data$data) +new_data <- CST_SplitDim(data, indices = time, freq = 'year') +dim(new_data$data) +} +\author{ +Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +} + diff --git a/man/SplitDim.Rd b/man/SplitDim.Rd new file mode 100644 index 0000000000000000000000000000000000000000..e36aa8a5d12ceb1cb360150be81d01f110d57e02 --- /dev/null +++ b/man/SplitDim.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_SplitDim.R +\name{SplitDim} +\alias{SplitDim} +\title{Function to Split Dimension} +\usage{ +SplitDim(data, split_dim = "time", indices, freq = "monthly") +} +\arguments{ +\item{data}{an n-dimensional array with named dimensions} + +\item{split_dim}{a character string indicating the name of the dimension to split} + +\item{indices}{a vector of numeric indices or dates} + +\item{freq}{a character string indicating the frequency: by 'day', 'month' and 'year' or 'monthly' (by default). 'month' identifies months between 1 and 12 independetly of the year they belong to, while 'monthly' differenciates months from different years. Parameter 'freq' can also be numeric indicating the length in which to subset the dimension} +} +\description{ +This function split a dimension in two. The user can select the dimension to split and provide indices indicating how to split that dimension or dates and the frequency expected (monthly or by day, month and year). The user can also provide a numeric frequency indicating the length of each division. +} +\examples{ + +data <- 1 : 20 +dim(data) <- c(time = 10, lat = 2) +indices <- c(rep(1,5), rep(2,5)) +new_data <- SplitDim(data, indices = indices) +time <- c(seq(ISOdate(1903, 1, 1), ISOdate(1903, 1, 4), "days"), + seq(ISOdate(1903, 2, 1), ISOdate(1903, 2, 4), "days"), + seq(ISOdate(1904, 1, 1), ISOdate(1904, 1, 2), "days")) +new_data <- SplitDim(data, indices = time) +new_data <- SplitDim(data, indices = time, freq = 'day') +new_data <- SplitDim(data, indices = time, freq = 'month') +new_data <- SplitDim(data, indices = time, freq = 'year') +} +\author{ +Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +} + diff --git a/tests/testthat/test-CST_QuantileMapping.R b/tests/testthat/test-CST_QuantileMapping.R new file mode 100644 index 0000000000000000000000000000000000000000..4a317b5a58abfee986448c10a0fcfbb215ee7224 --- /dev/null +++ b/tests/testthat/test-CST_QuantileMapping.R @@ -0,0 +1,97 @@ +context("Generic tests") +test_that("Sanity checks", { +library(qmap) + expect_error( + CST_QuantileMapping(exp = 1), + "Parameter 'exp' and 'obs' must be of the class 's2dv_cube', as output by ", + "CSTools::CST_Load.") + + exp <- 1 : 20 + dim(exp) <- c(time = 20) + exp <- list(data = exp) + class(exp) <- 's2dv_cube' + expect_error( + CST_QuantileMapping(exp = exp), + 'argument "obs" is missing, with no default') + expect_error( + CST_QuantileMapping(exp = exp, obs = 1), + "Parameter 'exp' and 'obs' must be of the class 's2dv_cube', as output by ", + "CSTools::CST_Load.") + + obs <- 1 : 20 + dim(obs) <- c(time = 20) + obs <- list(data = obs) + class(obs) <- 's2dv_cube' + expect_error( + CST_QuantileMapping(exp = exp, obs = obs, exp_cor = 1), + "Parameter 'exp_cor' must be of the class 's2dv_cube', as output ", + "by CSTools::CST_Load.") + + exp <- 1 : 20 + dim(exp) <- 20 + exp <- list(data = exp) + class(exp) <- 's2dv_cube' + expect_error( + CST_QuantileMapping(exp = exp, obs = obs), + "Parameter 'exp' must have dimension names.") + + exp <- 1 : 20 + dim(exp) <- c(time = 20) + exp <- list(data = exp) + class(exp) <- 's2dv_cube' + obs <- 1 : 20 + dim(obs) <- 20 + obs <- list(data = obs) + class(obs) <- 's2dv_cube' + expect_error( + CST_QuantileMapping(exp = exp, obs = obs), + "Parameter 'obs' must have dimension names.") + + obs <- 1 : 20 + dim(obs) <- c(time = 20) + obs <- list(data = obs) + class(obs) <- 's2dv_cube' + exp_cor <- 1 : 20 + dim(exp_cor) <- 20 + exp_cor <- list(data = exp_cor) + class(exp_cor) <- 's2dv_cube' + expect_error( + CST_QuantileMapping(exp = exp, obs = obs, exp_cor = exp_cor), + "Parameter 'exp_cor' must have dimension names.") + expect_error( + CST_QuantileMapping(exp = exp, obs = obs), + "Parameter 'sample_dims' must be a vector of string character indicating ", + "names of exiting dimension in parameter 'exp'.") + expect_error( + CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time', method = 'x'), + "Parameter 'method' must be one of the following methods: 'PTF','DIST',", + "'RQUANT','QUANT','SSPLIN'.") + expect_warning( + CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time', sample_length = "month"), + "Parameter 'sample_length' has not been correctly defined and the whole ", + "length of the timeseries will be used.") + + exp$data[1] <- NA + expect_error( + CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time'), + "Parameter 'exp' contains NA values.") + + exp$data[1] <- 1 + obs$data[1] <- NA + expect_error( + CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time'), + "Parameter 'obs' contains NA values.") + obs$data[1] <- 1 + expect_equal(CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time'), exp) + expect_equal(CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time', + method = 'PTF'), exp) + expect_equal(CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time', + method = 'RQUANT'), exp) + expect_equal(CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time', + method = 'SSPLIN'), exp) + library(CSTools) + expect_equal(CST_QuantileMapping(exp = lonlat_data$exp, obs = lonlat_data$obs), + CST_QuantileMapping(exp = lonlat_data$exp, obs = lonlat_data$obs, + exp_cor = lonlat_data$exp)) + +}) diff --git a/tests/testthat/test-CST_SplitDim.R b/tests/testthat/test-CST_SplitDim.R new file mode 100644 index 0000000000000000000000000000000000000000..25d9f7e8e45288b802721ec6c1a320baaceeef7d --- /dev/null +++ b/tests/testthat/test-CST_SplitDim.R @@ -0,0 +1,76 @@ +context("Generic tests") +test_that("Sanity checks", { + expect_error( + CST_SplitDim(data = 1), + "Parameter 'data' must be of the class 's2dv_cube', as output by ", + "CSTools::CST_Load.") + + data <- 1 : 20 + dim(data) <- c(time = 20) + data <- list(data = data) + class(data) <- 's2dv_cube' + expect_error( + CST_SplitDim(data = data), + "Parameter 'freq' must be a integer number indicating ", + " the length of each chunk.") +indices <- c(rep(1,5), rep(2,5), rep (3, 5), rep(4, 5)) +output = matrix(data$data, nrow = 5, ncol = 4) +names(dim(output)) <- c('time', 'monthly') +output <- list(data = output) +class(output) <- 's2dv_cube' + expect_equal( + CST_SplitDim(data = data, indices = indices), output) +output = matrix(data$data, nrow = 5, ncol = 4) +names(dim(output)) <- c('time', 'index') +output <- list(data = output) +class(output) <- 's2dv_cube' + expect_equal( + CST_SplitDim(data = data, freq = 5), output) + +time <- c(seq(ISOdate(1903, 1, 1), ISOdate(1903, 1, 4), "days"), + seq(ISOdate(1903, 2, 1), ISOdate(1903, 2, 4), "days"), + seq(ISOdate(1904, 1, 1), ISOdate(1904, 1, 2), "days")) +data <- list(data = data$data, Dates = time) +class(data) <- 's2dv_cube' + expect_error( + CST_SplitDim(data = data), + "Parameter 'indices' has different length of parameter data ", + "in the dimension supplied in 'split_dim'.") +time <- c(seq(ISOdate(1903, 1, 1), ISOdate(1903, 1, 8), "days"), + seq(ISOdate(1903, 2, 1), ISOdate(1903, 2, 8), "days"), + seq(ISOdate(1904, 1, 1), ISOdate(1904, 1, 4), "days")) +data <- list(data = data$data, Dates = time) +class(data) <- 's2dv_cube' +output <- c(data$data, rep(NA, 4)) +dim(output) <- c(time = 8, monthly = 3) +result <- data +result$data <- output + + expect_equal( + CST_SplitDim(data = data), result) + + exp_cor <- 1 : 20 + dim(exp_cor) <- 20 + exp_cor <- list(data = exp_cor) + class(exp_cor) <- 's2dv_cube' + expect_error( + CST_SplitDim(data = exp_cor, freq = 5), + "Parameter 'data' must have dimension names.") + expect_error( + CST_SplitDim(data, freq = 'x'), + "Parameter 'freq' must be numeric or a character: by 'day', ", + "'month', 'year' or 'monthly' (for distingible month).") + + library(CSTools) + expect_error( + CST_SplitDim(data = lonlat_data$exp), + "Parameter 'split_dims' must be one of the dimension names in parameter 'data'.") + output <- lonlat_data$exp$data + output <- abind(output[, , , 1, ,], output[, , , 2, ,], output[, , , 3, ,], along = 5) + dim(output) <- c(dataset = 1, member = 15, sdate = 6, ftime = 1, + lat = 22, lon = 53, monthly = 3) + result <- lonlat_data$exp + result$data <- output + expect_equal(CST_SplitDim(data = lonlat_data$exp, split_dim = 'ftime'), + result) +})