Newer
Older
#'Compute Bias Corrected Climatologies
#'
#'This function computes per-pair climatologies for the experimental
#'and observational data using one of the following methods:
#'\enumerate{
#' \item{per-pair method (Garcia-Serrano and Doblas-Reyes, CD, 2012)}
#' \item{Kharin method (Karin et al, GRL, 2012)}
#' \item{Fuckar method (Fuckar et al, GRL, 2014)}
#'}
#'Per-pair climatology means that only the startdates covered by the
#'whole experiments/observational dataset will be used. In other words, the
#'startdates which are not all available along 'dat_dim' dimension of both
#'the 'exp' and 'obs' are excluded when computing the climatologies.
#'
#'@param exp A named numeric array of experimental data, with at least two
#' dimensions 'time_dim' and 'dat_dim'.
#'@param obs A named numeric array of observational data, same dimensions as
#' parameter 'exp' except along 'dat_dim'.
#'@param time_dim A character string indicating the name of dimension along
#' which the climatologies are computed. The default value is 'sdate'.
#'@param dat_dim A character vector indicating the name of the dataset and
#' member dimensions. If data at one startdate (i.e., 'time_dim') are not
#' complete along 'dat_dim', this startdate along 'dat_dim' will be discarded.
#' The default value is "c('dataset', 'member')".
#'@param method A character string indicating the method to be used. The
#' options include 'clim', 'kharin', and 'NDV'. The default value is 'clim'.
#'@param ftime_dim A character string indicating the name of forecast time
#' dimension. Only used when method = 'NDV'. The default value is 'ftime'.
#'@param memb A logical value indicating whether to remain 'memb_dim' dimension
#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). The default value is TRUE.
#'@param memb_dim A character string indicating the name of the member
#' dimension. Only used when parameter 'memb' is FALSE. It must be one element
#' in 'dat_dim'. The default value is 'member'.
#'@param na.rm A logical value indicating whether to remove NA values along
#' 'time_dim' when calculating climatology (TRUE) or return NA if there is NA
#' along 'time_dim' (FALSE). The default value is TRUE.
#'@param ncores An integer indicating the number of cores to use for parallel
#' computation. The default value is NULL.
#'
#'@return
#'A list of 2:
#'\item{$clim_exp}{
#' A numeric array with the same dimensions as parameter 'exp' but
#' dimension 'time_dim' is moved to the first position. If parameter 'method'
#' is 'clim', dimension 'time_dim' is removed. If parameter 'memb' is FALSE,
#' dimension 'memb_dim' is also removed.
#'}
#'\item{$clim_obs}{
#' A numeric array with the same dimensions as parameter 'exp'
#' except dimension 'time_dim' is removed. If parameter 'memb' is FALSE,
#' dimension 'memb_dim' is also removed.
#'}
#'
#'@examples
#'# Load sample data as in Load() example:
#'example(Load)
#'clim <- Clim(sampleData$mod, sampleData$obs)
#'clim2 <- Clim(sampleData$mod, sampleData$obs, method = 'kharin', memb = FALSE)
#'\donttest{
#'PlotClim(clim$clim_exp, clim$clim_obs,
#' toptitle = paste('sea surface temperature climatologies'),
#' ytitle = 'K', monini = 11, listexp = c('CMIP5 IC3'),
#' listobs = c('ERSST'), biglab = FALSE, fileout = 'tos_clim.eps')
#'}
#'@importFrom abind adrop
#'@import multiApply
#'@export
Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'),
method = 'clim', ftime_dim = 'ftime', memb = TRUE,
memb_dim = 'member', na.rm = TRUE, ncores = NULL) {
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# Check inputs
## exp and obs (1)
if (is.null(exp) | is.null(obs)) {
stop("Parameter 'exp' and 'obs' cannot be NULL.")
}
if (!is.numeric(exp) | !is.numeric(obs)) {
stop("Parameter 'exp' and 'obs' must be a numeric array.")
}
if (is.null(dim(exp)) | is.null(dim(obs))) {
stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ",
"containing time_dim and dat_dim."))
}
if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) |
any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) {
stop("Parameter 'exp' and 'obs' must have dimension names.")
}
if(!all(names(dim(exp)) %in% names(dim(obs))) |
!all(names(dim(obs)) %in% names(dim(exp)))) {
stop("Parameter 'exp' and 'obs' must have same dimension name")
}
## time_dim
if (!is.character(time_dim) | length(time_dim) > 1) {
stop("Parameter 'time_dim' must be a character string.")
}
if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) {
stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.")
}
## dat_dim
if (!is.character(dat_dim)) {
stop("Parameter 'dat_dim' must be a character vector.")
}
if (!all(dat_dim %in% names(dim(exp))) | !all(dat_dim %in% names(dim(obs)))) {
stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.")
}
## method
if (!(method %in% c("clim", "kharin", "NDV"))) {
stop("Parameter 'method' must be one of 'clim', 'kharin' or 'NDV'.")
}
## ftime_dim
if (method == "NDV") {
if (!is.character(ftime_dim) | length(ftime_dim) > 1) {
stop("Parameter 'ftime_dim' must be a character string.")
}
if (!ftime_dim %in% names(dim(exp)) | !ftime_dim %in% names(dim(obs))) {
stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.")
}
}
## memb
if (!is.logical(memb) | length(memb) > 1) {
stop("Parameter 'memb' must be one logical value.")
}
## memb_dim
if (!memb) {
if (!is.character(memb_dim) | length(memb_dim) > 1) {
stop("Parameter 'memb_dim' must be a character string.")
}
if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) {
stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.")
}
}
## na.rm
if (!is.logical(na.rm) | length(na.rm) > 1) {
stop("Parameter 'na.rm' must be one logical value.")
}
## ncores
if (!is.null(ncores)) {
if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
length(ncores) > 1) {
stop("Parameter 'ncores' must be a positive integer.")
}
}
## exp and obs (2)
name_exp <- sort(names(dim(exp)))
name_obs <- sort(names(dim(obs)))
for (i in 1:length(dat_dim)) {
name_exp <- name_exp[-which(name_exp == dat_dim[i])]
name_obs <- name_obs[-which(name_obs == dat_dim[i])]
}
if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) {
stop(paste0("Parameter 'exp' and 'obs' must have same length of ",
"all dimension expect 'dat_dim'."))
}
###############################
# Sort dimension
name_exp <- names(dim(exp))
name_obs <- names(dim(obs))
order_obs <- match(name_exp, name_obs)
###############################
# Calculate Clim
#----------------------------------
# Remove all sdate if not complete along dat_dim
pos <- rep(0, length(dat_dim))
for (i in 1:length(dat_dim)) { #[dat, sdate]
## dat_dim: [dataset, member]
pos[i] <- which(names(dim(obs)) == dat_dim[i])
}
outrows_exp <- MeanDims(exp, pos, na.rm = FALSE, ncores = ncores) +
MeanDims(obs, pos, na.rm = FALSE, ncores = ncores)
outrows_obs <- outrows_exp
for (i in 1:length(pos)) {
outrows_exp <- InsertDim(outrows_exp, pos[i], dim(exp)[pos[i]])
outrows_obs <- InsertDim(outrows_obs, pos[i], dim(obs)[pos[i]])
}
exp[which(is.na(outrows_exp))] <- NA
obs[which(is.na(outrows_obs))] <- NA
#-----------------------------------
if (method == 'clim') {
clim <- Apply(list(exp, obs),
target_dims = c(time_dim, dat_dim),
fun = .Clim,
method = method, time_dim = time_dim,
dat_dim = dat_dim, memb_dim = memb_dim,
memb = memb, na.rm = na.rm, ncores_input = ncores,
ncores = ncores)
# Add member dimension name back
if (memb) {
if(is.null(names(dim(clim$clim_exp))[1])) {
names(dim(clim$clim_exp))[1] <- memb_dim
names(dim(clim$clim_obs))[1] <- memb_dim
}
}
} else if (method == 'kharin') {
clim <- Apply(list(exp, obs),
target_dims = c(time_dim, dat_dim),
fun = .Clim,
method = method, time_dim = time_dim,
dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim,
memb = memb, na.rm = na.rm, ncores_input = ncores,
ncores = ncores)
} else if (method == 'NDV') {
clim <- Apply(list(exp, obs),
target_dims = c(time_dim, dat_dim, ftime_dim),
fun = .Clim,
method = method, time_dim = time_dim,
dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim,
memb = memb, na.rm = na.rm, ncores_input = ncores,
ncores = ncores)
}
return(clim)
}
.Clim <- function(exp, obs, method = 'clim',
time_dim = 'sdate', dat_dim = c('dataset', 'member'),
ftime_dim = 'ftime', memb_dim = 'member', memb = TRUE,
na.rm = TRUE, ncores_input = NULL) {
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
if (method == 'clim') {
# exp: [sdate, dat_dim_exp]
# obs: [sdate, dat_dim_obs]
clim_exp <- apply(exp, which(names(dim(exp)) != time_dim),
mean, na.rm = na.rm) #average out time_dim
clim_obs <- apply(obs, which(names(dim(obs)) != time_dim),
mean, na.rm = na.rm) #[dat_dim]
## member mean
if (!memb) {
if (length(dim(clim_exp)) == 1) { #dim: [member]
clim_exp <- mean(clim_exp, na.rm = TRUE)
clim_obs <- mean(clim_obs, na.rm = TRUE)
} else {
pos <- which(names(dim(clim_exp)) == memb_dim)
pos <- c(1:length(dim(clim_exp)))[-pos]
dim_name <- names(dim(clim_exp))
clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE)
clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE)
if (is.null(names(dim(as.array(clim_exp))))) {
clim_exp <- as.array(clim_exp)
clim_obs <- as.array(clim_obs)
names(dim(clim_exp)) <- dim_name[pos]
names(dim(clim_obs)) <- dim_name[pos]
}
}
}
} else if (method == 'kharin') {
# exp: [sdate, dat_dim_exp]
# obs: [sdate, dat_dim_obs]
# obs clim
clim_obs <- apply(obs, which(names(dim(obs)) != time_dim),
mean, na.rm = na.rm) #[dat_dim]
# exp clim
##--- NEW trend ---##
tmp_obs <- Trend(data = obs, time_dim = time_dim, interval = 1,
polydeg = 1, conf = FALSE, ncores = ncores_input)$trend
tmp_exp <- Trend(data = exp, time_dim = time_dim, interval = 1,
polydeg = 1, conf = FALSE, ncores = ncores_input)$trend
# tmp_exp: [stats, dat_dim)]
tmp_obs_mean <- apply(tmp_obs, 1, mean) #average out dat_dim (dat and member)
#tmp_obs_mean: [stats = 2]
intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected') #[dat_dim]
slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected') #[dat_dim]
intercept_obs <- array(tmp_obs_mean[1], dim = dim(exp)[-1]) #[dat_dim]
slope_obs <- array(tmp_obs_mean[2], dim = dim(exp)[-1]) #[dat_dim]
trend_exp <- list()
trend_obs <- list()
for (jdate in 1:dim(exp)[time_dim]) {
trend_exp[[jdate]] <- intercept_exp + jdate * slope_exp
trend_obs[[jdate]] <- intercept_obs + jdate * slope_obs
}
# turn list into array
trend_exp <- array(unlist(trend_exp), dim = c(dim(exp)[-1], dim(exp)[1]))
trend_obs <- array(unlist(trend_obs), dim = c(dim(exp)[-1], dim(exp)[1]))
len <- length(dim(exp))
trend_exp <- Reorder(trend_exp, c(len, 1:(len - 1)))
trend_obs <- Reorder(trend_obs, c(len, 1:(len - 1)))
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
clim_obs_mean <- mean(apply(clim_obs, 1, mean)) #average out dat_dim, get a number
clim_obs_mean <- array(clim_obs_mean, dim = dim(exp)) #enlarge it for the next line
clim_exp <- trend_exp - trend_obs + clim_obs_mean
## member mean
if (!memb) {
pos <- which(names(dim(clim_exp)) == memb_dim)
pos <- c(1:length(dim(clim_exp)))[-pos]
dim_name <- names(dim(clim_exp))
clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE)
clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE)
if (is.null(names(dim(as.array(clim_exp))))) {
clim_exp <- as.array(clim_exp)
clim_obs <- as.array(clim_obs)
names(dim(clim_exp)) <- dim_name[pos]
names(dim(clim_obs)) <- dim_name[pos]
}
}
} else if (method == 'NDV') {
# exp: [sdate, dat_dim, ftime]
# obs: [sdate, dat_dim, ftime]
# obs clim
clim_obs <- apply(obs, which(names(dim(obs)) != time_dim),
mean, na.rm = na.rm) #[dat_dim, ftime]
# exp clim
pos_ftime <- length(dim(exp)) #a number
dim_ftime <- dim(exp)[pos_ftime] #c(ftime = 4)
pos_dat <- 2:(length(dim(exp)) - 1) #1 is sdate, last is ftime
dim_dat <- dim(exp)[pos_dat] #c(dataset = 1, member = 3)
# Create initial data set (i.e., only first ftime)
tmp <- Subset(exp, ftime_dim, 1, drop = 'selected')
ini_exp <- InsertDim(tmp, pos_ftime, dim_ftime) #only first ftime
tmp <- Subset(obs, ftime_dim, 1, drop = 'selected')
ini_obs <- InsertDim(tmp, pos_ftime, dim_ftime) #only first ftime
#ini_: [sdate, dat_dim, ftime]
tmp_exp <- Regression(datay = exp, datax = ini_exp, reg_dim = time_dim,
na.action = na.omit,
pval = FALSE, conf = FALSE, ncores = ncores_input)$regression
tmp_obs <- Regression(datay = obs, datax = ini_obs, reg_dim = time_dim,
na.action = na.omit,
pval = FALSE, conf = FALSE, ncores = ncores_input)$regression
#tmp_: [stats = 2, dat_dim, ftime]
tmp_obs_mean <- apply(tmp_obs, c(1, length(dim(tmp_obs))), mean) #average out dat_dim (dat and member)
#tmp_obs_mean: [stats = 2, ftime]
ini_obs_mean <- apply(ini_obs, c(1, length(dim(ini_obs))), mean) #average out dat_dim
#ini_obs_mean: [sdate, ftime]
# Find intercept and slope
intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected') #[dat_dim, ftime]
slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected') #[dat_dim, ftime]
intercept_obs <- array(tmp_obs_mean[1, ], dim = c(dim_ftime, dim_dat))
#[ftime, dat_dim] exp
intercept_obs <- Reorder(intercept_obs, c(2:length(dim(intercept_obs)), 1))
#[dat_dim, ftime] exp
slope_obs <- array(tmp_obs_mean[2, ], dim = c(dim_ftime, dim_dat))
#[ftime, dat_dim] exp
slope_obs <- Reorder(slope_obs, c(2:length(dim(slope_obs)), 1))
#[dat_dim, ftime] exp
trend_exp <- list()
trend_obs <- list()
for (jdate in 1:dim(exp)[time_dim]) {
tmp <- Subset(ini_exp, time_dim, jdate, drop = 'selected') #[dat_dim, ftime]
trend_exp[[jdate]] <- intercept_exp + tmp * slope_exp #[dat_dim, ftime]
tmp <- array(ini_obs_mean[jdate, ], dim = c(dim_ftime, dim_dat)) #[ftime, dat_dim]
tmp <- Reorder(tmp, c(2:length(dim(tmp)), 1)) #[dat_dim, ftime]
trend_obs[[jdate]] <- intercept_obs + tmp * slope_obs
}
# turn list into array
trend_exp <- array(unlist(trend_exp), dim = c(dim(exp)[-1], dim(exp)[1]))
trend_obs <- array(unlist(trend_obs), dim = c(dim(exp)[-1], dim(exp)[1]))
#trend_: [dat_dim, ftime, sdate]
len <- length(dim(exp))
trend_exp <- Reorder(trend_exp, c(len, 1:(len - 1)))
trend_obs <- Reorder(trend_obs, c(len, 1:(len - 1)))
#trend_: [sdate, dat_dim, ftime]
clim_obs_mean <- apply(clim_obs, length(dim(clim_obs)), mean) #average out dat_dim, [ftime]
clim_obs_mean <- array(clim_obs_mean, dim = c(dim_ftime, dim(exp)[1], dim_dat))
#[ftime, sdate, dat_dim]
len <- length(dim(clim_obs_mean))
clim_obs_mean <- Reorder(clim_obs_mean, c(2:len, 1))
#[sdate, dat_dim, ftime]
clim_exp <- trend_exp - trend_obs + clim_obs_mean
## member mean
if (!memb) {
pos <- which(names(dim(clim_exp)) == memb_dim)
pos <- c(1:length(dim(clim_exp)))[-pos]
dim_name <- names(dim(clim_exp))
clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE)
clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE)
if (is.null(names(dim(as.array(clim_exp))))) {
clim_exp <- as.array(clim_exp)
clim_obs <- as.array(clim_obs)
names(dim(clim_exp)) <- dim_name[pos]
names(dim(clim_obs)) <- dim_name[pos]
}
}
}
return(list(clim_exp = clim_exp, clim_obs = clim_obs))
}