Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#'Compute Bias Corrected Climatologies
#'
#'This function computes per-pair climatologies for the experimental
#'and observational data using one of the following methods:
#'\enumerate{
#' \item{per-pair method (Garcia-Serrano and Doblas-Reyes, CD, 2012)}
#' \item{Kharin method (Karin et al, GRL, 2012)}
#' \item{Fuckar method (Fuckar et al, GRL, 2014)}
#'}
#'Per-pair climatology means that only the startdates covered by the
#'whole experiments/observational dataset will be used. In other words, the
#'startdates which are not all available along 'dat_dim' dimension of both
#'the 'exp' and 'obs' are excluded when computing the climatologies.
#'
#'@param exp A named numeric array of experimental data, with at least two
#' dimensions 'time_dim' and 'dat_dim'.
#'@param obs A named numeric array of observational data, same dimensions as
#' parameter 'exp' except along 'dat_dim'.
#'@param time_dim A character string indicating the name of dimension along
#' which the climatologies are computed. The default value is 'sdate'.
#'@param dat_dim A character vector indicating the name of the dataset and
#' member dimensions. If data at one startdate (i.e., 'time_dim') are not
#' complete along 'dat_dim', this startdate along 'dat_dim' will be discarded.
#' The default value is "c('dataset', 'member')".
#'@param method A character string indicating the method to be used. The
#' options include 'clim', 'kharin', and 'NDV'. The default value is 'clim'.
#'@param ftime_dim A character string indicating the name of forecast time
#' dimension. Only used when method = 'NDV'. The default value is 'ftime'.
#'@param memb_dim A character string indicating the name of the member
#' dimension. Only used when parameter 'memb' is FALSE. It must be one element
#' in 'dat_dim'. The default value is 'member'.
#'@param memb A logical value indicating whether to remain 'memb_dim' dimension
#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). The default value is TRUE.
#'@param na.rm A logical value indicating whether to remove NA values along
#' 'time_dim' when calculating climatology (TRUE) or return NA if there is NA
#' along 'time_dim' (FALSE). The default value is TRUE.
#'@param ncores An integer indicating the number of cores to use for parallel
#' computation. The default value is NULL.
#'
#'@return
#'A list of 2:
#'\item{$clim_exp}{
#' A numeric array with the same dimensions as parameter 'exp' but
#' dimension 'time_dim' is moved to the first position. If parameter 'method'
#' is 'clim', dimension 'time_dim' is removed. If parameter 'memb' is FALSE,
#' dimension 'memb_dim' is also removed.
#'}
#'\item{$clim_obs}{
#' A numeric array with the same dimensions as parameter 'exp'
#' except dimension 'time_dim' is removed. If parameter 'memb' is FALSE,
#' dimension 'memb_dim' is also removed.
#'}
#'
#'@keywords datagen
#'@author History:\cr
#' 0.9 - 2011-03 (V. Guemas, \email{virginie.guemas@ic3.cat}) - Original code\cr
#' 1.0 - 2013-09 (N. Manubens, \email{nicolau.manubens@ic3.cat}) - Formatting to R CRAN
#' 3.0 - 2020-01 (A. Ho, \email{an.ho@bsc.es}) - Adopt multiApply feature
#'@examples
#'# Load sample data as in Load() example:
#'example(Load)
#'clim <- Clim(sampleData$mod, sampleData$obs)
#'clim2 <- Clim(sampleData$mod, sampleData$obs, method = 'kharin', memb = F)
#'\donttest{
#'PlotClim(clim$clim_exp, clim$clim_obs,
#' toptitle = paste('sea surface temperature climatologies'),
#' ytitle = 'K', monini = 11, listexp = c('CMIP5 IC3'),
#' listobs = c('ERSST'), biglab = FALSE, fileout = 'tos_clim.eps')
#'}
#'@importFrom abind adrop
#'@import multiApply
#'@export
Clim <- function(exp, obs, time_dim = 'sdate', dat_dim = c('dataset', 'member'),
method = 'clim', ftime_dim = 'ftime', memb_dim = 'member',
memb = TRUE, na.rm = TRUE, ncores = NULL) {
# Check inputs
## exp and obs (1)
if (is.null(exp) | is.null(obs)) {
stop("Parameter 'exp' and 'obs' cannot be NULL.")
}
if (!is.numeric(exp) | !is.numeric(obs)) {
stop("Parameter 'exp' and 'obs' must be a numeric array.")
}
if (is.null(dim(exp)) | is.null(dim(obs))) {
stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ",
"containing time_dim and dat_dim."))
}
if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) |
any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) {
stop("Parameter 'exp' and 'obs' must have dimension names.")
}
if(!all(names(dim(exp)) %in% names(dim(obs))) |
!all(names(dim(obs)) %in% names(dim(exp)))) {
stop("Parameter 'exp' and 'obs' must have same dimension name")
}
## time_dim
if (!is.character(time_dim) | length(time_dim) > 1) {
stop("Parameter 'time_dim' must be a character string.")
}
if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) {
stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.")
}
## dat_dim
if (!is.character(dat_dim)) {
stop("Parameter 'dat_dim' must be a character vector.")
}
if (!all(dat_dim %in% names(dim(exp))) | !all(dat_dim %in% names(dim(obs)))) {
stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.")
}
## method
if (!(method %in% c("clim", "kharin", "NDV"))) {
stop("Parameter 'method' must be one of 'clim', 'kharin' or 'NDV'.")
}
## ftime_dim
if (!is.character(ftime_dim) | length(ftime_dim) > 1) {
stop("Parameter 'ftime_dim' must be a character string.")
}
if (!ftime_dim %in% names(dim(exp)) | !ftime_dim %in% names(dim(obs))) {
stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.")
}
## memb_dim
if (!is.character(memb_dim) | length(memb_dim) > 1) {
stop("Parameter 'memb_dim' must be a character string.")
}
if (!memb_dim %in% names(dim(exp)) | !memb_dim %in% names(dim(obs))) {
stop("Parameter 'memb_dim' is not found in 'exp' or 'obs' dimension.")
}
## memb
if (!is.logical(memb) | length(memb) > 1) {
stop("Parameter 'memb' must be one logical value.")
}
## na.rm
if (!is.logical(na.rm) | length(na.rm) > 1) {
stop("Parameter 'na.rm' must be one logical value.")
}
## ncores
if (!is.null(ncores)) {
if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores < 0 |
length(ncores) > 1) {
stop("Parameter 'ncores' must be a positive integer.")
}
}
## exp and obs (2)
name_exp <- sort(names(dim(exp)))
name_obs <- sort(names(dim(obs)))
for (i in 1:length(dat_dim)) {
name_exp <- name_exp[-which(name_exp == dat_dim[i])]
name_obs <- name_obs[-which(name_obs == dat_dim[i])]
}
if(!all(dim(exp)[name_exp] == dim(obs)[name_obs])) {
stop(paste0("Parameter 'exp' and 'obs' must have same length of ",
"all dimension expect 'dat_dim'."))
}
###############################
# Sort dimension
name_exp <- names(dim(exp))
name_obs <- names(dim(obs))
order_obs <- match(name_exp, name_obs)
obs <- s2dverification:::.aperm2(obs, order_obs)
###############################
# Calculate Clim
#----------------------------------
# Remove all sdate if not complete along dat_dim
pos <- rep(0, length(dat_dim))
for (i in 1:length(dat_dim)) { #[dat, sdate]
## dat_dim: [dataset, member]
pos[i] <- which(names(dim(obs)) == dat_dim[i])
}
outrows_exp <- MeanDims(exp, pos, na.rm = FALSE) +
MeanDims(obs, pos, na.rm = FALSE)
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
outrows_obs <- outrows_exp
for (i in 1:length(pos)) {
outrows_exp <- InsertDim(outrows_exp, pos[i], dim(exp)[pos[i]])
outrows_obs <- InsertDim(outrows_obs, pos[i], dim(obs)[pos[i]])
}
exp[which(is.na(outrows_exp))] <- NA
obs[which(is.na(outrows_obs))] <- NA
#-----------------------------------
if (method == 'clim') {
clim <- Apply(list(exp, obs),
target_dims = c(time_dim, dat_dim),
fun = .Clim,
method = method, time_dim = time_dim,
dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim,
memb = memb, na.rm = na.rm,
ncores = ncores)
# Add member dimension name back
if (memb) {
if(is.null(names(dim(clim$clim_exp))[1])) {
names(dim(clim$clim_exp))[1] <- memb_dim
names(dim(clim$clim_obs))[1] <- memb_dim
}
}
} else if (method == 'kharin') {
clim <- Apply(list(exp, obs),
target_dims = c(time_dim, dat_dim),
fun = .Clim,
method = method, time_dim = time_dim,
dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim,
memb = memb, na.rm = na.rm,
ncores = ncores)
} else if (method == 'NDV') {
clim <- Apply(list(exp, obs),
target_dims = c(time_dim, dat_dim, ftime_dim),
fun = .Clim,
method = method, time_dim = time_dim,
dat_dim = dat_dim, ftime_dim = ftime_dim, memb_dim = memb_dim,
memb = memb, na.rm = na.rm,
ncores = ncores)
}
return(clim)
}
.Clim <- function(exp, obs, method = 'clim',
time_dim = 'sdate', dat_dim = c('dataset', 'member'),
ftime_dim = 'ftime', memb_dim = 'member', memb = TRUE,
na.rm = TRUE) {
if (method == 'clim') {
# exp: [sdate, dat_dim_exp]
# obs: [sdate, dat_dim_obs]
clim_exp <- apply(exp, which(names(dim(exp)) != time_dim),
mean, na.rm = na.rm) #average out time_dim
clim_obs <- apply(obs, which(names(dim(obs)) != time_dim),
mean, na.rm = na.rm) #[dat_dim]
## member mean
if (!memb) {
if (length(dim(clim_exp)) == 1) { #dim: [member]
clim_exp <- mean(clim_exp, na.rm = TRUE)
clim_obs <- mean(clim_obs, na.rm = TRUE)
} else {
pos <- which(names(dim(clim_exp)) == memb_dim)
pos <- c(1:length(dim(clim_exp)))[-pos]
dim_name <- names(dim(clim_exp))
clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE)
clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE)
if (is.null(names(dim(as.array(clim_exp))))) {
clim_exp <- as.array(clim_exp)
clim_obs <- as.array(clim_obs)
names(dim(clim_exp)) <- dim_name[pos]
names(dim(clim_obs)) <- dim_name[pos]
}
}
}
} else if (method == 'kharin') {
# exp: [sdate, dat_dim_exp]
# obs: [sdate, dat_dim_obs]
# obs clim
clim_obs <- apply(obs, which(names(dim(obs)) != time_dim),
mean, na.rm = na.rm) #[dat_dim]
# exp clim
##--- NEW trend ---##
tmp_obs <- Trend(data = obs, time_dim = time_dim, interval = 1,
polydeg = 1, conf = FALSE)$trend
tmp_exp <- Trend(data = exp, time_dim = time_dim, interval = 1,
polydeg = 1, conf = FALSE)$trend
# tmp_exp: [stats, dat_dim)]
tmp_obs_mean <- apply(tmp_obs, 1, mean) #average out dat_dim (dat and member)
#tmp_obs_mean: [stats = 2]
intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected') #[dat_dim]
slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected') #[dat_dim]
intercept_obs <- array(tmp_obs_mean[1], dim = dim(exp)[-1]) #[dat_dim]
slope_obs <- array(tmp_obs_mean[2], dim = dim(exp)[-1]) #[dat_dim]
trend_exp <- list()
trend_obs <- list()
for (jdate in 1:dim(exp)[time_dim]) {
trend_exp[[jdate]] <- intercept_exp + jdate * slope_exp
trend_obs[[jdate]] <- intercept_obs + jdate * slope_obs
}
# turn list into array
trend_exp <- array(unlist(trend_exp), dim = c(dim(exp)[-1], dim(exp)[1]))
trend_obs <- array(unlist(trend_obs), dim = c(dim(exp)[-1], dim(exp)[1]))
len <- length(dim(exp))
trend_exp <- s2dverification:::.aperm2(trend_exp, c(len, 1:(len - 1)))
trend_obs <- s2dverification:::.aperm2(trend_obs, c(len, 1:(len - 1)))
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
clim_obs_mean <- mean(apply(clim_obs, 1, mean)) #average out dat_dim, get a number
clim_obs_mean <- array(clim_obs_mean, dim = dim(exp)) #enlarge it for the next line
clim_exp <- trend_exp - trend_obs + clim_obs_mean
## member mean
if (!memb) {
pos <- which(names(dim(clim_exp)) == memb_dim)
pos <- c(1:length(dim(clim_exp)))[-pos]
dim_name <- names(dim(clim_exp))
clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE)
clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE)
if (is.null(names(dim(as.array(clim_exp))))) {
clim_exp <- as.array(clim_exp)
clim_obs <- as.array(clim_obs)
names(dim(clim_exp)) <- dim_name[pos]
names(dim(clim_obs)) <- dim_name[pos]
}
}
} else if (method == 'NDV') {
# exp: [sdate, dat_dim, ftime]
# obs: [sdate, dat_dim, ftime]
# obs clim
clim_obs <- apply(obs, which(names(dim(obs)) != time_dim),
mean, na.rm = na.rm) #[dat_dim, ftime]
# exp clim
pos_ftime <- length(dim(exp)) #a number
dim_ftime <- dim(exp)[pos_ftime] #c(ftime = 4)
pos_dat <- 2:(length(dim(exp)) - 1) #1 is sdate, last is ftime
dim_dat <- dim(exp)[pos_dat] #c(dataset = 1, member = 3)
# Create initial data set (i.e., only first ftime)
tmp <- Subset(exp, ftime_dim, 1, drop = 'selected')
ini_exp <- InsertDim(tmp, pos_ftime, dim_ftime) #only first ftime
tmp <- Subset(obs, ftime_dim, 1, drop = 'selected')
ini_obs <- InsertDim(tmp, pos_ftime, dim_ftime) #only first ftime
#ini_: [sdate, dat_dim, ftime]
tmp_exp <- Regression(datay = exp, datax = ini_exp, time_dim = time_dim,
na.action = na.omit,
pval = FALSE, conf = FALSE)$regression
tmp_obs <- Regression(datay = obs, datax = ini_obs, time_dim = time_dim,
na.action = na.omit,
pval = FALSE, conf = FALSE)$regression
#tmp_: [stats = 2, dat_dim, ftime]
tmp_obs_mean <- apply(tmp_obs, c(1, length(dim(tmp_obs))), mean) #average out dat_dim (dat and member)
#tmp_obs_mean: [stats = 2, ftime]
ini_obs_mean <- apply(ini_obs, c(1, length(dim(ini_obs))), mean) #average out dat_dim
#ini_obs_mean: [sdate, ftime]
# Find intercept and slope
intercept_exp <- Subset(tmp_exp, 1, 1, drop = 'selected') #[dat_dim, ftime]
slope_exp <- Subset(tmp_exp, 1, 2, drop = 'selected') #[dat_dim, ftime]
intercept_obs <- array(tmp_obs_mean[1, ], dim = c(dim_ftime, dim_dat))
#[ftime, dat_dim] exp
intercept_obs <- s2dverification:::.aperm2(intercept_obs, c(2:length(dim(intercept_obs)), 1))
#[dat_dim, ftime] exp
slope_obs <- array(tmp_obs_mean[2, ], dim = c(dim_ftime, dim_dat))
#[ftime, dat_dim] exp
slope_obs <- s2dverification:::.aperm2(slope_obs, c(2:length(dim(slope_obs)), 1))
#[dat_dim, ftime] exp
trend_exp <- list()
trend_obs <- list()
for (jdate in 1:dim(exp)[time_dim]) {
tmp <- Subset(ini_exp, time_dim, jdate, drop = 'selected') #[dat_dim, ftime]
trend_exp[[jdate]] <- intercept_exp + tmp * slope_exp #[dat_dim, ftime]
tmp <- array(ini_obs_mean[jdate, ], dim = c(dim_ftime, dim_dat)) #[ftime, dat_dim]
tmp <- s2dverification:::.aperm2(tmp, c(2:length(dim(tmp)), 1)) #[dat_dim, ftime]
trend_obs[[jdate]] <- intercept_obs + tmp * slope_obs
}
# turn list into array
trend_exp <- array(unlist(trend_exp), dim = c(dim(exp)[-1], dim(exp)[1]))
trend_obs <- array(unlist(trend_obs), dim = c(dim(exp)[-1], dim(exp)[1]))
#trend_: [dat_dim, ftime, sdate]
len <- length(dim(exp))
trend_exp <- s2dverification:::.aperm2(trend_exp, c(len, 1:(len - 1)))
trend_obs <- s2dverification:::.aperm2(trend_obs, c(len, 1:(len - 1)))
#trend_: [sdate, dat_dim, ftime]
clim_obs_mean <- apply(clim_obs, length(dim(clim_obs)), mean) #average out dat_dim, [ftime]
clim_obs_mean <- array(clim_obs_mean, dim = c(dim_ftime, dim(exp)[1], dim_dat))
#[ftime, sdate, dat_dim]
len <- length(dim(clim_obs_mean))
clim_obs_mean <- s2dverification:::.aperm2(clim_obs_mean, c(2:len, 1))
#[sdate, dat_dim, ftime]
clim_exp <- trend_exp - trend_obs + clim_obs_mean
## member mean
if (!memb) {
pos <- which(names(dim(clim_exp)) == memb_dim)
pos <- c(1:length(dim(clim_exp)))[-pos]
dim_name <- names(dim(clim_exp))
clim_exp <- apply(clim_exp, pos, mean, na.rm = TRUE)
clim_obs <- apply(clim_obs, pos, mean, na.rm = TRUE)
if (is.null(names(dim(as.array(clim_exp))))) {
clim_exp <- as.array(clim_exp)
clim_obs <- as.array(clim_obs)
names(dim(clim_exp)) <- dim_name[pos]
names(dim(clim_obs)) <- dim_name[pos]
}
}
}
return(list(clim_exp = clim_exp, clim_obs = clim_obs))
}