Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
#'Compute the Ranked Probability Skill Score
#'
#'The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score
#'based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to
#'assess whether a forecast presents an improvement or worsening with respect to
#'a reference forecast. The RPSS ranges between minus infinite and 1. If the
#'RPSS is positive, it indicates that the forecast has higher skill than the
#'reference forecast, while a negative value means that it has a lower skill.\cr
#'Examples of reference forecasts are the climatological forecast (same
#'probabilities for all categories for all time steps), persistence, a previous
#'model version, and another model. It is computed as
#'\code{RPSS = 1 - RPS_exp / RPS_ref}. The statistical significance is obtained
#'based on a Random Walk test at the specified confidence level (DelSole and
#'Tippett, 2016).\cr
#'The function accepts either the ensemble members or the probabilities of
#'each data as inputs. If there is more than one dataset, RPSS will be
#'computed for each pair of exp and obs data. The NA ratio of data will be
#'examined before the calculation. If the ratio is higher than the threshold
#'(assigned by parameter \code{na.rm}), NA will be returned directly. NAs are
#'counted by per-pair method, which means that only the time steps that all the
#'datasets have values count as non-NA values.
#'
#'@param exp A named numerical array of either the forecast with at least time
#' and member dimensions, or the probabilities with at least time and category
#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}.
#'@param obs A named numerical array of either the observation with at least
#' time dimension, or the probabilities with at least time and category
#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The
#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'.
#'@param ref A named numerical array of either the reference forecast with at
#' least time and member dimensions, or the probabilities with at least time and
#' category dimensions. The probabilities can be generated by
#' \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except
#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should
#' not have dataset dimension. If there is corresponding reference for each
#' experiment, the dataset dimension must have the same length as in 'exp'. If
#' 'ref' is NULL, the climatological forecast is used as reference forecast.
#' The default value is NULL.
#'@param time_dim A character string indicating the name of the time dimension.
#' The default value is 'sdate'.
#'@param memb_dim A character string indicating the name of the member dimension
#' to compute the probabilities of the forecast and the reference forecast. The
#' default value is 'member'. If the data are probabilities, set memb_dim as
#' NULL.
#'@param cat_dim A character string indicating the name of the category
#' dimension that is needed when exp, obs, and ref are probabilities. The
#' default value is NULL, which means that the data are not probabilities.
#'@param dat_dim A character string indicating the name of dataset dimension.
#' The length of this dimension can be different between 'exp' and 'obs'.
#' The default value is NULL.
#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to
#' 1) between the categories. The default value is c(1/3, 2/3), which
#' corresponds to tercile equiprobable categories.
#'@param indices_for_clim A vector of the indices to be taken along 'time_dim'
#' for computing the thresholds between the probabilistic categories. If NULL,
#' the whole period is used. The default value is NULL.
#'@param Fair A logical indicating whether to compute the FairRPSS (the
#' potential RPSS that the forecast would have with an infinite ensemble size).
#' The default value is FALSE.
#'@param weights_exp A named numerical array of the forecast ensemble weights
#' for probability calculation. The dimension should include 'memb_dim',
#' 'time_dim' and 'dat_dim' if there are multiple datasets. All dimension
#' lengths must be equal to 'exp' dimension lengths. The default value is NULL,
#' which means no weighting is applied. The ensemble should have at least 70
#' members or span at least 10 time steps and have more than 45 members if
#' consistency between the weighted and unweighted methodologies is desired.
#'@param weights_ref Same as 'weights_exp' but for the reference forecast.
#'@param cross.val A logical indicating whether to compute the thresholds
#' between probabilistics categories in cross-validation. The default value is
#' FALSE.
#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it
#' means the lower limit for the fraction of the non-NA values. 1 is equal to
#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable).
# The function returns NA if the fraction of non-NA values in the data is less
#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE.
#'@param sig_method.type A character string indicating the test type of the
#' significance method. Check \code{RandomWalkTest()} parameter
#' \code{test.type} for details. The default is 'two.sided.approx', which is
#' the default of \code{RandomWalkTest()}.
#'@param alpha A numeric of the significance level to be used in the statistical
#' significance test. The default value is 0.05.
#'@param ncores An integer indicating the number of cores to use for parallel
#' computation. The default value is NULL.
#'
#'@return
#'\item{$rpss}{
#' A numerical array of RPSS with dimensions c(nexp, nobs, the rest dimensions
#' of 'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of
#' experiment (i.e., dat_dim in exp), and nobs is the number of observation
#' i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted.
#'}
#'\item{$sign}{
#' A logical array of the statistical significance of the RPSS with the same
#' dimensions as $rpss.
#'}
#'
#'@references
#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7
#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1
#'
#'@examples
#'set.seed(1)
#'exp <- array(rnorm(3000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50))
#'set.seed(2)
#'obs <- array(rnorm(300), dim = c(lat = 3, lon = 2, sdate = 50))
#'set.seed(3)
#'ref <- array(rnorm(3000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50))
#'weights <- sapply(1:dim(exp)['sdate'], function(i) {
#' n <- abs(rnorm(10))
#' n/sum(n)
#' })
#'dim(weights) <- c(member = 10, sdate = 50)
#'# Use data as input
#'res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast
#'res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast
#'res <- RPSS(exp = exp, obs = obs, ref = ref, weights_exp = weights, weights_ref = weights)
#'res <- RPSS(exp = exp, obs = obs, alpha = 0.01, sig_method.type = 'two.sided')
#'
#'# Use probs as input
#'exp_probs <- GetProbs(exp, memb_dim = 'member')
#'obs_probs <- GetProbs(obs, memb_dim = NULL)
#'ref_probs <- GetProbs(ref, memb_dim = 'member')
#'res <- RPSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL,
#' cat_dim = 'bin')
#'
#'@import multiApply
#'@export
RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL,
dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL,
Fair = FALSE, nmemb = NULL, nmemb_ref = NULL,
weights_exp = NULL, weights_ref = NULL,
cross.val = FALSE, na.rm = FALSE,
sig_method.type = 'two.sided.approx', alpha = 0.05, ncores = NULL) {
# Check inputs
## exp, obs, and ref (1)
if (!is.array(exp) | !is.numeric(exp)) {
stop("Parameter 'exp' must be a numeric array.")
}
if (!is.array(obs) | !is.numeric(obs)) {
stop("Parameter 'obs' must be a numeric array.")
}
if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) |
any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) {
stop("Parameter 'exp' and 'obs' must have dimension names.")
}
if (!is.null(ref)) {
if (!is.array(ref) | !is.numeric(ref))
stop("Parameter 'ref' must be a numeric array.")
if (any(is.null(names(dim(ref)))) | any(nchar(names(dim(ref))) == 0)) {
stop("Parameter 'ref' must have dimension names.")
}
}
## time_dim
if (!is.character(time_dim) | length(time_dim) != 1) {
stop("Parameter 'time_dim' must be a character string.")
}
if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) {
stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.")
}
if (!is.null(ref) & !time_dim %in% names(dim(ref))) {
stop("Parameter 'time_dim' is not found in 'ref' dimension.")
}
## memb_dim & cat_dim
if (is.null(memb_dim) + is.null(cat_dim) != 1) {
stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.")
}
## memb_dim
if (!is.null(memb_dim)) {
if (!is.character(memb_dim) | length(memb_dim) > 1) {
stop("Parameter 'memb_dim' must be a character string.")
}
if (!memb_dim %in% names(dim(exp))) {
stop("Parameter 'memb_dim' is not found in 'exp' dimension.")
}
if (!is.null(ref) & !memb_dim %in% names(dim(ref))) {
stop("Parameter 'memb_dim' is not found in 'ref' dimension.")
}
}
## cat_dim
if (!is.null(cat_dim)) {
if (!is.character(cat_dim) | length(cat_dim) > 1) {
stop("Parameter 'cat_dim' must be a character string.")
}
if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs)) |
(!is.null(ref) & !cat_dim %in% names(dim(ref)))) {
stop("Parameter 'cat_dim' is not found in 'exp', 'obs', or 'ref' dimension.")
}
}
## dat_dim
if (!is.null(dat_dim)) {
if (!is.character(dat_dim) | length(dat_dim) > 1) {
stop("Parameter 'dat_dim' must be a character string.")
}
if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) {
stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.",
" Set it as NULL if there is no dataset dimension.")
}
}
## exp, obs, and ref (2)
name_exp <- sort(names(dim(exp)))
name_obs <- sort(names(dim(obs)))
if (!is.null(memb_dim)) {
name_exp <- name_exp[-which(name_exp == memb_dim)]
if (memb_dim %in% name_obs) {
name_obs <- name_obs[-which(name_obs == memb_dim)]
}
}
if (!is.null(dat_dim)) {
name_exp <- name_exp[-which(name_exp == dat_dim)]
name_obs <- name_obs[-which(name_obs == dat_dim)]
}
if (!identical(length(name_exp), length(name_obs)) |
!identical(dim(exp)[name_exp], dim(obs)[name_obs])) {
stop("Parameter 'exp' and 'obs' must have same length of ",
"all dimensions except 'memb_dim' and 'dat_dim'.")
}
if (!is.null(ref)) {
name_ref <- sort(names(dim(ref)))
if (!is.null(memb_dim)) {
name_ref <- name_ref[-which(name_ref == memb_dim)]
}
if (!is.null(dat_dim)) {
if (dat_dim %in% name_ref) {
if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) {
stop("If parameter 'ref' has dataset dimension, it must be",
" equal to dataset dimension of 'exp'.")
}
name_ref <- name_ref[-which(name_ref == dat_dim)]
}
}
if (!identical(length(name_exp), length(name_ref)) |
!identical(dim(exp)[name_exp], dim(ref)[name_ref])) {
stop("Parameter 'exp' and 'ref' must have the same length of ",
"all dimensions except 'memb_dim' and 'dat_dim' if there is ",
"only one reference dataset.")
}
}
## prob_thresholds
if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) |
any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) {
stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.")
}
## indices_for_clim
if (is.null(indices_for_clim)) {
indices_for_clim <- seq_len(dim(obs)[time_dim])
} else {
if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) {
stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.")
} else if (length(indices_for_clim) > dim(obs)[time_dim] |
max(indices_for_clim) > dim(obs)[time_dim] |
any(indices_for_clim) < 1) {
stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.")
}
}
## Fair
if (!is.logical(Fair) | length(Fair) > 1) {
stop("Parameter 'Fair' must be either TRUE or FALSE.")
}
## cross.val
if (!is.logical(cross.val) | length(cross.val) > 1) {
stop("Parameter 'cross.val' must be either TRUE or FALSE.")
}
## weights_exp
if (!is.null(weights_exp) & is.null(cat_dim)) {
if (!is.array(weights_exp) | !is.numeric(weights_exp))
stop("Parameter 'weights_exp' must be a named numeric array.")
if (is.null(dat_dim)) {
if (length(dim(weights_exp)) != 2 |
!all(names(dim(weights_exp)) %in% c(memb_dim, time_dim))) {
stop("Parameter 'weights_exp' must have two dimensions with the names of ",
"'memb_dim' and 'time_dim'.")
}
if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] |
dim(weights_exp)[time_dim] != dim(exp)[time_dim]) {
stop("Parameter 'weights_exp' must have the same dimension lengths as ",
"'memb_dim' and 'time_dim' in 'exp'.")
}
weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim))
} else {
if (length(dim(weights_exp)) != 3 |
!all(names(dim(weights_exp)) %in% c(memb_dim, time_dim, dat_dim))) {
stop("Parameter 'weights_exp' must have three dimensions with the names of ",
"'memb_dim', 'time_dim' and 'dat_dim'.")
}
if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] |
dim(weights_exp)[time_dim] != dim(exp)[time_dim] |
dim(weights_exp)[dat_dim] != dim(exp)[dat_dim]) {
stop("Parameter 'weights_exp' must have the same dimension lengths ",
"as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")
}
weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim, dat_dim))
}
} else if (!is.null(weights_exp) & !is.null(cat_dim)) {
.warning(paste0("Parameter 'exp' is probability already, so parameter ",
"'weights_exp' is not used. Change 'weights_exp' to NULL."))
weights_exp <- NULL
}
## weights_ref
if (!is.null(weights_ref) & is.null(cat_dim)) {
if (!is.array(weights_ref) | !is.numeric(weights_ref))
stop("Parameter 'weights_ref' must be a named numeric array.")
if (is.null(dat_dim) | ((!is.null(dat_dim)) && (!dat_dim %in% names(dim(ref))))) {
if (length(dim(weights_ref)) != 2 |
!all(names(dim(weights_ref)) %in% c(memb_dim, time_dim))) {
stop("Parameter 'weights_ref' must have two dimensions with the names of ",
"'memb_dim' and 'time_dim'.")
}
if (dim(weights_ref)[memb_dim] != dim(exp)[memb_dim] |
dim(weights_ref)[time_dim] != dim(exp)[time_dim]) {
stop("Parameter 'weights_ref' must have the same dimension lengths as ",
"'memb_dim' and 'time_dim' in 'ref'.")
}
weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim))
} else {
if (length(dim(weights_ref)) != 3 |
!all(names(dim(weights_ref)) %in% c(memb_dim, time_dim, dat_dim))) {
stop("Parameter 'weights_ref' must have three dimensions with the names of ",
"'memb_dim', 'time_dim' and 'dat_dim'.")
}
if (dim(weights_ref)[memb_dim] != dim(ref)[memb_dim] |
dim(weights_ref)[time_dim] != dim(ref)[time_dim] |
dim(weights_ref)[dat_dim] != dim(ref)[dat_dim]) {
stop("Parameter 'weights_ref' must have the same dimension lengths ",
"as 'memb_dim', 'time_dim' and 'dat_dim' in 'ref'.")
}
weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim, dat_dim))
}
} else if (!is.null(weights_ref) & !is.null(cat_dim)) {
.warning(paste0("Parameter 'ref' is probability already, so parameter ",
"'weights_ref' is not used. Change 'weights_ref' to NULL."))
weights_ref <- NULL
}
## na.rm
if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) {
stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1')
}
## alpha
if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) {
stop("Parameter 'alpha' must be a number between 0 and 1.")
}
## sig_method.type
#NOTE: These are the types of RandomWalkTest()
if (!sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) {
stop("Parameter 'sig_method.type' must be 'two.sided.approx', 'two.sided', ",
"'greater', or 'less'.")
}
if (sig_method.type == 'two.sided.approx' && alpha != 0.05) {
.warning("DelSole and Tippett (2016) aproximation is valid for alpha ",
"= 0.05 only. Returning the significance at the 0.05 significance level.")
}
## ncores
if (!is.null(ncores)) {
if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 |
length(ncores) > 1) {
stop("Parameter 'ncores' must be either NULL or a positive integer.")
}
}
###############################
# Compute RPSS
## Decide target_dims
if (!is.null(memb_dim)) {
target_dims_exp <- c(time_dim, memb_dim, dat_dim)
if (!memb_dim %in% names(dim(obs))) {
target_dims_obs <- c(time_dim, dat_dim)
} else {
target_dims_obs <- c(time_dim, memb_dim, dat_dim)
}
} else { # cat_dim
target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim)
}
if (!is.null(ref)) { # use "ref" as reference forecast
if (!is.null(memb_dim)) {
if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) {
target_dims_ref <- c(time_dim, memb_dim, dat_dim)
} else {
target_dims_ref <- c(time_dim, memb_dim)
}
} else {
target_dims_ref <- c(time_dim, cat_dim, dat_dim)
}
data <- list(exp = exp, obs = obs, ref = ref)
target_dims = list(exp = target_dims_exp,
obs = target_dims_obs,
ref = target_dims_ref)
} else {
data <- list(exp = exp, obs = obs)
target_dims = list(exp = target_dims_exp,
obs = target_dims_obs)
}
output <- Apply(data,
target_dims = target_dims,
fun = .RPSS,
time_dim = time_dim, memb_dim = memb_dim,
cat_dim = cat_dim, dat_dim = dat_dim,
prob_thresholds = prob_thresholds,
indices_for_clim = indices_for_clim, Fair = Fair,
nmemb = nmemb, nmemb_ref = nmemb_ref,
weights_exp = weights_exp,
weights_ref = weights_ref,
cross.val = cross.val,
na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha,
ncores = ncores)
return(output)
}
.RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL,
dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL,
Fair = FALSE, nmemb = NULL, nmemb_ref = NULL,
weights_exp = NULL, weights_ref = NULL, cross.val = FALSE,
na.rm = FALSE, sig_method.type = 'two.sided.approx', alpha = 0.05) {
#--- if memb_dim:
# exp: [sdate, memb, (dat)]
# obs: [sdate, (memb), (dat)]
# ref: [sdate, memb, (dat)] or NULL
#--- if cat_dim:
# exp: [sdate, bin, (dat)]
# obs: [sdate, bin, (dat)]
# ref: [sdate, bin, (dat)] or NULL
if (isTRUE(na.rm)) {
f_NAs <- 0
} else if (isFALSE(na.rm)) {
f_NAs <- 1
} else {
f_NAs <- na.rm
}
if (is.null(dat_dim)) {
nexp <- 1
nobs <- 1
} else {
nexp <- as.numeric(dim(exp)[dat_dim])
nobs <- as.numeric(dim(obs)[dat_dim])
}
# Calculate RPS
if (!is.null(ref)) {
# Adjust dimensions to be [sdate, memb, dat] for both exp, obs, and ref
## Insert memb_dim in obs
if (!is.null(memb_dim)) {
if (!memb_dim %in% names(dim(obs))) {
obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim)
}
}
## Insert dat_dim
if (is.null(dat_dim)) {
dim(obs) <- c(dim(obs), dat = nobs)
dim(exp) <- c(dim(exp), dat = nexp)
if (!is.null(weights_exp)) dim(weights_exp) <- c(dim(weights_exp), dat = nexp)
}
if (is.null(dat_dim) || (!is.null(dat_dim) && !dat_dim %in% names(dim(ref)))) {
nref <- 1
dim(ref) <- c(dim(ref), dat = nref)
if (!is.null(weights_ref)) dim(weights_ref) <- c(dim(weights_ref), dat = nref)
} else {
nref <- as.numeric(dim(ref)[dat_dim]) # should be the same as nexp
}
# Find good values then calculate RPS
rps_exp <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs))
rps_ref <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs))
for (i in 1:nexp) {
for (j in 1:nobs) {
for (k in 1:nref) {
if (nref != 1 & k != i) { # if nref is 1 or equal to nexp, calculate rps
next
}
exp_data <- exp[, , i, drop = F]
obs_data <- obs[, , j, drop = F]
ref_data <- ref[, , k, drop = F]
exp_mean <- rowMeans(exp_data)
obs_mean <- rowMeans(obs_data)
ref_mean <- rowMeans(ref_data)
good_values <- !is.na(exp_mean) & !is.na(obs_mean) & !is.na(ref_mean)
dum <- match(indices_for_clim, which(good_values))
good_indices_for_clim <- dum[!is.na(dum)]
if (f_NAs <= sum(good_values) / length(good_values)) {
rps_exp[good_values, i, j] <- .RPS(exp = exp[good_values, , i],
obs = obs[good_values, , j],
time_dim = time_dim, memb_dim = memb_dim,
cat_dim = cat_dim, dat_dim = NULL,
prob_thresholds = prob_thresholds,
indices_for_clim = good_indices_for_clim,
Fair = Fair, nmemb = nmemb,
weights = weights_exp[good_values, , i],
cross.val = cross.val, na.rm = na.rm)
rps_ref[good_values, i, j] <- .RPS(exp = ref[good_values, , k],
obs = obs[good_values, , j],
time_dim = time_dim, memb_dim = memb_dim,
cat_dim = cat_dim, dat_dim = NULL,
prob_thresholds = prob_thresholds,
indices_for_clim = good_indices_for_clim,
Fair = Fair, nmemb = nmemb_ref,
weights = weights_ref[good_values, , k],
na.rm = na.rm, cross.val = cross.val)
}
}
}
}
} else { # ref is NULL
rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim,
cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds,
indices_for_clim = indices_for_clim, Fair = Fair,
nmemb = nmemb, weights = weights_exp,
cross.val = cross.val, na.rm = na.rm)
# RPS of the reference forecast
if (!is.null(memb_dim)) {
if (!memb_dim %in% names(dim(obs))) {
obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim)
}
}
rps_ref <- array(NA, dim = c(dim(obs)[time_dim], nexp = nexp, nobs = nobs))
if (is.null(dat_dim)) {
dim(obs) <- c(dim(obs), nobs = nobs)
dim(exp) <- c(dim(exp), nexp = nexp)
dim(rps_exp) <- dim(rps_ref)
}
for (i in 1:nexp) {
for (j in 1:nobs) {
# Use good values only
good_values <- !is.na(rps_exp[, i, j])
if (f_NAs <= sum(good_values) / length(good_values)) {
obs_data <- obs[good_values, , j]
if (is.null(dim(obs_data))) dim(obs_data) <- c(length(obs_data), 1)
if (is.null(cat_dim)) { # calculate probs
# Subset indices_for_clim
dum <- match(indices_for_clim, which(good_values))
good_indices_for_clim <- dum[!is.na(dum)]
obs_probs <- .GetProbs(data = obs_data,
indices_for_quantiles = good_indices_for_clim,
prob_thresholds = prob_thresholds,
weights = NULL, cross.val = cross.val)
} else {
obs_probs <- t(obs_data)
}
# obs_probs: [bin, sdate]
clim_probs <- c(prob_thresholds[1], diff(prob_thresholds),
1 - prob_thresholds[length(prob_thresholds)])
clim_probs <- array(clim_probs, dim = dim(obs_probs))
# clim_probs: [bin, sdate]
# Calculate RPS for each time step
probs_clim_cumsum <- apply(clim_probs, 2, cumsum)
probs_obs_cumsum <- apply(obs_probs, 2, cumsum)
rps_ref[good_values, i, j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2)
}
if (Fair) { # FairRPS
if (!is.null(memb_dim)) {
if (memb_dim %in% names(dim(exp))) {
## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1))
## [formula taken from SpecsVerification::EnsRps]
R <- dim(obs)[1] #number of years
}
} else {
R <- nmemb_ref
}
adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum)
adjustment <- colSums(adjustment)
rps_ref[, i, j] <- rps_ref[, i, j] + adjustment
}
}
}
}
if (is.null(dat_dim)) {
dim(rps_ref) <- dim(rps_exp) <- dim(exp)[time_dim]
}
#----------------------------------------------
# Calculate RPSS
if (!is.null(dat_dim)) {
# rps_exp and rps_ref: [sdate, nexp, nobs]
rps_exp_mean <- colMeans(rps_exp, na.rm = TRUE)
rps_ref_mean <- colMeans(rps_ref, na.rm = TRUE)
rpss <- array(dim = c(nexp = nexp, nobs = nobs))
sign <- array(dim = c(nexp = nexp, nobs = nobs))
if (!all(is.na(rps_exp_mean))) {
for (i in 1:nexp) {
for (j in 1:nobs) {
rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j]
ind_nonNA <- !is.na(rps_exp[, i, j])
if (!any(ind_nonNA)) {
sign[i, j] <- NA
} else {
sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j],
skill_B = rps_ref[ind_nonNA, i, j],
test.type = sig_method.type, alpha = alpha,
sign = T, pval = F)$sign
}
}
}
}
# Turn NaN into NA
if (any(is.nan(rpss))) rpss[which(is.nan(rpss))] <- NA
} else { # dat_dim is NULL
ind_nonNA <- !is.na(rps_exp)
if (!any(ind_nonNA)) {
rpss <- NA
sign <- NA
} else {
# rps_exp and rps_ref: [sdate]
rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE)
sign <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA],
skill_B = rps_ref[ind_nonNA],
test.type = sig_method.type, alpha = alpha,
sign = T, pval = F)$sign
}
}
return(list(rpss = rpss, sign = sign))
}