Newer
Older
#'@rdname CST_Analogs
#'@title Downscaling using Analogs based on large scale fields.
#'
#'@author Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it}
#'@author Nuria Perez-Zanon \email{nuria.perez@bsc.es}
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#'@description This function perform a downscaling using Analogs. To compute
#'the analogs, the function search for days with similar large scale conditions
#'to downscaled fields in the local scale.
#'The large scale and the local scale regions are defined by the user.
#'The large scale is usually given by atmospheric circulation as sea level
#'pressure or geopotential height (Yiou et al, 2013) but the function gives the
#' possibility to use another field. The local scale will be usually given by
#' precipitation or Temperature, but might be another variable.
#' The analogs function will find the best analogs based in three criterias:
#' (1) Minimal distance in the large scale pattern (i.e. SLP)
#' (2) Minimal distance in the large scale pattern (i.e. SLP) and minimal
#' distance in the local scale pattern (i.e. SLP).
#' (3) Minimal distance in the large scale pattern (i.e. SLP), minimal
#' distance in the local scale pattern (i.e. SLP) and maxima correlation in the
#' local variable to downscale (i.e Precipitation).
#' The search of analogs must be done in the longest dataset posible. This is
#' important since it is necessary to have a good representation of the
#' possible states of the field in the past, and therefore, to get better
#' analogs. Once the search of the analogs is complete, and in order to used the
#' three criterias the user can select a number of analogs nAnalogs to restrict
#' the selection of the best analogs in a short number of posibilities, the best
#' ones. By default this parameter will be 1.
#' This function has not constrains of specific regions, variables to downscale,
#' or data to be used (seasonal forecast data, climate projections data,
#' reanalyses data).
#' The regrid into a finner scale is done interpolating with CST_Load.
#' Then, this interpolation is corrected selecting the analogs in the large
#' and local scale in based of the observations.
#' The function is an adapted version of the method of Yiou et al 2013.
#'@references Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard,
#' and M. Vrac, 2013 : Ensemble reconstruction of the atmospheric column
#' from surface pressure using analogues. Clim. Dyn., 41, 1419-1437.
#' \email{pascal.yiou@lsce.ipsl.fr}
#'@param criteria different criteria to be used for the selection of analogs
#'if criteria = "Large_dist"
#'if criteria ="Local_dist"
#'if criteria ="Local_cor"
#'@param expL variable for the Large scale in the model (same variable
#'might be used in the local scale for criteria 2)
#'@param obsL variable for the large scale in the observations
#'@param expVar variable for the local scale in the model usually different
#'to the variable in expL
#'@param obsVar variable for the local scale in the observations usually
#'different to the variable in obsL
#'@param lon_local longitude in the local scale
#'@param lat_local latitude in the local scale
#'@param region region for the local scale
#'@param nAnalogs number of Analogs to be selected to apply the criterias (this
#'is not the necessary the number of analogs that the user can get)
#'@import ClimProjDiags
#'@return Dowscaled values of the best analogs in the criteria selected
CST_Analogs <- function(expL, obsL, expVar, obsVar, criteria = 'Large_dist') {
#checks
timevector <- obsL$Dates$start
region <- c(min(expVar$lon), max(expVar$lon), min(expVar$lat),
max(expVar$lon))
result <- Analogs(expL$data, obsL$data, time_obsL = timevector,
expVar = expVar$data, obsVar = obsVar$data,
criteria = criteria,
lon_local = expVar$lon, lat_local = expVar$lat,
region = region, nAnalogs = 1, return_list = FALSE)
obsVar$data <- result$AnalogsFields
result(obsVar)
}
#'@rdname Analogs
#'@title Search for analogs based on large scale fields.
#'
#'@author Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it}
#'@author Nuria Perez-Zanon \email{nuria.perez@bsc.es}
#'
#'@description This function search for days with similar large scale
#'conditions or similar large and local scale conditions.
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#'The large scale and the local scale regions are defined by the user.
#'The large scale is usually given by atmospheric circulation as sea level
#'pressure or geopotential height (Yiou et al, 2013) but the function gives the
#' possibility to use another field. For the local scale the user can select
#' any variable.
#' The analogs function will find the best analogs based in three criterias:
#' (1) Minimal distance in the large scale pattern (i.e. SLP)
#' (2) Minimal distance in the large scale pattern (i.e. SLP) and minimal
#' distance in the local scale pattern (i.e. SLP).
#' (3) Minimal distance in the large scale pattern (i.e. SLP), minimal
#' distance in the local scale pattern (i.e. SLP) and maxima correlation in the
#' local variable to find the analog (i.e Precipitation).
#' Once the search of the analogs is complete, and in order to used the
#' three criterias the user can select a number of analogs nAnalogs to restrict
#' the selection of the best analogs in a short number of posibilities, the best
#' ones. By default this parameter will be 1.
#' This function has not constrains of specific regions, variables to find the
#' analogs, or data to be used (seasonal forecast data, climate projections
#' data, reanalyses data).
#' The input data might be interpolated or not.
#' The function is an adapted version of the method of Yiou et al 2013.
#'@references Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard,
#' and M. Vrac, 2013 : Ensemble reconstruction of the atmospheric column
#' from surface pressure using analogues. Clim. Dyn., 41, 1419-1437.
#' \email{pascal.yiou@lsce.ipsl.fr}
#'@param criteria different criteria to be used for the selection of analogs
#'if criteria = "Large_dist"
#'if criteria ="Local_dist"
#'if criteria ="Local_cor"
#'@param expL variable for the Large scale in the model (same variable
#'might be used in the local scale for criteria 2)
#'@param obsL variable for the large scale in the observations
#'@param expVar variable for the local scale in the model usually different
#'to the variable in expL
#'@param obsVar variable for the local scale in the observations usually
#'different to the variable in obsL
#'@param lon_local longitude in the local scale
#'@param lat_local latitude in the local scale
#'@param region region for the local scale
#'@param return_list TRUE if you want to get a list with the best analogs FALSE
#'#'if not.
#'@param nAnalogs number of Analogs to be selected to apply the criterias (this
#'is not the necessary the number of analogs that the user can get, but the number
#'of events with minimal distance in which perform the search of the best Analog.
#' The default value for the Large_dist criteria is 1, the default value for
#' the Local_dist criteria is 10 and same for Local_cor. If return_list is
#' False you will get just the first one for downscaling purposes. If return_list
#' is True you will get the list of the best analogs that were searched in nAnalogs
#' under the selected criterias.
#'@import ClimProjDiags
#'@return list list with the best analogs (time, distance)
#'@return values values of a certain variable
Analogs <- function(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL,
criteria = "Large_dist",
lonVar = NULL, latVar = NULL, region = NULL,
nAnalogs = 1, return_list = FALSE) {
# checks
if (!all(c('lon', 'lat') %in% names(dim(expL)))) {
stop("Parameter 'expL' must have the dimensions 'lat' and 'lon'.")
}
if (!all(c('lat', 'lon') %in% names(dim(obsL)))) {
stop("Parameter 'obsL' must have the dimension 'lat' and 'lon'.")
}
if (any(is.na(expL))) {
warning("Parameter 'exp' contains NA values.")
}
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
if (any(is.na(obsL))) {
warning("Parameter 'obs' contains NA values.")
}
if (is.null(expVar) & !is.null(obsVar)) {
obsVar <- NULL
warning("Parameter 'obsVar' is set to NULL as parameter 'expVar'.")
}
if (!is.null(expVar) & is.null(obsVar)) {
expVar <- NULL
warning("Parameter 'expVar' is set to NULL as parameter 'obsVar'.")
}
if (any(names(dim(obsL)) %in% 'ftime')) {
if (any(names(dim(obsL)) %in% 'time')) {
stop("Multiple temporal dimensions ('ftime' and 'time') found",
"in parameter 'obsL'.")
} else {
time_pos_obsL <- which(names(dim(obsL)) == 'ftime')
names(dim(obsL))[time_pos_obsL] <- 'time'
if (any(names(dim(expL)) %in% 'ftime')) {
time_pos_expL <- which(names(dim(expL)) == 'ftime')
names(dim(expL))[time_pos_expL] <- 'time'
}
}
}
if (any(names(dim(obsL)) %in% 'sdate')) {
if (any(names(dim(obsL)) %in% 'time')) {
dims_obsL <- dim(obsL)
pos_sdate <- which(names(dim(obsL)) == 'sdate')
pos_time <- which(names(dim(obsL)) == 'time')
pos <- 1 : length(dim(obsL))
pos <- c(pos_time, pos_sdate, pos[-c(pos_sdate,pos_time)])
obsL <- aperm(obsL, pos)
dim(obsL) <- c(time = prod(dims_obsL[c(pos_time, pos_sdate)]),
dims_obsL[-c(pos_time, pos_sdate)])
} else {
stop("Parameter 'obsL' must have a temporal dimension.")
}
}
if (is.null(region)) {
if (!is.null(lonVar) & !is.null(latVar)) {
region <- c(min(lonVar), max(lonVar), min(latVar), max(latVar))
}
}
position <- Select(expL = expL, obsL = obsL, expVar = expVar, obsVar = obsVar,
criteria = criteria, lonVar = lonVar, latVar = latVar,
region = region)$position
best <- Apply(list(position), target_dims = c('time', 'pos'), fun = BestAnalog,
criteria = criteria,
return_list = return_list, nAnalogs = nAnalogs)$output1
Analogs_dates <- time_obsL[best]
dim(Analogs_dates) <- dim(best)
if (all(!is.null(region), !is.null(lonVar), !is.null(latVar))) {
if (is.null(obsVar)) {
obsLocal <- SelBox(obsL, lon = lonVar, lat = latVar, region = region)
Analogs_fields <- Subset(obsL, along = which(names(dim(obsLocal)) == 'time'),
indices = best)
} else {
obsVar <- SelBox(obsL, lon = lonVar, lat = latVar, region = region)
Analogs_fields <- Subset(obsVar, along = which(names(dim(obsVar)) == 'time'),
indices = best)
}
warning("One or more of the parameter 'region', 'lonVar' and 'latVar'",
" are NULL and the large scale field will be returned.")
if (is.null(obsVar)) {
Analogs_fields <- Subset(obsL, along = which(names(dim(obsL)) == 'time'),
indices = best)
} else {
Analogs_fields <- Subset(obsVar,
along = which(names(dim(obsVar)) == 'time'),
indices = best)
}
lon_dim <- which(names(dim(Analogs_fields)) == 'lon')
lat_dim <- which(names(dim(Analogs_fields)) == 'lat')
if (lon_dim < lat_dim) {
dim(Analogs_fields) <- c(dim(Analogs_fields)[c(lon_dim, lat_dim)], dim(best))
} else if (lon_dim > lat_dim) {
dim(Analogs_fields) <- c(dim(Analogs_fields)[c(lat_dim, lon_dim)], dim(best))
stop("Dimensions 'lat' and 'lon' not found.")
}
return(list(DatesAnalogs = Analogs_dates, AnalogsFields = Analogs_fields))
BestAnalog <- function(position, criteria = 'Large_dist', return_list = FALSE,
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
nAnalogs = 10) {
pos_dim <- which(names(dim(position)) == 'pos')
if (dim(position)[pos_dim] == 1) {
pos1 <- position
if (criteria != 'Large_dist') {
warning("Dimension 'pos' in parameter 'position' has length 1,",
" criteria 'Large_dist' will be used.")
criteria <- 'Large_dist'
}
} else if (dim(position)[pos_dim] == 2) {
pos1 <- Subset(position, along = pos_dim, indices = 1)
pos2 <- Subset(position, along = pos_dim, indices = 2)
if (criteria == 'Local_cor') {
warning("Dimension 'pos' in parameter 'position' has length 2,",
" criteria 'Local_dist' will be used.")
criteria <- 'Local_dist'
}
} else if (dim(position)[pos_dim] == 3) {
pos1 <- Subset(position, along = pos_dim, indices = 1)
pos2 <- Subset(position, along = pos_dim, indices = 2)
pos3 <- Subset(position, along = pos_dim, indices = 3)
if (criteria != 'Local_cor') {
warning("Parameter 'criteria' is set to", criteria, ".")
}
} else {
stop("Parameter 'position' has dimension 'pos' of different ",
"length than expected (from 1 to 3).")
}
if (criteria == 'Large_dist') {
if (return_list == FALSE) {
pos <- pos1[1]
pos1 <- pos1[1 : nAnalogs]
pos2 <- pos2[1 : nAnalogs]
if(length(best)==1){
warning("Just 1 best analog matching Large_dist and ",
"Local_dist criteria")
}
if(length(best)==1 & is.na(best[1])==TRUE){
stop("no best analogs matching Large_dist and Local_dist criterias")
}
pos <- pos1[as.logical(best)]
pos <- pos[which(!is.na(pos))]
if (return_list == FALSE) {
pos <- pos[1]
}
pos1 <- pos1[1 : nAnalogs]
pos2 <- pos2[1 : nAnalogs]
best <- match(pos1, pos2)
pos <- pos1[as.logical(best)]
pos <- pos[which(!is.na(pos))]
best <- match(pos, pos3)
pos <- pos[order(best, decreasing = F)]
pos <- pos[which(!is.na(pos))]
if (return_list == FALSE) {
pos[1]
}
return(pos)
Select <- function(expL, obsL, expVar = NULL, obsVar = NULL, criteria = "Large_dist",
lonVar = NULL, latVar = NULL, region = NULL) {
names(dim(expL)) <- replace_repeat_dimnames(names(dim(expL)), names(dim(obsL)))
metric1 <- Apply(list(obsL), target_dims = list(c('lat', 'lon')),
fun = .select, expL, metric = "dist")$output1
if (length(dim(metric1)) > 1) {
dim_time_obs <- which(names(dim(metric1)) == 'time' |
names(dim(metric1)) == 'ftime')
margins <- c(1 : length(dim(metric1)))[-dim_time_obs]
pos1 <- apply(metric1, margins, order)
names(dim(pos1))[1] <- 'time'
metric1 <- apply(metric1, margins, sort)
names(dim(metric1))[1] <- 'time'
} else {
pos1 <- order(metric1)
dim(pos1) <- c(time = length(pos1))
metric1 <- sort(metric1)
dim(metric1) <- c(time = length(metric1))
}
dim(metric1) <- c(dim(metric1), metric = 1)
dim(pos1) <- c(dim(pos1), pos = 1)
return(list(metric = metric1, position = pos1))
}
if (criteria == "Local_dist" | criteria == "Local_cor") {
obs <- SelBox(obsL, lon = lonVar, lat = latVar, region = region)$data
exp <- SelBox(expL, lon = lonVar, lat = latVar, region = region)$data
metric2 <- Apply(list(obs), target_dims = list(c('lat', 'lon')),
fun = .select, exp, metric = "dist")$output1
dim(metric2) <- c(dim(metric2), metric=1)
margins <- c(1 : (length(dim(metric2))))[-dim_time_obs]
names(dim(pos2))[1] <- 'time'
metric2 <- apply(metric2, margins, sort)
names(dim(metric2))[1] <- 'time'
if (criteria == "Local_dist") {
metric <- abind(metric1, metric2, along = length(dim(metric1))+1)
position <- abind(pos1, pos2, along = length(dim(pos1))+1)
names(dim(metric)) <- c(names(dim(pos1)), 'metric')
names(dim(position)) <- c(names(dim(pos1)), 'pos')
return(list(metric = metric, position = position))
}
}
obs <- SelBox(obsVar, lon = lonVar, lat = latVar, region = region)$data
exp <- SelBox(expVar, lon = lonVar, lat = latVar, region = region)$data
metric3 <- Apply(list(obs), target_dims = list(c('lat', 'lon')),
fun = .select, exp, metric = "cor")$output1
margins <- c(1 : length(dim(metric3)))[-dim_time_obs]
pos3 <- apply(metric3, margins, order, decreasing = TRUE)
names(dim(pos3))[1] <- 'time'
metric3 <- apply(metric3, margins, sort)
names(dim(metric3))[1] <- 'time'
metric <- abind(metric1, metric2, metric3, along = length(dim(metric1)) + 1)
position <- abind(pos1, pos2, pos3, along = length(dim(pos1)) + 1)
names(dim(metric)) <- c(names(dim(metric1)), 'metric')
names(dim(position)) <- c(names(dim(pos1)), 'pos')
return(list(metric = metric, position = position))
}
else {
stop("Parameter 'criteria' must to be one of the: 'Large_dist', ",
"'Local_dist','Local_cor'.")
}
}
.select <- function(exp, obs, metric = "dist") {
if (metric == "dist") {
result <- Apply(list(obs), target_dims = list(c('lat', 'lon')),
fun = function(x) {sum((x - exp) ^ 2)})$output1
} else if (metric == "cor") {
result <- Apply(list(obs), target_dims = list(c('lat', 'lon')),
fun = function(x) {cor(as.vector(x), as.vector(exp))})$output1
replace_repeat_dimnames <- function(names_exp, names_obs, lat_name = 'lat',
if (!is.character(names_exp)) {
stop("Parameter 'names_exp' must be a vector of characters.")
}
if (!is.character(names_obs)) {
stop("Parameter 'names_obs' must be a vector of characters.")
}
latlon_dim_exp <- which(names_exp == lat_name | names_exp == lon_name)
latlon_dim_obs <- which(names_obs == lat_name | names_obs == lon_name)
if (any(unlist(lapply(names_exp[-latlon_dim_exp],
function(x){x == names_obs[-latlon_dim_obs]})))) {
original_pos <- lapply(names_exp, function(x) which(x == names_obs[-latlon_dim_obs]))
original_pos <- lapply(original_pos, length) > 0
names_exp[original_pos] <- paste0(names_exp[original_pos], "_exp")
}
return(names_exp)
## Improvements: other dimensions to avoid replacement for more flexibility.
}