Newer
Older
#'@rdname CST_Analogs
#'@title Downscaling using Analogs based on large scale fields.
#'
#'@author Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it}
#'@author Nuria Perez-Zanon \email{nuria.perez@bsc.es}
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#'@description This function perform a downscaling using Analogs. To compute
#'the analogs, the function search for days with similar large scale conditions
#'to downscaled fields in the local scale.
#'The large scale and the local scale regions are defined by the user.
#'The large scale is usually given by atmospheric circulation as sea level
#'pressure or geopotential height (Yiou et al, 2013) but the function gives the
#' possibility to use another field. The local scale will be usually given by
#' precipitation or Temperature, but might be another variable.
#' The analogs function will find the best analogs based in three criterias:
#' (1) Minimal distance in the large scale pattern (i.e. SLP)
#' (2) Minimal distance in the large scale pattern (i.e. SLP) and minimal
#' distance in the local scale pattern (i.e. SLP).
#' (3) Minimal distance in the large scale pattern (i.e. SLP), minimal
#' distance in the local scale pattern (i.e. SLP) and maxima correlation in the
#' local variable to downscale (i.e Precipitation).
#' The search of analogs must be done in the longest dataset posible. This is
#' important since it is necessary to have a good representation of the
#' possible states of the field in the past, and therefore, to get better
#' analogs. Once the search of the analogs is complete, and in order to used the
#' three criterias the user can select a number of analogs nAnalogs to restrict
#' the selection of the best analogs in a short number of posibilities, the best
#' ones. By default this parameter will be 1.
#' This function has not constrains of specific regions, variables to downscale,
#' or data to be used (seasonal forecast data, climate projections data,
#' reanalyses data).
#' The regrid into a finner scale is done interpolating with CST_Load.
#' Then, this interpolation is corrected selecting the analogs in the large
#' and local scale in based of the observations.
#' The function is an adapted version of the method of Yiou et al 2013.
#'@references Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard,
#' and M. Vrac, 2013 : Ensemble reconstruction of the atmospheric column
#' from surface pressure using analogues. Clim. Dyn., 41, 1419-1437.
#' \email{pascal.yiou@lsce.ipsl.fr}
#'@param criteria different criteria to be used for the selection of analogs
#'if criteria = "Large_dist"
#'if criteria ="Local_dist"
#'if criteria ="Local_cor"
#'@param expL variable for the Large scale in the model (same variable
#'might be used in the local scale for criteria 2)
#'@param obsL variable for the large scale in the observations
#'@param expVar variable for the local scale in the model usually different
#'to the variable in expL
#'@param obsVar variable for the local scale in the observations usually
#'different to the variable in obsL
#'@param lon_local longitude in the local scale
#'@param lat_local latitude in the local scale
#'@param region region for the local scale
#'@param nAnalogs number of Analogs to be selected to apply the criterias (this
#'is not the necessary the number of analogs that the user can get)
#'@param return_list TRUE if you want to get a list with the best analogs FALSE
#'if not.
#'@param mAnalogs months for searching the analogs
#'@import multiapply
#'@import ClimProjDiags
#'@return list list with the best analogs (time, distance)
#'@return values values of a certain variable
#'@example
#'expL <- lonlat_data$exp
#'obsL <- lonlat_data$obs
#'dim(obsL$data) <- c(dataset = 1, member = 1, time = 18, lat = 22, lon = 53)
#'expVar <- lonlat_prec$exp
#'obsVar <- lonlat_prec$obs
#'downscaledVar <- CST_Analogs(....)
CST_Analogs <- function(expL, obsL, expVar, obsVar, criteria = 'Large_dist')
#checks
timevector <- obsL$Dates$start
region <- c(min(expVar$lon), max(expVar$lon), min(expVar$lat), max(expVar$lon))
result <- Analogs(expL$data, obsL$data, time_obsL = timevector,
expVar = expVar$data, obsVar = obsVar$data,
criteria = criteria,
lon_local = expVar$lon, lat_local = expVar$lat,
region = region, nAnalogs = 1, return_list = FALSE)
obsVar$data <- result$AnalogsFields
result(obsVar)
}
#'@rdname Analogs
#'@title Search for analogs based on large scale fields.
#'
#'@author Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it}
#'@author Nuria Perez-Zanon \email{nuria.perez@bsc.es}
#'
#'@description This function search for days with similar large scale
#'conditions or similar large and local scale conditions.
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#'The large scale and the local scale regions are defined by the user.
#'The large scale is usually given by atmospheric circulation as sea level
#'pressure or geopotential height (Yiou et al, 2013) but the function gives the
#' possibility to use another field. For the local scale the user can select
#' any variable.
#' The analogs function will find the best analogs based in three criterias:
#' (1) Minimal distance in the large scale pattern (i.e. SLP)
#' (2) Minimal distance in the large scale pattern (i.e. SLP) and minimal
#' distance in the local scale pattern (i.e. SLP).
#' (3) Minimal distance in the large scale pattern (i.e. SLP), minimal
#' distance in the local scale pattern (i.e. SLP) and maxima correlation in the
#' local variable to find the analog (i.e Precipitation).
#' Once the search of the analogs is complete, and in order to used the
#' three criterias the user can select a number of analogs nAnalogs to restrict
#' the selection of the best analogs in a short number of posibilities, the best
#' ones. By default this parameter will be 1.
#' This function has not constrains of specific regions, variables to find the
#' analogs, or data to be used (seasonal forecast data, climate projections
#' data, reanalyses data).
#' The input data might be interpolated or not.
#' The function is an adapted version of the method of Yiou et al 2013.
#'@references Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard,
#' and M. Vrac, 2013 : Ensemble reconstruction of the atmospheric column
#' from surface pressure using analogues. Clim. Dyn., 41, 1419-1437.
#' \email{pascal.yiou@lsce.ipsl.fr}
#'@param criteria different criteria to be used for the selection of analogs
#'if criteria = "Large_dist"
#'if criteria ="Local_dist"
#'if criteria ="Local_cor"
#'@param expL variable for the Large scale in the model (same variable
#'might be used in the local scale for criteria 2)
#'@param obsL variable for the large scale in the observations
#'@param expVar variable for the local scale in the model usually different
#'to the variable in expL
#'@param obsVar variable for the local scale in the observations usually
#'different to the variable in obsL
#'@param lon_local longitude in the local scale
#'@param lat_local latitude in the local scale
#'@param region region for the local scale
#'@param nAnalogs number of Analogs to be selected to apply the criterias (this
#'is not the necessary the number of analogs that the user can get)
#'@param return_list TRUE if you want to get a list with the best analogs FALSE
#'if not.
#'@import multiapply
#'@import ClimProjDiags
#'@return list list with the best analogs (time, distance)
#'@return values values of a certain variable
Analogs <- function(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL,
criteria = "Large_dist",
lon_local = NULL, lat_local = NULL, region = NULL,
nAnalogs = 1, return_list = FALSE) {
# checks
if (!all(c('lon', 'lat') %in% names(dim(expL)))) {
stop("Parameter 'expL' must have the dimensions 'lat' and 'lon'.")
}
if (!all(c('lat', 'lon') %in% names(dim(obsL)))) {
stop("Parameter 'obsL' must have the dimension 'sdate'.")
}
if (any(is.na(expL))) {
warning("Parameter 'exp' contains NA values.")
}
if (any(is.na(obsL))) {
warning("Parameter 'obs' contains NA values.")
}
position <- Select(expL = expL, obsL = obsL, expVar = expVar, obsVar = obsVar,
criteria = criteria, lon_local = lon_local, lat_local = lat_local,
region = region)$position
best <- Apply(list(position), target_dims = c('time', 'pos'), fun = BestAnalog,
criteria = criteria, return_list = return_list, nAnalogs = nAnalogs)
Analogs_dates <- time_obsL[best]
dim(Analogs_dates) <- dim(best)
if (is.null(obsVar)) {
obsLocal <- SelBox(obsL, lon = lon_local, lat = lat_local, region = region)
Analogs_fields <- Subset(obsLocal, along = which(names(dim(obsLocal)) == 'time'), indices = best)
} else {
obsVar <- SelBox(obsVar, lon = lon_local, lat = lat_local, region = region)
Analogs_fields <- Subset(obsVar, along = which(names(dim(obsVar)) == 'time'), indices = best)
}
lon_dim <- which(names(dim(Analogs_fields)) == 'lon')
lat_dim <- which(names(dim(Analogs_fields)) == 'lat')
if (lon_dim < lat_dim) {
dim(Analogs_fields) <- c(dim(Analogs_fields)[c(lon_dim, lat_dim)], dim(best))
} else if (lat_dim > lon_dim) {
dim(Analogs_fields) <- c(dim(Analogs_fields)[c(lat_dim, lon_dim)], dim(best))
} else {
stop("Dimensions 'lat' and 'lon' not found.")
}
return(list(DatesAnalogs = Analogs_dates, AnalogsFields = Analogs_fields))
expL <- 1 : (8 * 10 * 2 * 6 * 7)
dim(expL) <- c(lat = 8, lon = 10, time = 2, member = 6, sdate = 7)
position <- Select(expL = expL, obsL = obsL)$position
dim(position)
pos <- BestAnalog(position)
pos
dim(pos)
res <- Apply(list(position), target_dims = c('time','pos') , fun = BestAnalog, criteria = 'Large_dist')$output1
lat_local <- lat <- seq(0, 19, 2.5)
position= Select(expL, obsL, criteria = "Local_dist", lon_local = lon,
region = c(lonmin = 0, lonmax = 5, latmin = 0, latmax = 5))$position
res <- Apply(list(position), target_dims = c('time','pos') , fun = BestAnalog, criteria = 'Local_dist')$output1
res <- Apply(list(position), target_dims = c('time', 'pos'), fun = BestAnalog, criteria = 'Large_dist')$output1
BestAnalog <- function(position, criteria = 'Large_dist', return_list = FALSE,
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
#ahora position es un array de 2 dimensiones: una para time_obs en la que estan ordenados los mapas de observaciones y otra de position de 1 a 3 con pos1,pos2 y pos3.
pos_dim <- which(names(dim(position)) == 'pos')
if (dim(position)[pos_dim] == 1) {
pos1 <- position
if (criteria != 'Large_dist') {
warning("Dimension 'pos' in parameter 'position' has length 1,",
" criteria 'Large_dist' will be used.")
criteria <- 'Large_dist'
}
} else if (dim(position)[pos_dim] == 2) {
pos1 <- Subset(position, along = pos_dim, indices = 1)
pos2 <- Subset(position, along = pos_dim, indices = 2)
if (criteria == 'Local_cor') {
warning("Dimension 'pos' in parameter 'position' has length 2,",
" criteria 'Local_dist' will be used.")
criteria <- 'Local_dist'
}
} else if (dim(position)[pos_dim] == 3) {
pos1 <- Subset(position, along = pos_dim, indices = 1)
pos2 <- Subset(position, along = pos_dim, indices = 2)
pos3 <- Subset(position, along = pos_dim, indices = 3)
if (criteria != 'Local_cor') {
warning("Parameter 'criteria' is set to", criteria, ".")
}
} else {
stop("Parameter 'position' has dimension 'pos' of different ",
"length than expected (from 1 to 3).")
}
if (criteria == 'Large_dist') {
if (return_list == FALSE) {
pos <- pos1[1]
} else {
pos <- pos1[1 : nAnalogs]
}
} else if (criteria== 'Local_dist') {
# pos1 <- c(7, 13, 5, 3, 6, 12, 10, 1, 8, 9, 11, 4, 2, 14)
# pos2 <- c(4, 8, 13, 6, 3, 1, 12, 5, 9, 7, 10, 2, 11, 14)
pos1 <- pos1[1 : nAnalogs]
pos2 <- pos2[1 : nAnalogs]
best <- match(pos1, pos2)
pos <- pos1[as.logical(best)]
pos <- pos[which(!is.na(pos))]
if (return_list == FALSE) {
pos <- pos[1]
}
} else if (criteria == 'Local_cor') {
pos1 <- pos1[1 : nAnalogs]
pos2 <- pos2[1 : nAnalogs]
best <- match(pos1, pos2)
pos <- pos1[as.logical(best)]
pos <- pos[which(!is.na(pos))]
# pos3 <- c(6, 11, 14, 3, 13, 7, 2, 5, 1, 12, 10, 9, 8, 4)
best <- match(pos, pos3)
pos <- pos[order(best, decreasing = F)]
pos <- pos[which(!is.na(pos))]
if (return_list == FALSE) {
pos[1]
}
}
expL <- (1 + 2): (4 * 3 * 2 + 2)
dim(expL) <- c(lat = 4, lon = 3, time = 2)
obsL <- 1 : c(4 * 3 * 5)
dim(obsL) <- c(lat = 4, lon = 3, time = 5)
res = Select(expL, obsL)
expL <- (1 + 2): (8 * 10 * 2 + 2)
dim(expL) <- c(lat = 8, lon = 10, time = 2)
obsL <- 1 : c(8 * 10 * 5)
dim(obsL) <- c(lat = 8, lon = 10, time = 5)
lat_local <- lat <- seq(0, 19, 2.5)
lon_local <- lon <- seq(0, 23, 2.5)
res = Select(expL, obsL, criteria = "Local_dist", lon_local = lon,
lat_local = lat,
region = c(lonmin = 0, lonmax = 5, latmin = 0, latmax = 5))
# probar mas ejemplos con diferentes criterios, latitudes, longitudes
Select <- function(expL, obsL, expVar = NULL, obsVar = NULL, criteria = "Large_dist",
lon_local = NULL, lat_local = NULL, region = NULL) {
#check expL
#check obsL
#check obsVar
names(dim(expL)) <- replace_repeat_dimnames(names(dim(expL)), names(dim(obsL)))
metric1 <- Apply(list(obsL), target_dims = list(c('lat', 'lon')),
fun = .select, expL, metric = "dist")$output1
dim_time_obs <- which(names(dim(metric1)) == 'time')
margins <- c(1 : length(dim(metric1)))[-dim_time_obs]
pos1 <- apply(metric1, margins, order)
names(dim(pos1))[1] <- 'time'
metric1 <- apply(metric1, margins, sort)
names(dim(metric1))[1] <- 'time'
dim(metric1) <- c(dim(metric1), metric = 1)
dim(pos1) <- c(dim(pos1), pos = 1)
return(list(metric = metric1, position = pos1))
}
if (criteria == "Local_dist" | criteria == "Local_cor") {
obs <- SelBox(obsL, lon = lon_local, lat = lat_local, region = region)$data
exp <- SelBox(expL, lon = lon_local, lat = lat_local, region = region)$data
metric2 <- Apply(list(obs), target_dims = list(c('lat', 'lon')),
fun = .select, exp, metric = "dist")$output1
margins <- c(1 : length(dim(metric2)))[-dim_time_obs]
pos2 <- apply(metric2, margins, order)
names(dim(pos2))[1] <- 'time'
metric2 <- apply(metric2, margins, sort)
names(dim(metric2))[1] <- 'time'
if (criteria == "Local_dist") {
metric <- abind(metric1, metric2, along = length(dim(metric1)) + 1)
position <- abind(pos1, pos2, along = length(dim(pos1)) + 1)
names(dim(metric)) <- c(names(dim(metric1)), 'metric')
names(dim(position)) <- c(names(dim(pos1)), 'pos')
return(list(metric = metric, position = position))
}
}
if (criteria == "Local_cor") {
obs <- SelBox(obsVar, lon = lon_local, lat = lat_local, region = region)$data
exp <- SelBox(expVar, lon = lon_local, lat = lat_local, region = region)$data
metric3 <- Apply(list(obs), target_dims = list(c('lat', 'lon')),
fun = .select, exp, metric = "cor")$output1
margins <- c(1 : length(dim(metric3)))[-dim_time_obs]
pos3 <- apply(metric3, margins, order, decreasing = TRUE)
names(dim(pos3))[1] <- 'time'
metric3 <- apply(metric3, margins, sort)
names(dim(metric3))[1] <- 'time'
metric <- abind(metric1, metric2, metric3, along = length(dim(metric1)) + 1)
position <- abind(pos1, pos2, pos3, along = length(dim(pos1)) + 1)
names(dim(metric)) <- c(names(dim(metric1)), 'metric')
names(dim(position)) <- c(names(dim(pos1)), 'pos')
return(list(metric = metric, position = position))
}
else {
stop("Parameter 'criteria' must to be one of the: 'Large_dist', ",
"'Local_dist','Local_cor'.")
}
}
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
# dim(data) <- c(lon = 20, lat = 3, time = 2, model = 4)
# lon <- seq(2, 40, 2)
# lat <- c(1, 5, 10)
# a <- SelBox(data = data, lon = lon, lat = lat, region = c(2, 20, 1, 5),
# londim = 1, latdim = 2, mask = NULL)
# str(a)
#'@example
exp <- (1 + 2): (4 * 3 + 2)
dim(exp) <- c(lat = 4, lon = 3)
obs <- 1 : c(5 * 4 * 3)
dim(obs) <- c(time = 5, lat = 4, lon = 3)
res <- .select(exp, obs)
res
res <- .select(exp, obs, metric = 'cor')
dim(res)
.select <- function(exp, obs, metric = "dist") {
if (metric == "dist") {
#metric <- sum((obs - exp) ^ 2)
#metric <- apply(obs, "time", function(x) {sum((x - exp) ^ 2)})
result <- Apply(list(obs), target_dims = list(c('lat', 'lon')),
fun = function(x) {sum((x - exp) ^ 2)})$output1
} else if (metric == "cor") {
result <- Apply(list(obs), target_dims = list(c('lat', 'lon')),
fun = function(x) {cor(as.vector(x), as.vector(exp))})$output1
}
result
}
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
#'#This auxiliar function looks for replecated dimension names between two vectors.
#'# The repeated dimensions (different than 'lon' and 'lat') will be replaced in the first
#'# vector with the same name and extra label '_exp'.
#'# Example for 'time' and member' repeated dim in the same order of names
#'# dimension for 'expL' and 'obsL'
#'expL <- 1 : (8*10*2*6*7)
#'expL <- 1 :c(8*10*2*6*7)
#'dim(expL) <- c(lat = 8, lon = 10, time = 2, member = 6, sdate = 7)
#'obsL <- 1 : (8*10*5*3)
#'dim(obsL) <- c(lat = 8, time = 5, member = 3, lon = 10)
#'names_exp <- names(dim(expL))
#'names_obs <- names(dim(obsL))
#'replace_repeat_dimnames(names_exp, names_obs)
#'#[1] "lat" "lon" "time_exp" "member_exp" "sdate"
#'
#' Example for time and memeber repeated with different order
#'dim(obsL) <- c(lon = 10, member = 3, time = 5, lat = 8)
#'names_obs <- names(dim(obsL))
#'replace_repeat_dimnames(names_exp, names_obs)
#'#[1] "lat" "lon" "time_exp" "member_exp" "sdate"
#'
#'# Example for no repeated names:
#'dim(obsL) <- c(lon = 10, time_obs = 5, member_obs = 3, lat = 8)
#'names_obs <- names(dim(obsL))
#'replace_repeat_dimnames(names_exp, names_obs)
#'#[1] "lat" "lon" "time" "member" "sdate"
replace_repeat_dimnames <- function(names_exp, names_obs, lat_name = 'lat', lon_name = 'lon') {
if (!is.character(names_exp)) {
stop("Parameter 'names_exp' must be a vector of characters.")
}
if (!is.character(names_obs)) {
stop("Parameter 'names_obs' must be a vector of characters.")
latlon_dim_exp <- which(names_exp == lat_name | names_exp == lon_name)
latlon_dim_obs <- which(names_obs == lat_name | names_obs == lon_name)
if (any(unlist(lapply(names_exp[-latlon_dim_exp],
function(x){x == names_obs[-latlon_dim_obs]})))) {
original_pos <- lapply(names_exp, function(x) which(x == names_obs[-latlon_dim_obs]))
original_pos <- lapply(original_pos, length) > 0
names_exp[original_pos] <- paste0(names_exp[original_pos], "_exp")
}
return(names_exp)
## Improvements: other dimensions to avoid replacement for more flexibility.