#'@rdname CST_Analogs #'@title Downscaling using Analogs based on large scale fields. #' #'@author Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} #'@author Nuria Perez-Zanon \email{nuria.perez@bsc.es} #' #'@description This function perform a downscaling using Analogs. To compute #'the analogs, the function search for days with similar large scale conditions #'to downscaled fields in the local scale. #'The large scale and the local scale regions are defined by the user. #'The large scale is usually given by atmospheric circulation as sea level #'pressure or geopotential height (Yiou et al, 2013) but the function gives the #' possibility to use another field. The local scale will be usually given by #' precipitation or Temperature, but might be another variable. #' The analogs function will find the best analogs based in three criterias: #' (1) Minimal distance in the large scale pattern (i.e. SLP) #' (2) Minimal distance in the large scale pattern (i.e. SLP) and minimal #' distance in the local scale pattern (i.e. SLP). #' (3) Minimal distance in the large scale pattern (i.e. SLP), minimal #' distance in the local scale pattern (i.e. SLP) and maxima correlation in the #' local variable to downscale (i.e Precipitation). #' The search of analogs must be done in the longest dataset posible. This is #' important since it is necessary to have a good representation of the #' possible states of the field in the past, and therefore, to get better #' analogs. Once the search of the analogs is complete, and in order to used the #' three criterias the user can select a number of analogs nAnalogs to restrict #' the selection of the best analogs in a short number of posibilities, the best #' ones. By default this parameter will be 1. #' This function has not constrains of specific regions, variables to downscale, #' or data to be used (seasonal forecast data, climate projections data, #' reanalyses data). #' The regrid into a finner scale is done interpolating with CST_Load. #' Then, this interpolation is corrected selecting the analogs in the large #' and local scale in based of the observations. #' The function is an adapted version of the method of Yiou et al 2013. #'@references Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard, #' and M. Vrac, 2013 : Ensemble reconstruction of the atmospheric column #' from surface pressure using analogues. Clim. Dyn., 41, 1419-1437. #' \email{pascal.yiou@lsce.ipsl.fr} #'@param criteria different criteria to be used for the selection of analogs #'if criteria = "Large_dist" #'if criteria ="Local_dist" #'if criteria ="Local_cor" #'@param expL variable for the Large scale in the model (same variable #'might be used in the local scale for criteria 2) #'@param obsL variable for the large scale in the observations #'@param expVar variable for the local scale in the model usually different #'to the variable in expL #'@param obsVar variable for the local scale in the observations usually #'different to the variable in obsL #'@param lon_local longitude in the local scale #'@param lat_local latitude in the local scale #'@param region region for the local scale #'@param nAnalogs number of Analogs to be selected to apply the criterias (this #'is not the necessary the number of analogs that the user can get) #'@import multiApply #'@import ClimProjDiags #'@import abind #'@return Dowscaled values of the best analogs in the criteria selected #'@export CST_Analogs <- function(expL, obsL, expVar, obsVar, criteria = 'Large_dist') { #checks timevector <- obsL$Dates$start region <- c(min(expVar$lon), max(expVar$lon), min(expVar$lat), max(expVar$lon)) result <- Analogs(expL$data, obsL$data, time_obsL = timevector, expVar = expVar$data, obsVar = obsVar$data, criteria = criteria, lon_local = expVar$lon, lat_local = expVar$lat, region = region, nAnalogs = 1, return_list = FALSE) obsVar$data <- result$AnalogsFields result(obsVar) } #'@rdname Analogs #'@title Search for analogs based on large scale fields. #' #'@author Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} #'@author Nuria Perez-Zanon \email{nuria.perez@bsc.es} #' #'@description This function search for days with similar large scale #'conditions or similar large and local scale conditions. #' #'The large scale and the local scale regions are defined by the user. #'The large scale is usually given by atmospheric circulation as sea level #'pressure or geopotential height (Yiou et al, 2013) but the function gives the #' possibility to use another field. For the local scale the user can select #' any variable. #' The analogs function will find the best analogs based in three criterias: #' (1) Minimal distance in the large scale pattern (i.e. SLP) #' (2) Minimal distance in the large scale pattern (i.e. SLP) and minimal #' distance in the local scale pattern (i.e. SLP). #' (3) Minimal distance in the large scale pattern (i.e. SLP), minimal #' distance in the local scale pattern (i.e. SLP) and maxima correlation in the #' local variable to find the analog (i.e Precipitation). #' Once the search of the analogs is complete, and in order to used the #' three criterias the user can select a number of analogs nAnalogs to restrict #' the selection of the best analogs in a short number of posibilities, the best #' ones. By default this parameter will be 1. #' This function has not constrains of specific regions, variables to find the #' analogs, or data to be used (seasonal forecast data, climate projections #' data, reanalyses data). #' The input data might be interpolated or not. #' The function is an adapted version of the method of Yiou et al 2013. #'@references Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard, #' and M. Vrac, 2013 : Ensemble reconstruction of the atmospheric column #' from surface pressure using analogues. Clim. Dyn., 41, 1419-1437. #' \email{pascal.yiou@lsce.ipsl.fr} #'@param criteria different criteria to be used for the selection of analogs #'if criteria = "Large_dist" #'if criteria ="Local_dist" #'if criteria ="Local_cor" #'@param expL variable for the Large scale in the model (same variable #'might be used in the local scale for criteria 2) #'@param obsL variable for the large scale in the observations #'@param expVar variable for the local scale in the model usually different #'to the variable in expL #'@param obsVar variable for the local scale in the observations usually #'different to the variable in obsL #'@param lon_local longitude in the local scale #'@param lat_local latitude in the local scale #'@param region region for the local scale #'@param return_list TRUE if you want to get a list with the best analogs FALSE #'#'if not. #'@param nAnalogs number of Analogs to be selected to apply the criterias (this #'is not the necessary the number of analogs that the user can get, but the number #'of events with minimal distance in which perform the search of the best Analog. #' The default value for the Large_dist criteria is 1, the default value for #' the Local_dist criteria is 10 and same for Local_cor. If return_list is #' False you will get just the first one for downscaling purposes. If return_list #' is True you will get the list of the best analogs that were searched in nAnalogs #' under the selected criterias. #'@import multiApply #'@import ClimProjDiags #'@import abind #'@return list list with the best analogs (time, distance) #'@return values values of a certain variable #'@export Analogs <- function(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, criteria = "Large_dist", lonVar = NULL, latVar = NULL, region = NULL, nAnalogs = 1, return_list = FALSE) { # checks if (!all(c('lon', 'lat') %in% names(dim(expL)))) { stop("Parameter 'expL' must have the dimensions 'lat' and 'lon'.") } if (!all(c('lat', 'lon') %in% names(dim(obsL)))) { stop("Parameter 'obsL' must have the dimension 'lat' and 'lon'.") } if (any(is.na(expL))) { warning("Parameter 'exp' contains NA values.") } if (any(is.na(obsL))) { warning("Parameter 'obs' contains NA values.") } if (is.null(expVar) & !is.null(obsVar)) { obsVar <- NULL warning("Parameter 'obsVar' is set to NULL as parameter 'expVar'.") } if (!is.null(expVar) & is.null(obsVar)) { expVar <- NULL warning("Parameter 'expVar' is set to NULL as parameter 'obsVar'.") } if (any(names(dim(obsL)) %in% 'ftime')) { if (any(names(dim(obsL)) %in% 'time')) { stop("Multiple temporal dimensions ('ftime' and 'time') found", "in parameter 'obsL'.") } else { time_pos_obsL <- which(names(dim(obsL)) == 'ftime') names(dim(obsL))[time_pos_obsL] <- 'time' if (any(names(dim(expL)) %in% 'ftime')) { time_pos_expL <- which(names(dim(expL)) == 'ftime') names(dim(expL))[time_pos_expL] <- 'time' } } } if (any(names(dim(obsL)) %in% 'sdate')) { if (any(names(dim(obsL)) %in% 'time')) { dims_obsL <- dim(obsL) pos_sdate <- which(names(dim(obsL)) == 'sdate') pos_time <- which(names(dim(obsL)) == 'time') pos <- 1 : length(dim(obsL)) pos <- c(pos_time, pos_sdate, pos[-c(pos_sdate,pos_time)]) obsL <- aperm(obsL, pos) dim(obsL) <- c(time = prod(dims_obsL[c(pos_time, pos_sdate)]), dims_obsL[-c(pos_time, pos_sdate)]) } else { stop("Parameter 'obsL' must have a temporal dimension.") } } if (is.null(region)) { if (!is.null(lonVar) & !is.null(latVar)) { region <- c(min(lonVar), max(lonVar), min(latVar), max(latVar)) } } position <- Select(expL = expL, obsL = obsL, expVar = expVar, obsVar = obsVar, criteria = criteria, lonVar = lonVar, latVar = latVar, region = region)$position best <- Apply(list(position), target_dims = c('time', 'pos'), fun = BestAnalog, criteria = criteria, return_list = return_list, nAnalogs = nAnalogs)$output1 Analogs_dates <- time_obsL[best] dim(Analogs_dates) <- dim(best) if (all(!is.null(region), !is.null(lonVar), !is.null(latVar))) { if (is.null(obsVar)) { obsLocal <- SelBox(obsL, lon = lonVar, lat = latVar, region = region) Analogs_fields <- Subset(obsL, along = which(names(dim(obsLocal)) == 'time'), indices = best) } else { obsVar <- SelBox(obsL, lon = lonVar, lat = latVar, region = region) Analogs_fields <- Subset(obsVar, along = which(names(dim(obsVar)) == 'time'), indices = best) } } else { warning("One or more of the parameter 'region', 'lonVar' and 'latVar'", " are NULL and the large scale field will be returned.") if (is.null(obsVar)) { Analogs_fields <- Subset(obsL, along = which(names(dim(obsL)) == 'time'), indices = best) } else { Analogs_fields <- Subset(obsVar, along = which(names(dim(obsVar)) == 'time'), indices = best) } } lon_dim <- which(names(dim(Analogs_fields)) == 'lon') lat_dim <- which(names(dim(Analogs_fields)) == 'lat') if (lon_dim < lat_dim) { dim(Analogs_fields) <- c(dim(Analogs_fields)[c(lon_dim, lat_dim)], dim(best)) } else if (lon_dim > lat_dim) { dim(Analogs_fields) <- c(dim(Analogs_fields)[c(lat_dim, lon_dim)], dim(best)) } else { stop("Dimensions 'lat' and 'lon' not found.") } return(list(DatesAnalogs = Analogs_dates, AnalogsFields = Analogs_fields)) } BestAnalog <- function(position, criteria = 'Large_dist', return_list = FALSE, nAnalogs = 10) { pos_dim <- which(names(dim(position)) == 'pos') if (dim(position)[pos_dim] == 1) { pos1 <- position if (criteria != 'Large_dist') { warning("Dimension 'pos' in parameter 'position' has length 1,", " criteria 'Large_dist' will be used.") criteria <- 'Large_dist' } } else if (dim(position)[pos_dim] == 2) { pos1 <- Subset(position, along = pos_dim, indices = 1) pos2 <- Subset(position, along = pos_dim, indices = 2) if (criteria == 'Local_cor') { warning("Dimension 'pos' in parameter 'position' has length 2,", " criteria 'Local_dist' will be used.") criteria <- 'Local_dist' } } else if (dim(position)[pos_dim] == 3) { pos1 <- Subset(position, along = pos_dim, indices = 1) pos2 <- Subset(position, along = pos_dim, indices = 2) pos3 <- Subset(position, along = pos_dim, indices = 3) if (criteria != 'Local_cor') { warning("Parameter 'criteria' is set to", criteria, ".") } } else { stop("Parameter 'position' has dimension 'pos' of different ", "length than expected (from 1 to 3).") } if (criteria == 'Large_dist') { if (return_list == FALSE) { pos <- pos1[1] } else { pos <- pos1[1 : nAnalogs] } } else if (criteria== 'Local_dist') { pos1 <- pos1[1 : nAnalogs] pos2 <- pos2[1 : nAnalogs] best <- match(pos1, pos2) if(length(best)==1){ warning("Just 1 best analog matching Large_dist and ", "Local_dist criteria") } if(length(best)==1 & is.na(best[1])==TRUE){ stop("no best analogs matching Large_dist and Local_dist criterias") } pos <- pos1[as.logical(best)] pos <- pos[which(!is.na(pos))] if (return_list == FALSE) { pos <- pos[1] } } else if (criteria == 'Local_cor') { pos1 <- pos1[1 : nAnalogs] pos2 <- pos2[1 : nAnalogs] best <- match(pos1, pos2) pos <- pos1[as.logical(best)] pos <- pos[which(!is.na(pos))] pos3 <- pos3[1 : nAnalogs] best <- match(pos, pos3) pos <- pos[order(best, decreasing = F)] pos <- pos[which(!is.na(pos))] if (return_list == FALSE) { pos[1] } return(pos) } } Select <- function(expL, obsL, expVar = NULL, obsVar = NULL, criteria = "Large_dist", lonVar = NULL, latVar = NULL, region = NULL) { names(dim(expL)) <- replace_repeat_dimnames(names(dim(expL)), names(dim(obsL))) metric1 <- Apply(list(obsL), target_dims = list(c('lat', 'lon')), fun = .select, expL, metric = "dist")$output1 if (length(dim(metric1)) > 1) { dim_time_obs <- which(names(dim(metric1)) == 'time' | names(dim(metric1)) == 'ftime') margins <- c(1 : length(dim(metric1)))[-dim_time_obs] pos1 <- apply(metric1, margins, order) names(dim(pos1))[1] <- 'time' metric1 <- apply(metric1, margins, sort) names(dim(metric1))[1] <- 'time' } else { pos1 <- order(metric1) dim(pos1) <- c(time = length(pos1)) metric1 <- sort(metric1) dim(metric1) <- c(time = length(metric1)) } if (criteria == "Large_dist") { dim(metric1) <- c(dim(metric1), metric = 1) dim(pos1) <- c(dim(pos1), pos = 1) return(list(metric = metric1, position = pos1)) } if (criteria == "Local_dist" | criteria == "Local_cor") { obs <- SelBox(obsL, lon = lonVar, lat = latVar, region = region)$data exp <- SelBox(expL, lon = lonVar, lat = latVar, region = region)$data metric2 <- Apply(list(obs), target_dims = list(c('lat', 'lon')), fun = .select, exp, metric = "dist")$output1 dim(metric2) <- c(dim(metric2), metric=1) margins <- c(1 : (length(dim(metric2))))[-dim_time_obs] pos2 <- apply(metric2, margins, order) dim(pos2) <- dim(pos1) names(dim(pos2))[1] <- 'time' metric2 <- apply(metric2, margins, sort) names(dim(metric2))[1] <- 'time' if (criteria == "Local_dist") { metric <- abind(metric1, metric2, along = length(dim(metric1))+1) position <- abind(pos1, pos2, along = length(dim(pos1))+1) names(dim(metric)) <- c(names(dim(pos1)), 'metric') names(dim(position)) <- c(names(dim(pos1)), 'pos') return(list(metric = metric, position = position)) } } if (criteria == "Local_cor") { obs <- SelBox(obsVar, lon = lonVar, lat = latVar, region = region)$data exp <- SelBox(expVar, lon = lonVar, lat = latVar, region = region)$data metric3 <- Apply(list(obs), target_dims = list(c('lat', 'lon')), fun = .select, exp, metric = "cor")$output1 margins <- c(1 : length(dim(metric3)))[-dim_time_obs] pos3 <- apply(metric3, margins, order, decreasing = TRUE) names(dim(pos3))[1] <- 'time' metric3 <- apply(metric3, margins, sort) names(dim(metric3))[1] <- 'time' metric <- abind(metric1, metric2, metric3, along = length(dim(metric1)) + 1) position <- abind(pos1, pos2, pos3, along = length(dim(pos1)) + 1) names(dim(metric)) <- c(names(dim(metric1)), 'metric') names(dim(position)) <- c(names(dim(pos1)), 'pos') return(list(metric = metric, position = position)) } else { stop("Parameter 'criteria' must to be one of the: 'Large_dist', ", "'Local_dist','Local_cor'.") } } .select <- function(exp, obs, metric = "dist") { if (metric == "dist") { result <- Apply(list(obs), target_dims = list(c('lat', 'lon')), fun = function(x) {sum((x - exp) ^ 2)})$output1 } else if (metric == "cor") { result <- Apply(list(obs), target_dims = list(c('lat', 'lon')), fun = function(x) {cor(as.vector(x), as.vector(exp))})$output1 } result } replace_repeat_dimnames <- function(names_exp, names_obs, lat_name = 'lat', lon_name = 'lon') { if (!is.character(names_exp)) { stop("Parameter 'names_exp' must be a vector of characters.") } if (!is.character(names_obs)) { stop("Parameter 'names_obs' must be a vector of characters.") } latlon_dim_exp <- which(names_exp == lat_name | names_exp == lon_name) latlon_dim_obs <- which(names_obs == lat_name | names_obs == lon_name) if (any(unlist(lapply(names_exp[-latlon_dim_exp], function(x){x == names_obs[-latlon_dim_obs]})))) { original_pos <- lapply(names_exp, function(x) which(x == names_obs[-latlon_dim_obs])) original_pos <- lapply(original_pos, length) > 0 names_exp[original_pos] <- paste0(names_exp[original_pos], "_exp") } return(names_exp) ## Improvements: other dimensions to avoid replacement for more flexibility. }