CST_Analogs.R 19.3 KB
Newer Older
#'@rdname CST_Analogs
#'@title Downscaling using Analogs based on large scale fields.
#' 
#'@author Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it}
#'@author Nuria Perez-Zanon \email{nuria.perez@bsc.es}
#'@description This function perform a downscaling using Analogs. To compute 
#'the analogs, the function search for days with similar large scale conditions
#'to downscaled fields in the local scale. 
#'The large scale and the local scale regions are defined by the user.
#'The large scale is usually given by atmospheric circulation as sea level 
#'pressure or geopotential height (Yiou et al, 2013) but the function gives the
#' possibility to use another field. The local scale will be usually given by 
#' precipitation or Temperature, but might be another variable. 
#' The analogs function will find the best analogs based in three criterias:
#' (1) Minimal distance in the large scale pattern (i.e. SLP)
#' (2) Minimal distance in the large scale pattern (i.e. SLP) and minimal 
#' distance in the local scale pattern (i.e. SLP). 
#' (3) Minimal distance in the large scale pattern (i.e. SLP), minimal 
#' distance in the local scale pattern (i.e. SLP) and maxima correlation in the 
#' local variable to downscale (i.e Precipitation).
#' The search of analogs must be done in the longest dataset posible. This is 
#' important since it is necessary to have a good representation of the 
#' possible states of the field in the past, and therefore, to get better 
#' analogs. Once the search of the analogs is complete, and in order to used the 
#' three criterias the user can select a number of analogs nAnalogs to restrict 
#' the selection of the best analogs in a short number of posibilities, the best
#' ones. By default this parameter will be 1.
#' This function has not constrains of specific regions, variables to downscale,
#' or data to be used (seasonal forecast data, climate projections data, 
#' reanalyses data). 
#' The regrid into a finner scale is done interpolating with CST_Load. 
#' Then, this interpolation is corrected selecting the analogs in the large 
#' and local scale in based of the observations. 
#' The function is an adapted version of the method of Yiou et al 2013.       
#'@references Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard,
#' and M. Vrac, 2013 : Ensemble reconstruction of the atmospheric column 
#' from surface pressure using analogues.  Clim. Dyn., 41, 1419-1437. 
#' \email{pascal.yiou@lsce.ipsl.fr}
#'@param criteria different criteria to be used for the selection of analogs
#'if criteria = "Large_dist"
#'if criteria ="Local_dist"
#'if criteria ="Local_cor"
#'@param expL      variable for the Large scale in the model (same variable 
#'might be used in the local scale for criteria 2)
#'@param obsL      variable for the large scale in the observations
#'@param expVar    variable for the local scale in the model usually different
#'to the variable in expL
#'@param obsVar    variable for the local scale in the observations usually 
#'different to the variable in obsL
#'@param lon_local longitude in the local scale
#'@param lat_local latitude in the local scale
#'@param region  region for the local scale
#'@param nAnalogs  number of Analogs to be selected to apply the criterias (this
#'is not the necessary the number of analogs that the user can get)
#'@param return_list TRUE if you want to get a list with the best analogs FALSE 
#'if not. 
#'@param mAnalogs months for searching the analogs
#'@import multiapply
#'@import ClimProjDiags
#'@import abind
#'@return list  list with the best analogs (time, distance) 
#'@return values values of a certain variable 
#'@example
#'expL <- lonlat_data$exp
#'obsL <- lonlat_data$obs
#'dim(obsL$data) <- c(dataset = 1, member = 1, time = 18, lat = 22, lon = 53)
#'expVar <- lonlat_prec$exp
#'obsVar <- lonlat_prec$obs
#'downscaledVar <- CST_Analogs(....)
CST_Analogs <- function(expL, obsL, expVar, obsVar, criteria = 'Large_dist')
  #checks
  timevector <- obsL$Dates$start
  region <- c(min(expVar$lon), max(expVar$lon), min(expVar$lat), max(expVar$lon))
  result <- Analogs(expL$data, obsL$data, time_obsL = timevector, 
                    expVar = expVar$data, obsVar = obsVar$data,
                    criteria = criteria, 
                   lon_local = expVar$lon, lat_local = expVar$lat, 
                   region = region, nAnalogs = 1, return_list = FALSE)
  obsVar$data <- result$AnalogsFields
  result(obsVar)
}
  #'@rdname Analogs
  #'@title Search for analogs based on large scale fields.
  #' 
  #'@author Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it}
  #'@author Nuria Perez-Zanon \email{nuria.perez@bsc.es}
  #'        
#'@description This function search for days with similar large scale 
#'conditions or similar large and local scale conditions. 
#'The large scale and the local scale regions are defined by the user.
#'The large scale is usually given by atmospheric circulation as sea level 
#'pressure or geopotential height (Yiou et al, 2013) but the function gives the
#' possibility to use another field. For the local scale the user can select 
#' any variable. 
#' The analogs function will find the best analogs based in three criterias:
#' (1) Minimal distance in the large scale pattern (i.e. SLP)
#' (2) Minimal distance in the large scale pattern (i.e. SLP) and minimal 
#' distance in the local scale pattern (i.e. SLP). 
#' (3) Minimal distance in the large scale pattern (i.e. SLP), minimal 
#' distance in the local scale pattern (i.e. SLP) and maxima correlation in the 
#' local variable to find the analog (i.e Precipitation).
#' Once the search of the analogs is complete, and in order to used the 
#' three criterias the user can select a number of analogs nAnalogs to restrict 
#' the selection of the best analogs in a short number of posibilities, the best
#' ones. By default this parameter will be 1.
#' This function has not constrains of specific regions, variables to find the
#' analogs, or data to be used (seasonal forecast data, climate projections 
#' data, reanalyses data). 
#' The input data might be interpolated or not. 
#' The function is an adapted version of the method of Yiou et al 2013.       
#'@references Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard,
#' and M. Vrac, 2013 : Ensemble reconstruction of the atmospheric column 
#' from surface pressure using analogues.  Clim. Dyn., 41, 1419-1437. 
#' \email{pascal.yiou@lsce.ipsl.fr}
#'@param criteria different criteria to be used for the selection of analogs
#'if criteria = "Large_dist"
#'if criteria ="Local_dist"
#'if criteria ="Local_cor"
#'@param expL      variable for the Large scale in the model (same variable 
#'might be used in the local scale for criteria 2)
#'@param obsL      variable for the large scale in the observations
#'@param expVar    variable for the local scale in the model usually different
#'to the variable in expL
#'@param obsVar    variable for the local scale in the observations usually 
#'different to the variable in obsL
#'@param lon_local longitude in the local scale
#'@param lat_local latitude in the local scale
#'@param region  region for the local scale
#'@param nAnalogs  number of Analogs to be selected to apply the criterias (this
#'is not the necessary the number of analogs that the user can get)
#'@param return_list TRUE if you want to get a list with the best analogs FALSE 
#'if not. 
#'@import multiapply
#'@import ClimProjDiags
#'@import abind
#'@return list  list with the best analogs (time, distance) 
#'@return values values of a certain variable 
#'@example
Analogs <- function(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, 
                    criteria = "Large_dist",
                    lon_local = NULL, lat_local = NULL, region = NULL, 
                    nAnalogs = 1, return_list = FALSE) {
  # checks
     if (!all(c('lon', 'lat') %in% names(dim(expL)))) {
       stop("Parameter 'expL' must have the dimensions 'lat' and 'lon'.")
     }
     
     if (!all(c('lat', 'lon') %in% names(dim(obsL)))) {
       stop("Parameter 'obsL' must have the dimension 'sdate'.")
     }
     
     if (any(is.na(expL)))  {
       warning("Parameter 'exp' contains NA values.")
     }
     
     if (any(is.na(obsL))) {
       warning("Parameter 'obs' contains NA values.")
     }
  position <- Select(expL = expL, obsL = obsL,  expVar = expVar, obsVar = obsVar, 
                   criteria = criteria, lon_local = lon_local, lat_local = lat_local,
                   region = region)$position
  best <- Apply(list(position), target_dims = c('time', 'pos'), fun = BestAnalog,
                criteria = criteria, return_list = return_list, nAnalogs = nAnalogs)
  Analogs_dates <- time_obsL[best]
  dim(Analogs_dates) <- dim(best)
  if (is.null(obsVar)) {
     obsLocal <- SelBox(obsL, lon = lon_local, lat = lat_local, region = region)
     Analogs_fields <- Subset(obsLocal, along = which(names(dim(obsLocal)) == 'time'), indices = best)
  } else {
    obsVar <- SelBox(obsVar, lon = lon_local, lat = lat_local, region = region)
    Analogs_fields <- Subset(obsVar, along = which(names(dim(obsVar)) == 'time'), indices = best)
  }
  lon_dim <- which(names(dim(Analogs_fields)) == 'lon')
  lat_dim <- which(names(dim(Analogs_fields)) == 'lat')
  if (lon_dim < lat_dim) {
     dim(Analogs_fields) <- c(dim(Analogs_fields)[c(lon_dim, lat_dim)], dim(best))
  } else if (lat_dim > lon_dim) {
     dim(Analogs_fields) <- c(dim(Analogs_fields)[c(lat_dim, lon_dim)], dim(best))
  } else {
     stop("Dimensions 'lat' and 'lon' not found.")
  }
  return(list(DatesAnalogs = Analogs_dates,  AnalogsFields = Analogs_fields))
}
#'@example
expL <- 1 : (8 * 10 * 2 * 6 * 7)
dim(expL) <- c(lat = 8, lon = 10, time = 2, member = 6, sdate = 7)
obsL <- 1 : (8 * 10 * 5)
dim(obsL) <- c(lat = 8, lon = 10, time = 5)
position <- Select(expL = expL, obsL = obsL)$position
dim(position)
pos <- BestAnalog(position)
pos
dim(pos)
res <- Apply(list(position), target_dims = c('time','pos') , fun = BestAnalog, criteria = 'Large_dist')$output1


lat_local <- lat <- seq(0, 19, 2.5)
lon_local <- lon <- seq(0, 23, 2.5)
position= Select(expL, obsL, criteria = "Local_dist", lon_local = lon, 
             lat_local = lat,
             region = c(lonmin = 0, lonmax = 5, latmin = 0, latmax = 5))$position
res <- Apply(list(position), target_dims = c('time','pos') , fun = BestAnalog, criteria = 'Local_dist')$output1
res <- Apply(list(position), target_dims = c('time', 'pos'), fun = BestAnalog, criteria = 'Large_dist')$output1


BestAnalog <- function(position, criteria = 'Large_dist', return_list = FALSE, 
                       nAnalogs = 1) {
#ahora position es un array de 2 dimensiones: una para time_obs en la que estan ordenados los mapas de observaciones y otra de position de 1 a 3 con pos1,pos2 y pos3.
    pos_dim <- which(names(dim(position)) == 'pos')
    if (dim(position)[pos_dim] == 1) {
       pos1 <- position
       if (criteria != 'Large_dist') {
          warning("Dimension 'pos' in parameter 'position' has length 1,",
                  " criteria 'Large_dist' will be used.")
          criteria <- 'Large_dist'
       }
    } else if (dim(position)[pos_dim] == 2) {
       pos1 <- Subset(position, along = pos_dim, indices = 1)
       pos2 <- Subset(position, along = pos_dim, indices = 2)
       if (criteria == 'Local_cor') {
         warning("Dimension 'pos' in parameter 'position' has length 2,",
                 " criteria 'Local_dist' will be used.")
         criteria <- 'Local_dist'
       }
    } else if (dim(position)[pos_dim] == 3) {
       pos1 <- Subset(position, along = pos_dim, indices = 1)
       pos2 <- Subset(position, along = pos_dim, indices = 2)
       pos3 <- Subset(position, along = pos_dim, indices = 3)
       if (criteria != 'Local_cor') {
         warning("Parameter 'criteria' is set to", criteria, ".")
       }
    } else {
       stop("Parameter 'position' has dimension 'pos' of different ",
            "length than expected (from 1 to 3).")
    }
    if (criteria == 'Large_dist') {
       if (return_list == FALSE) {
          pos <- pos1[1]
       } else {
           pos <- pos1[1 : nAnalogs]
       }
  } else if (criteria== 'Local_dist') {
    # pos1 <- c(7, 13, 5, 3, 6, 12, 10, 1, 8, 9, 11, 4, 2, 14)
    # pos2 <- c(4, 8, 13, 6, 3, 1, 12, 5, 9, 7, 10, 2, 11, 14)
    pos1 <- pos1[1 : nAnalogs]
    pos2 <- pos2[1 : nAnalogs]
    best <- match(pos1, pos2)
    pos <- pos1[as.logical(best)]
    pos <- pos[which(!is.na(pos))]
       if (return_list == FALSE) {
         pos <- pos[1]
       }
  } else if (criteria == 'Local_cor') {
    pos1 <- pos1[1 : nAnalogs]
    pos2 <- pos2[1 : nAnalogs]
    best <- match(pos1, pos2)
    pos <- pos1[as.logical(best)]
    pos <- pos[which(!is.na(pos))]
    # pos3 <- c(6, 11, 14, 3, 13, 7, 2, 5, 1, 12, 10, 9, 8, 4) 
    pos3 <- pos3[1 : nAnalogs]
    best <- match(pos, pos3)
    pos <- pos[order(best, decreasing = F)]
    pos <- pos[which(!is.na(pos))]
      if (return_list == FALSE) {
          pos[1]
      } 
}

expL <- (1  + 2): (4 * 3 * 2 + 2)
dim(expL) <- c(lat = 4, lon = 3, time = 2)
obsL <- 1 : c(4 * 3 * 5)
dim(obsL) <- c(lat = 4, lon = 3, time = 5)
res = Select(expL, obsL)
expL <- (1  + 2): (8 * 10 * 2 + 2)
dim(expL) <- c(lat = 8, lon = 10, time = 2)
obsL <- 1 : c(8 * 10 * 5)
dim(obsL) <- c(lat = 8, lon = 10, time = 5)
lat_local <- lat <- seq(0, 19, 2.5)
lon_local <- lon <- seq(0, 23, 2.5)
res = Select(expL, obsL, criteria = "Local_dist", lon_local = lon, 
             lat_local = lat,
             region = c(lonmin = 0, lonmax = 5, latmin = 0, latmax = 5))
# probar mas ejemplos con diferentes criterios, latitudes, longitudes
Select <- function(expL, obsL,  expVar = NULL, obsVar = NULL, criteria = "Large_dist",
                   lon_local = NULL, lat_local = NULL, region = NULL) {
  #check expL
  #check obsL
  #check obsVar
  names(dim(expL)) <- replace_repeat_dimnames(names(dim(expL)), names(dim(obsL)))
  metric1 <- Apply(list(obsL), target_dims = list(c('lat', 'lon')), 
                   fun = .select, expL, metric = "dist")$output1
  dim_time_obs <- which(names(dim(metric1)) == 'time')
  margins <- c(1 : length(dim(metric1)))[-dim_time_obs]
  pos1 <- apply(metric1, margins, order)
  names(dim(pos1))[1] <- 'time'
  metric1 <-  apply(metric1, margins, sort)
  names(dim(metric1))[1] <- 'time'
  if (criteria == "Large_dist") {
     dim(metric1) <- c(dim(metric1), metric = 1)
     dim(pos1) <- c(dim(pos1), pos = 1)
     return(list(metric = metric1, position = pos1))
  }
  if (criteria == "Local_dist" | criteria == "Local_cor") {
    obs <- SelBox(obsL, lon = lon_local, lat = lat_local, region = region)$data
    exp <- SelBox(expL, lon = lon_local, lat = lat_local, region = region)$data
    metric2 <- Apply(list(obs), target_dims = list(c('lat', 'lon')), 
                     fun = .select, exp, metric = "dist")$output1
    margins <- c(1 : length(dim(metric2)))[-dim_time_obs]
    pos2 <- apply(metric2, margins, order)
    names(dim(pos2))[1] <- 'time'
    metric2 <-  apply(metric2, margins, sort)
    names(dim(metric2))[1] <- 'time'
    if (criteria == "Local_dist") {
      metric <- abind(metric1, metric2, along = length(dim(metric1)) + 1)
      position <- abind(pos1, pos2, along = length(dim(pos1)) + 1) 
      names(dim(metric)) <- c(names(dim(metric1)), 'metric')
      names(dim(position)) <- c(names(dim(pos1)), 'pos')
      return(list(metric = metric, position = position))
    }   
  }
  if (criteria == "Local_cor") {
    obs <- SelBox(obsVar, lon = lon_local, lat = lat_local, region = region)$data
    exp <- SelBox(expVar, lon = lon_local, lat = lat_local, region = region)$data
    metric3 <- Apply(list(obs), target_dims = list(c('lat', 'lon')), 
                     fun = .select, exp, metric = "cor")$output1
    margins <- c(1 : length(dim(metric3)))[-dim_time_obs]
    pos3 <- apply(metric3, margins, order, decreasing = TRUE)
    names(dim(pos3))[1] <- 'time'
    metric3 <-  apply(metric3, margins, sort)
    names(dim(metric3))[1] <- 'time'
    metric <- abind(metric1, metric2, metric3, along = length(dim(metric1)) + 1)
    position <- abind(pos1, pos2, pos3, along = length(dim(pos1)) + 1)   
    names(dim(metric)) <- c(names(dim(metric1)), 'metric')
    names(dim(position)) <- c(names(dim(pos1)), 'pos')
    return(list(metric = metric, position = position))
  }
  else {
    stop("Parameter 'criteria' must to be one of the: 'Large_dist', ",
         "'Local_dist','Local_cor'.")
  }
}
  
# data <- 1:(20 * 3 * 2 * 4) 
# dim(data) <- c(lon = 20, lat = 3, time = 2, model = 4)
# lon <- seq(2, 40, 2)
# lat <- c(1, 5, 10)
# a <- SelBox(data = data, lon = lon, lat = lat, region = c(2, 20, 1, 5),
#             londim = 1, latdim = 2, mask = NULL)
# str(a)
  
#'@example
exp <- (1  + 2): (4 * 3 + 2)
dim(exp) <- c(lat = 4, lon = 3)
obs <- 1 : c(5 * 4 * 3)
dim(obs) <- c(time = 5, lat = 4, lon = 3)
res <- .select(exp, obs)
res
res <- .select(exp, obs, metric = 'cor')
dim(res)
.select <- function(exp, obs, metric = "dist") {
  if (metric == "dist") {
    #metric <- sum((obs - exp) ^ 2)
    #metric <- apply(obs, "time", function(x) {sum((x - exp) ^ 2)})
    result <- Apply(list(obs), target_dims = list(c('lat', 'lon')), 
                    fun = function(x) {sum((x - exp) ^ 2)})$output1
  } else if (metric == "cor") {
    result <- Apply(list(obs), target_dims = list(c('lat', 'lon')), 
                    fun = function(x) {cor(as.vector(x), as.vector(exp))})$output1
  } 
  result
}
#'#This auxiliar function looks for replecated dimension names between two vectors.
#'# The repeated dimensions (different than 'lon' and 'lat') will be replaced in the first 
#'# vector with the same name and extra label '_exp'.
#'# Example for 'time' and member' repeated dim in the same order of names 
#'# dimension for 'expL' and 'obsL'
#'expL <- 1 : (8*10*2*6*7)
#'expL <- 1 :c(8*10*2*6*7)
#'dim(expL) <- c(lat = 8, lon = 10, time = 2, member = 6, sdate = 7)
#'obsL <- 1 : (8*10*5*3)
#'dim(obsL) <- c(lat = 8, time = 5, member = 3, lon = 10)
#'names_exp <- names(dim(expL))
#'names_obs <- names(dim(obsL))
#'replace_repeat_dimnames(names_exp, names_obs)
#'#[1] "lat"        "lon"        "time_exp"   "member_exp" "sdate" 
#'
#' Example for time and memeber repeated with different order
#'dim(obsL) <- c(lon = 10, member = 3, time = 5, lat = 8)
#'names_obs <- names(dim(obsL))
#'replace_repeat_dimnames(names_exp, names_obs)
#'#[1] "lat"        "lon"        "time_exp"   "member_exp" "sdate"     
#'
#'# Example for no repeated names:
#'dim(obsL) <- c(lon = 10, time_obs = 5, member_obs = 3, lat = 8)
#'names_obs <- names(dim(obsL))
#'replace_repeat_dimnames(names_exp, names_obs)
#'#[1] "lat"    "lon"    "time"   "member" "sdate" 
replace_repeat_dimnames <- function(names_exp, names_obs, lat_name = 'lat', lon_name = 'lon') {
  if (!is.character(names_exp)) {
     stop("Parameter 'names_exp' must be a vector of characters.")
  }
  if (!is.character(names_obs)) {
     stop("Parameter 'names_obs' must be a vector of characters.")
  latlon_dim_exp <- which(names_exp == lat_name | names_exp == lon_name)
  latlon_dim_obs <- which(names_obs == lat_name | names_obs == lon_name)
  if (any(unlist(lapply(names_exp[-latlon_dim_exp],
                          function(x){x == names_obs[-latlon_dim_obs]})))) {
    original_pos <- lapply(names_exp, function(x) which(x == names_obs[-latlon_dim_obs]))
    original_pos <- lapply(original_pos, length) > 0
     names_exp[original_pos] <- paste0(names_exp[original_pos], "_exp")
## Improvements: other dimensions to avoid replacement for more flexibility.