#'@rdname CST_Analogs #'@title Downscaling using Analogs based on large scale fields. #' #'@author M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} #'@author Maria M. Chaves-Montero, \email{mariadm.chaves@cmcc.it } #'@author Nuria Perez-Zanon \email{nuria.perez@bsc.es} #' #'@description This function perform a downscaling using Analogs. To compute #'the analogs, the function search for days with similar large scale conditions #'to downscaled fields to a local scale. The large scale and the local scale #'regions are defined by the user. The large scale is usually given by #'atmospheric circulation as sea level pressure or geopotential height #'(Yiou et al, 2013) but the function gives the possibility to use another #'field. The local scale will be usually given by precipitation or temperature #'fields, but might be another variable.The analogs function will find the best #'analogs based in Minimum Euclidean distance in the large scale pattern #'(i.e.SLP). #' #'The search of analogs must be done in the longest dataset posible. This is #'important since it is necessary to have a good representation of the #'possible states of the field in the past, and therefore, to get better #'analogs. #'This function has not constrains of specific regions, variables to downscale, #'or data to be used (seasonal forecast data, climate projections data, #'reanalyses data). The regrid into a finner scale is done interpolating with #'CST_Load. Then, this interpolation is corrected selecting the analogs in the #'large and local scale in based of the observations. The function is an #'adapted version of the method of Yiou et al 2013. For an advanced search of #'Analogs (multiple Analogs, different criterias, further information from the #'metrics and date of the selected Analogs) use the'Analog' #'function within 'CSTools' package. #' #'@references Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard, #' and M. Vrac, 2013 : Ensemble reconstruction of the atmospheric column #' from surface pressure using analogues. Clim. Dyn., 41, 1419-1437. #' \email{pascal.yiou@lsce.ipsl.fr} #' #'@param expL an 's2dv_cube' object containing the experimental field on the #'large scale for which the analog is aimed. This field is used to in all the #'criterias. If parameter 'expVar' is not provided, the function will return #'the expL analog. The element 'data' in the 's2dv_cube' object must have, at #'least, latitudinal and longitudinal dimensions. The object is expect to be #'already subset for the desired large scale region. #'@param obsL an 's2dv_cube' object containing the observational field on the #'large scale. The element 'data' in the 's2dv_cube' object must have the same #'latitudinal and longitudinal dimensions as parameter 'expL' and a temporal #'dimension with the maximum number of available observations. #'@param time_obsL a character string indicating the date of the observations #'in the date format (i.e. "yyyy-mm-dd") #'@param time_expL a character string indicating the date of the experiment #'in the same format than time_obsL (i.e. "yyyy-mm-dd") #'@param expVar an 's2dv_cube' object containing the experimental field on the #'local scale, usually a different variable to the parameter 'expL'. If it is #'not NULL (by default, NULL), the returned field by this function will be the #'analog of parameter 'expVar'. #'@param lonVar a vector containing the longitude of parameter 'expVar'. #'@param latVar a vector containing the latitude of parameter 'expVar'. #'@param obsVar an 's2dv_cube' containing the field of the same variable as the #'passed in parameter 'expVar' for the same region. #'@param region a vector of length four indicating the minimum longitude, the #'maximum longitude, the minimum latitude and the maximum latitude. #'@param dimension the dimension where the downscaling will be performed (i.e. #''member', 'sdate',etc)) #' #'@import multiApply #'@import s2dverification #'@import abind #'@import ClimProjDiags #' #'@seealso code{\link{CST_Load}}, \code{\link[s2dverification]{Load}} and #'\code{\link[s2dverification]{CDORemap}} #' #'@return An 'array' object containing the dowscaled values of the best #'analogs. #'@examples #'# using lonlat_data.RData in "~/cstools/data/lonlat_data.RData" #'expL = lonlat_data$exp$data #'obsL = lonlat_data$obs$data #'lon = as.vector(lonlat_data$exp$lon) #'lat = as.vector(lonlat_data$exp$lat) #'time_expL=lonlat_data$exp$Dates$start[1] #'# corresponding to stdate 1 and ftime 1 #'sdate=1 #'ftime=1 #'time_obsL=lonlat_data$obs$Dates$start #'region=c(min(lon),max(lon),min(lat),max(lat)) #'# to select the first timestep (stdate=1, ftime=1) and compute the downscaling #'# in all the members #'expL=expL[1,,sdate,ftime,,] #'obsL=obsL[1,1,,,,] #'dimension=names(dim(expL))[1] #'dim(obsL) <-c(dim(obsL)[1]*dim(obsL)[2], dim(obsL)[3], dim(obsL)[4]) #'names(dim(obsL))[1] <- 'time' #'downscaled_field<- CST_Analogs(expL = expL, obsL = obsL, time_obsL=time_obsL, #'time_expL=time_expL,dimension=dimension,expVar = expL, obsVar = obsL,lonVar = lon, #'latVar = lat, region = region) #' #'@export CST_Analogs <- function(expL, obsL, time_obsL, time_expL, dimension, expVar = NULL, obsVar = NULL, region = NULL, lonVar=NULL, latVar=NULL){ if (is.null(dimension)) { warning("It is necessary to choose a dimension to perform the downscaling (i.e. 'member', 'ftime','stdate') ") }else{ warning(paste0("the dimension selected to perform the downscaling is '", dimension,"' ")) } ApplyAnalog<-Apply(expL, target_dims = list(c('lat', 'lon')), margins=dimension, fun = Analogs, obsL = obsL, time_obsL=time_obsL, criteria = "Large_dist", lonVar = lon,time_expL=time_expL, latVar = lat, region = region) result<-ApplyAnalog$output1[1,,,] return(result) } #'@rdname Analogs #'@title Analogs based on large scale fields. #' #'@author M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} #'@author Maria M. Chaves-Montero, \email{mariadm.chaves@cmcc.it } #'@author Nuria Perez-Zanon \email{nuria.perez@bsc.es} #' #'@description This function perform a downscaling using Analogs. To compute #'the analogs, the function search for days with similar large scale conditions #'to downscaled fields in the local scale. The large scale and the local scale #'regions are defined by the user. The large scale is usually given by #'atmospheric circulation as sea level pressure or geopotential height (Yiou #'et al, 2013) but the function gives the possibility to use another field. The #'local scale will be usually given by precipitation or temperature fields, but #'might be another variable. #'The analogs function will find the best analogs based in three criterias: #' (1) Minimum Euclidean distance in the large scale pattern (i.e. SLP) #' (2) Minimum Euclidean distance in the large scale pattern (i.e. SLP) #' and minimum Euclidean distance in the local scale pattern (i.e. SLP). #' (3) Minimum Euclidean distance in the large scale pattern (i.e. SLP), #' minimum distance in the local scale pattern (i.e. SLP) and highest #' correlation in the local variable to downscale (i.e Precipitation). #'The search of analogs must be done in the longest dataset posible. This is #'important since it is necessary to have a good representation of the #'possible states of the field in the past, and therefore, to get better #'analogs. Once the search of the analogs is complete, and in order to used the #'three criterias the user can select a number of analogs , using parameter #''nAnalogs' to restrict #'the selection of the best analogs in a short number of posibilities, the best #'ones. This function has not constrains of specific regions, variables to #'downscale, or data to be used (seasonal forecast data, climate projections #'data, reanalyses data). The regrid into a finner scale is done interpolating #'with CST_Load. Then, this interpolation is corrected selecting the analogs in #'the large and local scale in based of the observations. The function is an #'adapted version of the method of Yiou et al 2013. #' #'@references Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard, #'and M. Vrac, 2013 : Ensemble reconstruction of the atmospheric column #'from surface pressure using analogues. Clim. Dyn., 41, 1419-1437. #'\email{pascal.yiou@lsce.ipsl.fr} #' #'@param expL an array of N named dimensions containing the experimental field #'on the large scale for which the analog is aimed. This field is used to in #'all the criterias. If parameter 'expVar' is not provided, the function will #'return the expL analog. The element 'data' in the 's2dv_cube' object must #'have, at least, latitudinal and longitudinal dimensions. The object is expect #'to be already subset for the desired large scale region. #'@param obsL an array of N named dimensions containing the observational field #'on the large scale. The element 'data' in the 's2dv_cube' object must have #'the same latitudinal and longitudinal dimensions as parameter 'expL' and a #' temporal dimension with the maximum number of available observations. #'@param time_obsL a character string indicating the date of the observations #'in the format "dd/mm/yyyy". Reference time to search for analogs. #'@param time_expL a character string indicating the date of the experiment #'in the format "dd/mm/yyyy". Time to find the analogs. #'@param excludeTime a character string indicating the date of the observations #'in the format "dd/mm/yyyy" to be excluded during the search of analogs, in a #'forecast might be NULL, if is not a forecast can not be NULL. #'@param expVar an array of N named dimensions containing the experimental #'field on the local scale, usually a different variable to the parameter #''expL'. If it is not NULL (by default, NULL), the returned field by this #'function will be the analog of parameter 'expVar'. #'@param obsVar an array of N named dimensions containing the field of the #'same variable as the passed in parameter 'expVar' for the same region. #'@param AnalogsInfo TRUE to get a list with two elements: 1) the downscaled #'field and 2) the AnalogsInfo which contains: a) the number of the best #'analogs, b) the corresponding value of the metric used in the selected #'criteria (distance values for Large_dist and Local_dist,correlation values #'for Local_cor), c)dates of the analogs). The analogs are listed in decreasing #'order, the first one is the best analog (i.e if the selected criteria is #'Local_cor the best analog will be the one with highest correlation, while for #'Large_dist criteria the best analog will be the day with minimum Euclidean #'distance). Set to FALSE to get a single analog, the best analog, for instance #'for downscaling. #'@param criteria a character string indicating the criteria to be used for the #'selection of analogs: #'\itemize{ #'\item{Large_dist} minimum Euclidean distance in the large scale pattern; #'\item{Local_dist} minimum Euclidean distance in the large scale pattern #'and minimum Euclidean distance in the local scale pattern; and #'\item{Local_cor} minimum Euclidean distance in the large scale pattern, #'minimum Euclidean distance in the local scale pattern and highest #'correlation in the local variable to downscale.} #'@param lonVar a vector containing the longitude of parameter 'expVar'. #'@param latVar a vector containing the latitude of parameter 'expVar'. #'@param region a vector of length four indicating the minimum longitude, #'the maximum longitude, the minimum latitude and the maximum latitude. #'@param nAnalogs number of Analogs to be selected to apply the criterias #''Local_dist' or 'Local_cor'. This is not the necessary the number of analogs #'that the user can get, but the number of events with minimum distance in #'which perform the search of the best Analog. The default value for the #''Large_dist' criteria is 1, for 'Local_dist' and 'Local_cor' criterias must #' be greater than 1 in order to match with the first criteria, if nAnalogs is #' NULL for 'Local_dist' and 'Local_cor' the default value will be set at the #' length of 'time_obsL'. If AnalogsInfo is FALSE the function returns just #' the best analog. #' #'@import multiApply #'@import s2dverification #'@import abind #'@import ClimProjDiags #' #'@return AnalogsFields, dowscaled values of the best analogs for the criteria #'selected. If AnalogsInfo is set to TRUE the function also returns a #'list with the dowsncaled field and the Analogs Information. #' #'@examples #'# Example 1:Downscaling using criteria 'Large_dist' and a single variable: #'# The best analog is found using a single variable (i.e. Sea level pressure, #'# SLP). The downscaling will be done in the same variable used to study the #'# minimal distance (i.e. SLP). obsVar and expVar NULLS or equal to obsL and #'# expL respectively region, lonVar and latVar not necessary here, #'# excludeTime=NULL #' #'expSLP <- rnorm(1:20) #'dim(expSLP) <- c(lat = 4, lon = 5) #'obsSLP <- c(rnorm(1:180),expSLP*1.2) #'dim(obsSLP) <- c(time = 10,lat = 4, lon = 5) #'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") #'downscale_field <- Analogs(expL=expSLP, obsL=obsSLP, time_obsL=time_obsSLP, #' time_expL = "01-01-1994") #'str(downscale_field) #' #'# Example 2: Downscaling using criteria 'Large_dist' and 2 variables: #'# The best analog is found using 2 variables (i.e. Sea Level Pressure, SLP #'# and precipitation, pr): one variable (i.e. sea level pressure, expL) to #'# find the best analog day (defined in criteria 'Large_dist' as the day, in #'# obsL, with the minimum Euclidean distance to the day of interest in expL) #'# The second variable will be the variable to donwscale (i.e. precipitation, #'# obsVar). Parameter obsVar must be different to obsL.The downscaled #'# precipitation will be the precipitation that belongs to the best analog day #'# in SLP. Region not needed here since will be the same for both variables. #' #'expSLP <- rnorm(1:20) #'dim(expSLP) <- c(lat = 4, lon = 5) #'obsSLP <- c(rnorm(1:180),expSLP*2) #'dim(obsSLP) <- c(time = 10,lat = 4, lon = 5) #'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") #'obs.pr <- c(rnorm(1:200)*0.001) #'dim(obs.pr)=dim(obsSLP) #'downscale_field <- Analogs(expL=expSLP, obsL=obsSLP, #' obsVar=obs.pr, #' time_obsL=time_obsSLP,time_expL = "01-01-1994") #'str(downscale_field) #' #'# Example 3:List of best Analogs using criteria 'Large_dist' and a single #'# variable: same as Example 1 but getting a list of best analogs (AnalogsInfo #'# =TRUE) with the corresponding downscaled values, instead of only 1 single #'# donwscaled value instead of 1 single downscaled value. Imposing nAnalogs #'# (number of analogs to do the search of the best Analogs). obsVar and expVar #'# NULL or equal to obsL and expL,respectively. #' #' #'expSLP <- rnorm(1:20) #'dim(expSLP) <- c(lat = 4, lon = 5) #'obsSLP <- c(rnorm(1:1980),expSLP*1.5) #'dim(obsSLP) <- c(lat = 4, lon = 5, time = 100) #'time_obsSLP <- paste(rep("01", 100), rep("01", 100), 1920 : 2019, sep = "-") #'downscale_field<- Analogs(expL=expSLP, obsL=obsSLP, time_obsSLP, #' nAnalogs=5,time_expL = "01-01-2003", #' AnalogsInfo=TRUE, excludeTime= "01-01-2003") #'str(downscale_field) #'# Example 4:List of best Analogs using criteria 'Large_dist' and 2 variables: #'# same as example 2 but getting a list of best analogs (AnalogsInfo =TRUE) #'# with the values instead of only a single downscaled value. Imposing #'# nAnalogs (number of analogs to do the search of the best Analogs). obsVar #'# and expVar must be different to obsL and expL. #' #'expSLP <- rnorm(1:20) #'dim(expSLP) <- c(lat = 4, lon = 5) #'obsSLP <- c(rnorm(1:180),expSLP*2) #'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) #'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") #'obs.pr <- c(rnorm(1:200)*0.001) #'dim(obs.pr)=dim(obsSLP) #'downscale_field <- Analogs(expL=expSLP, obsL=obsSLP, #' obsVar=obs.pr, #' time_obsL=time_obsSLP,nAnalogs=5, #' time_expL = "01-10-2003", #' AnalogsInfo=TRUE) #'str(downscale_field) #' #'# Example 5: Downscaling using criteria 'Local_dist' and 2 variables: #'# The best analog is found using 2 variables (i.e. Sea Level Pressure, #'# SLP and precipitation, pr). Parameter obsVar must be different to obsL.The #'# downscaled precipitation will be the precipitation that belongs to the best #'# analog day in SLP. lonVar, latVar and Region must be given here to select #'# the local scale #' #'expSLP <- rnorm(1:20) #'dim(expSLP) <- c(lat = 4, lon = 5) #'obsSLP <- c(rnorm(1:180),expSLP*2) #'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) #'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") #'obs.pr <- c(rnorm(1:200)*0.001) #'dim(obs.pr)=dim(obsSLP) #'# analogs of local scale using criteria 2 #'lonmin=-1 #'lonmax=2 #'latmin=30 #'latmax=33 #'region=c(lonmin,lonmax,latmin,latmax) #'Local_scale <- Analogs(expL=expSLP, #' obsL=obsSLP, time_obsL=time_obsSLP, #' obsVar=obs.pr, #' criteria="Local_dist",lonVar=seq(-1,5,1.5), #' latVar=seq(30,35,1.5),region=region, #' time_expL = "01-10-2000", #' nAnalogs = 10, AnalogsInfo=TRUE) #'str(Local_scale) #'# Example 6: list of best analogs using criteria 'Local_dist' and 2 #'# variables: #'# The best analog is found using 2 variables. Parameter obsVar must be #'# different to obsL in this case.The downscaled precipitation will be the #'# precipitation that belongs to the best analog day in SLP. lonVar, latVar #'# and Region needed. AnalogsInfo=TRUE #' #'expSLP <- rnorm(1:20) #'dim(expSLP) <- c(lat = 4, lon = 5) #'obsSLP <- c(rnorm(1:180),expSLP*2) #'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) #'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") #'obs.pr <- c(rnorm(1:200)*0.001) #'dim(obs.pr)=dim(obsSLP) #'# analogs of local scale using criteria 2 #'lonmin=-1 #'lonmax=2 #'latmin=30 #'latmax=33 #'region=c(lonmin,lonmax,latmin,latmax) #'Local_scale <- Analogs(expL=expSLP, #' obsL=obsSLP, time_obsL=time_obsSLP, #' criteria="Local_dist",lonVar=seq(-1,5,1.5), #' latVar=seq(30,35,1.5),region=region, #' time_expL = "01-10-2000", #' nAnalogs = 5, AnalogsInfo = TRUE) #'str(Local_scale) #'# Example 7: Downscaling using Local_dist criteria #'# without parameters obsVar and expVar. return list =FALSE #' #'expSLP <- rnorm(1:20) #'dim(expSLP) <- c(lat = 4, lon = 5) #'obsSLP <- c(rnorm(1:180),expSLP*2) #'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) #'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") #'# analogs of local scale using criteria 2 #'lonmin=-1 #'lonmax=2 #'latmin=30 #'latmax=33 #'region=c(lonmin,lonmax,latmin,latmax) #'Local_scale <- Analogs(expL=expSLP, #' obsL=obsSLP, time_obsL=time_obsSLP, #' criteria="Local_dist",lonVar=seq(-1,5,1.5), #' latVar=seq(30,35,1.5),region=region, #' time_expL = "01-10-2000", #' nAnalogs = 10, AnalogsInfo = TRUE) #'str(Local_scale) #' #'# Example 8: Downscaling using criteria 'Local_cor' and 2 variables: #'# The best analog is found using 2 variables. Parameter obsVar and expVar #'# are necessary and must be different to obsL and expL, respectively. #'# The downscaled precipitation will be the precipitation that belongs to #'# the best analog day in SLP large and local scales, and to the day with #'# the higher correlation in precipitation. AnalogsInfo=FALSE. two options #'# for nAnalogs #' #'expSLP <- rnorm(1:20) #'dim(expSLP) <- c(lat = 4, lon = 5) #'obsSLP <- c(rnorm(1:180),expSLP*2) #'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) #'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") #'exp.pr <- c(rnorm(1:20)*0.001) #'dim(exp.pr)=dim(expSLP) #'obs.pr <- c(rnorm(1:200)*0.001) #'dim(obs.pr)=dim(obsSLP) #'# analogs of local scale using criteria 2 #'lonmin=-1 #'lonmax=2 #'latmin=30 #'latmax=33 #'region=c(lonmin,lonmax,latmin,latmax) #'Local_scalecor <- Analogs(expL=expSLP, #' obsL=obsSLP, time_obsL=time_obsSLP, #' obsVar=obs.pr,expVar=exp.pr, #' criteria="Local_cor",lonVar=seq(-1,5,1.5), #' time_expL = "01-10-2000", #' latVar=seq(30,35,1.5),nAnalogs=8,region=region, #' AnalogsInfo = FALSE) #'str(Local_scalecor) #'# same but without imposing nAnalogs,so nAnalogs will be set by default as 10 #'Local_scalecor <- Analogs(expL=expSLP, #' obsL=obsSLP, time_obsL=time_obsSLP, #' obsVar=obs.pr,expVar=exp.pr, #' criteria="Local_cor",lonVar=seq(-1,5,1.5), #' time_expL = "01-10-2000", #' latVar=seq(30,35,1.5),region=region, #' AnalogsInfo = TRUE) #'Local_scalecor$AnalogsInfo #' # Example 9: List of best analogs in the three criterias Large_dist, # Local_dist, and Local_cor return list TRUE same variable #' #'expSLP <- rnorm(1:20) #'dim(expSLP) <- c(lat = 4, lon = 5) #'obsSLP <- c(rnorm(1:180),expSLP*2) #'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) #'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") #'# analogs of large scale using criteria 1 #'Large_scale <- Analogs(expL=expSLP, #' obsL=obsSLP, time_obsL=time_obsSLP, #' criteria="Large_dist", time_expL = "01-10-2000", #' nAnalogs = 7, AnalogsInfo = TRUE) #'str(Large_scale) #'Large_scale$AnalogsInfo #'# analogs of local scale using criteria 2 #'lonmin=-1 #'lonmax=2 #'latmin=30 #'latmax=33 #'region=c(lonmin,lonmax,latmin,latmax) #'Local_scale <- Analogs(expL=expSLP, #' obsL=obsSLP, time_obsL=time_obsSLP, #' time_expL = "01-10-2000", #' criteria="Local_dist",lonVar=seq(-1,5,1.5), #' latVar=seq(30,35,1.5),nAnalogs=7,region=region, #' AnalogsInfo = TRUE) #'str(Local_scale) #'Local_scale$AnalogsInfo #'# analogs of local scale using criteria 3 #'Local_scalecor <- Analogs(expL=expSLP, #' obsL=obsSLP, time_obsL=time_obsSLP, #' obsVar=obsSLP,expVar=expSLP, #' time_expL = "01-10-2000", #' criteria="Local_cor",lonVar=seq(-1,5,1.5), #' latVar=seq(30,35,1.5),nAnalogs=7,region=region, #' AnalogsInfo = TRUE) #'str(Local_scalecor) #'Local_scalecor$AnalogsInfo #' #'# Example 10: Downscaling in the three criteria Large_dist, Local_dist, and #'# Local_cor return list FALSE, different variable #' #'expSLP <- rnorm(1:20) #'dim(expSLP) <- c(lat = 4, lon = 5) #'obsSLP <- c(rnorm(1:180),expSLP*2) #'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) #'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") #'exp.pr <- c(rnorm(1:20)*0.001) #'dim(exp.pr)=dim(expSLP) #'obs.pr <- c(rnorm(1:200)*0.001) #'dim(obs.pr)=dim(obsSLP) # # analogs of large scale using criteria 1 #'Large_scale <- Analogs(expL=expSLP, #' obsL=obsSLP, time_obsL=time_obsSLP, #' criteria="Large_dist",time_expL = "01-10-2000", #' nAnalogs = 7, AnalogsInfo=TRUE) #'str(Large_scale) #'Large_scale$AnalogsInfo #'# # analogs of local scale using criteria 2 #'lonmin=-1 #'lonmax=2 #'latmin=30 #'latmax=33 #'region=c(lonmin,lonmax,latmin,latmax) #'Local_scale <- Analogs(expL=expSLP, #' obsL=obsSLP, time_obsL=time_obsSLP, #' obsVar=obs.pr,time_expL = "01-10-2000", #' criteria="Local_dist",lonVar=seq(-1,5,1.5), #' latVar=seq(30,35,1.5),nAnalogs=7,region=region, #' AnalogsInfo = TRUE) #'str(Local_scale) #'Local_scale$AnalogsInfo #'# # analogs of local scale using criteria 3 #'Local_scalecor <- Analogs(expL=expSLP, #' obsL=obsSLP, time_obsL=time_obsSLP, #' obsVar=obs.pr,expVar=exp.pr, #' time_expL = "01-10-2000", #' criteria="Local_cor",lonVar=seq(-1,5,1.5), #' latVar=seq(30,35,1.5),nAnalogs=7,region=region, #' AnalogsInfo = TRUE) #'str(Local_scalecor) #'Local_scalecor$AnalogsInfo #' #' #' #'@export Analogs <- function(expL, obsL, time_obsL,time_expL=NULL,expVar = NULL, obsVar = NULL, criteria = "Large_dist",excludeTime=NULL, lonVar = NULL, latVar = NULL, region = NULL, nAnalogs = NULL,AnalogsInfo=FALSE) { if (!all(c('lon', 'lat') %in% names(dim(expL)))) { stop("Parameter 'expL' must have the dimensions 'lat' and 'lon'.") } if (!all(c('lat', 'lon') %in% names(dim(obsL)))) { stop("Parameter 'obsL' must have the dimension 'lat' and 'lon'.") } if (any(is.na(expL))) { warning("Parameter 'exp' contains NA values.") } if (any(is.na(obsL))) { warning("Parameter 'obs' contains NA values.") } if (!is.null(expVar) & is.null(obsVar)) { expVar <- NULL warning("Parameter 'expVar' is set to NULL as parameter 'obsVar', large scale field will be returned.") } if (is.null(expVar) & is.null(obsVar)) { warning("Parameter 'expVar' and 'obsVar' are NULLs, downscaling/listing same variable as obsL and expL'.") } if(!is.null(obsVar) & is.null(expVar) & criteria=="Local_cor"){ stop("parameter 'expVar' cannot be NULL") } if(is.null(nAnalogs) & criteria!="Large_dist"){ nAnalogs=length(time_obsL) warning("Parameter 'nAnalogs' is NULL and is set to the same length of time_obsL by default") } if(is.null(nAnalogs) & criteria=="Large_dist"){ nAnalogs=1 } if(is.null(time_expL)){ stop("parameter 'time_expL' cannot be NULL") } if(is.null(time_obsL)){ stop("parameter 'time_obsL' cannot be NULL") } if(is.null(expL)){ stop("parameter 'expL' cannot be NULL") } if(!is.null(obsL)){ obsL=replace_time_dimnames(obsL) if(time_expL %in% time_obsL){ if(is.null(excludeTime)){ excludeTime=time_expL warning("Parameter 'excludeTime' is NULL, time_obsL contains time_expL, so, by default, the date of time_expL will be excluded in the search of analogs") }else{ `%!in%` = Negate(`%in%`) if(time_expL %!in% excludeTime){ excludeTime=c(excludeTime,time_expL) warning("Parameter 'excludeTime' is not NULL, time_obsL contains time_expL, so, by default, the date of time_expL will be excluded in the search of analogs") } } time_ref<- time_obsL[-c(which(time_obsL %in% excludeTime))] posdim<- which(names(dim(obsL)) == 'time') posref<- which(time_obsL %in% time_ref) obsT<- Subset(obsL,along = posdim,indices = posref) if(!is.null(obsVar)){obsTVar<- Subset(obsVar,along = posdim, indices = posref)} time_obsL <- time_ref obsL <- obsT if(!is.null(obsVar)){obsVar <- obsTVar} }else{ if(is.null(excludeTime)){ if(!is.null(obsVar)){ warning("Parameter 'excludeTime' is NULL, time_obsL does not contain time_expL, obsVar not NULL") }else{ warning("Parameter 'excludeTime' is NULL, time_obsL does not contain time_expL") } }else{ time_ref<- time_obsL[-c(which(time_obsL %in% excludeTime))] posdim<- which(names(dim(obsL)) == 'time') posref<- which(time_obsL %in% time_ref) obsT<- Subset(obsL,along = posdim,indices = posref) if(!is.null(obsVar)){obsTVar<- Subset(obsVar,along = posdim, indices = posref)} time_obsL <- time_ref obsL <- obsT if(!is.null(obsVar)){obsVar <- obsTVar} if(!is.null(obsVar)){ warning("Parameter 'excludeTime' has a value and time_obsL does not contain time_expL, obsVar not NULL") }else{ warning("Parameter 'excludeTime' has a value and time_obsL does not contain time_expL") } } } }else{stop("parameter 'obsL' cannot be NULL")} if(is.null(obsVar)){ warning("obsVar is NULL") } if (any(names(dim(obsL)) %in% 'ftime')) { if (any(names(dim(obsL)) %in% 'time')) { stop("Multiple temporal dimensions ('ftime' and 'time') found", "in parameter 'obsL'.") } else { time_pos_obsL <- which(names(dim(obsL)) == 'ftime') names(dim(obsL))[time_pos_obsL] <- 'time' if (any(names(dim(expL)) %in% 'ftime')) { time_pos_expL <- which(names(dim(expL)) == 'ftime') names(dim(expL))[time_pos_expL] <- 'time' } } } if(!is.null(obsVar)){ if (any(names(dim(obsVar)) %in% 'ftime')) { if (any(names(dim(obsVar)) %in% 'time')) { stop("Multiple temporal dimensions ('ftime' and 'time') found", "in parameter 'obsVar'.") } else { time_pos_obsVar <- which(names(dim(obsVar)) == 'ftime') names(dim(obsVar))[time_pos_obsVar] <- 'time' if (any(names(dim(expVar)) %in% 'ftime')) { time_pos_expVar <- which(names(dim(expVar)) == 'ftime') names(dim(expVar))[time_pos_expVar] <- 'time' } } } } if ((any(names(dim(obsL)) %in% 'sdate')) && (any(names(dim(obsL)) %in% 'time'))){ dims_obsL <- dim(obsL) pos_sdate <- which(names(dim(obsL)) == 'sdate') pos_time <- which(names(dim(obsL)) == 'time') pos <- 1 : length(dim(obsL)) pos <- c(pos_time, pos_sdate, pos[-c(pos_sdate,pos_time)]) obsL <- aperm(obsL, pos) dim(obsL) <- c(time = prod(dims_obsL[c(pos_time, pos_sdate)]), dims_obsL[-c(pos_time, pos_sdate)]) }else{ if(any(names(dim(obsL)) %in% 'sdate')){ dims_obsL <- dim(obsL) pos_sdate <- which(names(dim(obsL)) == 'sdate') pos <- 1 : length(dim(obsL)) pos <- c( pos_sdate, pos[-c(pos_sdate)]) obsL <- aperm(obsL, pos) dim(obsL) <- c(time = prod(dims_obsL[c(pos_sdate)]), dims_obsL[-c( pos_sdate)]) }else{ if (any(names(dim(obsL)) %in% 'time')){ dims_obsL <- dim(obsL) pos_time <- which(names(dim(obsL)) == 'time') if(length(time_obsL)!=dim(obsL)[pos_time]){ stop(" 'time_obsL' and 'obsL' must have same length in the temporal dimension.") } pos <- 1 : length(dim(obsL)) pos <- c(pos_time, pos[-c(pos_time)]) obsL <- aperm(obsL, pos) dim(obsL) <- c(time = prod(dims_obsL[pos_time]), dims_obsL[-c(pos_time)]) }else{stop("Parameter 'obsL' must have a temporal dimension.")} } } if(!is.null(obsVar)){ if (any(names(dim(obsVar)) %in% 'sdate')) { if (any(names(dim(obsVar)) %in% 'time')) { dims_obsVar <- dim(obsVar) pos_sdate <- which(names(dim(obsVar)) == 'sdate') pos_time <- which(names(dim(obsVar)) == 'time') pos <- 1 : length(dim(obsVar)) pos <- c(pos_time, pos_sdate, pos[-c(pos_sdate,pos_time)]) obsVar <- aperm(obsVar, pos) dim(obsVar) <- c(time = prod(dims_obsVar[c(pos_time, pos_sdate)]), dims_obsVar[-c(pos_time, pos_sdate)]) } else { dims_obsVar <- dim(obsVar) pos_sdate <- which(names(dim(obsVar)) == 'sdate') pos <- 1 : length(dim(obsVar)) pos <- c(pos_sdate, pos[-c(pos_sdate)]) obsVar <- aperm(obsVar, pos) dim(obsVar) <- c(time = prod(dims_obsVar[c(pos_sdate)]), dims_obsVar[-c(pos_sdate)]) } } else { if (any(names(dim(obsVar)) %in% 'time')) { dims_obsVar <- dim(obsVar) pos_time <- which(names(dim(obsVar)) == 'time') if(length(time_obsL)!=dim(obsVar)[pos_time]){ stop(" 'time_obsL' and 'obsVar' must have same length in the temporal dimension.")} pos <- 1 : length(dim(obsVar)) pos <- c(pos_time, pos[-c(pos_time)]) obsVar <- aperm(obsVar, pos) dim(obsVar) <- c(time = prod(dims_obsVar[c(pos_time)]), dims_obsVar[-c(pos_time)]) }else{ stop("Parameter 'obsVar' must have a temporal dimension.") } } } if (is.null(region) & criteria!="Large_dist") { if (!is.null(lonVar) & !is.null(latVar)) { region <- c(min(lonVar), max(lonVar), min(latVar), max(latVar)) }else{ stop("Parameters 'lonVar' and 'latVar' must be given in criteria 'Local_dist' and 'Local_cor'") } } if ((length(time_expL)==1) && (nAnalogs>=1)){ warning("computing one day and 1 or more Analogs") Analog_result <- FindAnalog(expL = expL, obsL = obsL, time_obsL=time_obsL, expVar = expVar, obsVar = obsVar, criteria = criteria, AnalogsInfo = AnalogsInfo, nAnalogs=nAnalogs,lonVar = lonVar, latVar = latVar, region = region) if(AnalogsInfo==TRUE){ AnalogsInfo=list(analog=Analog_result$Analog, metric=Analog_result$metric, dates=Analog_result$dates) return(list(AnalogsFields=Analog_result$AnalogsFields, AnalogsInfo=AnalogsInfo ) ) }else{ return(AnalogsFields=Analog_result$AnalogsFields) } } if ((length(time_expL)>1) && (nAnalogs>=1)){ warning("Computing more than 1 analog in more than one day") Analog_result_timestep<-Apply(expL, target_dims = list(c('lat', 'lon')), margins=c('time'), fun = FindAnalog, AnalogsInfo = AnalogsInfo, nAnalogs=nAnalogs, obsL = obsL, time_obsL=time_obsL, expVar = expVar, obsVar = obsVar, criteria = criteria, lonVar = lonVar, latVar = latVar, region = region ) if(AnalogsInfo==TRUE){ names(dim(Analog_result_timestep$AnalogsFields))[1]<-'analog' names(dim(Analog_result_timestep$Analog))[1]<-'analog' names(dim(Analog_result_timestep$metric))[1]<-'analog' names(dim(Analog_result_timestep$dates))[1]<-'analog' Analog_result_timestep$dates<- as.POSIXct(Analog_result_timestep$dates, origin = '1970-01-01') #Analog_result_timestep$dates<- as.Date(Analog_result_timestep$dates) AnalogsInfo=list(analog=Analog_result_timestep$Analog, metric=Analog_result_timestep$metric, dates=Analog_result_timestep$dates) return(list(AnalogsFields=Analog_result_timestep$AnalogsFields, AnalogsInfo=AnalogsInfo) ) }else{ return(AnalogsFields=Analog_result_timestep$AnalogsFields) } } } FindAnalog <- function(expL,obsL,time_obsL,expVar,obsVar,criteria,lonVar, latVar,region, nAnalogs=nAnalogs,AnalogsInfo = AnalogsInfo) { position <- Select(expL = expL, obsL = obsL, expVar = expVar, obsVar = obsVar, criteria = criteria, lonVar = lonVar, latVar = latVar, region = region)$position metrics<- Select(expL = expL, obsL = obsL, expVar = expVar, obsVar = obsVar, criteria = criteria, lonVar = lonVar, latVar = latVar, region = region)$metric.original best <- Apply(list(position), target_dims = c('time', 'pos'), fun = BestAnalog, criteria = criteria, AnalogsInfo = AnalogsInfo, nAnalogs = nAnalogs)$output1 Analogs_dates <- time_obsL[best] dim(Analogs_dates) <- dim(best) if (all(!is.null(region), !is.null(lonVar), !is.null(latVar))) { if (is.null(obsVar)) { obsVar <- SelBox(obsL, lon = lonVar, lat = latVar, region = region)$data expVar <- SelBox(expL, lon = lonVar, lat = latVar, region=region)$data Analogs_fields <- Subset(obsVar, along = which(names(dim(obsVar)) == 'time'), indices = best) warning("obsVar is NULL, obsVar will be computed from obsL (same variable)") } else { obslocal <- SelBox(obsVar, lon = lonVar, lat = latVar, region = region)$data Analogs_fields <- Subset(obslocal, along = which(names(dim(obslocal)) == 'time'), indices = best) } } else { warning("One or more of the parameter 'region', 'lonVar' and 'latVar'", " are NULL and the large scale field will be returned.") if (is.null(obsVar)) { Analogs_fields <- Subset(obsL, along = which(names(dim(obsL)) == 'time'), indices = best) } else { Analogs_fields <- Subset(obsVar, along = which(names(dim(obsVar)) == 'time'), indices = best) } } lon_dim <- which(names(dim(Analogs_fields)) == 'lon') lat_dim <- which(names(dim(Analogs_fields)) == 'lat') Analogs_metrics <- Subset(metrics, along = which(names(dim(metrics)) == 'time'), indices = best) return(list(AnalogsFields=Analogs_fields, Analog=as.numeric(1:nrow(Analogs_metrics)), metric=Analogs_metrics, dates=Analogs_dates) ) } BestAnalog <- function(position, nAnalogs = nAnalogs, AnalogsInfo = FALSE, criteria = 'Large_dist') { pos_dim <- which(names(dim(position)) == 'pos') if (dim(position)[pos_dim] == 1) { pos1 <- Subset(position, along = pos_dim, indices = 1) if (criteria != 'Large_dist') { warning("Dimension 'pos' in parameter 'position' has length 1,", " criteria 'Large_dist' will be used.") criteria <- 'Large_dist' } } else if (dim(position)[pos_dim] == 2) { pos1 <- Subset(position, along = pos_dim, indices = 1) pos2 <- Subset(position, along = pos_dim, indices = 2) if (criteria == 'Local_cor') { warning("Dimension 'pos' in parameter 'position' has length 2,", " criteria 'Local_dist' will be used.") criteria <- 'Local_dist' } } else if (dim(position)[pos_dim] == 3) { pos1 <- Subset(position, along = pos_dim, indices = 1) pos2 <- Subset(position, along = pos_dim, indices = 2) pos3 <- Subset(position, along = pos_dim, indices = 3) if (criteria != 'Local_cor') { warning("Parameter 'criteria' is set to", criteria, ".") } } else { stop("Parameter 'position' has dimension 'pos' of different ", "length than expected (from 1 to 3).") } if (criteria == 'Large_dist') { if (AnalogsInfo == FALSE) { pos <- pos1[1] } else { pos <- pos1[1 : nAnalogs] } } else if (criteria== 'Local_dist') { pos1 <- pos1[1 : nAnalogs] pos2 <- pos2[1 : nAnalogs] best <- match(pos1, pos2) if(length(best)==1){ warning("Just 1 best analog matching Large_dist and ", "Local_dist criteria") } if(length(best)<1 & is.na(best[1])==TRUE){ stop("no best analogs matching Large_dist and Local_dist criterias, please increase nAnalogs") } pos <- pos2[as.logical(best)] pos <- pos[which(!is.na(pos))] if (AnalogsInfo == FALSE) { pos <- pos[1] }else { pos <- pos} } else if (criteria == 'Local_cor') { pos1 <- pos1[1 : nAnalogs] pos2 <- pos2[1 : nAnalogs] best <- match(pos1, pos2) if(length(best)==1){ warning("Just 1 best analog matching Large_dist and ", "Local_dist criteria") } if(length(best)<1 & is.na(best[1])==TRUE){ stop("no best analogs matching Large_dist and Local_dist criterias, please increase nAnalogs") } pos <- pos1[as.logical(best)] pos <- pos[which(!is.na(pos))] pos3 <- pos3[1 : nAnalogs] best <- match(pos, pos3) if(length(best)==1){ warning("Just 1 best analog matching Large_dist, Local_dist and ", "Local_cor criteria") } if(length(best)<1 & is.na(best[1])==TRUE){ stop("no best analogs matching Large_dist, Local_dist and Local_cor criterias, please increase nAnalogs") } pos <- pos[order(best, decreasing = F)] pos <- pos[which(!is.na(pos))] if (AnalogsInfo == FALSE) { pos <- pos[1] } else{ pos <- pos } return(pos) } } Select <- function(expL, obsL, expVar = NULL, obsVar = NULL, criteria = "Large_dist", lonVar = NULL, latVar = NULL, region = NULL) { names(dim(expL)) <- replace_repeat_dimnames(names(dim(expL)), names(dim(obsL))) metric1 <- Apply(list(obsL), target_dims = list(c('lat', 'lon')), fun = .select, expL, metric = "dist")$output1 metric1.original=metric1 if (length(dim(metric1)) > 1) { dim_time_obs <- which(names(dim(metric1)) == 'time' | names(dim(metric1)) == 'ftime') dim(metric1) <- c(dim(metric1), metric=1) margins <- c(1 : (length(dim(metric1))))[-dim_time_obs] pos1 <- apply(metric1, margins, order) names(dim(pos1))[1] <- 'time' metric1.original=metric1 metric1 <- apply(metric1, margins, sort) names(dim(metric1))[1] <- 'time' names(dim(metric1.original))=names(dim(metric1)) } else { pos1 <- order(metric1) dim(pos1) <- c(time = length(pos1)) metric1 <- sort(metric1) dim(metric1) <- c(time = length(metric1)) dim(metric1.original)=dim(metric1) dim_time_obs=1 } if (criteria == "Large_dist") { dim(metric1) <- c(dim(metric1), metric = 1) dim(pos1) <- c(dim(pos1), pos = 1) dim(metric1.original)=dim(metric1) return(list(metric = metric1, metric.original=metric1.original, position = pos1)) } if (criteria == "Local_dist" | criteria == "Local_cor") { obs <- SelBox(obsL, lon = lonVar, lat = latVar, region = region)$data exp <- SelBox(expL, lon = lonVar, lat = latVar, region = region)$data metric2 <- Apply(list(obs), target_dims = list(c('lat', 'lon')), fun = .select, exp, metric = "dist")$output1 metric2.original=metric2 dim(metric2) <- c(dim(metric2), metric=1) margins <- c(1 : (length(dim(metric2))))[-dim_time_obs] pos2 <- apply(metric2, margins, order) dim(pos2) <- dim(pos1) names(dim(pos2))[1] <- 'time' metric2 <- apply(metric2, margins, sort) names(dim(metric2))[1] <- 'time' if (criteria == "Local_dist") { metric <- abind(metric1, metric2, along = length(dim(metric1))+1) metric.original <- abind(metric1.original,metric2.original, along=length(dim(metric1))+1) position <- abind(pos1, pos2, along = length(dim(pos1))+1) names(dim(metric)) <- c(names(dim(pos1)), 'metric') names(dim(position)) <- c(names(dim(pos1)), 'pos') names(dim(metric.original)) = names(dim(metric)) return(list(metric = metric, metric.original=metric.original, position = position)) } } if (criteria == "Local_cor") { obs <- SelBox(obsVar, lon = lonVar, lat = latVar, region = region)$data exp <- SelBox(expVar, lon = lonVar, lat = latVar, region = region)$data metric3 <- Apply(list(obs), target_dims = list(c('lat', 'lon')), fun = .select, exp, metric = "cor")$output1 metric3.original=metric3 dim(metric3) <- c(dim(metric3), metric=1) margins <- c(1 : (length(dim(metric3))))[-dim_time_obs] pos3 <- apply(abs(metric3), margins, order, decreasing = TRUE) names(dim(pos3))[1] <- 'time' metricsort <- metric3[pos3] dim(metricsort)=dim(metric3) names(dim(metricsort))[1] <- 'time' metric <- abind(metric1, metric2, metricsort, along = length(dim(metric1)) + 1) metric.original <- abind(metric1.original, metric2.original, metric3.original, along = length(dim(metric1)) + 1) position <- abind(pos1, pos2, pos3, along = length(dim(pos1)) + 1) names(dim(metric)) <- c(names(dim(metric1)), 'metric') names(dim(position)) <- c(names(dim(pos1)), 'pos') names(dim(metric.original)) = names(dim(metric)) return(list(metric = metric, metric.original=metric.original, position = position)) } else { stop("Parameter 'criteria' must to be one of the: 'Large_dist', ", "'Local_dist','Local_cor'.") } } .select <- function(exp, obs, metric = "dist") { if (metric == "dist") { result <- Apply(list(obs), target_dims = list(c('lat', 'lon')), fun = function(x) {sqrt(sum((x - exp) ^ 2, na.rm = TRUE))})$output1 } else if (metric == "cor") { result <- Apply(list(obs), target_dims = list(c('lat', 'lon')), fun = function(x) {cor(as.vector(x), as.vector(exp), method="spearman")})$output1 } result } .time_ref<- function(time_obsL,time_expL,excludeTime){ sameTime=which(time_obsL %in% time_expL) result<- c(time_obsL[1:(sameTime-excludeTime-1)], time_obsL[(sameTime+excludeTime+1):length(time_obsL)]) result } replace_repeat_dimnames <- function(names_exp, names_obs, lat_name = 'lat', lon_name = 'lon') { if (!is.character(names_exp)) { stop("Parameter 'names_exp' must be a vector of characters.") } if (!is.character(names_obs)) { stop("Parameter 'names_obs' must be a vector of characters.") } latlon_dim_exp <- which(names_exp == lat_name | names_exp == lon_name) latlon_dim_obs <- which(names_obs == lat_name | names_obs == lon_name) if (any(unlist(lapply(names_exp[-latlon_dim_exp], function(x){x == names_obs[-latlon_dim_obs]})))) { original_pos <- lapply(names_exp, function(x) which(x == names_obs[-latlon_dim_obs])) original_pos <- lapply(original_pos, length) > 0 names_exp[original_pos] <- paste0(names_exp[original_pos], "_exp") } return(names_exp) } replace_time_dimnames <- function(dataL, time_name = 'time', stdate_name='stdate', ftime_name='ftime') { names_obs=names(dim(dataL)) if (!is.character(names_obs)) { stop("Parameter 'names_obs' must be a vector of characters.") } time_dim_obs <- which(names_obs == time_name | names_obs == stdate_name | names_obs == ftime_name) if(length(time_dim_obs) >1){ stop ("more than 1 time dimension, please give just 1") } if(length(time_dim_obs) == 0){ warning ("name of time dimension is not 'ftime' or 'time' or 'stdate' or time dimension is null") } if(length(time_dim_obs)!=0){ names_obs[time_dim_obs]= time_name} names(dim(dataL))=names_obs return(dataL) }