diff --git a/DESCRIPTION b/DESCRIPTION index 07433f279959e102a655e5b3b8d32c51e0a00770..7fe8d6812209852a7de75a7eb923886ee2847137 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,11 +1,12 @@ Package: CSTools Title: Assessing Skill of Climate Forecasts on Seasonal-to-Decadal Timescales -Version: 3.1.0 +Version: 4.0.0 Authors@R: c( person("Nuria", "Perez-Zanon", , "nuria.perez@bsc.es", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8568-3071")), person("Louis-Philippe", "Caron", , "louis-philippe.caron@bsc.es", role = "aut", comment = c(ORCID = "0000-0001-5221-0147")), - person("Carmen", "Alvarez-Castro", , "carmen.alvarez-castro@cmcc.it", role = "aut"), + person("Carmen", "Alvarez-Castro", , "carmen.alvarez-castro@cmcc.it", role = "aut", comment = c(ORCID = "0000-0002-9958-010X")), + person("Lauriane", "Batte", , "lauriane.batte@meteo.fr", role = "aut"), person("Jost", "von Hardenberg", , email = c("j.vonhardenberg@isac.cnr.it", "jost.hardenberg@polito.it"), role = "aut", comment = c(ORCID = "0000-0002-5312-8070")), person("Llorenç", "LLedo", , "llledo@bsc.es", role = "aut"), person("Nicolau", "Manubens", , "nicolau.manubens@bsc.es", role = "aut"), @@ -13,16 +14,18 @@ Authors@R: c( person("Bert", "van Schaeybroeck", , "bertvs@meteo.be", role = "aut"), person("Veronica", "Torralba", , "veronica.torralba@bsc.es", role = "aut"), person("Deborah", "Verfaillie", , "deborah.verfaillie@bsc.es", role = "aut"), - person("Lauriane", "Batte", , "lauriane.batte@meteo.fr", role = "ctb"), person("Filippo", "Cali Quaglia", , "filippo.caliquaglia@gmail.com", role = "ctb"), + person("Maria M.", "Chaves-Montero", , "mariadm.chaves@cmcc.it", role = "ctb"), person("Chihchung", "Chou", , "chihchung.chou@bsc.es", role = "ctb"), person("Nicola", "Cortesi", , "nicola.cortesi@bsc.es", role = "ctb"), person("Susanna", "Corti", , "s.corti@isac.cnr.it", role = "ctb"), person("Paolo", "Davini", , "p.davini@isac.cnr.it", role = "ctb"), + person("Gildas", "Dayon", , "gildas.dayon@meteo.fr", role = "ctb"), person("Marta", "Dominguez", , "mdomingueza@aemet.es", role = "ctb"), person("Federico", "Fabiano", , "f.fabiano@isac.cnr.it", role = "ctb"), person("Ignazio", "Giuntoli", , "i.giuntoli@isac.cnr.it", role = "ctb"), person("Raul", "Marcos", , "raul.marcos@bsc.es", role = "ctb"), + person("Paola", "Marson", , "paola.marson@meteo.fr", role = "ctb"), person("Niti", "Mishra", , "niti.mishra@bsc.es", role = "ctb"), person("Jesus", "Peña", , "jesus.pena@bsc.es", role = "ctb"), person("Francesc", "Roura-Adserias", , "francesc.roura@bsc.es", role = "ctb"), @@ -43,17 +46,19 @@ Description: Exploits dynamical seasonal forecasts in order to provide Terzago et al. (2018) . Torralba et al. (2017) . D'Onofrio et al. (2014) . + Verfaillie et al. (2017) . Van Schaeybroeck et al. (2019) . Yiou et al. (2013) . Depends: R (>= 3.4.0), - maps + maps, + qmap, + easyVerification Imports: s2dverification, s2dv, rainfarmr, multiApply (>= 2.1.1), - qmap, ClimProjDiags, ncdf4, plyr, diff --git a/NAMESPACE b/NAMESPACE index 9f9a0e3481c2eaa1eb04b26776b497b25783564e..7e313bc5e0cdbdfbd3f2735c6958745477e20276 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,11 @@ # Generated by roxygen2: do not edit by hand +export(AdamontAnalog) export(Analogs) export(BEI_PDFBest) export(BEI_Weights) export(CST_Analogs) +export(CST_AnalogsPredictors) export(CST_Anomaly) export(CST_BEI_Weighting) export(CST_BiasCorrection) @@ -25,16 +27,20 @@ export(CST_SaveExp) export(CST_SplitDim) export(CST_WeatherRegimes) export(Calibration) +export(CategoricalEnsCombination) export(EnsClustering) export(MergeDims) export(MultiEOF) +export(MultiMetric) export(PlotCombinedMap) export(PlotForecastPDF) export(PlotMostLikelyQuantileMap) export(PlotPDFsOLE) export(PlotTriangles4Categories) +export(QuantileMapping) export(RFSlope) export(RFTemp) +export(RF_Weights) export(RainFARM) export(RegimesAssign) export(SaveExp) @@ -42,19 +48,22 @@ export(SplitDim) export(WeatherRegime) export(as.s2dv_cube) export(s2dv_cube) +export(training_analogs) import(abind) import(ggplot2) import(multiApply) import(ncdf4) import(qmap) import(rainfarmr) -import(s2dverification) import(stats) importFrom(ClimProjDiags,SelBox) importFrom(RColorBrewer,brewer.pal) +importFrom(abind,abind) importFrom(data.table,CJ) importFrom(data.table,data.table) importFrom(data.table,setkey) +importFrom(easyVerification,climFairRpss) +importFrom(easyVerification,veriApply) importFrom(grDevices,adjustcolor) importFrom(grDevices,bmp) importFrom(grDevices,colorRampPalette) @@ -84,8 +93,25 @@ importFrom(maps,map) importFrom(plyr,.) importFrom(plyr,dlply) importFrom(reshape2,melt) +importFrom(s2dv,ColorBar) +importFrom(s2dv,Corr) +importFrom(s2dv,InsertDim) +importFrom(s2dv,MeanDims) +importFrom(s2dv,PlotEquiMap) +importFrom(s2dv,RMS) +importFrom(s2dv,RMSSS) importFrom(s2dv,Reorder) +importFrom(s2dverification,ACC) +importFrom(s2dverification,Ano_CrossValid) +importFrom(s2dverification,Clim) +importFrom(s2dverification,EOF) +importFrom(s2dverification,Eno) +importFrom(s2dverification,Load) +importFrom(s2dverification,Mean1Dim) +importFrom(s2dverification,Subset) importFrom(utils,glob2rx) importFrom(utils,head) +importFrom(utils,read.table) importFrom(utils,tail) importFrom(verification,verify) +useDynLib(CSTools) diff --git a/NEWS.md b/NEWS.md index fab70c708389c95d71195da561f6b346cfed7d2b..23d198894d3d5db5510465662479fd0a6947b591 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,36 @@ +### CSTools 4.0.0 +**Submission date to CRAN: XX-12-2020** + +- New features: + + ADAMONT downscaling method: requires CST_AdamontAnalogs and CST_AdamontQQCor functions + + Analogs method using Predictors: requires training_analogs and CST_AnalogsPredictors + + PlotPDFsOLE includes parameters to modify legend style + + CST_RFSlope handless missing values in the temporal dimension and new 'ncores' parameter allows parallel computation + + CST_RFWeights accepts s2dv_cube objects as input and new 'ncores' paramenter allows parallel computation + + RFWeights is exposed to users + + CST_RainFARM accepts multi-dimensional slopes and weights and handless missing values in sample dimensions. + + QuantileMapping is exposed to users + + CST_MultiMetric includes 'rpss' metric and it is addapted to s2dv. + + PlotMostLikelyQuantileMap vignette + + PlotTriangles4Categories includes two parameters to adjust axis and margins + + CategoricalEnsCombination is exposed to users + + CST_SplitDims includes parameter 'insert_ftime' + + Analogs vignette + + Data Storage and retrieval vignette + +- Fixes: + + PlotForecastPDF correctly displays terciles labels + + CST_SaveExp correctly save time units + + CST_SplitDims returns ordered output following ascending order provided in indices when it is numeric + + qmap library moved from Imports to Depends + + CST_QuantileMapping correctly handles exp_cor + + Figures resize option from vignettes has been removed + + Fix Analogs to work with three diferent criteria + + Vignette PlotForecastPDF updated plots + + Decrease package size compresing vignettes figures and removing areave_data sample + ### CSTools 3.1.0 -**Submission date to CRAN: XX-06-2020** +**Submission date to CRAN: 02-07-2020** - New features: + EnsClustering vignette diff --git a/R/AnalogsPred_train.R b/R/AnalogsPred_train.R new file mode 100644 index 0000000000000000000000000000000000000000..c68c48b05cd5991de93ff5f1282fa761148693a1 --- /dev/null +++ b/R/AnalogsPred_train.R @@ -0,0 +1,534 @@ +#' AEMET Training +#' Training method (pre-downscaling) based on analogs: +#' synoptic situations and significant predictors. +#' +#'@author Marta Dominguez Alonso - AEMET, \email{mdomingueza@aemet.es} +#'@author Nuria Perez-Zanon - BSC, \email{nuria.perez@bsc.es} +#' +#'@description This function caracterizes the synoptic situations in a past period based on +#' low resolution reanalysis data (e.g, ERAInterim 1.5º x 1.5º) and an observational high +#' resolution (HR) dataset (AEMET 5 km gridded daily precipitation and maximum and +#' minimum temperature) (Peral et al., 2017)). +#' The method uses three domains: +#' - peninsular Spain and Balearic Islands domain (5 km resolution): HR domain +#' - synoptic domain (low resolution): it should be centered over Iberian Peninsula and +#' cover enough extension to detect as much synoptic situations as possible. +#' - extended domain (low resolution): it is an extension of the synoptic +#' domain. It is used for 'slp_ext' parameter (see 'slp_lon' and 'slp_lat' below). +#'@param pred List of matrix reanalysis data in a synoptic domain. The list +#' has to contain reanalysis atmospheric variables (instantaneous 12h data) +#' that must be indentify by parenthesis name. +#' For precipitation: +#' - u component of wind at 500 hPa (u500) in m/s +#' - v component of wind at 500 hPa (v500) in m/s +#' - temperature at 500 hPa (t500) in K +#' - temperature at 850 hPa (t850) in K +#' - temperature at 1000 hPa (t1000) in K +#' - geopotential height at 500 hPa (z500) in m +#' - geopotential height at 1000 hPa (z1000) in m +#' - sea level pressure (slp) in hPa +#' - specific humidity at 700 hPa (q700) in g/kg +#' For maximum and minimum temperature: +#' - temperature at 1000 hPa (t1000) in K +#' - sea level pressure (slp) in hPa +#' All matrix must have [time,gridpoint] dimensions. +#' (time = number of training days, gridpoint = number of synoptic gridpoints). +#'@param slp_ext Matrix with atmospheric reanalysis sea level pressure +#' (instantaneous 12h data)(hPa). It has the same resolution as 'pred' parameter +#' but with an extended domain. This domain contains extra degrees (most in the +#' north and west part) compare to synoptic domain. The matrix must have +#' [time,gridpoint] dimensions. +#' (time = number of training days, gridpoint = number of extended gridpoints). +#'@param lon Vector of the synoptic longitude (from (-180º) to 180º), +#' The vector must go from west to east. +#'@param lat Vector of the synoptic latitude. The vector must go from north to south. +#'@param slp_lon Vector of the extended longitude (from (-180º) to 180º) +#' The vector must go from west to east. +#'@param slp_lat Vector of the extended latitude. The vector must go from north to south. +#'@param var Variable name to downscale. There are two options: 'prec' for +#' precipitation and 'temp' for maximum and minimum temperature. +#'@param HR_path Local path of HR observational files (maestro and pcp/tmx-tmn). +#' For precipitation can be downloaded from http://www.aemet.es/documentos/es/serviciosclimaticos/cambio_climat/datos_diarios/dato_observacional/rejilla_5km/v2/Serie_AEMET_v2_pcp_1951a202006_txt.tar.gz +#' For maximum and minimum temperature can be downloaded from http://www.aemet.es/documentos/es/serviciosclimaticos/cambio_climat/datos_diarios/dato_observacional/rejilla_5km/temperatura/v1/tmax/Serie_AEMET_v1_tmax_1951a202006_txt.tar.gz and http://www.aemet.es/documentos/es/serviciosclimaticos/cambio_climat/datos_diarios/dato_observacional/rejilla_5km/temperatura/v1/tmin/Serie_AEMET_v1_tmin_1951a202006_txt.tar.gz respetively. +#' Maestro file (maestro_red_hr_SPAIN.txt) has gridpoint (nptos), longitude (lon), latitude (lat) and +#' altitude (alt) in columns (vector structure). +#' Data file (pcp/tmx/tmn_red_SPAIN_1951-201903.txt) includes 5km resolution spanish daily data +#' (precipitation or maximum and minimum temperature from january 1951 to june 2020. See README +#' file for more information. +#' IMPORTANT!: HR observational period must be the same as for reanalysis variables. +#' It is assumed that the training period is smaller than the HR original one (1951-2020), so it is +#' needed to make a new ascii file with the new period and the same structure as original, +#' specifying the training dates ('tdates' parameter) in the name +#' (e.g. 'pcp_red_SPAIN_19810101-19961231.txt' for '19810101-19961231' period). +#'@param tdates Training period dates in format YYYYMMDD(start)-YYYYMMDD(end) (e.g. 19810101-19961231). +#'@return matrix list (e.g. restrain) as a result of characterize the past synoptic +#' situations and the significant predictors needed to downscale seasonal forecast variables. +#' For precipitation the output includes: +#' um: u component of geostrophic wind in all period (numeric matrix with [time,gridpoint] dimensions) +#' vm: v component of geostrophic wind in all period (numeric matrix with [time,gridpoint] dimensions) +#' nger: number of synoptic situations (integer) +#' gu92: u component of geostrophic wind for each synoptic situation (numeric matrix with +#' [nger,gridpoint] dimensions) +#' gv92: v component of geostrophic wind for each synoptic situation (numeric matrix with +#' [nger,gridpoint] dimensions) +#' gu52: u component of wind at 500 hPa for each synotic situation (numeric matrix with +#' [nger,gridpoint] dimensions) +#' gv52: v component of wind at 500 hPa for each synotic situation (numeric matrix with +#' [nger,gridpoint] dimensions) +#' neni: number of reference centers where predictors are calculated (integer) +#' vdmin: minimum distances between each HR gridpoint and the four nearest synoptic +#' gridpoints (numeric matrix with [nptos,4] dimensions) (nptos = number of HR gridpoints) +#' vref: four nearest synoptic gridpoints to each HR gridpoint (integer matrix with +#' [nptos,4] dimensions) +#' ccm: multiple correlation coeficients (numeric matrix with [nger,nptos] dimensions) +#' indices: +#' - lab_pred: numeric labels of selected predictors (integer matrix +#' with [nger,nptos,11,1] dimensions) +#' - cor_pred: partial correlation of selected predictors (numeric matrix with +#' [nger,nptos,11,2] dimensions) +#' For maximum and minimum temperature the output includes: +#' um: u component of geostrophic wind in all training period (numeric matrix with [time,gridpoint] dimensions) +#' vm: v component of geostrophic wind in all training period (numeric matrix with [time,gridpoint] dimensions) +#' insol: insolation in all training period (numeric vector with [time] dimension) +#' neni: number of reference centers where predictors are calculated (integer) +#' vdmin: minimum distances between each HR gridpoint and the four nearest synoptic +#' gridpoints (numeric matrix with [nptos,4] dimensions) (nptos = number of HR gridpoints) +#' vref: four nearest synoptic gridpoints to each HR gridpoint (integer matrix with +#' [nptos,4] dimensions) +#' +#' The output can directly use as argument to 'CST_AnalogsPredictors' function +#' (e.g. resdowns <- CST_AnalogsPredictors(...,restrain)) +#' +#'@importFrom utils read.table +#' +#'@useDynLib CSTools +#' +#'@export + +training_analogs <- function(pred, + slp_ext, + lon, + lat, + slp_lon, + slp_lat, + var, + HR_path, + tdates) { + +if (!is.list(pred)) { + stop("Parameter 'pred' must be a list of 'matrix' objects") + } + +if (!(all(sapply(pred, inherits, 'matrix')))) { + stop("Elements of the list in parameter 'pred' must be of the class ", + "'matrix'.") + } + +if (var == "prec") { + if (length(pred) != 9) { + stop("Parameter 'pred' must be a length of 9.") + } else { + if (is.null(names(dim(pred[[1]]))) || + is.null(names(dim(pred[[2]]))) || + is.null(names(dim(pred[[3]]))) || + is.null(names(dim(pred[[4]]))) || + is.null(names(dim(pred[[5]]))) || + is.null(names(dim(pred[[6]]))) || + is.null(names(dim(pred[[7]]))) || + is.null(names(dim(pred[[8]]))) || + is.null(names(dim(pred[[9]])))) { + stop("Parameter 'pred' should have dimmension names.") + } + if (!(any(names(pred) %in% "u500"))) { + stop("Variable 'u500' in pred parameter is missed.") + } else if (!(any(names(pred) %in% "v500"))) { + stop("Variable 'v500' in pred parameter is missed.") + } else if (!(any(names(pred) %in% "t500"))) { + stop("Variable 't500' in pred parameter is missed.") + } else if (!(any(names(pred) %in% "t850"))) { + stop("Variable 't850' in pred parameter is missed.") + } else if (!(any(names(pred) %in% "t1000"))) { + stop("Variable 't1000' in pred parameter is missed.") + } else if (!(any(names(pred) %in% "z500"))) { + stop("Variable 'z500' in pred parameter is missed.") + } else if (!(any(names(pred) %in% "z1000"))) { + stop("Variable 'z1000' in pred parameter is missed.") + } else if (!(any(names(pred) %in% "slp"))) { + stop("Variable 'slp' in pred parameter is missed.") + } else if (!(any(names(pred) %in% "q700"))) { + stop("Variable 'q700' in pred parameter is missed.") + } + } +} else { + if (length(pred) != 2) { + stop("Parameter 'pred' must be a length of 2.") + } else { + if (is.null(names(dim(pred[[1]]))) || + is.null(names(dim(pred[[2]])))) { + stop("Parameter 'pred' should have dimmension names.") + } + if (!(any(names(pred) %in% "t1000"))) { + stop("Variable 't1000' in pred parameter is missed.") + } else if (!(any(names(pred) %in% "slp"))) { + stop("Variable 'slp' in pred parameter is missed.") + } + } +} + + if (all((sapply(pred,dim))==dim(pred[[1]])) & + all((sapply(pred,function(pred){names(dim(pred))}))==names(dim(pred[[1]])))) { + dim_pred <- dim(pred[[1]]) + if (!(any(names(dim_pred) %in% "time"))) { + stop("Dimension 'time' in pred parameter is missed.") + } + if (!(any(names(dim_pred) %in% "gridpoint"))) { + stop("Dimension 'gridpoint' in pred parameter is missed.") + } + if (names(dim_pred)[1] == "gridpoint") { + pred <- lapply(pred,aperm) + } else { + pred <- pred + } + } else { + stop("All 'pred' variables must have the same dimensions and name dimensions.") + } + +if (!is.vector(lon) || !is.numeric(lon)) { + stop("Parameter 'lon' must be a numeric vector") +} else { + if (is.unsorted(lon)) { + lon <- sort(lon) + warning("'lon' vector has been sorted in increasing order") + } +} + +if (!is.vector(lat) || !is.numeric(lat)) { + stop("Parameter 'lat' must be a numeric vector") +} else { + if (!is.unsorted(lat)) { + lat <- sort(lat, decreasing = TRUE) + warning("'lat' vector has been sorted in decreasing order") + } +} + +if (!is.character(HR_path)) { + stop("Parameter 'HR_path' must be a character.") +} else { + if (!dir.exists(HR_path)) { + stop("'HR_path' directory does not exist") + } +} + +if (!is.character(tdates)) { + stop("Parameter 'tdates' must be a character.") +} else { + if (nchar(tdates) != "17") { + stop("Parameter 'tdates' must be a string with 17 charecters.") + } else { + dateini <- as.Date(substr(tdates,start=1,stop=8),format="%Y%m%d") + dateend <- as.Date(substr(tdates,start=10,stop=18),format="%Y%m%d") + if (dateend <= dateini) { + stop("Parameter 'tdates' must be at least of one day") + } + } +} + +#! REANALYSIS GRID PARAMETERS + + rlon <- c(lon, NA) - c(NA, lon) + rlon <- rlon[!is.na(rlon)] + if (!all(rlon == rlon[1])) { + stop("Parameter 'lon' must be in regular grid.") + } else { + rlon <- rlon[1] + } + + rlat <- c(lat, NA) - c(NA, lat) + rlat <- rlat[!is.na(rlat)] + if (!all(rlat == rlat[1])) { + stop("Parameter 'lat' must be in regular grid.") + } else { + rlat <- rlat[1] + } + + if (rlon != (-rlat)) { + stop("Parameters 'lon' and 'lat' must have the same resolution.") + } else { + res <- rlon + } + + nlat <- ((lat[length(lat)] - lat[1]) / rlat) + 1 + nlon <- ((lon[length(lon)] - lon[1]) / rlon) + 1 + + ic <- nlat * nlon +# + slp_rlon <- c(slp_lon, NA) - c(NA, slp_lon) + slp_rlon <- slp_rlon[!is.na(slp_rlon)] + if (!all(slp_rlon == slp_rlon[1])) { + stop("Parameter 'slp_lon' must be in regular grid.") + } else { + slp_rlon <- slp_rlon[1] + } + + slp_rlat <- c(slp_lat, NA) - c(NA, slp_lat) + slp_rlat <- slp_rlat[!is.na(slp_rlat)] + if (!all(slp_rlat == slp_rlat[1])) { + stop("Parameter 'slp_lat' must be in regular grid.") + } else { + slp_rlat <- slp_rlat[1] + } + + if (slp_rlon != (-slp_rlat)) { + stop("Parameters 'slp_lon' and 'slp_lat' must have the same resolution.") + } else { + slp_res <- slp_rlon + } + + nlatt <- ((slp_lat[length(slp_lat)] - slp_lat[1]) / slp_rlat) + 1 + nlont <- ((slp_lon[length(slp_lon)] - slp_lon[1]) / slp_rlon) + 1 + + id <- nlatt * nlont + + slat <- max(lat) + slon <- min(c(lon[which(lon > 180)] - 360, + lon[which(lon <= 180)])) + + slatt <- max(slp_lat) + slont <- min(c(slp_lon[which(slp_lon > 180)] - 360, + slp_lon[which(slp_lon <= 180)])) + + ngridd <- ((2*nlatt)-1)*((2*nlont)-1) + + if (all((sapply(pred,nrow))==nrow(pred[[1]]))){ + nd <- nrow(pred[[1]]) + } else { + stop("All 'pred' variables must be in the same period.") + } + +#!!!!! COMPROBAR QUE SLP TAMBIEN TIENE EL MISMO NROW + + seqdates <- seq(as.Date(substr(tdates,start=1,stop=8),format="%Y%m%d"), + as.Date(substr(tdates,start=10,stop=18),format="%Y%m%d"),by="days") + month <- format(seqdates,format="%m") + day <- format(seqdates,format="%d") + +#! TRAINING REANALYSIS VARIABLES +t1000 <- pred[['t1000']] +msl_si <- pred[['slp']] +msl_lr <- slp_ext + +if (var == "prec") { +u500 <- pred[['u500']] +v500 <- pred[['v500']] +t500 <- pred[['t500']] +t850 <- pred[['t850']] +z500 <- pred[['z500']] +z1000 <- pred[['z1000']] +q700 <- pred[['q700']] +} + +#! HIGH-RESOLUTION (HR) OBSERVATIONAL DATASET +maestro_hr_file <- paste(HR_path, "maestro_red_hr_SPAIN.txt",sep="") +if (!file.exists(maestro_hr_file)) { + stop("'maestro_red_hr_SPAIN.txt' does not exist.") +} else { + maestro <- read.table(maestro_hr_file) + lon_hr <- unlist(maestro[2]) + lat_hr <- unlist(maestro[3]) + nptos <- length(readLines(maestro_hr_file)) +} + +if (var == "prec") { + prec_hr_file <- paste(HR_path, "pcp_red_SPAIN_",tdates,".txt",sep="") + if (!file.exists(prec_hr_file)) { + stop(sprintf("precipitation HR file for %s does not exist.",tdates)) + } else { + nd_hr <- length(readLines(prec_hr_file)) + preprec_hr <- matrix(scan(prec_hr_file), nrow=nd_hr ,ncol= nptos+1, byrow=TRUE) + prec_hr <- preprec_hr[1:nd_hr,-c(1)] + } +} else { + tmx_hr_file <- paste(HR_path, "tmx_red_SPAIN_",tdates,".txt",sep="") + tmn_hr_file <- paste(HR_path, "tmn_red_SPAIN_",tdates,".txt",sep="") + if (!file.exists(tmx_hr_file)) { + stop(sprintf("maximum temperature HR file for %s does not exist.",tdates)) + } else if (!file.exists(tmn_hr_file)) { + stop(sprintf("minimum temperature HR file for %s does not exist.",tdates)) + } else if (length(readLines(tmx_hr_file)) != length(readLines(tmn_hr_file))) { + stop("maximum and minimum temperature HR observation files must have the same period.") + } else { + nd_hr <- length(readLines(tmx_hr_file)) + pretmx_hr <- matrix(scan(tmx_hr_file), nrow=nd_hr ,ncol= nptos+1, byrow=TRUE) + tmx_hr <- pretmx_hr[1:nd_hr,-c(1)] + pretmn_hr <- matrix(scan(tmn_hr_file), nrow=nd_hr ,ncol= nptos+1, byrow=TRUE) + tmn_hr <- pretmn_hr[1:nd_hr,-c(1)] + } +} + if (nd_hr != nd) { + stop("Reanalysis variables and HR observations must have the same period.") + } + +#! OTHER PARAMETERS that should not be changed +#! Number of analog situations to consider +nanx <- 155 +#! Number of predictors +npx <- 11 + +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +if (var == "prec") { + + prePro <- .Fortran("training_part1_prec", + u500 = as.numeric(u500), + v500 = as.numeric(v500), + t1000 = as.numeric(t1000), + z500 = as.numeric(z500), + z1000 = as.numeric(z1000), + msl_si = as.numeric(msl_si), + msl_lr = as.numeric(msl_lr), + ngridd = as.integer(ngridd), + nlat = as.integer(nlat), + nlon = as.integer(nlon), + ic = as.integer(ic), + nlatt = as.integer(nlatt), + nlont = as.integer(nlont), + id = as.integer(id), + slat = as.numeric(slat), + slon = as.numeric(slon), + rlat = as.numeric(rlat), + rlon = as.numeric(rlon), + slatt = as.numeric(slatt), + slont = as.numeric(slont), + nd = as.integer(nd), + um = matrix(as.double(seq(1,nd*ic)),c(nd,ic)), + vm = matrix(as.double(seq(1,nd*ic)),c(nd,ic)), + gu92 = matrix(as.double(seq(1,nd*ic)),c(nd,ic)), + gv92 = matrix(as.double(seq(1,nd*ic)),c(nd,ic)), + gu52 = matrix(as.double(seq(1,nd*ic)),c(nd,ic)), + gv52 = matrix(as.double(seq(1,nd*ic)),c(nd,ic)), + nger = as.integer(1), + PACKAGE = 'CSTools') + + a <- prePro$um + b <- prePro$vm + c <- prePro$gu92[1:prePro$nger,] + d <- prePro$gv92[1:prePro$nger,] + e <- prePro$gu52[1:prePro$nger,] + f <- prePro$gv52[1:prePro$nger,] + + g <- prePro$nger + + predSig <- .Fortran("training_part2_prec", + u500 = as.numeric(u500), + v500 = as.numeric(v500), + t500 = as.numeric(t500), + t850 = as.numeric(t850), + msl_si = as.numeric(msl_si), + q700 = as.numeric(q700), + lon_hr = as.numeric(lon_hr), + lat_hr = as.numeric(lat_hr), + prec_hr = as.numeric(prec_hr), + nanx = as.integer(nanx), + nlat = as.integer(nlat), + nlon = as.integer(nlon), + ic = as.integer(ic), + nlatt = as.integer(nlatt), + nlont = as.integer(nlont), + id = as.integer(id), + slat = as.numeric(slat), + slon = as.numeric(slon), + rlat = as.numeric(rlat), + rlon = as.numeric(rlon), + slatt = as.numeric(slatt), + slont = as.numeric(slont), + nd = as.integer(nd), + um = as.double(a), + vm = as.double(b), + gu92 = as.double(c), + gv92 = as.double(d), + gu52 = as.double(e), + gv52 = as.double(f), + nger = as.integer(g), + vdmin = matrix(as.double(seq(1,nptos*4)),c(nptos,4)), + vref = matrix(as.integer(seq(1,nptos*4)),c(nptos,4)), + neni = as.integer(1), + mi = matrix(as.integer(seq(1,prePro$nger*nptos)),c(prePro$nger,nptos)), + ccm = matrix(as.double(seq(1,prePro$nger*nptos)),c(prePro$nger,nptos)), + lab_pred = matrix(as.integer(seq(1,prePro$nger*nptos*npx)),c(prePro$nger,nptos,npx)), + cor_pred = matrix(as.double(seq(1,prePro$nger*nptos*npx)),c(prePro$nger,nptos,npx)), + PACKAGE = 'CSTools') + + h <- predSig$mi + i <- predSig$ccm + j <- predSig$lab_pred + k <- predSig$cor_pred + l <- predSig$vdmin + m <- predSig$vref + + indices <- array(c(j,k),c(g,nptos,npx,2)) + dimnames(indices)[[4]] <- c("lab_pred","cor_pred") + + output <- list("um" = a, + "vm" = b, + "nger" = g, + "gu92" = c, + "gv92" = d, + "gu52" = e, + "gv52" = f, + "neni" = predSig$neni, + "vdmin" = l, + "vref" = m, + "ccm" = i, + "indices" = indices) +} else { + + prePro <- .Fortran("training_temp", + t1000 = as.numeric(t1000), + msl_si = as.numeric(msl_si), + msl_lr = as.numeric(msl_lr), + lon_hr = as.numeric(lon_hr), + lat_hr = as.numeric(lat_hr), + ngridd = as.integer(ngridd), + nlat = as.integer(nlat), + nlon = as.integer(nlon), + ic = as.integer(ic), + nlatt = as.integer(nlatt), + nlont = as.integer(nlont), + id = as.integer(id), + slat = as.numeric(slat), + slon = as.numeric(slon), + rlat = as.numeric(rlat), + rlon = as.numeric(rlon), + slatt = as.numeric(slatt), + slont = as.numeric(slont), + nd = as.integer(nd), + day = as.integer(day), + month = as.integer(month), + um = matrix(as.double(seq(1,nd*ic)),c(nd,ic)), + vm = matrix(as.double(seq(1,nd*ic)),c(nd,ic)), + insol = vector(mode="double",length=nd), + vdmin = matrix(as.double(seq(1,nptos*4)),c(nptos,4)), + vref = matrix(as.integer(seq(1,nptos*4)),c(nptos,4)), + neni = as.integer(1), + PACKAGE = 'CSTools') + + a <- prePro$um + b <- prePro$vm + c <- prePro$insol + d <- prePro$vdmin + e <- prePro$vref + f <- prePro$neni + + output <- list("um" = a, + "vm" = b, + "insol" = c, + "vdmin" = d, + "vref" = e, + "neni" = f) + +} + + return(output) + +} + diff --git a/R/CST_AdamontAnalog.R b/R/CST_AdamontAnalog.R new file mode 100644 index 0000000000000000000000000000000000000000..1238b3b19b7d1757c6313427674de160c76f1558 --- /dev/null +++ b/R/CST_AdamontAnalog.R @@ -0,0 +1,234 @@ +#'CST_AdamontAnalog finds analogous data in the reference dataset to experiment +#'data based on weather types +#' +#'@description This function searches for analogs in a reference dataset for +#'experiment data, based on corresponding weather types. The experiment data is +#'typically a hindcast, observations are typically provided by reanalysis data. +#'@author Paola Marson, \email{paola.marson@meteo.fr} for PROSNOW version +#'@author Lauriane Batté, \email{lauriane.batte@meteo.fr} for CSTools adaptation +#' +#'@param exp \itemize{ +#'\item\code{CST_AdamontAnalog}{experiment data an object of class \code{s2dv_cube}, can be output +#'from quantile correction using CST_AdamontQQCorr} +#'\item\code{AdamontAnalog}{experiment data array with named dimension}} +#'@param wt_exp corresponding weather types (same dimensions as \code{exp$data} +#'but lat/lon) +#'@param obs \itemize{ +#'\item\code{CST_AdamontAnalog}{reference data, also of class \code{s2dv_cube}.} +#'\item\code{AdamontAnalog}{reference data array with named dimension.}} +#'Note that lat/lon dimensions need to be the same as \code{exp} +#'@param wt_obs corresponding weather types (same dimensions as \code{obs$data} +#'but lat/lon) +#'@param nanalogs integer defining the number of analog values to return +#'(default: 5) +#'@param method a character string indicating the method used for analog +#'definition +#' Coded are 'pattcorr': pattern correlation +#' 'rain1' (for precip patterns): rain occurrence consistency +#' 'rain01' (for precip patterns): rain occurrence/non +#' occurrence consistency +#'@param thres real number indicating the threshold to define rain +#'occurrence/non occurrence in rain(0)1 +#'@param search_obsdims list of dimensions in \code{obs} along which analogs are +#'searched for +#'@param londim name of longitude dimension +#'@param latdim name of latitude dimension +#'@return analog_vals +#'\itemize{ +#'\item\code{CST_AdamontAnalog}{an object of class \code{s2dv_cube} containing nanalogs +#'analog values for each value of \code{exp} input data} +#'\item\code{AdamontAnalog}{an array containing nanalogs analog values}} +#'@import multiApply +#'@importFrom s2dverification Subset +#'@examples +#'\dontrun{ +#'wt_exp <- sample(1:3, 15*6*3, replace=T) +#'dim(wt_exp) <- c(dataset=1, member=15, sdate=6, ftime=3) +#'wt_obs <- sample(1:3, 6*3, replace=T) +#'dim(wt_obs) <- c(dataset=1, member=1, sdate=6, ftime=3) +# analog_vals <- CST_AdamontAnalog(exp=lonlat_data$exp, obs=lonlat_data$obs, wt_exp=wt_exp, wt_obs=wt_obs, nanalogs=2) +#'} +#'\dontrun{ +#'wt_exp <- sample(1:3, 15*6*3, replace=T) +#'dim(wt_exp) <- c(dataset=1, member=15, sdate=6, ftime=3) +#'wt_obs <- sample(1:3, 6*3, replace=T) +#'dim(wt_obs) <- c(dataset=1, member=1, sdate=6, ftime=3) +# analog_vals <- AdamontAnalog(exp=lonlat_data$exp$data, +#' obs=lonlat_data$obs$data, wt_exp=wt_exp, wt_obs=wt_obs, nanalogs=2) +#'} + +CST_AdamontAnalog <- function(exp, obs, wt_exp, wt_obs, nanalogs, + method = 'pattcorr', thres = NULL, + search_obsdims = c('member', 'sdate', 'ftime'), + londim = 'lon', latdim = 'lat') { + + dimnames <- names(dim(obs$data)) + dimnamesexp <- names(dim(exp$data)) + if (!inherits(exp, 's2dv_cube') || !inherits(obs, 's2dv_cube')) { + stop("Inputs 'exp' and 'obs' must be of class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + if (!(method %in% c('pattcorr','rain1','rain01'))) { + stop("Input parameter 'method' must be 'pattcorr', 'rain1', or 'rain01'") + } + if (is.null(nanalogs)){ + nanalogs <- 5 + } + if (!(latdim %in% dimnames) || !(londim %in% dimnames)){ + stop("'londim' or 'latdim' input doesn't match with 'obs$data' dimension", + " names") + } + if (!(latdim %in% dimnamesexp) || !(londim %in% dimnamesexp)){ + stop("'londim' or 'latdim' input doesn't match with 'exp$data' dimension", + " names") + } + if (!all(search_obsdims %in% dimnames)){ + stop("Names in parameter 'search_obsdims' should match 'obs$data' ", + "dimension names.") + } + if (!all(dim(wt_exp) %in% dim(exp$data))){ + stop("Dimensions for 'wt_exp' should match 'exp$data' except lat/lon") + } + if (!all(dim(wt_obs) %in% dim(obs$data))){ + stop("Dimensions for 'wt_obs' should match 'obs$data' except lat/lon") + } + plat_exp <- which(dimnamesexp==latdim) + plon_exp <- which(dimnamesexp==londim) + plat_obs <- which(dimnames==latdim) + plon_obs <- which(dimnames==londim) + if ((dim(obs$data)[plon_obs]!=dim(exp$data)[plon_exp]) || + (dim(obs$data)[plat_obs]!=dim(exp$data)[plat_exp])){ + stop("Element 'data' from parameters 'obs' and 'exp' should have", + "same lon / lat dimensions if working with regular grids.") + } + # End of sanity checks; call AdamontAnalog function + analog_vals <- AdamontAnalog(exp = exp$data, obs = obs$data, wt_exp = wt_exp, + wt_obs=wt_obs, nanalogs = nanalogs, + method = method, thres = thres, + search_obsdims = search_obsdims, londim = londim, + latdim = latdim ) + + return(analog_vals) +} + +#'AdamontAnalog finds analogous data in the reference dataset to experiment data +#'based on weather types +#' +#'@import multiApply +#'@importFrom s2dverification Subset +#'@rdname CST_AdamontAnalog +#'@export +AdamontAnalog <- function(exp, obs, wt_exp, wt_obs, nanalogs=5, + method = 'pattcorr', thres = NULL, + search_obsdims = c('member', 'sdate', 'ftime'), + londim = 'lon', latdim = 'lat') { + # exp: lat, lon, sdate, ftime, member + # obs: lat, lon, dims for searching 'sdate' 'ftime'... + # wt_exp: sdate, ftime, member + # wt_obs: the dims for searching + dimnames <- names(dim(obs)) + dimnamesexp <- names(dim(exp)) + if (method %in% c('rain1','rain01') & is.null(thres)){ + stop("Threshold 'thres' must be defined with methods 'rain1' and 'rain01'") + } + if (method == 'pattcorr' & !is.null(thres)){ + warning("Parameter 'thres' is not used with method 'pattcorr'.") + } + if (!(latdim %in% dimnamesexp) || !(londim %in% dimnamesexp)){ + stop("'londim' or 'latdim' input doesn't match with 'exp' dimension names") + } + # Position of lat/lon dimensions in exp data + poslatexp <- which(dimnamesexp == latdim) + poslonexp <- which(dimnamesexp == londim) + poslatobs <- which(dimnames == latdim) + poslonobs <- which(dimnames == londim) + if (!all(search_obsdims %in% dimnames)){ + stop("Names in parameter 'search_obsdims' should match 'obs' ", + "dimension names.") + } + if (!all(dim(wt_exp) %in% dim(exp))){ + stop("Dimensions for 'wt_exp' should match 'exp' except lat/lon") + } + if (!all(dim(wt_obs) %in% dim(obs))){ + stop("Dimensions for 'wt_obs' should match 'obs' except lat/lon") + } + if ((dim(obs)[poslonobs]!=dim(exp)[poslonexp]) || + (dim(obs)[poslatobs]!=dim(exp)[poslatexp])){ + stop("Parameters 'obs' and 'exp' should have same lon / lat dimensions.") + } + + ## Reshaping obs: + ## The dimensions where to search in a single dim + if (length(search_obsdims) > 1) { + for (i in 1:(length(search_obsdims) - 1)) { + obs <- MergeDims(obs, search_obsdims[i:(i + 1)], + rename_dim = search_obsdims[i + 1]) + wt_obs <- MergeDims(wt_obs, search_obsdims[i:(i + 1)], + rename_dim = search_obsdims[i + 1]) + } + } + names(dim(obs))[which(names(dim(obs)) == search_obsdims[length(search_obsdims)])] <- 'time' + names(dim(wt_obs))[which(names(dim(wt_obs)) == search_obsdims[length(search_obsdims)])] <- 'time' + # Split 'time' dim in weather types + obs <- SplitDim(obs, split_dim = 'time', indices = as.vector(wt_obs), + new_dim_name='type') + + analog_vals <- Apply(list(exp, obs, wt_exp), + target_dims = list(c(londim, latdim), + c(londim, latdim, 'time', 'type'), + NULL), + .analogs, method = method, thres = thres)$output1 + + # Reshaping output: + analog_vals <- Subset(analog_vals,along='type',indices=1,drop='selected') + poslat <- which(names(dim(analog_vals)) == latdim) + poslon <- which(names(dim(analog_vals)) == londim) + postime <- which(names(dim(analog_vals)) == 'time') # Dimension with N analogs + pos <- 1:length(dim(analog_vals)) + if (poslatexp > poslonexp){ + analog_vals <- aperm(analog_vals,c(pos[-c(poslon,poslat,postime)], + postime,poslon,poslat)) + } else { + analog_vals <- aperm(analog_vals,c(pos[-c(poslon,poslat,postime)], + postime,poslat,poslon)) + } + # Renaming 'time' dim to 'analog' + names(dim(analog_vals))[which(names(dim(analog_vals)) == 'time')] <- 'analog' + return(analog_vals) +} + + +.analogs <- function(exp, obs, wt_exp, nanalogs = 5, method = 'pattcorr', + thres = NULL, londimexp = 'lon', latdimexp = 'lat', + londimobs = 'lon', latdimobs = 'lat') { + # exp: lon, lat + # obs: lon, lat, time, wt + # wt_exp: wt single scalar + + search_analog <- switch(method, 'rain1' = .rain1, 'rain01' = .rain01, + 'pattcorr' = .pattcor, + stop(paste0("Adamont Analog function only supports ", + "methods 'rain1', 'rain01', 'pattcorr'"))) + + obs <- Subset(obs, along = 'type', indices = wt_exp) + accuracy <- Apply(list(exp, obs), target_dims = list(c(londimexp, latdimexp), + c(londimobs, latdimobs)), + search_analog, thres = thres)$output1 + obs <- Subset(obs, along = 'time', indices = order(accuracy, decreasing = TRUE)[1:nanalogs]) + return(obs) +} + +.rain1 <- function(exp_day, obs_day, thres) { + accuracy <- sum((obs_day >= thres) * (exp_day >= thres)) + return(accuracy) +} +.rain01 <- function(exp_day, obs_day, thres) { + accuracy <- sum(diag(table((obs_day >= thres),(exp_day >= thres)))) + return(accuracy) +} +.pattcor <- function(exp_day, obs_day, thres = NULL) { + accuracy <- cor(as.vector(obs_day),as.vector(exp_day)) + return(accuracy) +} + + diff --git a/R/CST_AdamontQQCorr.R b/R/CST_AdamontQQCorr.R new file mode 100644 index 0000000000000000000000000000000000000000..1d72c2a87b788ded2a1ae8e00d80ff0f5aa0abff --- /dev/null +++ b/R/CST_AdamontQQCorr.R @@ -0,0 +1,393 @@ +#'CST_AdamontQQCorr computes quantile-quantile correction of seasonal or +#'decadal forecast data using weather types +#' +#'@description This function computes a quantile mapping based on weather types +#'for experiment data (typically a hindcast) onto reference \code{obs}, +#'typically provided by reanalysis data. +#'@author Lauriane Batté, \email{lauriane.batte@meteo.fr} +#'@author Paola Marson, \email{paola.marson@meteo.fr} +#'@author Gildas Dayon, \email{gildas.dayon@meteo.fr} +#' +#'@param exp experiment data an object of class \code{s2dv_cube} +#'@param wt_exp corresponding weather types (same dimensions as \code{exp$data} +#' but lat/lon) +#'@param obs reference data, also of class \code{s2dv_cube}. lat/lon dimensions +#' can differ from \code{exp} if non rectilinear latlon grids are used, +#' in which case regrid should be set to TRUE and .NearestNeighbors \code{NN} +#' output should be provided +#'@param wt_obs corresponding weather types (same dimensions as \code{obs} but +#'lat/lon) +#'@param corrdims list of dimensions in \code{exp} for which quantile mapping +#' correction is applied +#'@param londim character name of longitude dimension in \code{exp} and +#' \code{obs} +#'@param latdim character name of latitude dimension in \code{exp} and +#' \code{obs} +#' +#'@return an object of class \code{s2dv_cube} containing experiment data on the +#' lat/lon grid of \code{obs} input data, corrected by quantile mapping +#' depending on the weather types \code{wt_exp} +#' +#'@import qmap +#'@importFrom s2dverification Subset +#'@import multiApply +#'@import abind +#'@examples +#'\dontrun{ +#'wt_exp <- sample(1:3, 15*6*3, replace=T) +#'dim(wt_exp) <- c(dataset=1, member=15, sdate=6, ftime=3) +#'wt_obs <- sample(1:3, 6*3, replace=T) +#'dim(wt_obs) <- c(dataset=1, member=1, sdate=6, ftime=3) +#'exp_corr <- CST_AdamontQQCorr(exp=lonlat_data$exp, wt_exp=wt_exp, +#' obs=lonlat_data$obs, wt_obs=wt_obs, +#' corrdims = c('dataset','member','sdate','ftime')) +#'} +CST_AdamontQQCorr <- function(exp, wt_exp, obs, wt_obs, + corrdims = c('member','sdate','ftime'), + londim='lon', latdim='lat') { + + if (!inherits(exp, 's2dv_cube') || !inherits(obs, 's2dv_cube')){ + stop("Inputs 'exp' and 'obs' must be of class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + dimnames <- names(dim(obs$data)) + dimnamesexp <- names(dim(exp$data)) + if (!(latdim %in% dimnames) || !(londim %in% dimnames)){ + stop("'londim' or 'latdim' input doesn't match with 'obs$data' dimension", + " names") + } + if (!(latdim %in% dimnamesexp) || !(londim %in% dimnamesexp)){ + stop("'londim' or 'latdim' input doesn't match with 'exp$data' dimension", + " names") + } + if (!(('time' %in% corrdims) || ('ftime' %in% corrdims))){ + warning("Forecast time should be one of the dimensions for the correction + specified in corrdims input list") + } + if (!all(corrdims %in% dimnamesexp)){ + stop("Names in parameter 'corrdims' should match input dimension names.") + } + if (!all(dim(wt_exp) %in% dim(exp$data))){ + stop("Dimensions for 'wt_exp' should match 'exp$data' except lat/lon") + } + if (!all(dim(wt_obs) %in% dim(obs$data))){ + stop("Dimensions for 'wt_obs' should match 'obs$data' except lat/lon") + } + if ((length(dim(exp$lon))==2) || (length(dim(obs$lon))==2)){ + myNN <- .NearestNeighbors(exp=exp, obs=obs, method='ADA') + exp_corr <- AdamontQQCorr(exp=exp$data, wt_exp=wt_exp, obs=obs$data, + wt_obs=wt_obs, corrdims=corrdims, + londim=londim, latdim=latdim, + regrid=TRUE, NN=myNN) + } else { + ## If not (standard case) + ## exp$data lat/lon dimensions should match obs$data + plat_exp <- which(dimnamesexp==latdim) + plon_exp <- which(dimnamesexp==londim) + plat_obs <- which(dimnames==latdim) + plon_obs <- which(dimnames==londim) + if ((dim(obs$data)[plon_obs]!=dim(exp$data)[plon_exp]) || + (dim(obs$data)[plat_obs]!=dim(exp$data)[plat_exp])){ + stop("Element 'data' from parameters 'obs' and 'exp' should have", + "same lon / lat dimensions if working with regular grids.") + } + exp_corr <- AdamontQQCorr(exp=exp$data, wt_exp=wt_exp, obs=obs$data, + wt_obs=wt_obs, corrdims=corrdims, + londim=londim, latdim=latdim, regrid=FALSE) + } + return(exp_corr) +} + + +#'AdamontQQCorr computes quantile-quantile correction of seasonal or decadal +#'forecast data using weather types +#' +#'@description This function computes a quantile mapping based on weather types +#'for experiment data (typically a hindcast) onto reference \code{obs}, +#'typically provided by reanalysis data. +#'@author Paola Marson, \email{paola.marson@meteo.fr} for PROSNOW version +#'@author Lauriane Batté, \email{lauriane.batte@meteo.fr} for CSTools adaptation +#' +#'@param exp array with named dimensions (such as \code{$data} array of +#'experiment data from an object of class \code{s2dv_cube}) +#'@param wt_exp corresponding weather types (same dimensions as \code{exp} but +#'lat/lon) +#'@param obs array with named dimensions with reference data (can also be +#'\code{$data} array of class \code{s2dv_cube}). lat/lon dimensions can differ +#'from \code{exp} if non rectilinear latlon grids are used, in which case +#'regrid should be set to TRUE and .NearestNeighbors \code{NN} output should be +#'provided +#'@param wt_obs corresponding weather types (same dimensions as \code{obs} but +#'lat/lon) +#'@param corrdims list of dimensions in \code{exp} for which quantile mapping +#'correction is applied +#'@param londim character name of longitude dimension in \code{exp} and +#'\code{obs} +#'@param latdim character name of latitude dimension in \code{exp} and +#'\code{obs} +#'@param regrid (optional) boolean indicating whether .NearestNeighbors +#'regridding is needed +#'@param NN (optional, if regrid=TRUE) list (output from .NearestNeighbors) +#'maps (nlat, nlon) onto (nlat_o, nlon_o) +#' +#'@return an array (such as \code{$data} array from an object of class +#'\code{s2dv_cube}) with named dimensions, containing experiment data on the +#'lat/lon grid of \code{obs} array, corrected by quantile mapping depending on +#'the weather types \code{wt_exp} +#' +#'@import qmap +#'@importFrom s2dverification Subset +#'@import multiApply +#'@import abind +#'@examples +#'\dontrun{ +#'wt_exp <- sample(1:3, 15*6*3, replace=T) +#'dim(wt_exp) <- c(dataset=1, member=15, sdate=6, ftime=3) +#'wt_obs <- sample(1:3, 6*3, replace=T) +#'dim(wt_obs) <- c(dataset=1, member=1, sdate=6, ftime=3) +#'exp_corr <- AdamontQQCorr(exp=lonlat_data$exp$data, wt_exp=wt_exp, +#' obs=lonlat_data$obs$data, wt_obs=wt_obs, +#' corrdims = c('dataset','member','sdate','ftime')) +#'} +AdamontQQCorr <- function(exp, wt_exp, obs, wt_obs, + corrdims = c('member', 'sdate', 'ftime'), + londim='lon', latdim='lat', regrid=FALSE, NN=NULL) { + + dimnames <- names(dim(obs)) + dimnamesexp <- names(dim(exp)) + if (!(latdim %in% dimnames) || !(londim %in% dimnames)){ + stop("'londim' or 'latdim' input doesn't match with 'obs' dimension names") + } + if (!(('time' %in% corrdims) || ('ftime' %in% corrdims))){ + warning("Forecast time should be one of the dimensions for the correction", + " specified in corrdims input list") + } + if (!all(corrdims %in% dimnamesexp)){ + stop("Names in parameter 'corrdims' should match input dimension names.") + } + if (!all(dim(wt_exp) %in% dim(exp))){ + stop("Dimensions for 'wt_exp' should match 'exp' except lat/lon") + } + if (!all(dim(wt_obs) %in% dim(obs))){ + stop("Dimensions for 'wt_obs' should match 'obs' except lat/lon") + } + if ((regrid == 'TRUE') & is.null(NN)){ + stop("regrid set to TRUE: provide nearest neighbors input NN") + } + # The regridding part should only be done if lat/lon dimensions of obs and + # exp differ. + if (regrid == 'TRUE'){ + obsdims <- names(dim(obs)) + poslat <- which(obsdims == latdim) + poslon <- which(obsdims == londim) + nlat_o <- dim(obs)[poslat] + nlon_o <- dim(obs)[poslon] + ilat_o <- array(c(1:nlat_o)) + names(dim(ilat_o))[1] <- latdim + ilon_o <- array(c(1:nlon_o)) + names(dim(ilon_o))[1] <- londim + ## First step if obs data is higher resolution than exp data is to use + ## nearest neighbor to compute downscaling of exp data + exp_corr <- Apply(list(exp,ilat_o,ilon_o), + target_dims=list(c(latdim,londim),latdim,londim), + .getNN,NN=NN)$output1 + + ## Reorder exp_corr dimensions to match exp dimensions + dexpc <- match(names(dim(exp)), names(dim(exp_corr))) + exp_corr <- aperm(exp_corr,dexpc) + dimnames(exp_corr) <- dimnames(exp)[dexpc] + ## Keep original wt_exp for remapping data + wt_exp2 <- wt_exp + ## Both exp and obs data are now on the same grid + } else { + ## exp lat/lon dimensions should match obs + plat_exp <- which(dimnamesexp==latdim) + plon_exp <- which(dimnamesexp==londim) + plat_obs <- which(dimnames==latdim) + plon_obs <- which(dimnames==londim) + if ((dim(obs)[plon_obs]!=dim(exp)[plon_exp]) || + (dim(obs)[plat_obs]!=dim(exp)[plat_exp])){ + stop("Parameters 'obs' and 'exp' should have same lon / lat", + " dimensions if regrid set to 'FALSE' (regular grid case).") + } + exp_corr <- exp + ## Keep original wt_exp for remapping data + wt_exp2 <- wt_exp + } + + ## Use CST_QuantileMapping function for quantile mapping + ## depending on weather type + for (i in 1:(length(corrdims) - 1)) { + obs <- MergeDims(obs, corrdims[i:(i+1)], rename_dim=corrdims[i+1]) + wt_obs <- MergeDims(wt_obs, corrdims[i:(i+1)], rename_dim=corrdims[i+1]) + exp_corr <- MergeDims(exp_corr, corrdims[i:(i+1)], rename_dim=corrdims[i+1]) + wt_exp2 <- MergeDims(wt_exp2, corrdims[i:(i+1)], rename_dim=corrdims[i+1]) + } + names(dim(obs))[which(names(dim(obs)) == corrdims[length(corrdims)])] <- 'time' + names(dim(wt_obs))[which(names(dim(wt_obs)) == corrdims[length(corrdims)])] <- 'time' + names(dim(exp_corr))[which(names(dim(exp_corr)) == corrdims[length(corrdims)])] <- 'time' + names(dim(wt_exp2))[which(names(dim(wt_exp2)) == corrdims[length(corrdims)])] <- 'time' + # Split 'time' dim in weather types + obs <- SplitDim(obs, split_dim='time',indices=as.vector(wt_obs), + new_dim_name='type') + exp_corr <- SplitDim(exp_corr, split_dim='time',indices=as.vector(wt_exp2), + new_dim_name='type') + ## Add NAs to exp_corr if needed to have compatible sample dimensions + numtobs <- dim(obs)[which(names(dim(obs))=='time')] + numtexp <- dim(exp_corr)[which(names(dim(exp_corr))=='time')] + if (numtexp%%numtobs > 0){ + ## Create extra dimension and include NAs + ndimexp <- names(dim(exp_corr)) + ndimobs <- names(dim(obs)) + postime <- which(ndimexp=='time') + dimadd <- dim(exp_corr) + dimadd[postime] <- ceiling(numtexp/numtobs)*numtobs-numtexp + exp_corr <- abind::abind(exp_corr,array(NA,dimadd),along=postime) + names(dim(exp_corr)) <- ndimexp + exp_corr <- SplitDim(exp_corr,'time',freq=numtobs,indices=NULL) + dimobs <- c(dim(obs),1) + dim(obs) <- dimobs + names(dim(obs)) <- c(ndimobs,'index') + res <- QuantileMapping(exp=exp_corr,obs=obs,sample_dims=c('time','index'), + method='RQUANT') + res <- MergeDims(res,c('time','index')) + ## Remove the extra NA values added previously + res <- Subset(res,along='time',indices=1:numtexp) + } else { + ## Apply QuantileMapping to exp_corr depending on weather type + res <- QuantileMapping(exp=exp_corr,obs=obs,sample_dims='time', + samplemethod='RQUANT') + } + rm(exp_corr) # Save space in memory + ## Reshape exp_corr data onto time dimension before 'Split' + rep_pos <- array(NA,c(time=length(wt_exp2))) + pos_time <- which(names(dim(res)) == 'time') + pos_type <- which(names(dim(res)) == 'type') + for (x in unique(wt_exp2)){ + rep_pos[which(wt_exp2==x)]<-1:length(which(wt_exp2==x)) + } + exp_corr <- .unsplit_wtype(exp=res,wt_exp=wt_exp2,rep_pos=rep_pos, + pos_time=pos_time) + # Now reshape exp_corr data onto original dimensions + dim(exp_corr) <- c(dim(wt_exp), dim(exp_corr)[-c(pos_time,pos_type)]) + return(exp_corr) +} + +.getNN <- function(exp,ilat,ilon,NN){ + return(exp[NN$imin_lat[ilat,ilon],NN$imin_lon[ilat,ilon]]) +} + +.unsplit_wtype <- function(exp=exp,dim_wt='type',wt_exp=wt_exp, + dim_time='time',rep_pos=rep_pos,pos_time=1){ + # Initiate output + new <- Subset(Subset(exp, along=dim_wt, indices=wt_exp[1]), along=dim_time, + indices=rep_pos[1]) + dimnames <- names(dim(new)) + for (x in 2:length(wt_exp)){ + dat <- Subset(Subset(exp, along=dim_wt, indices=wt_exp[x]), + along=dim_time, indices=rep_pos[x]) + new <- abind::abind(new,dat,along=pos_time) + } + names(dim(new)) <- dimnames + return(new) +} +#' ADAMONT Nearest Neighbors computes the distance between reference data grid centroid and SF data grid +#' +#'@author Paola Marson, \email{paola.marson@meteo.fr} for PROSNOW version +#'@author Lauriane Batté, \email{lauriane.batte@meteo.fr} for CSTools adaptation +#'@description This function computes the nearest neighbor for each reference data (lon, lat) point in the experiment dataset by computing the distance between the reference dataset grid and the experiment data. This is the first step in the ADAMONT method adapted from Verfaillie et al. (2018). +#' +#'@param method a string among three options ('ADA': standard ADAMONT distance, 'simple': lon/lat straight euclidian distance, 'radius': distance on the sphere) +#'@param exp an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment longitudes in \code{$lon} and latitudes in \code{$lat} +#'@param obs an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the reference data on a different grid, with longitudes in \code{$lon} and latitudes in \code{$lat}. +#' +#'@return NN a list, containing the following: +#' min_lon: array of dimensions \code{obs$lon} giving the longitude of closest gridpoint in exp +#' min_lat: array of dimensions \code{obs$lat} giving the latitude of closest gridpoint in exp +#' imin_lon: array of dimensions \code{obs$lon} giving the longitude index of closest gridpoint in exp +#' imin_lat: array of dimensions \code{obs$lat} giving the latitude index of closest gridpoint in exp +#' +#'@importFrom s2dverification Subset +#'@import ncdf4 +#'@noRd +.NearestNeighbors <- function (exp, obs, method='ADA') { + + if (!inherits(exp, 's2dv_cube') || !inherits(obs, 's2dv_cube')) { + stop("Inputs 'exp' and 'obs' must be of class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + exp_lon <- exp$lon + exp_lat <- exp$lat + obs_lon <- obs$lon + obs_lat <- obs$lat + dim_exp_lon <- dim(exp_lon) + dim_exp_lat <- dim(exp_lat) + dim_obs_lon <- dim(obs_lon) + dim_obs_lat <- dim(obs_lat) + # Check if one of the grids is non-regular: + if ((length(dim_exp_lon)==2) || (length(dim_obs_lon)==2)){ + # Flatten longitudes and latitudes in case of 2-D longitudes and latitudes (Lambert grids, etc.) + if ((length(dim_exp_lon)==2) & (length(dim_exp_lat)==2)){ + dim(exp_lon) <- c(dim_exp_lon[1]*dim_exp_lon[2]) + dim(exp_lat) <- c(dim_exp_lat[1]*dim_exp_lat[2]) + } + if ((length(dim_obs_lon)==2) & (length(dim_obs_lat)==2)){ + dim(obs_lon) <- c(dim_obs_lon[1]*dim_obs_lon[2]) + dim(obs_lat) <- c(dim_obs_lat[1]*dim_obs_lat[2]) + } + # Now lat and lon arrays have 1 dimension, length npt (= nlat*nlon) + OBS_grid <- cbind(obs_lon,obs_lat) + EXP_grid <- cbind(exp_lon,exp_lat) + dist_min<-min_lon<-min_lat<-imin_lon<-imin_lat<-array(dim=nrow(OBS_grid)) + if (method == 'ADA'){ + C<-cos(OBS_grid[,2]*pi/180)^2 + for (i in 1:nrow(OBS_grid)){ + dist<-(OBS_grid[i,2]-EXP_grid[,2])^2+C[i]*(OBS_grid[i,1]-EXP_grid[,1])^2 + dist_min[i]<-min(dist) + min_lon[i]<-EXP_grid[which.min(dist),1] + min_lat[i]<-EXP_grid[which.min(dist),2] + imin_lon[i]<-which(exp_lon==min_lon[i]) + imin_lat[i]<-which(exp_lat==min_lat[i]) + } + } else if (method == 'simple'){ + for (i in 1:nrow(OBS_grid)){ + dist<-(OBS_grid[i,2]-EXP_grid[,2])^2+(OBS_grid[i,1]-EXP_grid[,1])^2 + dist_min[i]<-min(dist) + min_lon[i]<-EXP_grid[which.min(dist),1] + min_lat[i]<-EXP_grid[which.min(dist),2] + imin_lon[i]<-which(exp_lon==min_lon[i]) + imin_lat[i]<-which(exp_lat==min_lat[i]) + } + } else if (method == 'radius'){ + R <- 6371e3 # metres, Earth radius + EXP_gridr<-EXP_grid*pi/180 + OBS_gridr<-OBS_grid*pi/180 + for (i in 1:nrow(OBS_grid)){ + a<-sin((OBS_gridr[i,2]-EXP_gridr[,2])/2)^2 + cos(OBS_gridr[i,2])*cos(EXP_gridr[,2])*sin((OBS_gridr[i,1]-EXP_gridr[,1])/2)^2 + c<-2*atan2(sqrt(a),sqrt(1-a)) + dist<-R*c + dist_min[i]<-min(dist) + min_lon[i]<-EXP_grid[which.min(dist),1] + min_lat[i]<-EXP_grid[which.min(dist),2] + imin_lon[i]<-which(exp_lon==min_lon[i]) + imin_lat[i]<-which(exp_lat==min_lat[i]) + } + } else { + stop("AdamontNearestNeighbors supports method = 'ADA', 'simple' or 'radius' only.") + } + + # Reshape outputs to original grid + dim(min_lon)=dim_obs_lon + dim(min_lat)=dim_obs_lat + dim(imin_lon)=dim_obs_lon + dim(imin_lat)=dim_obs_lat + + } else { + # Regular lon/lat grid case: has been handled by CST_Load() + stop("AdamontNearestNeighbors is meant for non-regular lat/lon grids; use e.g. CST_Load to interpolate exp onto obs grid") + } + + NN=list(min_lon=min_lon, min_lat=min_lat, imin_lon=imin_lon, imin_lat=imin_lat) + + return(NN) +} diff --git a/R/CST_Analogs.R b/R/CST_Analogs.R index dc813c7918ecacebf4cebf9ce2dc0d346fe6deee..24475de158c0d71f629d68e7b2448a50b5f9f802 100644 --- a/R/CST_Analogs.R +++ b/R/CST_Analogs.R @@ -2,36 +2,34 @@ #'@title Downscaling using Analogs based on large scale fields. #' #'@author M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} +#'@author Maria M. Chaves-Montero, \email{mariadm.chaves@cmcc.it} +#'@author Veronica Torralba, \email{veronica.torralba@cmcc.it} #'@author Nuria Perez-Zanon \email{nuria.perez@bsc.es} #' #'@description This function perform a downscaling using Analogs. To compute #'the analogs, the function search for days with similar large scale conditions -#'to downscaled fields in the local scale. The large scale and the local scale +#'to downscaled fields to a local scale. The large scale and the local scale #'regions are defined by the user. The large scale is usually given by #'atmospheric circulation as sea level pressure or geopotential height #'(Yiou et al, 2013) but the function gives the possibility to use another #'field. The local scale will be usually given by precipitation or temperature #'fields, but might be another variable.The analogs function will find the best -#'analogs based in three criterias: -#' (1) Minimal distance in the large scale pattern (i.e. SLP) -#' (2) Minimal distance in the large scale pattern (i.e. SLP) and minimal -#' distance in the local scale pattern (i.e. SLP). -#' (3) Minimal distance in the large scale pattern (i.e. SLP), minimal -#' distance in the local scale pattern (i.e. SLP) and maxima correlation in the -#' local variable to downscale (i.e Precipitation). +#'analogs based in Minimum Euclidean distance in the large scale pattern +#'(i.e.SLP). +#' #'The search of analogs must be done in the longest dataset posible. This is #'important since it is necessary to have a good representation of the #'possible states of the field in the past, and therefore, to get better -#'analogs. Once the search of the analogs is complete, and in order to used -#'the three criterias the user can select a number of analogs, using parameter -#''nAnalogs' to restrict the selection of the best analogs in a short number -#'of posibilities, the best ones. +#'analogs. #'This function has not constrains of specific regions, variables to downscale, #'or data to be used (seasonal forecast data, climate projections data, #'reanalyses data). The regrid into a finner scale is done interpolating with #'CST_Load. Then, this interpolation is corrected selecting the analogs in the #'large and local scale in based of the observations. The function is an -#'adapted version of the method of Yiou et al 2013. +#'adapted version of the method of Yiou et al 2013. For an advanced search of +#'Analogs (multiple Analogs, different criterias, further information from the +#'metrics and date of the selected Analogs) use the'Analog' +#'function within 'CSTools' package. #' #'@references Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard, #' and M. Vrac, 2013 : Ensemble reconstruction of the atmospheric column @@ -48,8 +46,6 @@ #'large scale. The element 'data' in the 's2dv_cube' object must have the same #'latitudinal and longitudinal dimensions as parameter 'expL' and a temporal #'dimension with the maximum number of available observations. -#'@param time_obsL a character string indicating the date of the observations -#'in the format "dd/mm/yyyy" #'@param expVar an 's2dv_cube' object containing the experimental field on the #'local scale, usually a different variable to the parameter 'expL'. If it is #'not NULL (by default, NULL), the returned field by this function will be the @@ -58,70 +54,154 @@ #'passed in parameter 'expVar' for the same region. #'@param region a vector of length four indicating the minimum longitude, the #'maximum longitude, the minimum latitude and the maximum latitude. -#'@param criteria a character string indicating the criteria to be used for the +#'@param criteria a character string indicating the criteria to be used for the #'selection of analogs: #'\itemize{ -#'\item{Large_dist} minimal distance in the large scale pattern; -#'\item{Local_dist} minimal distance in the large scale pattern and minimal -#' distance in the local scale pattern; and -#'\item{Local_cor} minimal distance in the large scale pattern, minimal -#' distance in the local scale pattern and maxima correlation in the -#' local variable to downscale.} -#' +#'\item{Large_dist} minimum Euclidean distance in the large scale pattern; +#'\item{Local_dist} minimum Euclidean distance in the large scale pattern +#'and minimum Euclidean distance in the local scale pattern; and +#'\item{Local_cor} minimum Euclidean distance in the large scale pattern, +#'minimum Euclidean distance in the local scale pattern and highest +#'correlation in the local variable to downscale.} +#'Criteria 'Large_dist' is recommended for CST_Analogs, for an advanced use of +#'the criterias 'Local_dist' and 'Local_cor' use 'Analogs' function. +#'@param excludeTime an array of N named dimensions (coinciding with time +#'dimensions in expL)of character string(s) indicating the date(s) of the +#'observations in the format "dd/mm/yyyy" to be excluded during the search of +#'analogs. It can be NULL but if expL is not a forecast (time_expL contained in +#'time_obsL), by default time_expL will be removed during the search of analogs. +#'@param time_expL a character string indicating the date of the experiment +#'in the same format than time_obsL (i.e. "yyyy-mm-dd"). By default it is NULL +#'and dates are taken from element \code{$Dates$start} from expL. +#'@param time_obsL a character string indicating the date of the observations +#'in the date format (i.e. "yyyy-mm-dd"). By default it is NULL and dates are +#'taken from element \code{$Dates$start} from obsL. +#'@param region a vector of length four indicating the minimum longitude, +#'the maximum longitude, the minimum latitude and the maximum latitude. +#'@param nAnalogs number of Analogs to be selected to apply the criterias +#''Local_dist' or 'Local_cor'. This is not the necessary the number of analogs +#'that the user can get, but the number of events with minimum distance in +#'which perform the search of the best Analog. The default value for the +#''Large_dist' criteria is 1, for 'Local_dist' and 'Local_cor' criterias must +#' be greater than 1 in order to match with the first criteria, if nAnalogs is +#' NULL for 'Local_dist' and 'Local_cor' the default value will be set at the +#' length of 'time_obsL'. If AnalogsInfo is FALSE the function returns just +#' the best analog. +#'@param AnalogsInfo TRUE to get a list with two elements: 1) the downscaled +#'field and 2) the AnalogsInfo which contains: a) the number of the best +#'analogs, b) the corresponding value of the metric used in the selected +#'criteria (distance values for Large_dist and Local_dist,correlation values +#'for Local_cor), c)dates of the analogs). The analogs are listed in decreasing +#'order, the first one is the best analog (i.e if the selected criteria is +#'Local_cor the best analog will be the one with highest correlation, while for +#'Large_dist criteria the best analog will be the day with minimum Euclidean +#'distance). Set to FALSE to get a single analog, the best analog, for instance +#'for downscaling. +#'@param ncores The number of cores to use in parallel computation #'@import multiApply -#'@importFrom ClimProjDiags SelBox +#'@importFrom s2dverification Subset #'@import abind +#'@importFrom ClimProjDiags SelBox #' #'@seealso code{\link{CST_Load}}, \code{\link[s2dverification]{Load}} and #'\code{\link[s2dverification]{CDORemap}} #' -#'@return An 's2dv_cube' object containing the dowscaled values of the best -#'analogs in the criteria selected. -#' +#'@return An 'array' object containing the dowscaled values of the best +#'analogs. #'@examples -#'res <- CST_Analogs(expL = lonlat_data$exp, obsL = lonlat_data$obs) -#' +#'expL <- rnorm(1:200) +#'dim(expL) <- c(member=10,lat = 4, lon = 5) +#'obsL <- c(rnorm(1:180),expL[1,,]*1.2) +#'dim(obsL) <- c(time = 10,lat = 4, lon = 5) +#'time_obsL <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") +#'time_expL <- time_obsL[1] +#'lon <- seq(-1,5,1.5) +#'lat <- seq(30,35,1.5) +#'expL <- s2dv_cube(data = expL, lat = lat, lon = lon, +#' Dates = list(start = time_expL, end = time_expL)) +#'obsL <- s2dv_cube(data = obsL, lat = lat, lon = lon, +#' Dates = list(start = time_obsL, end = time_obsL)) +#'region <- c(min(lon), max(lon), min(lat), max(lat)) +#'downscaled_field <- CST_Analogs(expL = expL, obsL = obsL, region = region) #'@export -CST_Analogs <- function(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, - region = NULL, criteria = "Large_dist") { - if (!inherits(expL, 's2dv_cube') || !inherits(obsL, 's2dv_cube')) { +CST_Analogs <- function(expL, obsL, expVar = NULL, obsVar = NULL, region = NULL, + criteria = 'Large_dist', excludeTime = NULL, + time_expL = NULL, time_obsL = NULL, + nAnalogs = NULL, AnalogsInfo = FALSE, + ncores = 1) { + if (!inherits(expL, "s2dv_cube") || !inherits(obsL, "s2dv_cube")) { stop("Parameter 'expL' and 'obsL' must be of the class 's2dv_cube', ", "as output by CSTools::CST_Load.") } - if (!is.null(expVar) || !is.null(obsVar)) { - if (!inherits(expVar, 's2dv_cube') || !inherits(obsVar, 's2dv_cube')) { - stop("Parameter 'expVar' and 'obsVar' must be of the class 's2dv_cube', - ","as output by CSTools::CST_Load.") + if (!is.null(expVar) && !inherits(expVar, "s2dv_cube")) { + stop("Parameter 'expVar' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + if (!is.null(obsVar) && !inherits(obsVar, "s2dv_cube")) { + stop("Parameter 'expVar' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") + } + if (any(is.na(expL))) { + warning("Parameter 'expL' contains NA values.") + } + if (any(is.na(obsL))) { + warning("Parameter 'obsL' contains NA values.") + } + if (any(names(dim(obsL$data)) %in% 'sdate')) { + if (any(names(dim(obsL$data)) %in% 'ftime')) { + obsL <- CST_MergeDims(obsL, c('ftime', 'sdate'), rename_dim = 'time') + } else if (any(names(dim(obsL$data)) %in% 'time')) { + obsL <- CST_MergeDims(obsL, c('time', 'sdate'), rename_dim = 'time') } } - timevector <- obsL$Dates$start - if (!is.null(expVar)) { - region <- c(min(expVar$lon), max(expVar$lon), min(expVar$lat), - max(expVar$lon)) - lonVar <- expVar$lon - latVar <- expVar$lat - } else { - region <- c(min(expL$lon), max(expL$lon), min(expL$lat), max(expL$lon)) - lonVar <- expL$lon - latVar <- expL$lat - } - result <- Analogs(expL$data, obsL$data, time_obsL = timevector, - expVar = expVar$data, obsVar = obsVar$data, - criteria = criteria, - lonVar = expVar$lon, latVar = expVar$lat, - region = region, nAnalogs = 1, return_list = FALSE) if (!is.null(obsVar)) { - obsVar$data <- result$AnalogsFields - return(obsVar) + if (any(names(dim(obsVar$data)) %in% 'sdate')) { + if (any(names(dim(obsVar$data)) %in% 'ftime')) { + obsVar <- CST_MergeDims(obsVar, c('ftime', 'sdate'),rename_dim = 'time') + } else if (any(names(dim(obsVar$data)) %in% 'time')) { + obsVar <- CST_MergeDims(obsVar, c('time', 'sdate'), rename_dim = 'time') + } + } + } + if (is.null(time_expL)) { + time_expL <- expL$Dates$start + } + if (is.null(time_obsL)) { + time_obsL <- obsL$Dates$start + } + res <- Analogs(expL$data, obsL$data, time_obsL = time_obsL, + time_expL = time_expL, expVar = expVar$data, + obsVar = obsVar$data, criteria = criteria, + excludeTime = excludeTime, region = region, + lonVar = as.vector(obsVar$lon), latVar = as.vector(obsVar$lat), + nAnalogs = nAnalogs, AnalogsInfo = AnalogsInfo, + ncores = ncores) + if (AnalogsInfo) { + if (is.numeric(res$dates)) { + res$dates <- as.POSIXct(res$dates, origin = '1970-01-01', tz = 'UTC') + } + } + expL$data <- res + if (is.null(region)) { + expL$lon <- obsL$lon + expL$lat <- obsL$lat } else { - obsL$data <- result$AnalogsFields - return(obsL) + expL$lon <- SelBox(obsL$data, lon = as.vector(obsL$lon), + lat = as.vector(obsL$lat), + region = region)$lon + expL$lat <- SelBox(obsL$data, lon = as.vector(obsL$lon), + lat = as.vector(obsL$lat), + region = region)$lat } + return(expL) } + #'@rdname Analogs #'@title Analogs based on large scale fields. #' #'@author M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} +#'@author Maria M. Chaves-Montero, \email{mariadm.chaves@cmcc.it } +#'@author Veronica Torralba, \email{veronica.torralba@cmcc.it} #'@author Nuria Perez-Zanon \email{nuria.perez@bsc.es} #' #'@description This function perform a downscaling using Analogs. To compute @@ -136,9 +216,9 @@ CST_Analogs <- function(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, #' (1) Minimum Euclidean distance in the large scale pattern (i.e. SLP) #' (2) Minimum Euclidean distance in the large scale pattern (i.e. SLP) #' and minimum Euclidean distance in the local scale pattern (i.e. SLP). -#' (3) Minimum Euclidean distance in the large scale pattern (i.e. SLP), minimum -#' distance in the local scale pattern (i.e. SLP) and highest correlation in the -#' local variable to downscale (i.e Precipitation). +#' (3) Minimum Euclidean distance in the large scale pattern (i.e. SLP), +#' minimum distance in the local scale pattern (i.e. SLP) and highest +#' correlation in the local variable to downscale (i.e Precipitation). #'The search of analogs must be done in the longest dataset posible. This is #'important since it is necessary to have a good representation of the #'possible states of the field in the past, and therefore, to get better @@ -166,16 +246,34 @@ CST_Analogs <- function(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, #'to be already subset for the desired large scale region. #'@param obsL an array of N named dimensions containing the observational field #'on the large scale. The element 'data' in the 's2dv_cube' object must have -#'the same latitudinal and longitudinal dimensions as parameter 'expL' and a -#' temporal dimension with the maximum number of available observations. +#'the same latitudinal and longitudinal dimensions as parameter 'expL' and a +#' single temporal dimension with the maximum number of available observations. #'@param time_obsL a character string indicating the date of the observations -#'in the format "dd/mm/yyyy" +#'in the format "dd/mm/yyyy". Reference time to search for analogs. +#'@param time_expL an array of N named dimensions (coinciding with time +#'dimensions in expL) of character string(s) indicating the date(s) of the +#'experiment in the format "dd/mm/yyyy". Time(s) to find the analogs. +#'@param excludeTime an array of N named dimensions (coinciding with time +#'dimensions in expL) of character string(s) indicating the date(s) of the +#'observations in the format "dd/mm/yyyy" to be excluded during the search of +#'analogs. It can be NULL but if expL is not a forecast (time_expL contained in +#'time_obsL),by default time_expL will be removed during the search of analogs. #'@param expVar an array of N named dimensions containing the experimental #'field on the local scale, usually a different variable to the parameter #''expL'. If it is not NULL (by default, NULL), the returned field by this #'function will be the analog of parameter 'expVar'. #'@param obsVar an array of N named dimensions containing the field of the #'same variable as the passed in parameter 'expVar' for the same region. +#'@param AnalogsInfo TRUE to get a list with two elements: 1) the downscaled +#'field and 2) the AnalogsInfo which contains: a) the number of the best +#'analogs, b) the corresponding value of the metric used in the selected +#'criteria (distance values for Large_dist and Local_dist,correlation values +#'for Local_cor), c)dates of the analogs). The analogs are listed in decreasing +#'order, the first one is the best analog (i.e if the selected criteria is +#'Local_cor the best analog will be the one with highest correlation, while for +#'Large_dist criteria the best analog will be the day with minimum Euclidean +#'distance). Set to FALSE to get a single analog, the best analog, for instance +#'for downscaling. #'@param criteria a character string indicating the criteria to be used for the #'selection of analogs: #'\itemize{ @@ -183,326 +281,144 @@ CST_Analogs <- function(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, #'\item{Local_dist} minimum Euclidean distance in the large scale pattern #'and minimum Euclidean distance in the local scale pattern; and #'\item{Local_cor} minimum Euclidean distance in the large scale pattern, -#'minimum Euclidean distance in the local scale pattern and highest correlation -#' in the local variable to downscale.} +#'minimum Euclidean distance in the local scale pattern and highest +#'correlation in the local variable to downscale.} #'@param lonVar a vector containing the longitude of parameter 'expVar'. #'@param latVar a vector containing the latitude of parameter 'expVar'. #'@param region a vector of length four indicating the minimum longitude, #'the maximum longitude, the minimum latitude and the maximum latitude. -#'@param return_list TRUE to get a list with the best analogs. FALSE -#'to get a single analog, the best analog. For Downscaling return_list must be -#'FALSE. #'@param nAnalogs number of Analogs to be selected to apply the criterias #''Local_dist' or 'Local_cor'. This is not the necessary the number of analogs #'that the user can get, but the number of events with minimum distance in #'which perform the search of the best Analog. The default value for the -#''Large_dist' criteria is 1, for 'Local_dist' and 'Local_cor'criterias must -#' be selected by the user otherwise the default value will be set as 10. +#''Large_dist' criteria is 1, for 'Local_dist' and 'Local_cor' criterias must +#' be greater than 1 in order to match with the first criteria, if nAnalogs is +#' NULL for 'Local_dist' and 'Local_cor' the default value will be set at the +#' length of 'time_obsL'. If AnalogsInfo is FALSE the function returns just +#' the best analog. +#'@param ncores the number of cores to use in parallel computation. #'@import multiApply -#'@importFrom ClimProjDiags SelBox +#'@importFrom s2dverification Subset #'@import abind +#'@importFrom ClimProjDiags SelBox +#' #'@return AnalogsFields, dowscaled values of the best analogs for the criteria -#'selected. -#'@return AnalogsInfo, a dataframe with information about the number of the -#'best analogs, the corresponding value of the metric used in the selected -#'criteria (distance values for Large_dist and Local_dist,correlation values -#'for Local_cor), date of the analog). The analogs are listed in decreasing -#'order, the first one is the best analog (i.e if the selected criteria -#'is Local_cor the best analog will be the one with highest correlation, while -#'for Large_dist criteria the best analog will be the day with minimum -#'Euclidean distance) -#'@examples -#'require(zeallot) +#'selected. If AnalogsInfo is set to TRUE the function also returns a +#'list with the dowsncaled field and the Analogs Information. #' +#'@examples #'# Example 1:Downscaling using criteria 'Large_dist' and a single variable: -#'# The best analog is found using a single variable (i.e. Sea level pressure, -#'# SLP). The downscaling will be done in the same variable used to study the -#'# minimal distance (i.e. SLP). obsVar and expVar NULLS or equal to obsL and -#'# expL respectively region, lonVar and latVar not necessary here. -#'# return_list=FALSE -#' #'expSLP <- rnorm(1:20) #'dim(expSLP) <- c(lat = 4, lon = 5) -#'obsSLP <- c(rnorm(1:180),expSLP*1.2) -#'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) +#'obsSLP <- c(rnorm(1:180), expSLP * 1.2) +#'dim(obsSLP) <- c(time = 10, lat = 4, lon = 5) #'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -#'downscale_field <- Analogs(expL=expSLP, obsL=obsSLP, time_obsL=time_obsSLP) -#'str(downscale_field) +#'downscale_field <- Analogs(expL = expSLP, obsL = obsSLP, +#' time_obsL = time_obsSLP,time_expL = "01-01-1994") #' #'# Example 2: Downscaling using criteria 'Large_dist' and 2 variables: -#'# The best analog is found using 2 variables (i.e. Sea Level Pressure, SLP -#'# and precipitation, pr): one variable (i.e. sea level pressure, expL) to -#'# find the best analog day (defined in criteria 'Large_dist' as the day, in -#'# obsL, with the minimum Euclidean distance to the day of interest in expL) -#'# The second variable will be the variable to donwscale (i.e. precipitation, -#'# obsVar). Parameter obsVar must be different to obsL.The downscaled -#'# precipitation will be the precipitation that belongs to the best analog day -#'# in SLP. Region not needed here since will be the same for both variables. -#' -#'expSLP <- rnorm(1:20) -#'dim(expSLP) <- c(lat = 4, lon = 5) -#'obsSLP <- c(rnorm(1:180),expSLP*2) -#'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) -#'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -#'obs.pr <- c(rnorm(1:200)*0.001) -#'dim(obs.pr)=dim(obsSLP) -#'downscale_field <- Analogs(expL=expSLP, obsL=obsSLP, -#' obsVar=obs.pr, -#' time_obsL=time_obsSLP) -#'str(downscale_field) +#'obs.pr <- c(rnorm(1:200) * 0.001) +#'dim(obs.pr) <- dim(obsSLP) +#'downscale_field <- Analogs(expL = expSLP, obsL = obsSLP, obsVar = obs.pr, +#' time_obsL = time_obsSLP, time_expL = "01-01-1994") #' #'# Example 3:List of best Analogs using criteria 'Large_dist' and a single -#'# variable: same as Example 1 but getting a list of best analogs (return_list -#'# =TRUE) with the corresponding downscaled values, instead of only 1 single -#'# donwscaled value instead of 1 single downscaled value. Imposing nAnalogs -#'# (number of analogs to do the search of the best Analogs). obsVar and expVar -#'# NULL or equal to obsL and expL,respectively. -#' -#'expSLP <- rnorm(1:20) -#'dim(expSLP) <- c(lat = 4, lon = 5) -#'obsSLP <- c(rnorm(1:1980),expSLP*1.5) +#'obsSLP <- c(rnorm(1:1980), expSLP * 1.5) #'dim(obsSLP) <- c(lat = 4, lon = 5, time = 100) #'time_obsSLP <- paste(rep("01", 100), rep("01", 100), 1920 : 2019, sep = "-") -#'downscale_field<- Analogs(expL=expSLP, obsL=obsSLP, time_obsSLP, -#' nAnalogs=5,return_list = TRUE) -#'str(downscale_field) +#'downscale_field<- Analogs(expL = expSLP, obsL = obsSLP, time_obsSLP, +#' nAnalogs = 5, time_expL = "01-01-2003", +#' AnalogsInfo = TRUE, excludeTime = "01-01-2003") #' #'# Example 4:List of best Analogs using criteria 'Large_dist' and 2 variables: -#'# same as example 2 but getting a list of best analogs (return_list =TRUE) -#'# with the values instead of only a single downscaled value. Imposing -#'# nAnalogs (number of analogs to do the search of the best Analogs). obsVar -#'# and expVar must be different to obsL and expL. -#' -#'expSLP <- rnorm(1:20) -#'dim(expSLP) <- c(lat = 4, lon = 5) -#'obsSLP <- c(rnorm(1:180),expSLP*2) +#'obsSLP <- c(rnorm(1:180), expSLP * 2) #'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) #'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -#'obs.pr <- c(rnorm(1:200)*0.001) -#'dim(obs.pr)=dim(obsSLP) -#'downscale_field <- Analogs(expL=expSLP, obsL=obsSLP, -#' obsVar=obs.pr, -#' time_obsL=time_obsSLP,nAnalogs=5, -#' return_list = TRUE) -#'str(downscale_field) +#'downscale_field <- Analogs(expL = expSLP, obsL = obsSLP, obsVar = obs.pr, +#' time_obsL = time_obsSLP,nAnalogs=5, +#' time_expL = "01-10-2003", AnalogsInfo = TRUE) #' #'# Example 5: Downscaling using criteria 'Local_dist' and 2 variables: -#'# The best analog is found using 2 variables (i.e. Sea Level Pressure, -#'# SLP and precipitation, pr). Parameter obsVar must be different to obsL.The -#'# downscaled precipitation will be the precipitation that belongs to the best -#'# analog day in SLP. lonVar, latVar and Region must be given here to select -#'# the local scale -#' -#'expSLP <- rnorm(1:20) -#'dim(expSLP) <- c(lat = 4, lon = 5) -#'obsSLP <- c(rnorm(1:180),expSLP*2) -#'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) -#'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -#'obs.pr <- c(rnorm(1:200)*0.001) -#'dim(obs.pr)=dim(obsSLP) #'# analogs of local scale using criteria 2 -#'lonmin=-1 -#'lonmax=2 -#'latmin=30 -#'latmax=33 -#'region=c(lonmin,lonmax,latmin,latmax) -#'Local_scale <- Analogs(expL=expSLP, -#' obsL=obsSLP, time_obsL=time_obsSLP, -#' obsVar=obs.pr, -#' criteria="Local_dist",lonVar=seq(-1,5,1.5), -#' latVar=seq(30,35,1.5),region=region, -#' nAnalogs = 10, return_list = FALSE) -#'str(Local_scale) +#'region=c(lonmin = -1 ,lonmax = 2, latmin = 30, latmax = 33) +#'Local_scale <- Analogs(expL = expSLP, obsL = obsSLP, time_obsL = time_obsSLP, +#' obsVar = obs.pr, criteria = "Local_dist", +#' lonVar = seq(-1, 5, 1.5),latVar = seq(30, 35, 1.5), +#' region = region,time_expL = "01-10-2000", +#' nAnalogs = 10, AnalogsInfo = TRUE) #' #'# Example 6: list of best analogs using criteria 'Local_dist' and 2 -#'# variables: -#'# The best analog is found using 2 variables. Parameter obsVar must be -#'# different to obsL in this case.The downscaled precipitation will be the -#'# precipitation that belongs to the best analog day in SLP. lonVar, latVar -#'# and Region needed. return_list=TRUE -#' -#'expSLP <- rnorm(1:20) -#'dim(expSLP) <- c(lat = 4, lon = 5) -#'obsSLP <- c(rnorm(1:180),expSLP*2) -#'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) -#'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -#'obs.pr <- c(rnorm(1:200)*0.001) -#'dim(obs.pr)=dim(obsSLP) -#'# analogs of local scale using criteria 2 -#'lonmin=-1 -#'lonmax=2 -#'latmin=30 -#'latmax=33 -#'region=c(lonmin,lonmax,latmin,latmax) -#'Local_scale <- Analogs(expL=expSLP, -#' obsL=obsSLP, time_obsL=time_obsSLP, -#' obsVar=obs.pr, -#' criteria="Local_dist",lonVar=seq(-1,5,1.5), -#' latVar=seq(30,35,1.5),region=region, -#' nAnalogs = 5, return_list = TRUE) -#'str(Local_scale) +#'Local_scale <- Analogs(expL = expSLP, obsL = obsSLP, time_obsL = time_obsSLP, +#' criteria = "Local_dist", lonVar = seq(-1, 5, 1.5), +#' latVar = seq(30, 35, 1.5), region = region, +#' time_expL = "01-10-2000", nAnalogs = 5, +#' AnalogsInfo = TRUE) #' #'# Example 7: Downscaling using Local_dist criteria -#'# without parameters obsVar and expVar. return list =FALSE -#' -#'expSLP <- rnorm(1:20) -#'dim(expSLP) <- c(lat = 4, lon = 5) -#'obsSLP <- c(rnorm(1:180),expSLP*2) -#'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) -#'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -#'# analogs of local scale using criteria 2 -#'lonmin=-1 -#'lonmax=2 -#'latmin=30 -#'latmax=33 -#'region=c(lonmin,lonmax,latmin,latmax) -#'Local_scale <- Analogs(expL=expSLP, -#' obsL=obsSLP, time_obsL=time_obsSLP, -#' criteria="Local_dist",lonVar=seq(-1,5,1.5), -#' latVar=seq(30,35,1.5),region=region, -#' nAnalogs = 10, return_list = TRUE) -#'str(Local_scale) +#'Local_scale <- Analogs(expL = expSLP, obsL = obsSLP, time_obsL = time_obsSLP, +#' criteria = "Local_dist", lonVar = seq(-1, 5, 1.5), +#' latVar = seq(30, 35, 1.5), region = region, +#' time_expL = "01-10-2000", +#' nAnalogs = 10, AnalogsInfo = FALSE) #' #'# Example 8: Downscaling using criteria 'Local_cor' and 2 variables: -#'# The best analog is found using 2 variables. Parameter obsVar and expVar -#'# are necessary and must be different to obsL and expL, respectively. -#'# The downscaled precipitation will be the precipitation that belongs to -#'# the best analog day in SLP large and local scales, and to the day with -#'# the higher correlation in precipitation. return_list=FALSE. two options -#'# for nAnalogs -#' -#'expSLP <- rnorm(1:20) -#'dim(expSLP) <- c(lat = 4, lon = 5) -#'obsSLP <- c(rnorm(1:180),expSLP*2) -#'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) -#'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -#'exp.pr <- c(rnorm(1:20)*0.001) -#'dim(exp.pr)=dim(expSLP) -#'obs.pr <- c(rnorm(1:200)*0.001) -#'dim(obs.pr)=dim(obsSLP) -#'# analogs of local scale using criteria 2 -#'lonmin=-1 -#'lonmax=2 -#'latmin=30 -#'latmax=33 -#'region=c(lonmin,lonmax,latmin,latmax) -#'Local_scalecor <- Analogs(expL=expSLP, -#' obsL=obsSLP, time_obsL=time_obsSLP, -#' obsVar=obs.pr,expVar=exp.pr, -#' criteria="Local_cor",lonVar=seq(-1,5,1.5), -#' latVar=seq(30,35,1.5),nAnalogs=8,region=region, -#' return_list = FALSE) -#'Local_scalecor$AnalogsInfo -#'Local_scalecor$DatesAnalogs -#'# same but without imposing nAnalogs, so nAnalogs will be set by default as 10 -#'Local_scalecor <- Analogs(expL=expSLP, -#' obsL=obsSLP, time_obsL=time_obsSLP, -#' obsVar=obs.pr,expVar=exp.pr, -#' criteria="Local_cor",lonVar=seq(-1,5,1.5), -#' latVar=seq(30,35,1.5),region=region, -#' return_list = FALSE) -#'Local_scalecor$AnalogsInfo -#'Local_scalecor$DatesAnalogs -#' -#'# Example 9: List of best analogs in the three criterias Large_dist, -#'# Local_dist, and Local_cor return list TRUE same variable +#'exp.pr <- c(rnorm(1:20) * 0.001) +#'dim(exp.pr) <- dim(expSLP) +#'Local_scalecor <- Analogs(expL = expSLP, obsL = obsSLP,time_obsL = time_obsSLP, +#' obsVar = obs.pr, expVar = exp.pr, +#' criteria = "Local_cor", lonVar = seq(-1, 5, 1.5), +#' time_expL = "01-10-2000", latVar = seq(30, 35, 1.5), +#' nAnalogs = 8, region = region, AnalogsInfo = FALSE) +#'# same but without imposing nAnalogs,so nAnalogs will be set by default as 10 +#'Local_scalecor <- Analogs(expL = expSLP, obsL = obsSLP,time_obsL = time_obsSLP, +#' obsVar = obs.pr, expVar = exp.pr, +#' criteria = "Local_cor", lonVar = seq(-1,5,1.5), +#' time_expL = "01-10-2000", latVar=seq(30, 35, 1.5), +#' region = region, AnalogsInfo = TRUE) #' -#'expSLP <- rnorm(1:20) -#'dim(expSLP) <- c(lat = 4, lon = 5) -#'obsSLP <- c(rnorm(1:180),expSLP*2) -#'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) -#'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -#'# analogs of large scale using criteria 1 -#'Large_scale <- Analogs(expL=expSLP, -#' obsL=obsSLP, time_obsL=time_obsSLP, -#' criteria="Large_dist", -#' nAnalogs = 7, return_list = TRUE) -#'str(Large_scale) -#'Large_scale$AnalogsInfo -#'# analogs of local scale using criteria 2 -#'lonmin=-1 -#'lonmax=2 -#'latmin=30 -#'latmax=33 -#'region=c(lonmin,lonmax,latmin,latmax) -#'Local_scale <- Analogs(expL=expSLP, -#' obsL=obsSLP, time_obsL=time_obsSLP, -#' criteria="Local_dist",lonVar=seq(-1,5,1.5), -#' latVar=seq(30,35,1.5),nAnalogs=7,region=region, -#' return_list = TRUE) -#'str(Local_scale) -#'Local_scale$AnalogsInfo -#'# analogs of local scale using criteria 3 -#'Local_scalecor <- Analogs(expL=expSLP, -#' obsL=obsSLP, time_obsL=time_obsSLP, -#' obsVar=obsSLP,expVar=expSLP, -#' criteria="Local_cor",lonVar=seq(-1,5,1.5), -#' latVar=seq(30,35,1.5),nAnalogs=7,region=region, -#' return_list = TRUE) -#'str(Local_scalecor) -#'Local_scalecor$AnalogsInfo -#' -#'# Example 10: Downscaling in the three criteria Large_dist, Local_dist, and -#'# Local_cor return list FALSE, different variable -#' -#'expSLP <- rnorm(1:20) -#'dim(expSLP) <- c(lat = 4, lon = 5) -#'obsSLP <- c(rnorm(1:180),expSLP*2) -#'dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) -#'time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -#'exp.pr <- c(rnorm(1:20)*0.001) -#'dim(exp.pr)=dim(expSLP) -#'obs.pr <- c(rnorm(1:200)*0.001) -#'dim(obs.pr)=dim(obsSLP) -#'# analogs of large scale using criteria 1 -#'Large_scale <- Analogs(expL=expSLP, -#' obsL=obsSLP, time_obsL=time_obsSLP, -#' criteria="Large_dist", -#' nAnalogs = 7, return_list = FALSE) -#'str(Large_scale) -#'Large_scale$AnalogsInfo -#'# analogs of local scale using criteria 2 -#'lonmin=-1 -#'lonmax=2 -#'latmin=30 -#'latmax=33 -#'region=c(lonmin,lonmax,latmin,latmax) -#'Local_scale <- Analogs(expL=expSLP, -#' obsL=obsSLP, time_obsL=time_obsSLP, -#' obsVar=obs.pr, -#' criteria="Local_dist",lonVar=seq(-1,5,1.5), -#' latVar=seq(30,35,1.5),nAnalogs=7,region=region, -#' return_list = FALSE) -#'str(Local_scale) -#'Local_scale$AnalogsInfo -#'# analogs of local scale using criteria 3 -#'Local_scalecor <- Analogs(expL=expSLP, -#' obsL=obsSLP, time_obsL=time_obsSLP, -#' obsVar=obs.pr,expVar=exp.pr, -#' criteria="Local_cor",lonVar=seq(-1,5,1.5), -#' latVar=seq(30,35,1.5),nAnalogs=7,region=region, -#' return_list = FALSE) -#'str(Local_scalecor) -#'Local_scalecor$AnalogsInfo -#' -#'@export -Analogs <- function(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, - criteria = "Large_dist", - lonVar = NULL, latVar = NULL, region = NULL, - nAnalogs = NULL, return_list = FALSE) { - # checks +#'#'Example 9: List of best analogs in the three criterias Large_dist, +#'Large_scale <- Analogs(expL = expSLP, obsL = obsSLP, time_obsL = time_obsSLP, +#' criteria = "Large_dist", time_expL = "01-10-2000", +#' nAnalogs = 7, AnalogsInfo = TRUE) +#'Local_scale <- Analogs(expL = expSLP, obsL = obsSLP, time_obsL = time_obsSLP, +#' time_expL = "01-10-2000", criteria = "Local_dist", +#' lonVar = seq(-1, 5, 1.5), latVar = seq(30, 35, 1.5), +#' nAnalogs = 7,region = region, AnalogsInfo = TRUE) +#'Local_scalecor <- Analogs(expL = expSLP, obsL = obsSLP,time_obsL = time_obsSLP, +#' obsVar = obsSLP, expVar = expSLP, +#' time_expL = "01-10-2000",criteria = "Local_cor", +#' lonVar = seq(-1, 5, 1.5), latVar = seq(30, 35, 1.5), +#' nAnalogs = 7,region = region, +#' AnalogsInfo = TRUE) +#'#Example 10: Downscaling using criteria 'Large_dist' and a single variable, +#'# more than 1 sdate: +#'expSLP <- rnorm(1:40) +#'dim(expSLP) <- c(sdate = 2, lat = 4, lon = 5) +#'obsSLP <- c(rnorm(1:180), expSLP * 1.2) +#'dim(obsSLP) <- c(time = 11, lat = 4, lon = 5) +#'time_obsSLP <- paste(rep("01", 11), rep("01", 11), 1993 : 2003, sep = "-") +#'time_expSLP <- paste(rep("01", 2), rep("01", 2), 1994 : 1995, sep = "-") +#'excludeTime <- c("01-01-2003", "01-01-2003") +#'dim(excludeTime) <- c(sdate = 2) +#'downscale_field_exclude <- Analogs(expL = expSLP, obsL = obsSLP, +#' time_obsL = time_obsSLP, time_expL = time_expSLP, +#' excludeTime = excludeTime, AnalogsInfo = TRUE) +#'@export +Analogs <- function(expL, obsL, time_obsL, time_expL = NULL, expVar = NULL, + obsVar = NULL, + criteria = "Large_dist",excludeTime = NULL, + lonVar = NULL, latVar = NULL, region = NULL, + nAnalogs = NULL, AnalogsInfo = FALSE, + ncores = 1) { if (!all(c('lon', 'lat') %in% names(dim(expL)))) { stop("Parameter 'expL' must have the dimensions 'lat' and 'lon'.") } - if (!all(c('lat', 'lon') %in% names(dim(obsL)))) { stop("Parameter 'obsL' must have the dimension 'lat' and 'lon'.") } - if (any(is.na(expL))) { warning("Parameter 'exp' contains NA values.") } - if (any(is.na(obsL))) { warning("Parameter 'obs' contains NA values.") } @@ -515,17 +431,40 @@ Analogs <- function(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, warning("Parameter 'expVar' and 'obsVar' are NULLs, downscaling/listing same variable as obsL and expL'.") } - if(!is.null(obsVar) & is.null(expVar) & criteria=="Local_cor"){ - stop("parameter 'expVar' cannot be NULL") + if (!is.null(obsVar) & is.null(expVar) & criteria == "Local_cor") { + stop("Parameter 'expVar' cannot be NULL.") } - if(is.null(nAnalogs) & criteria!="Large_dist"){ - nAnalogs=10 - warning("Parameter 'nAnalogs' is NULL and is set to 10 by default") + if (is.null(nAnalogs) & criteria != "Large_dist") { + nAnalogs = length(time_obsL) + warning("Parameter 'nAnalogs' is NULL and is set to the same length of", + "'time_obsL' by default") } - if(is.null(nAnalogs) & criteria=="Large_dist"){ - nAnalogs=1 + if (is.null(nAnalogs) & criteria == "Large_dist") { + nAnalogs <- 1 + } + if (is.null(time_expL)) { + stop("Parameter 'time_expL' cannot be NULL") + } + if(any(class(time_obsL)!="character")){ + warning('imposing time_obsL to be a character') + time_obsL=format(as.Date(time_obsL),'%d-%m-%Y') + } + if(any(class(time_expL)!="character")){ + warning('imposing time_expL to be a character') + time_expL=format(as.Date(time_expL),'%d-%m-%Y') + } + if(!is.null(excludeTime)){ + if(any(class(excludeTime)!="character")){ + warning('imposing excludeTime to be a character') + excludeTime=format(as.Date(excludeTime),'%d-%m-%Y') + } + } + if (is.null(time_obsL)) { + stop("Parameter 'time_obsL' cannot be NULL") + } + if (is.null(expL)) { + stop("Parameter 'expL' cannot be NULL") } - if (any(names(dim(obsL)) %in% 'ftime')) { if (any(names(dim(obsL)) %in% 'time')) { stop("Multiple temporal dimensions ('ftime' and 'time') found", @@ -539,65 +478,348 @@ Analogs <- function(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, } } } - if (any(names(dim(obsVar)) %in% 'ftime')) { - if (any(names(dim(obsVar)) %in% 'time')) { - stop("Multiple temporal dimensions ('ftime' and 'time') found", - "in parameter 'obsVar'.") - } else { - time_pos_obsVar <- which(names(dim(obsVar)) == 'ftime') - names(dim(obsVar))[time_pos_obsVar] <- 'time' - if (any(names(dim(expVar)) %in% 'ftime')) { - time_pos_expVar <- which(names(dim(expVar)) == 'ftime') - names(dim(expVar))[time_pos_expVar] <- 'time' + if (!is.null(obsVar)) { + if (any(names(dim(obsVar)) %in% 'ftime')) { + if (any(names(dim(obsVar)) %in% 'time')) { + stop("Multiple temporal dimensions ('ftime' and 'time') found", + "in parameter 'obsVar'.") + } else { + time_pos_obsVar <- which(names(dim(obsVar)) == 'ftime') + names(dim(obsVar))[time_pos_obsVar] <- 'time' + if (any(names(dim(expVar)) %in% 'ftime')) { + time_pos_expVar <- which(names(dim(expVar)) == 'ftime') + names(dim(expVar))[time_pos_expVar] <- 'time' + } } } } - if (any(names(dim(obsL)) %in% 'sdate')) { - if (any(names(dim(obsL)) %in% 'time')) { + if ((any(names(dim(obsL)) %in% 'sdate')) && + (any(names(dim(obsL)) %in% 'time'))) { + dims_obsL <- dim(obsL) + pos_sdate <- which(names(dim(obsL)) == 'sdate') + pos_time <- which(names(dim(obsL)) == 'time') + pos <- 1 : length(dim(obsL)) + pos <- c(pos_time, pos_sdate, pos[-c(pos_sdate,pos_time)]) + obsL <- aperm(obsL, pos) + dim(obsL) <- c(time = prod(dims_obsL[c(pos_time, pos_sdate)]), + dims_obsL[-c(pos_time, pos_sdate)]) + } else { + if (any(names(dim(obsL)) %in% 'sdate')) { dims_obsL <- dim(obsL) pos_sdate <- which(names(dim(obsL)) == 'sdate') - pos_time <- which(names(dim(obsL)) == 'time') pos <- 1 : length(dim(obsL)) - pos <- c(pos_time, pos_sdate, pos[-c(pos_sdate,pos_time)]) + pos <- c( pos_sdate, pos[-c(pos_sdate)]) obsL <- aperm(obsL, pos) - dim(obsL) <- c(time = prod(dims_obsL[c(pos_time, pos_sdate)]), - dims_obsL[-c(pos_time, pos_sdate)]) + dim(obsL) <- c(time = prod(dims_obsL[c(pos_sdate)]), + dims_obsL[-c( pos_sdate)]) } else { - stop("Parameter 'obsL' must have a temporal dimension.") + if (any(names(dim(obsL)) %in% 'time')) { + dims_obsL <- dim(obsL) + pos_time <- which(names(dim(obsL)) == 'time') + if(length(time_obsL) != dim(obsL)[pos_time]) { + stop(" 'time_obsL' and 'obsL' must have same length in the temporal + dimension.") + } + pos <- 1 : length(dim(obsL)) + pos <- c(pos_time, pos[-c(pos_time)]) + obsL <- aperm(obsL, pos) + dim(obsL) <- c(time = prod(dims_obsL[pos_time]), + dims_obsL[-c(pos_time)]) + } else { + stop("Parameter 'obsL' must have a temporal dimension named 'time'.") + } } } - if (any(names(dim(obsVar)) %in% 'sdate')) { - if (any(names(dim(obsVar)) %in% 'time')) { - dims_obsVar <- dim(obsVar) - pos_sdate <- which(names(dim(obsVar)) == 'sdate') - pos_time <- which(names(dim(obsVar)) == 'time') - pos <- 1 : length(dim(obsVar)) - pos <- c(pos_time, pos_sdate, pos[-c(pos_sdate,pos_time)]) - obsVar <- aperm(obsVar, pos) - dim(obsVar) <- c(time = prod(dims_obsVar[c(pos_time, pos_sdate)]), - dims_obsVar[-c(pos_time, pos_sdate)]) + if (!is.null(obsVar)) { + if (any(names(dim(obsVar)) %in% 'sdate')) { + if (any(names(dim(obsVar)) %in% 'time')) { + dims_obsVar <- dim(obsVar) + pos_sdate <- which(names(dim(obsVar)) == 'sdate') + pos_time <- which(names(dim(obsVar)) == 'time') + pos <- 1 : length(dim(obsVar)) + pos <- c(pos_time, pos_sdate, pos[-c(pos_sdate,pos_time)]) + obsVar <- aperm(obsVar, pos) + dim(obsVar) <- c(time = prod(dims_obsVar[c(pos_time, pos_sdate)]), + dims_obsVar[-c(pos_time, pos_sdate)]) + } else { + dims_obsVar <- dim(obsVar) + pos_sdate <- which(names(dim(obsVar)) == 'sdate') + pos <- 1 : length(dim(obsVar)) + pos <- c(pos_sdate, pos[-c(pos_sdate)]) + obsVar <- aperm(obsVar, pos) + dim(obsVar) <- c(time = prod(dims_obsVar[c(pos_sdate)]), + dims_obsVar[-c(pos_sdate)]) + } } else { - stop("Parameter 'obsVar' must have a temporal dimension.") + if (any(names(dim(obsVar)) %in% 'time')) { + dims_obsVar <- dim(obsVar) + pos_time <- which(names(dim(obsVar)) == 'time') + if (length(time_obsL) != dim(obsVar)[pos_time]) { + stop(" 'time_obsL' and 'obsVar' must have same length in the temporal + dimension.")} + pos <- 1 : length(dim(obsVar)) + pos <- c(pos_time, pos[-c(pos_time)]) + obsVar <- aperm(obsVar, pos) + dim(obsVar) <- c(time = prod(dims_obsVar[c(pos_time)]), + dims_obsVar[-c(pos_time)]) + } else { + stop("Parameter 'obsVar' must have a temporal dimension named 'time'.") + } } - } - - if (is.null(region) & criteria!="Large_dist") { + } + if (is.null(region) && criteria != 'Large_dist') { if (!is.null(lonVar) & !is.null(latVar)) { region <- c(min(lonVar), max(lonVar), min(latVar), max(latVar)) - }else{ + } else { stop("Parameters 'lonVar' and 'latVar' must be given in criteria 'Local_dist' and 'Local_cor'") } } + if (any(names(dim(expL)) %in% c('ftime', 'leadtime', 'ltime'))) { + if (length(which(names(dim(expL)) %in% + c('ftime', 'leadtime', 'ltime') == TRUE)) > 1) { + stop("Parameter 'expL' cannot have multiple forecast time dimensions") + } else { + names(dim(expL))[which(names(dim(expL)) %in% c('ftime','leadtime','ltime'))] <- + 'time' + } + } + # remove dimension length 1 to simplify outputs: + if (any(dim(obsL) == 1)) { + obsL <- adrop(obsL, which(dim(obsL) == 1)) + } + if (any(dim(expL) == 1)) { + expL <- adrop(expL, which(dim(expL) == 1)) + } + if (!is.null(obsVar)) { + if (any(dim(obsVar) == 1)) { + obsVar <- adrop(obsVar, which(dim(obsVar) == 1)) + } + } + if (!is.null(expVar)) { + if (any(dim(expVar) == 1)) { + expVar <- adrop(expVar, which(dim(expVar) == 1)) + } + } + names(dim(expL)) <- replace_repeat_dimnames(names(dim(expL)), + names(dim(obsL))) + if (!is.null(expVar)) { + names(dim(expVar)) <- replace_repeat_dimnames(names(dim(expVar)), + names(dim(obsVar))) + } + + if (is.null(excludeTime)) { + excludeTime <- vector(mode="character", length=length(time_expL)) + } + if(length(time_expL)==length(excludeTime)){ + if (any(names(dim(expL)) %in% c('sdate_exp'))) { + dim(time_expL) <- c(dim(expL)['sdate_exp'], dim(expL)['time_exp']) + } else if (any(names(dim(expL)) %in% c('sdate'))) { + if (any(names(dim(expL)) %in% c('time_exp'))) { + dim(time_expL) <- c(dim(expL)['sdate'], dim(expL)['time_exp']) + dim(excludeTime) <- c(dim(expL)['sdate'], dim(expL)['time_exp']) + } else if (any(names(dim(expL)) %in% c('time'))) { + dim(time_expL) <- c(dim(expL)['sdate'], dim(expL)['time']) + dim(excludeTime) <- c(dim(expL)['sdate'], dim(expL)['time']) + } else { + dim(time_expL) <- c(dim(expL)['sdate']) + dim(excludeTime) <- c(dim(expL)['sdate']) + } + } else if (any(names(dim(expL)) %in% c('time'))) { + dim(time_expL) <- c(dim(expL)['time']) + dim(excludeTime) <- c(dim(expL)['time']) + } else if (any(names(dim(expL)) %in% c('time_exp'))) { + dim(time_expL) <- c(dim(expL)['time_exp']) + dim(excludeTime) <- c(dim(expL)['time_exp']) + } + } + if (!AnalogsInfo) { + if (is.null(obsVar)) { + res <- Apply(list(expL, obsL), + target_dims = list(c('lat', 'lon'), c('time','lat','lon')), + fun = .analogs, time_obsL, expVar = expVar, + time_expL=time_expL, excludeTime=excludeTime, + obsVar = obsVar, criteria = criteria, + lonVar = lonVar, latVar = latVar, region = region, + nAnalogs = nAnalogs, AnalogsInfo = AnalogsInfo, + output_dims = c('nAnalogs', 'lat', 'lon'), + ncores = ncores)$output1 + + } else if (!is.null(obsVar) && is.null(expVar)) { + res <- Apply(list(expL, obsL, obsVar), + target_dims = list(c('lat', 'lon'), c('time','lat','lon'), + c('time', 'lat', 'lon')), + fun = .analogs,time_obsL, + time_expL=time_expL, excludeTime=excludeTime, + expVar = expVar, criteria = criteria, + lonVar = lonVar, latVar = latVar, region = region, + nAnalogs = nAnalogs, AnalogsInfo = AnalogsInfo, + output_dims = c('nAnalogs', 'lat', 'lon'), + ncores = ncores)$output1 + + } else if (!is.null(obsVar) && !is.null(expVar)) { + res <- Apply(list(expL, obsL, obsVar, expVar), + target_dims = list(c('lat', 'lon'), c('time','lat','lon'), + c('time','lat','lon'), c('lat','lon')), + fun = .analogs, + criteria = criteria,time_obsL, + time_expL=time_expL, excludeTime=excludeTime, + lonVar = lonVar, latVar = latVar, region = region, + nAnalogs = nAnalogs, AnalogsInfo = AnalogsInfo, + output_dims = c('nAnalogs', 'lat', 'lon'), + ncores = ncores)$output1 + } + } else { + if (is.null(obsVar)) { + res <- Apply(list(expL, obsL), + target_dims = list(c('lat', 'lon'), c('time','lat','lon')), + fun = .analogs, time_obsL, expVar = expVar, + time_expL=time_expL, excludeTime=excludeTime, + obsVar = obsVar, criteria = criteria, + lonVar = lonVar, latVar = latVar, region = region, + nAnalogs = nAnalogs, AnalogsInfo = AnalogsInfo, + output_dims = list(fields = c('nAnalogs', 'lat', 'lon'), + analogs = c('nAnalogs'), + metric = c('nAnalogs', 'metric'), + dates = c('nAnalogs')), + ncores = ncores) + } else if (!is.null(obsVar) && is.null(expVar)) { + res <- Apply(list(expL, obsL, obsVar), + target_dims = list(c('lat', 'lon'), c('time','lat','lon'), + c('time', 'lat', 'lon')), + fun = .analogs,time_obsL, + time_expL=time_expL, excludeTime=excludeTime, + expVar = expVar, criteria = criteria, + lonVar = lonVar, latVar = latVar, region = region, + nAnalogs = nAnalogs, AnalogsInfo = AnalogsInfo, + output_dims = list(fields = c('nAnalogs', 'lat', 'lon'), + analogs = c('nAnalogs'), + metric = c('nAnalogs', 'metric'), + dates = c('nAnalogs')), + ncores = ncores) + + } else if (!is.null(obsVar) && !is.null(expVar)) { + res <- Apply(list(expL, obsL, obsVar, expVar), + target_dims = list(c('lat', 'lon'), c('time', 'lat', 'lon'), + c('time', 'lat', 'lon'), c('lat', 'lon')), + fun = .analogs,time_obsL, + criteria = criteria, + time_expL=time_expL, excludeTime=excludeTime, + lonVar = lonVar, latVar = latVar, region = region, + nAnalogs = nAnalogs, AnalogsInfo = AnalogsInfo, + output_dims = list(fields = c('nAnalogs', 'lat', 'lon'), + analogs = c('nAnalogs'), + metric = c('nAnalogs', 'metric'), + dates = c('nAnalogs')), + ncores = ncores) + } + } + return(res) +} +.analogs <- function(expL, obsL, time_expL, excludeTime = NULL, + obsVar = NULL, expVar = NULL, + time_obsL, criteria = "Large_dist", + lonVar = NULL, latVar = NULL, region = NULL, + nAnalogs = NULL, AnalogsInfo = FALSE) { + + if (all(excludeTime=="")) { + excludeTime = NULL + } + + if (!is.null(obsL)) { + #obsL <- replace_time_dimnames(obsL) + if (any(time_expL %in% time_obsL)) { + if (is.null(excludeTime)) { + excludeTime <- time_expL + warning("Parameter 'excludeTime' is NULL, time_obsL contains + time_expL, so, by default, the date of + time_expL will be excluded in the search of analogs") + } else { + `%!in%` = Negate(`%in%`) + if(any(time_expL %!in% excludeTime)) { + excludeTime <- c(excludeTime, time_expL) + warning("Parameter 'excludeTime' is not NULL, time_obsL contains + time_expL, so, by default, the date of + time_expL will be excluded in the search of analogs") + } + } + time_ref <- time_obsL[-c(which(time_obsL %in% excludeTime))] + posdim <- which(names(dim(obsL)) == 'time') + posref <- which(time_obsL %in% time_ref) + obsT <- Subset(obsL, along = posdim, indices = posref) + if (!is.null(obsVar)) { + obsTVar <- Subset(obsVar, along = posdim, indices = posref) + } + time_obsL <- time_ref + obsL <- obsT + if (!is.null(obsVar)) { + obsVar <- obsTVar + } + } else { + if (is.null(excludeTime)) { + if (!is.null(obsVar)) { + warning("Parameter 'excludeTime' is NULL, time_obsL does not contain + time_expL, obsVar not NULL") + } else { + warning("Parameter 'excludeTime' is NULL, time_obsL does not contain + time_expL") + } + } else { + time_ref <- time_obsL[-c(which(time_obsL %in% excludeTime))] + posdim <- which(names(dim(obsL)) == 'time') + posref <- which(time_obsL %in% time_ref) + obsT <- Subset(obsL,along = posdim,indices = posref) + if (!is.null(obsVar)) { + obsTVar <- Subset(obsVar, along = posdim, indices = posref) + } + time_obsL <- time_ref + obsL <- obsT + if (!is.null(obsVar)) { + obsVar <- obsTVar + } + if (!is.null(obsVar)) { + warning("Parameter 'excludeTime' has a value and time_obsL does not + contain time_expL, obsVar not NULL") + } else { + warning("Parameter 'excludeTime' has a value and time_obsL does not + contain time_expL") + } + } + } + } else { + stop("parameter 'obsL' cannot be NULL") + } + if(length(time_obsL)==0){ + stop("Parameter 'time_obsL' can not be length 0") + } + Analog_result <- FindAnalog(expL = expL, obsL = obsL, time_obsL = time_obsL, + expVar = expVar, obsVar = obsVar, + criteria = criteria, + AnalogsInfo = AnalogsInfo, + nAnalogs = nAnalogs,lonVar = lonVar, + latVar = latVar, region = region) + if (AnalogsInfo == TRUE) { + return(list(AnalogsFields = Analog_result$AnalogsFields, + AnalogsInfo = Analog_result$Analog, + AnalogsMetric = Analog_result$metric, + AnalogsDates = Analog_result$dates)) + } else { + return(AnalogsFields = Analog_result$AnalogsFields) + } +} +FindAnalog <- function(expL, obsL, time_obsL, expVar, obsVar, criteria, lonVar, + latVar, region, nAnalogs = nAnalogs, + AnalogsInfo = AnalogsInfo) { position <- Select(expL = expL, obsL = obsL, expVar = expVar, obsVar = obsVar, criteria = criteria, lonVar = lonVar, latVar = latVar, region = region)$position metrics<- Select(expL = expL, obsL = obsL, expVar = expVar, - obsVar = obsVar, criteria = criteria, lonVar = lonVar, - latVar = latVar, region = region)$metric.original + obsVar = obsVar, criteria = criteria, lonVar = lonVar, + latVar = latVar, region = region)$metric.original best <- Apply(list(position), target_dims = c('time', 'pos'), fun = BestAnalog, criteria = criteria, - return_list = return_list, nAnalogs = nAnalogs)$output1 + AnalogsInfo = AnalogsInfo, nAnalogs = nAnalogs)$output1 + Analogs_dates <- time_obsL[best] dim(Analogs_dates) <- dim(best) if (all(!is.null(region), !is.null(lonVar), !is.null(latVar))) { @@ -607,13 +829,13 @@ Analogs <- function(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, Analogs_fields <- Subset(obsVar, along = which(names(dim(obsVar)) == 'time'), indices = best) - warning("obsVar is NULL, - obsVar will be computed from obsL (same variable)") + warning("Parameter 'obsVar' is NULL and the returned field", + "will be computed from 'obsL' (same variable).") } else { obslocal <- SelBox(obsVar, lon = lonVar, lat = latVar, - region = region)$data - Analogs_fields <- Subset(obslocal, + region = region) + Analogs_fields <- Subset(obslocal$data, along = which(names(dim(obslocal)) == 'time'), indices = best) } @@ -632,29 +854,21 @@ Analogs <- function(expL, obsL, time_obsL, expVar = NULL, obsVar = NULL, lon_dim <- which(names(dim(Analogs_fields)) == 'lon') lat_dim <- which(names(dim(Analogs_fields)) == 'lat') - if (lon_dim < lat_dim) { - dim(Analogs_fields) <- c(dim(Analogs_fields)[c(lon_dim, lat_dim)], dim(best)) - } else if (lon_dim > lat_dim) { - dim(Analogs_fields) <- c(dim(Analogs_fields)[c(lat_dim, lon_dim)], dim(best)) - } else { - stop("Dimensions 'lat' and 'lon' not found.") - } + Analogs_metrics <- Subset(metrics, along = which(names(dim(metrics)) == 'time'), - indices = best) - DistCorr <- data.frame(as.numeric(1:nrow(Analogs_metrics)),(Analogs_metrics), - Analogs_dates) - if(dim(DistCorr)[2]==3){names(DistCorr) <- c("Analog","LargeDist","Dates")} - if(dim(DistCorr)[2]==4){names(DistCorr) <- c("Analog","LargeDist", - "LocalDist","Dates")} - if(dim(DistCorr)[2]==5){names(DistCorr) <- c("Analog","LargeDist", - "LocalDist","LocalCorr","Dates")} - return(list(AnalogsFields = Analogs_fields, - AnalogsInfo=DistCorr)) - } + indices = best) + analog_number <- as.numeric(1:nrow(Analogs_metrics)) + dim(analog_number) <- c(nAnalog = length(analog_number)) + dim(Analogs_dates) <- c(nAnalog = length(Analogs_dates)) + return(list(AnalogsFields = Analogs_fields, + Analog = analog_number, + metric = Analogs_metrics, + dates = Analogs_dates)) +} -BestAnalog <- function(position, criteria = 'Large_dist', return_list = FALSE, - nAnalogs = nAnalogs) { +BestAnalog <- function(position, nAnalogs = nAnalogs, AnalogsInfo = FALSE, + criteria = 'Large_dist') { pos_dim <- which(names(dim(position)) == 'pos') if (dim(position)[pos_dim] == 1) { pos1 <- Subset(position, along = pos_dim, indices = 1) @@ -683,7 +897,7 @@ BestAnalog <- function(position, criteria = 'Large_dist', return_list = FALSE, "length than expected (from 1 to 3).") } if (criteria == 'Large_dist') { - if (return_list == FALSE) { + if (AnalogsInfo == FALSE) { pos <- pos1[1] } else { pos <- pos1[1 : nAnalogs] @@ -696,26 +910,43 @@ BestAnalog <- function(position, criteria = 'Large_dist', return_list = FALSE, warning("Just 1 best analog matching Large_dist and ", "Local_dist criteria") } - if(length(best)==1 & is.na(best[1])==TRUE){ - stop("no best analogs matching Large_dist and Local_dist criterias") + if(length(best)<1 | is.na(best[1])==TRUE){ + stop("no best analogs matching Large_dist and Local_dist criterias, + please increase nAnalogs") } pos <- pos2[as.logical(best)] pos <- pos[which(!is.na(pos))] - if (return_list == FALSE) { + if (AnalogsInfo == FALSE) { pos <- pos[1] }else { - pos <- pos} + pos <- pos} } else if (criteria == 'Local_cor') { pos1 <- pos1[1 : nAnalogs] pos2 <- pos2[1 : nAnalogs] best <- match(pos1, pos2) + if(length(best)==1){ + warning("Just 1 best analog matching Large_dist and ", + "Local_dist criteria") + } + if(length(best)<1 | is.na(best[1])==TRUE){ + stop("no best analogs matching Large_dist and Local_dist criterias, + please increase nAnalogs") + } pos <- pos1[as.logical(best)] pos <- pos[which(!is.na(pos))] pos3 <- pos3[1 : nAnalogs] best <- match(pos, pos3) + if(length(best)==1){ + warning("Just 1 best analog matching Large_dist, Local_dist and ", + "Local_cor criteria") + } + if(length(best)<1 | is.na(best[1])==TRUE){ + stop("no best analogs matching Large_dist, Local_dist and Local_cor + criterias, please increase nAnalogs") + } pos <- pos[order(best, decreasing = F)] pos <- pos[which(!is.na(pos))] - if (return_list == FALSE) { + if (AnalogsInfo == FALSE) { pos <- pos[1] } else{ pos <- pos @@ -726,7 +957,8 @@ BestAnalog <- function(position, criteria = 'Large_dist', return_list = FALSE, Select <- function(expL, obsL, expVar = NULL, obsVar = NULL, criteria = "Large_dist", lonVar = NULL, latVar = NULL, region = NULL) { -names(dim(expL)) <- replace_repeat_dimnames(names(dim(expL)), names(dim(obsL))) + names(dim(expL)) <- replace_repeat_dimnames(names(dim(expL)), + names(dim(obsL))) metric1 <- Apply(list(obsL), target_dims = list(c('lat', 'lon')), fun = .select, expL, metric = "dist")$output1 metric1.original=metric1 @@ -753,7 +985,8 @@ names(dim(expL)) <- replace_repeat_dimnames(names(dim(expL)), names(dim(obsL))) dim(metric1) <- c(dim(metric1), metric = 1) dim(pos1) <- c(dim(pos1), pos = 1) dim(metric1.original)=dim(metric1) - return(list(metric = metric1, metric.original=metric1.original,position = pos1)) + return(list(metric = metric1, metric.original=metric1.original, + position = pos1)) } if (criteria == "Local_dist" | criteria == "Local_cor") { obs <- SelBox(obsL, lon = lonVar, lat = latVar, region = region)$data @@ -770,12 +1003,14 @@ names(dim(expL)) <- replace_repeat_dimnames(names(dim(expL)), names(dim(obsL))) names(dim(metric2))[1] <- 'time' if (criteria == "Local_dist") { metric <- abind(metric1, metric2, along = length(dim(metric1))+1) - metric.original <- abind(metric1.original,metric2.original,along=length(dim(metric1))+1) + metric.original <- abind(metric1.original,metric2.original, + along=length(dim(metric1))+1) position <- abind(pos1, pos2, along = length(dim(pos1))+1) names(dim(metric)) <- c(names(dim(pos1)), 'metric') names(dim(position)) <- c(names(dim(pos1)), 'pos') names(dim(metric.original)) = names(dim(metric)) - return(list(metric = metric, metric.original=metric.original,position = position)) + return(list(metric = metric, metric.original=metric.original, + position = position)) } } if (criteria == "Local_cor") { @@ -788,19 +1023,20 @@ names(dim(expL)) <- replace_repeat_dimnames(names(dim(expL)), names(dim(obsL))) margins <- c(1 : (length(dim(metric3))))[-dim_time_obs] pos3 <- apply(abs(metric3), margins, order, decreasing = TRUE) names(dim(pos3))[1] <- 'time' - #metric3 <- apply(abs(metric3), margins, sort) metricsort <- metric3[pos3] dim(metricsort)=dim(metric3) names(dim(metricsort))[1] <- 'time' metric <- abind(metric1, metric2, metricsort, along = length(dim(metric1)) + 1) - metric.original <- abind(metric1.original, metric2.original, metric3.original, - along = length(dim(metric1)) + 1) + metric.original <- abind(metric1.original, metric2.original, + metric3.original, + along = length(dim(metric1)) + 1) position <- abind(pos1, pos2, pos3, along = length(dim(pos1)) + 1) names(dim(metric)) <- c(names(dim(metric1)), 'metric') names(dim(position)) <- c(names(dim(pos1)), 'pos') names(dim(metric.original)) = names(dim(metric)) - return(list(metric = metric, metric.original=metric.original,position = position)) + return(list(metric = metric, metric.original=metric.original, + position = position)) } else { stop("Parameter 'criteria' must to be one of the: 'Large_dist', ", @@ -810,14 +1046,22 @@ names(dim(expL)) <- replace_repeat_dimnames(names(dim(expL)), names(dim(obsL))) .select <- function(exp, obs, metric = "dist") { if (metric == "dist") { result <- Apply(list(obs), target_dims = list(c('lat', 'lon')), - fun = function(x) {sum((x - exp) ^ 2)})$output1 + fun = function(x) {sqrt(sum((x - exp) ^ 2, na.rm = TRUE))})$output1 } else if (metric == "cor") { result <- Apply(list(obs), target_dims = list(c('lat', 'lon')), fun = function(x) {cor(as.vector(x), - as.vector(exp))})$output1 + as.vector(exp), + method="spearman")})$output1 } result } +.time_ref<- function(time_obsL,time_expL,excludeTime){ + sameTime=which(time_obsL %in% time_expL) + result<- c(time_obsL[1:(sameTime-excludeTime-1)], + time_obsL[(sameTime+excludeTime+1):length(time_obsL)]) + result +} + replace_repeat_dimnames <- function(names_exp, names_obs, lat_name = 'lat', lon_name = 'lon') { if (!is.character(names_exp)) { @@ -836,5 +1080,24 @@ replace_repeat_dimnames <- function(names_exp, names_obs, lat_name = 'lat', names_exp[original_pos] <- paste0(names_exp[original_pos], "_exp") } return(names_exp) - ## Improvements: other dimensions to avoid replacement for more flexibility. +} + +replace_time_dimnames <- function(dataL, time_name = 'time', + stdate_name='stdate', ftime_name='ftime') { + names_obs=names(dim(dataL)) + if (!is.character(names_obs)) { + stop("Parameter 'names_obs' must be a vector of characters.") + } + time_dim_obs <- which(names_obs == time_name | + names_obs == stdate_name | names_obs == ftime_name) + if(length(time_dim_obs) >1){ + stop ("more than 1 time dimension, please give just 1") + } + if(length(time_dim_obs) == 0){ + warning ("name of time dimension is not 'ftime' or 'time' or 'stdate' + or time dimension is null") + } + if(length(time_dim_obs)!=0){ names_obs[time_dim_obs]= time_name} + names(dim(dataL))=names_obs + return(dataL) } diff --git a/R/CST_AnalogsPredictors.R b/R/CST_AnalogsPredictors.R new file mode 100644 index 0000000000000000000000000000000000000000..a15a4c0cab79b383293dcbd4c34de41b95a8a19c --- /dev/null +++ b/R/CST_AnalogsPredictors.R @@ -0,0 +1,847 @@ +#' AEMET Downscaling +#' Precipitation and maximum and minimum temperature downscaling method +#' based on analogs: synoptic situations and significant predictors. +#' +#'@author Marta Dominguez Alonso - AEMET, \email{mdomingueza@aemet.es} +#'@author Nuria Perez-Zanon - BSC, \email{nuria.perez@bsc.es} +#' +#'@description This function downscales low resolution precipitation data (e.g. from +#' Seasonal Forecast Models) through the association with an observational high +#' resolution (HR) dataset (AEMET 5 km gridded data of daily precipitation (Peral et al., 2017)) +#' and a collection of predictors and past synoptic situations similar to estimated day. +#' The method uses three domains: +#' - peninsular Spain and Balearic Islands domain (5 km resolution): HR precipitation +#' and the downscaling result domain. +#' - synoptic domain (low resolution, e.g. 1.5º x 1.5º): it should be centered over Iberian Peninsula +#' and cover enough extension to detect as much synoptic situations as possible. +#' - extended domain (low resolution, e.g. 1.5º x 1.5º): it should have the same resolution +#' as synoptic domain. It is used for SLP Seasonal Forecast Models. +#'@param exp List of arrays with downscaled period seasonal forecast data. The list +#' has to contain model atmospheric variables (instantaneous 12h data) that must +#' be indentify by parenthesis name. +#' For precipitation: +#' - u component of wind at 500 hPa (u500_mod) in m/s +#' - v component of wind at 500 hPa (v500_mod) in m/s +#' - temperature at 500 hPa (t500_mod) in K +#' - temperature at 850 hPa (t850_mod) in K +#' - specific humidity at 700 hPa (q700_mod) in g/kg +#' For temperature: +#' - u component of wind at 500 hPa (u500_mod) in m/s +#' - v component of wind at 500 hPa (v500_mod) in m/s +#' - temperature at 500 hPa (t500_mod) in K +#' - temperature at 700 hPa (t700_mod) in K +#' - temperature at 850 hPa (t850_mod) in K +#' - specific humidity at 700 hPa (q700_mod) in g/kg +#' - 2 meters temperature (tm2m_mod) in K +#' The arrays must have at least three dimensions with names 'lon', 'lat' and 'time'. +#' (lon = gridpoints of longitude, lat = gridpoints of latitude, time = number of downscaling days) +#' Seasonal forecast variables must have the same resolution and +#' domain as reanalysis variables ('obs' parameter, below). +#'@param slp Array with atmospheric seasonal forecast model sea level pressure +#' (instantaneous 12h data) that must be indentify as 'slp' (hPa). It has the same +#' resolution as 'exp' and 'obs' paremeters but with an extended domain. +#' This domain contains extra degrees (most in the north and west part) compare to +#' synoptic domain. The array must have at least three dimensions +#' with names 'lon', 'lat' and 'time'. +#'@param obs List of arrays with training period reanalysis data. +#' The list has to contain reanalysis atmospheric variables (instantaneous +#' 12h data) that must be indentify by parenthesis name. +#' For precipitation: +#' - u component of wind at 500 hPa (u500) in m/s +#' - v component of wind at 500 hPa (v500) in m/s +#' - temperature at 500 hPa (t500) in K +#' - temperature at 850 hPa (t850) in K +#' - sea level pressure (slp) in hPa +#' - specific humidity at 700 hPa (q700) in g/kg +#' For maximum and minimum temperature: +#' - u component of wind at 500 hPa (u500) in m/s +#' - v component of wind at 500 hPa (v500) in m/s +#' - temperature at 500 hPa (t500) in K +#' - temperature at 700 hPa (t700) in K +#' - temperature at 850 hPa (t850) in K +#' - sea level pressure (slp) in hPa +#' - specific humidity at 700 hPa (q700) in g/kg +#' - 2 meters temperature (tm2m) in K +#' The arrays must have at least three dimensions with names 'lon', 'lat' and 'time'. +#'@param lon Vector of the synoptic longitude (from (-180º) to 180º), +#' The vector must go from west to east. The same as for the training function. +#'@param lat Vector of the synoptic latitude. The vector must go from north to south. +#' The same as for the training function. +#'@param slp_lon Vector of the extended longitude (from (-180º) to 180º), +#' The vector must go from west to east. The same as for the training function. +#'@param slp_lat Vector of the extended latitude. The vector must go from north to south. +#' The same as for the training function. +#'@param var_name Variable name to downscale. There are two options: 'prec' for +#' precipitation and 'temp' for maximum and minimum temperature. +#'@param hr_obs Local path of HR observational files (maestro and pcp/tmx-tmn). +#' For precipitation can be downloaded from http://www.aemet.es/documentos/es/serviciosclimaticos/cambio_climat/datos_diarios/dato_observacional/rejilla_5km/v2/Serie_AEMET_v2_pcp_1951a202006_txt.tar.gz +#' For maximum and minimum temperature can be downloaded from http://www.aemet.es/documentos/es/serviciosclimaticos/cambio_climat/datos_diarios/dato_observacional/rejilla_5km/temperatura/v1/tmax/Serie_AEMET_v1_tmax_1951a202006_txt.tar.gz and http://www.aemet.es/documentos/es/serviciosclimaticos/cambio_climat/datos_diarios/dato_observacional/rejilla_5km/temperatura/v1/tmin/Serie_AEMET_v1_tmin_1951a202006_txt.tar.gz respetively. +#' Maestro file (maestro_red_hr_SPAIN.txt) has gridpoint (nptos), longitude (lon), latitude (lat) and +#' altitude (alt) in columns (vector structure). +#' Data file (pcp/tmx/tmn_red_SPAIN_1951-201903.txt) includes 5km resolution spanish daily data +#' (precipitation or maximum and minimum temperature from january 1951 to june 2020. See README +#' file for more information. +#' IMPORTANT!: HR observational period must be the same as for reanalysis variables. +#' It is assumed that the training period is smaller than the HR original one (1951-2019), so it is +#' needed to make a new ascii file with the new period and the same structure as original, +#' specifying the training dates in the name (e.g. 'pcp_red_SPAIN_19810101-19961231.txt' for +#' '19810101-19961231' period). +#'@param tdates Training period dates in format YYYYMMDD(start)-YYYYMMDD(end) +#' (e.g. 19810101-20181231). +#'@param ddates Downscaling period dates in format YYYYMMDD(start)-YYYYMMDD(end) (e.g. 20191001-20200331). +#'@param restrain Output (list of matrix) obtained from 'training_analogs' function. +#' For precipitation, 'restrain' object must contains um, vm, nger, gu92, gv92, +#' gu52, gv52, neni, vdmin, vref, ccm, lab_pred and cor_pred variables. +#' For maximum and minimum temperature, 'restrain' object must contains um, vm, +#' insol, neni, vdmin y vref. See 'AnalogsPred_train.R' for more information. +#'@param dim_name_longitude A character string indicating the name of the longitude +#'dimension, by default 'longitude'. +#'@param dim_name_latitude A character string indicating the name of the latitude +#'dimension, by default 'latitude'. +#'@param dim_name_time A character string indicating the name of the time +#'dimension, by default 'time'. +#'@return Matrix with seasonal forecast precipitation (mm) or +#' maximum and minimum temperature (dozens of ºC) in a 5km x 5km regular grid +#' over peninsular Spain and Balearic Islands. The resulted matrices have two +#' dimensions ('ddates' x 'nptos').(ddates = number of downscaling days +#' and nptos = number of 'hr_obs' gridpoints). +#' +#'@useDynLib CSTools +#' +#'@export +#' +CST_AnalogsPredictors <- function(exp, + slp, + obs, + lon, + lat, + slp_lon, + slp_lat, + var_name, + hr_obs, + tdates, + ddates, + restrain, + dim_name_longitude = "lon", + dim_name_latitude = "lat", + dim_name_time = "time") { + +if (!is.list(exp)) { + stop("Parameter 'exp' must be a list of 'array' objects") + } + +if (!(all(sapply(exp, inherits, 'array')))) { + stop("Elements of the list in parameter 'exp' must be of the class ", + "'array'.") + } + +if (!is.array(slp)) { + stop("Parameter 'slp' must be of the class 'array'.") + } + +if (!is.list(obs)) { + stop("Parameter 'obs' must be a list of 'array' objects") + } + +if (!(all(sapply(obs, inherits, 'array')))) { + stop("Elements of the list in parameter 'obs' must be of the class ", + "'array'.") + } + +if (var_name == "prec") { + if (length(exp) != 5) { + stop("Parameter 'exp' must be a length of 5.") + } else { + if (!(any(names(exp) %in% "u500_mod"))) { + stop("Variable 'u500_mod' in 'exp' parameter is missed.") + } else if (!(any(names(exp) %in% "v500_mod"))) { + stop("Variable 'v500_mod' in 'exp' parameter is missed.") + } else if (!(any(names(exp) %in% "t500_mod"))) { + stop("Variable 't500_mod' in 'exp' parameter is missed.") + } else if (!(any(names(exp) %in% "t850_mod"))) { + stop("Variable 't850_mod' in 'exp' parameter is missed.") + } else if (!(any(names(exp) %in% "q700_mod"))) { + stop("Variable 'q700_mod' in 'exp' parameter is missed.") + } + } + if (length(obs) != 6) { + stop("Parameter 'obs' must be a length of 6.") + } else { + if (!(any(names(obs) %in% "u500"))) { + stop("Variable 'u500' in 'obs' parameter is missed.") + } else if (!(any(names(obs) %in% "v500"))) { + stop("Variable 'v500' in 'obs' parameter is missed.") + } else if (!(any(names(obs) %in% "t500"))) { + stop("Variable 't500' in 'obs' parameter is missed.") + } else if (!(any(names(obs) %in% "t850"))) { + stop("Variable 't850' in 'obs' parameter is missed.") + } else if (!(any(names(obs) %in% "slp"))) { + stop("Variable 'slp' in 'obs' parameter is missed.") + } else if (!(any(names(obs) %in% "q700"))) { + stop("Variable 'q700' in 'obs' parameter is missed.") + } + } +} else { + if (length(exp) != 7) { + stop("Parameter 'exp' must be a length of 7.") + } else { + if (!(any(names(exp) %in% "u500_mod"))) { + stop("Variable 'u500_mod' in 'exp' parameter is missed.") + } else if (!(any(names(exp) %in% "v500_mod"))) { + stop("Variable 'v500_mod' in 'exp' parameter is missed.") + } else if (!(any(names(exp) %in% "t500_mod"))) { + stop("Variable 't500_mod' in 'exp' parameter is missed.") + } else if (!(any(names(exp) %in% "t700_mod"))) { + stop("Variable 't700_mod' in 'exp' parameter is missed.") + } else if (!(any(names(exp) %in% "t850_mod"))) { + stop("Variable 't850_mod' in 'exp' parameter is missed.") + } else if (!(any(names(exp) %in% "q700_mod"))) { + stop("Variable 'q700_mod' in 'exp' parameter is missed.") + } else if (!(any(names(exp) %in% "tm2m_mod"))) { + stop("Variable 'tm2m_mod' in 'exp' parameter is missed.") + } + } + if (length(obs) != 8) { + stop("Parameter 'obs' must be a length of 8.") + } else { + if (!(any(names(obs) %in% "u500"))) { + stop("Variable 'u500' in 'obs' parameter is missed.") + } else if (!(any(names(obs) %in% "v500"))) { + stop("Variable 'v500' in 'obs' parameter is missed.") + } else if (!(any(names(obs) %in% "t500"))) { + stop("Variable 't500' in 'obs' parameter is missed.") + } else if (!(any(names(obs) %in% "t700"))) { + stop("Variable 't700' in 'obs' parameter is missed.") + } else if (!(any(names(obs) %in% "t850"))) { + stop("Variable 't850' in 'obs' parameter is missed.") + } else if (!(any(names(obs) %in% "slp"))) { + stop("Variable 'slp' in 'obs' parameter is missed.") + } else if (!(any(names(obs) %in% "q700"))) { + stop("Variable 'q700' in 'obs' parameter is missed.") + } else if (!(any(names(obs) %in% "tm2m"))) { + stop("Variable 'tm2m' in 'obs' parameter is missed.") + } + } +} + +if (all((sapply(exp,dim))==dim(exp[[1]]))) { + dim_exp <- dim(exp[[1]]) + if (!(any(names(dim_exp) %in% dim_name_longitude))) { + stop("Dimension 'lon' in exp parameter is missed.") + } + if (!(any(names(dim_exp) %in% dim_name_latitude))) { + stop("Dimension 'lat' in exp parameter is missed.") + } + if (!(any(names(dim_exp) %in% dim_name_time))) { + stop("Dimension 'time' in exp parameter is missed.") + } +} else { + stop("All 'exp' variables must have the same dimensions.") +} + +dim_slp <- dim(slp) +if (!(any(names(dim_slp) %in% dim_name_longitude))) { + stop("Dimension 'lon' in slp parameter is missed.") +} +if (!(any(names(dim_slp) %in% dim_name_latitude))) { + stop("Dimension 'lat' in slp parameter is missed.") +} +if (!(any(names(dim_slp) %in% dim_name_time))) { + stop("Dimension 'time' in slp parameter is missed.") +} + +if (all((sapply(obs,dim))==dim(obs[[1]]))) { + dim_obs <- dim(obs[[1]]) + if (!(any(names(dim_obs) %in% dim_name_longitude))) { + stop("Dimension 'lon' in obs parameter is missed.") + } + if (!(any(names(dim_obs) %in% dim_name_latitude))) { + stop("Dimension 'lat' in obs parameter is missed.") + } + if (!(any(names(dim_obs) %in% dim_name_time))) { + stop("Dimension 'time' in obs parameter is missed.") + } +} else { + stop("All 'obs' variables must have the same dimensions.") +} + +if (!is.vector(lon) || !is.numeric(lon)) { + stop("Parameter 'lon' must be a numeric vector") +} else { + if (is.unsorted(lon)) { + lon <- sort(lon) + warning("'lon' vector has been sorted in increasing order") + } +} + +if (!is.vector(lat) || !is.numeric(lat)) { + stop("Parameter 'lat' must be a numeric vector") +} else { + if (!is.unsorted(lat)) { + lat <- sort(lat, decreasing = TRUE) + warning("'lat' vector has been sorted in decreasing order") + } +} + +if (!is.vector(slp_lon) || !is.numeric(slp_lon)) { + stop("Parameter 'slp_lon' must be a numeric vector") +} else { + if (is.unsorted(slp_lon)) { + lon <- sort(slp_lon) + warning("'slp_lon' vector has been sorted in increasing order") + } +} + +if (!is.vector(slp_lat) || !is.numeric(slp_lat)) { + stop("Parameter 'slp_lat' must be a numeric vector") +} else { + if (!is.unsorted(slp_lat)) { + lat <- sort(slp_lat, decreasing = TRUE) + warning("'slp_lat' vector has been sorted in decreasing order") + } +} + +if (!is.character(hr_obs)){ + stop("Parameter 'hr_obs' must be a character.") +} else { + if (!dir.exists(hr_obs)) { + stop("'hr_obs' directory does not exist") + } +} + +if (!is.character(tdates)) { + stop("Parameter 'tdates' must be a character.") +} else { + if (nchar(tdates) != "17") { + stop("Parameter 'tdates' must be a string with 17 charecters.") + } else { + dateini <- as.Date(substr(tdates,start=1,stop=8),format="%Y%m%d") + dateend <- as.Date(substr(tdates,start=10,stop=18),format="%Y%m%d") + if (dateend <= dateini) { + stop("Parameter 'tdates' must be at least of one day") + } + } +} + +if (!is.character(ddates)) { + stop("Parameter 'ddates' must be a character.") +} else { + if (nchar(ddates) != "17") { + stop("Parameter 'ddates' must be a string with 17 charecters.") + } else { + dateini <- as.Date(substr(ddates,start=1,stop=8),format="%Y%m%d") + dateend <- as.Date(substr(ddates,start=10,stop=18),format="%Y%m%d") + if (dateend <= dateini) { + stop("Parameter 'ddates' must be at least of one day") + } + } +} + +# + +if (names(dim(exp[[1]]))[1] == "lon" & names(dim(exp[[1]]))[2] == "lat" + || names(dim(exp[[1]]))[2] == "lon" & names(dim(exp[[1]]))[3] == "lat") { + texp2D <- lapply(exp, MergeDims, merge_dims = c('lon', 'lat'), + rename_dim = 'gridpoint') +} else if (names(dim(exp[[1]]))[1] == "lat" & names(dim(exp[[1]]))[2] == "lon" + || names(dim(exp[[1]]))[2] == "lat" & names(dim(exp[[1]]))[3] == "lon") { + texp2D <- lapply(exp, MergeDims, merge_dims = c('lat', 'lon'), + rename_dim = 'gridpoint') +} + +if (names(dim(slp))[1] == "lon" & names(dim(slp))[2] == "lat" + || names(dim(slp))[2] == "lon" & names(dim(slp))[3] == "lat") { + tslp2D <- MergeDims(slp,merge_dims = c('lon', 'lat'), + rename_dim = 'gridpoint') +} else if (names(dim(slp))[1] == "lat" & names(dim(slp))[2] == "lon" + || names(dim(slp))[2] == "lat" & names(dim(slp))[3] == "lon") { + tslp2D <- MergeDims(slp,merge_dims = c('lat', 'lon'), + rename_dim = 'gridpoint') +} + +if (names(dim(obs[[1]]))[1] == "lon" & names(dim(obs[[1]]))[2] == "lat" + || names(dim(obs[[1]]))[2] == "lon" & names(dim(obs[[1]]))[3] == "lat") { + tobs2D <- lapply(obs, MergeDims, merge_dims = c('lon', 'lat'), + rename_dim = 'gridpoint') +} else if (names(dim(obs[[1]]))[1] == "lat" & names(dim(obs[[1]]))[2] == "lon" + || names(dim(obs[[1]]))[2] == "lat" & names(dim(obs[[1]]))[3] == "lon") { + tobs2D <- lapply(obs, MergeDims, merge_dims = c('lat', 'lon'), + rename_dim = 'gridpoint') +} + +if (names(dim(texp2D[[1]]))[1] == "gridpoint") { + exp2D <- lapply(texp2D,aperm) +} else { + exp2D <- texp2D +} + +if (names(dim(tslp2D))[1] == "gridpoint") { + slp2D <- aperm(tslp2D) +} else { + slp2D <- tslp2D +} + +if (names(dim(tobs2D[[1]]))[1] == "gridpoint") { + obs2D <- lapply(tobs2D,aperm) +} else { + obs2D <- tobs2D +} + + downres <- .analogspred(exp2D, + slp2D, + obs2D, + lon, + lat, + slp_lon, + slp_lat, + var_name, + hr_obs, + tdates, + ddates, + restrain) + +} + +#' Atomic .analogspred function +#' +#'@author Marta Dom\'inguez Alonso - AEMET, \email{mdomingueza@aemet.es} +#' +#' This function works with lists of matrix from reanalysis and seasonal +#' forecast data and uses a Fortran interface (.Fortran) to run an +#' analogs method developed in AEMET. +#'@param pred_mod List of matrix with downscaled period seasonal forecast data. The list +#' has to contain model atmospheric variables (instantaneous 12h data) that must +#' be indentify by parenthesis name. +#' For precipitation: +#' - u component of wind at 500 hPa (u500_mod) in m/s +#' - v component of wind at 500 hPa (v500_mod) in m/s +#' - temperature at 500 hPa (t500_mod) in K +#' - temperature at 850 hPa (t850_mod) in K +#' - specific humidity at 700 hPa (q700_mod) in g/kg +#' For temperature: +#' - u component of wind at 500 hPa (u500_mod) in m/s +#' - v component of wind at 500 hPa (v500_mod) in m/s +#' - temperature at 500 hPa (t500_mod) in K +#' - temperature at 700 hPa (t500_mod) in K +#' - temperature at 850 hPa (t850_mod) in K +#' - specific humidity at 700 hPa (q700_mod) in g/kg +#' - 2 meters temperature (tm2m_mod) in K +#' Seasonal forecast variables must have the same resolution and +#' domain as 'pred_rea' parameter. +#' All matrices must have two dimensions with names 'time' and 'gridpoint'. +#'@param pred_slp Matrix with atmospheric seasonal forecast model sea level pressure +#' (instantaneous 12h data) that must be indentify as 'slp'. It has the same +#' resolution as 'pred_mod' paremeter but with an extended domain. This domain contains +#' extra degrees (most in the north and west part) compare to synoptic domain. +#' The matrix must have two dimensions with names 'time' and 'gridpoint'. +#'@param pred_rea List of matrix with training period reanalysis data. +#' The list has to contain reanalysis atmospheric variables (instantaneous +#' 12h data) that must be indentify by parenthesis name. +#' For precipitation: +#' - u component of wind at 500 hPa (u500) in m/s +#' - v component of wind at 500 hPa (v500) in m/s +#' - temperature at 500 hPa (t500) in K +#' - temperature at 850 hPa (t850) in K +#' - sea level pressure (slp) in hPa +#' - specific humidity at 700 hPa (q700) in g/kg +#' For maximum and minimum temperature: +#' - u component of wind at 500 hPa (u500) in m/s +#' - v component of wind at 500 hPa (v500) in m/s +#' - temperature at 500 hPa (t500) in K +#' - temperature at 700 hPa (t500) in K +#' - temperature at 850 hPa (t850) in K +#' - sea level pressure (slp) in hPa +#' - specific humidity at 700 hPa (q700) in g/kg +#' - 2 meters temperature (tm2m) in K +#' All matrices must have two dimensions with names 'ddates' and 'gridpoint'. +#'@param lon Vector of the synoptic longitude (from (-180º) to 180º), +#' The vector must go from west to east. +#'@param lat Vector of the synoptic latitude. The vector must go from north to south. +#'@param slp_lon Vector of the extended longitude (from (-180º) to 180º), +#' The vector must go from west to east. +#'@param slp_lat Vector of the extended latitude. The vector must go from north to south. +#'@param var Variable name to downscale. There are two options: 'prec' for +#' precipitation and 'temp' for maximum and minimum temperature. +#'@param HR_path Local path of HR observational files (maestro and pcp/tmx-tmn). +#' For precipitation can be downloaded from http://www.aemet.es/documentos/es/serviciosclimaticos/cambio_climat/datos_diarios/dato_observacional/rejilla_5km/v2/Serie_AEMET_v2_pcp_1951a201903_txt.tar.gz +#' For maximum and minimum temperature can be downloaded from http://www.aemet.es/documentos/es/serviciosclimaticos/cambio_climat/datos_diarios/dato_observacional/rejilla_5km/temperatura/v1/tmax/Serie_AEMET_v1_tmax_1951a202006_txt.tar.gz and http://www.aemet.es/documentos/es/serviciosclimaticos/cambio_climat/datos_diarios/dato_observacional/rejilla_5km/temperatura/v1/tmin/Serie_AEMET_v1_tmin_1951a202006_txt.tar.gz respetively. +#' Maestro file (maestro_red_hr_SPAIN.txt) has gridpoint (nptos), longitude (lon), latitude (lat) and +#' altitude (alt) in columns (vector structure). +#' Data file (pcp/tmx/tmn_red_SPAIN_1951-201903.txt) includes 5km resolution spanish daily data +#' (precipitation or maximum and minimum temperature from january 1951 to march 2019. See README +#' file for more information. +#' IMPORTANT!: HR observational period must be the same as for reanalysis variables +#' ('pred_rea' parameter). +#' It is assumed that the training period is smaller than the HR original one (1951-2019), so it is +#' needed to make a new ascii file with the new period and the same structure as original, +#' specifying the training dates in the name (e.g. 'pcp_red_SPAIN_19810101-19961231.txt' for +#' '19810101-19961231' period). +#'@param tdates Training period dates in format YYYYMMDD(start)-YYYYMMDD(end) +#' (e.g. 19810101-20181231). The same as for the training function. +#'@param ddates Downscaling period dates in format YYYYMMDD(start)-YYYYMMDD(end) (e.g. 20191001-20200331). +#'@param restrain Output (list of matrix) obtained from 'training_analogs' function. +#' For precipitation, 'restrain' object must contains um, vm, nger, gu92, gv92, +#' gu52, gv52, neni, vdmin, vref, ccm, lab_pred and cor_pred variables. +#' For maximum and minimum temperature, 'restrain' object must contains um, vm, +#' insol, neni, vdmin y vref. See 'AnalogsPred_train.R' for more information. +#'@return .analogspred returns seasonal forecast precipitation (mm) or +#' maximum and minimum temperature (dozens of ºC) in a 5km x 5km regular grid over +#' peninsular Spain and Balearic Islands. Each matrix of the list has two dimensions +#' ('ddates' x 'nptos'). +#' +#'@importFrom utils read.table +#' +#'@useDynLib CSTools +#'@noRd + + +.analogspred <- function(pred_mod, + pred_slp, + pred_rea, + lon, + lat, + slp_lon, + slp_lat, + var, + HR_path, + tdates, + ddates, + restrain) { + + +if (!is.list(pred_mod)) { + stop("Parameter 'pred_mod' must be a list of 'matrix' objects") + } + +if (!(all(sapply(pred_mod, inherits, 'matrix')))) { + stop("Elements of the list in parameter 'pred_mod' must be of the class ", + "'matrix'.") + } + +if (!is.matrix(pred_slp)) { + stop("Parameter 'pred_slp' must be of the class 'matrix'.") + } + +if (!is.list(pred_rea)) { + stop("Parameter 'pred_rea' must be a list of 'matrix' objects") + } + +if (!(all(sapply(pred_rea, inherits, 'matrix')))) { + stop("Elements of the list in parameter 'pred_rea' must be of the class ", + "'matrix'.") + } + +if (var == "prec") { + if (length(pred_rea) != 6) { + stop("Parameter 'pred_rea' must be a length of 6.") + } + if (length(pred_mod) != 5) { + stop("Parameter 'pred_mod' must be a length of 5.") + } +} else { + if (length(pred_rea) != 8) { + stop("Parameter 'pred_rea' must be a length of 8.") + } + if (length(pred_mod) != 7) { + stop("Parameter 'pred_mod' must be a length of 7.") + } +} + +if (!is.vector(lon) || !is.numeric(lon)) { + stop("Parameter 'lon' must be a numeric vector") + } + +if (!is.vector(lat) || !is.numeric(lat)) { + stop("Parameter 'lat' must be a numeric vector") + } + +if (!is.vector(slp_lon) || !is.numeric(slp_lon)) { + stop("Parameter 'slp_lon' must be a numeric vector") + } + +if (!is.vector(slp_lat) || !is.numeric(slp_lat)) { + stop("Parameter 'slp_lat' must be a numeric vector") + } + +if (!is.character(HR_path)){ + stop("Parameter 'HR_path' must be a character.") + } + +if (!is.character(tdates)) { + stop("Parameter 'tdates' must be a character.") + } + +if (!is.character(ddates)) { + stop("Parameter 'ddates' must be a character.") + } + +if (!is.list(restrain)) { + stop("Parameter 'restrain' must be a list of 'matrix' and 'parameter' objects") + } + +#! REANALYSIS GRID PARAMETERS + + rlon <- c(lon, NA) - c(NA, lon) + rlon <- rlon[!is.na(rlon)] + if (!all(rlon == rlon[1])) { + stop("Parameter 'lon' must be in regular grid.") + } else { + rlon <- rlon[1] + } + + rlat <- c(lat, NA) - c(NA, lat) + rlat <- rlat[!is.na(rlat)] + if (!all(rlat == rlat[1])) { + stop("Parameter 'lat' must be in regular grid.") + } else { + rlat <- rlat[1] + } + + if (rlon != (-rlat)) { + stop("Parameters 'lon' and 'lat' must have the same resolution.") + } else { + res <- rlon + } + + nlat <- ((lat[length(lat)] - lat[1]) / rlat) + 1 + nlon <- ((lon[length(lon)] - lon[1]) / rlon) + 1 + + ic <- nlat * nlon +# + slp_rlon <- c(slp_lon, NA) - c(NA, slp_lon) + slp_rlon <- slp_rlon[!is.na(slp_rlon)] + if (!all(slp_rlon == slp_rlon[1])) { + stop("Parameter 'slp_lon' must be in regular grid.") + } else { + slp_rlon <- slp_rlon[1] + } + + slp_rlat <- c(slp_lat, NA) - c(NA, slp_lat) + slp_rlat <- slp_rlat[!is.na(slp_rlat)] + if (!all(slp_rlat == slp_rlat[1])) { + stop("Parameter 'slp_lat' must be in regular grid.") + } else { + slp_rlat <- slp_rlat[1] + } + + if (slp_rlon != (-slp_rlat)) { + stop("Parameters 'slp_lon' and 'slp_lat' must have the same resolution.") + } else { + slp_res <- slp_rlon + } + + nlatt <- ((slp_lat[length(slp_lat)] - slp_lat[1]) / slp_rlat) + 1 + nlont <- ((slp_lon[length(slp_lon)] - slp_lon[1]) / slp_rlon) + 1 + + id <- nlatt * nlont + + slat <- max(lat) + slon <- min(c(lon[which(lon > 180)] - 360, + lon[which(lon <= 180)])) + + slatt <- max(slp_lat) + slont <- min(c(slp_lon[which(slp_lon > 180)] - 360, + slp_lon[which(slp_lon <= 180)])) + + ngridd <- ((2*nlatt)-1)*((2*nlont)-1) + + if (all((sapply(pred_rea,nrow))==nrow(pred_rea[[1]]))){ + nd <- nrow(pred_rea[[1]]) + } else { + stop("All 'pred_rea' variables must have the same period.") + } + + if (all((sapply(pred_mod,nrow))==nrow(pred_mod[[1]]))){ + nm <- nrow(pred_mod[[1]]) + } else { + stop("All 'pred_mod' variables must have the same period.") + } + + seqdates <- seq(as.Date(substr(ddates,start=1,stop=8),format="%Y%m%d"),as.Date(substr(ddates,start=10,stop=18),format="%Y%m%d"),by="days") + month <- format(seqdates,format="%m") + day <- format(seqdates,format="%d") + +#! TRAINING REANALYSIS VARIABLES +u500 <- pred_rea[['u500']] +v500 <- pred_rea[['v500']] +t500 <- pred_rea[['t500']] +t850 <- pred_rea[['t850']] +msl_si <- pred_rea[['slp']] +q700 <- pred_rea[['q700']] + +if (var == "temp") { +t700 <- pred_rea[['t700']] +tm2m <- pred_rea[['tm2m']] +} + +#! SEASONAL FORECAST MODEL VARIABLES +u500_mod <- pred_mod[['u500_mod']] +v500_mod <- pred_mod[['v500_mod']] +t500_mod <- pred_mod[['t500_mod']] +t850_mod <- pred_mod[['t850_mod']] +msl_lr_mod <- pred_slp +q700_mod <- pred_mod[['q700_mod']] + +if (var == "temp") { +t700_mod <- pred_mod[['t700_mod']] +tm2m_mod <- pred_mod[['tm2m_mod']] +} + +#! HIGH-RESOLUTION (HR) OBSERVATIONAL DATASET +maestro_hr_file <- paste(HR_path, "maestro_red_hr_SPAIN.txt",sep="") +if (!file.exists(maestro_hr_file)) { + stop("'maestro_red_hr_SPAIN.txt' does not exist.") +} else { + maestro <- read.table(maestro_hr_file) + lon_hr <- unlist(maestro[2]) + lat_hr <- unlist(maestro[3]) + nptos <- length(readLines(maestro_hr_file)) +} + +if (var == "prec") { + prec_hr_file <- paste(HR_path, "pcp_red_SPAIN_",tdates,".txt",sep="") + if (!file.exists(prec_hr_file)) { + stop(sprintf("precipitation HR file for %s does not exist.",tdates)) + } else { + nd_hr <- length(readLines(prec_hr_file)) + preprec_hr <- matrix(scan(prec_hr_file), nrow=nd_hr ,ncol= nptos+1, byrow=TRUE) + prec_hr <- preprec_hr[1:nd_hr,-c(1)] + } +} else { + tmx_hr_file <- paste(HR_path, "tmx_red_SPAIN_",tdates,".txt",sep="") + tmn_hr_file <- paste(HR_path, "tmn_red_SPAIN_",tdates,".txt",sep="") + if (!file.exists(tmx_hr_file)) { + stop(sprintf("maximum temperature HR file for %s does not exist.",tdates)) + } else if (!file.exists(tmn_hr_file)) { + stop(sprintf("minimum temperature HR file for %s does not exist.",tdates)) + } else if (length(readLines(tmx_hr_file)) != length(readLines(tmn_hr_file))) { + stop("maximum and minimum temperature HR observation files must have the same period.") + } else { + nd_hr <- length(readLines(tmx_hr_file)) + pretmx_hr <- matrix(scan(tmx_hr_file), nrow=nd_hr ,ncol= nptos+1, byrow=TRUE) + tmx_hr <- pretmx_hr[1:nd_hr,-c(1)] + pretmn_hr <- matrix(scan(tmn_hr_file), nrow=nd_hr ,ncol= nptos+1, byrow=TRUE) + tmn_hr <- pretmn_hr[1:nd_hr,-c(1)] + } +} + + if (nd_hr != nd) { + stop("Reanalysis variables and HR observations must have the same period.") + } + +#! OTHER PARAMETERS that should not be changed +#! Number of analog situations to consider +nanx <- 155 +#! Number of temperature predictors +nvar <- 7 + +#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +if (var == "prec") { + + downs <- .Fortran("down_prec", + ic = as.integer(ic), + id = as.integer(id), + nd = as.integer(nd), + nm = as.integer(nm), + nlat = as.integer(nlat), + nlon = as.integer(nlon), + nlatt = as.integer(nlatt), + nlont = as.integer(nlont), + slat = as.numeric(slat), + slon = as.numeric(slon), + rlat = as.numeric(rlat), + rlon = as.numeric(rlon), + slatt = as.numeric(slatt), + slont = as.numeric(slont), + ngridd = as.integer(ngridd), + u500 = as.numeric(u500), + v500 = as.numeric(v500), + t500 = as.numeric(t500), + t850 = as.numeric(t850), + msl_si = as.numeric(msl_si), + q700 = as.numeric(q700), + prec_hr = as.numeric(prec_hr), + nanx = as.integer(nanx), + restrain$um, + restrain$vm, + restrain$nger, + restrain$gu92, + restrain$gv92, + restrain$gu52, + restrain$gv52, + restrain$neni, + restrain$vdmin, + restrain$vref, + restrain$ccm, + restrain$indices[,,,1],#lab_pred + restrain$indices[,,,2],#cor_pred + u500_mod = as.numeric(u500_mod), + v500_mod = as.numeric(v500_mod), + t500_mod = as.numeric(t500_mod), + t850_mod = as.numeric(t850_mod), + msl_lr_mod = as.numeric(msl_lr_mod), + q700_mod = as.numeric(q700_mod), + pp=matrix(as.double(seq(1,nm*nptos)),c(nm,nptos)), + PACKAGE = 'CSTools') + + output <- downs$pp + +} else { + + downs <- .Fortran("down_temp", + ic = as.integer(ic), + id = as.integer(id), + nd = as.integer(nd), + nm = as.integer(nm), + nlat = as.integer(nlat), + nlon = as.integer(nlon), + nlatt = as.integer(nlatt), + nlont = as.integer(nlont), + slat = as.numeric(slat), + slon = as.numeric(slon), + rlat = as.numeric(rlat), + rlon = as.numeric(rlon), + slatt = as.numeric(slatt), + slont = as.numeric(slont), + ngridd = as.integer(ngridd), + u500 = as.numeric(u500), + v500 = as.numeric(v500), + t500 = as.numeric(t500), + t850 = as.numeric(t850), + msl_si = as.numeric(msl_si), + q700 = as.numeric(q700), + t700 = as.numeric(t700), + tm2m = as.numeric(tm2m), + tmx_hr = as.numeric(tmx_hr), + tmn_hr = as.numeric(tmn_hr), + nanx = as.integer(nanx), + nvar = as.integer(nvar), + day = as.integer(day), + month = as.integer(month), + restrain$um, + restrain$vm, + restrain$insol, + restrain$neni, + restrain$vdmin, + restrain$vref, + u500_mod = as.numeric(u500_mod), + v500_mod = as.numeric(v500_mod), + t500_mod = as.numeric(t500_mod), + t850_mod = as.numeric(t850_mod), + msl_lr_mod = as.numeric(msl_lr_mod), + q700_mod = as.numeric(q700_mod), + t700_mod = as.numeric(t700_mod), + tm2m_mod = as.numeric(tm2m_mod), + tmx=matrix(as.double(seq(1,nm*nptos)),c(nm,nptos)), + tmn=matrix(as.double(seq(1,nm*nptos)),c(nm,nptos)), + PACKAGE = 'CSTools') + + output <- list("tmax" = downs$tmx, + "tmin" = downs$tmn) + +} + return(output) +} + + diff --git a/R/CST_Anomaly.R b/R/CST_Anomaly.R index 6e33c3411d1000abb0c15e4994aa9d7fbd473b23..52a786fb577d5a4f397a9a205abe65e1ece46f41 100644 --- a/R/CST_Anomaly.R +++ b/R/CST_Anomaly.R @@ -18,7 +18,8 @@ #' #' @return A list with two S3 objects, 'exp' and 'obs', of the class 's2dv_cube', containing experimental and date-corresponding observational anomalies, respectively. These 's2dv_cube's can be ingested by other functions in CSTools. #' -#'@import s2dverification +#'@importFrom s2dverification Clim Ano_CrossValid +#'@importFrom s2dv InsertDim #' #'@examples #'# Example 1: diff --git a/R/CST_BiasCorrection.R b/R/CST_BiasCorrection.R index 1da7fb5be8a8679d022a7d5179dcbdc2830acd26..9baf897bb680eff59d5b3bd4b3169e4a880ee65b 100644 --- a/R/CST_BiasCorrection.R +++ b/R/CST_BiasCorrection.R @@ -11,7 +11,6 @@ #' #'@references Torralba, V., F.J. Doblas-Reyes, D. MacLeod, I. Christel and M. Davis (2017). Seasonal climate prediction: a new source of information for the management of wind energy resources. Journal of Applied Meteorology and Climatology, 56, 1231-1247, doi:10.1175/JAMC-D-16-0204.1. (CLIM4ENERGY, EUPORIAS, NEWA, RESILIENCE, SPECS) #' -#'@import s2dverification #'@import multiApply #'@examples #' diff --git a/R/CST_Calibration.R b/R/CST_Calibration.R index ca29039764d5cbe108f1c340cf9188963972141e..58ef720777bd5ebb58cb5a3916b48ccbe71a6414 100644 --- a/R/CST_Calibration.R +++ b/R/CST_Calibration.R @@ -6,14 +6,19 @@ #' #'@param exp an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the seasonal forecast experiment data in the element named \code{$data}. #'@param obs an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the observed data in the element named \code{$data}. -#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min} or \code{crps_min}. Default value is \code{mse_min}. +#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}. #'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. #'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. #'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned. +#'@param na.rm is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. See Details section for further information about its use and compatibility with \code{na.fill}. +#'@param apply_to is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. +#'@param alpha is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. +#'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'. +#'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'. #'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. #'@return an object of class \code{s2dv_cube} containing the calibrated forecasts in the element \code{$data} with the same dimensions as the one in the exp object. #' -#'@import s2dverification +#'@importFrom s2dv InsertDim #'@import abind #' #'@seealso \code{\link{CST_Load}} @@ -36,16 +41,12 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-one-out", multi.model = FALSE, - na.fill = TRUE, ncores = 1) { + na.fill = TRUE, na.rm = TRUE, apply_to = NULL, alpha = NULL, + memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) { if (!inherits(exp, "s2dv_cube") || !inherits(obs, "s2dv_cube")) { stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", "as output by CSTools::CST_Load.") } - - if (dim(obs$data)["member"] != 1) { - stop("The length of the dimension 'member' in the component 'data' ", - "of the parameter 'obs' must be equal to 1.") - } if(!missing(multi.model) & !(cal.method == "mse_min")){ warning(paste0("The multi.model parameter is ignored when using the calibration method ", cal.method)) @@ -54,7 +55,9 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", cal.method = cal.method, eval.method = eval.method, multi.model = multi.model, - na.fill = na.fill, + na.fill = na.fill, na.rm = na.rm, + apply_to = apply_to, alpha = alpha, + memb_dim = memb_dim, sdate_dim = sdate_dim, ncores = ncores) exp$Datasets <- c(exp$Datasets, obs$Datasets) @@ -69,26 +72,37 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", #' #'@author Verónica Torralba, \email{veronica.torralba@bsc.es} #'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be} -#'@description Four types of member-by-member bias correction can be performed. The \code{bias} method corrects the bias only, the \code{evmos} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). +#'@description Four types of member-by-member bias correction can be performed. The \code{"bias"} method corrects the bias only, the \code{"evmos"} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). The \code{"rpc-based"} method adjusts the forecast variance ensuring that the ratio of predictable components (RPC) is equal to one, as in Eade et al. (2014). #'@description Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible. #'@references Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the success of multi-model ensembles in seasonal forecasting-II calibration and combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x +#'@references Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate predictions underestimate the predictability of the read world? Geophysical Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 #'@references Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing through linear regression. Nonlinear Processes in Geophysics, 18(2), 147. doi:10.5194/npg-18-147-2011 #'@references Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble post-processing using member-by-member approaches: theoretical aspects. Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. doi:10.1002/qj.2397 #' #'@param exp an array containing the seasonal forecast experiment data. #'@param obs an array containing the observed data. -#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min} or \code{crps_min}. Default value is \code{mse_min}. +#'@param cal.method is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}. #'@param eval.method is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation. #'@param multi.model is a boolean that is used only for the \code{mse_min} method. If multi-model ensembles or ensembles of different sizes are used, it must be set to \code{TRUE}. By default it is \code{FALSE}. Differences between the two approaches are generally small but may become large when using small ensemble sizes. Using multi.model when the calibration method is \code{bias}, \code{evmos} or \code{crps_min} will not affect the result. #'@param na.fill is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned. +#'@param na.rm is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. +#'@param apply_to is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}. +#'@param alpha is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}. +#'@param memb_dim is a character string indicating the name of the member dimension. By default, it is set to 'member'. +#'@param sdate_dim is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'. #'@param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. #'@return an array containing the calibrated forecasts with the same dimensions as the \code{exp} array. #' -#'@import s2dverification +#'@importFrom s2dv InsertDim MeanDims Reorder #'@import abind +#'@import multiApply +#'@importFrom s2dverification Subset #' #'@seealso \code{\link{CST_Load}} #' +#'@details +#'Both the \code{na.fill} and \code{na.rm} parameters can be used to indicate how the function has to handle the NA values. The \code{na.fill} parameter checks whether there are more than three forecast-observations pairs to perform the computation. In case there are three or less pairs, the computation is not carried out, and the value returned by the function depends on the value of this parameter (either NA if \code{na.fill == TRUE} or the uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} is used to indicate the function whether to remove the missing values during the computation of the parameters needed to perform the calibration. +#' #'@examples #'mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) #'dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) @@ -97,8 +111,11 @@ CST_Calibration <- function(exp, obs, cal.method = "mse_min", #'a <- Calibration(exp = mod1, obs = obs1) #'str(a) #'@export -Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-one-out", - multi.model = FALSE, na.fill = TRUE, ncores = 1) { +Calibration <- function(exp, obs, cal.method = "mse_min", + eval.method = "leave-one-out", + multi.model = FALSE, na.fill = TRUE, + na.rm = TRUE, apply_to = NULL, alpha = NULL, + memb_dim = 'member', sdate_dim = 'sdate', ncores = 1) { dim.exp <- dim(exp) amt.dims.exp <- length(dim.exp) @@ -106,16 +123,36 @@ Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-o amt.dims.obs <- length(dim.obs) dim.names.exp <- names(dim.exp) dim.names.obs <- names(dim.obs) + if (is.null(memb_dim) || !is.character(memb_dim)) { + stop("Parameter 'memb_dim' should be a character string indicating the", + "name of the dimension where members are stored in 'exp'.") + } + if (length(memb_dim) > 1) { + memb_dim <- memb_dim[1] + warning("Parameter 'memb_dim' has length greater than 1 and only", + " the first element will be used.") + } - target.dim.names.exp <- c("member", "sdate") - target.dim.names.obs <- c("sdate") + if (is.null(sdate_dim) || !is.character(sdate_dim)) { + stop("Parameter 'sdate_dim' should be a character string indicating the", + "name of the dimension where start dates are stored in 'exp'.") + } + if (length(sdate_dim) > 1) { + sdate_dim <- sdate_dim[1] + warning("Parameter 'sdate_dim' has length greater than 1 and only", + " the first element will be used.") + } + target.dim.names.exp <- c(memb_dim, sdate_dim) + target.dim.names.obs <- sdate_dim if (!all(target.dim.names.exp %in% dim.names.exp)) { - stop("Parameter 'exp' must have the dimensions 'member' and 'sdate'.") + stop("Parameter 'exp' must have the dimensions defined in memb_dim ", + "and sdate_dim.") } - if (!all(c("sdate") %in% dim.names.obs)) { - stop("Parameter 'obs' must have the dimension 'sdate'.") + if (!all(c(sdate_dim) %in% dim.names.obs)) { + stop("Parameter 'obs' must have the dimension defined in sdate_dim ", + "parameter.") } if (any(is.na(exp))) { @@ -126,12 +163,9 @@ Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-o warning("Parameter 'obs' contains NA values.") } - if (dim(obs)['member']!=1){ - stop("Parameter 'obs' must have a member dimension with length=1") + if (memb_dim %in% names(dim(obs))) { + obs <- Subset(obs, along = memb_dim, indices = 1, drop = "selected") } - - obs <- Subset(obs, along = "member", indices = 1, drop = "selected") - data.set.sufficiently.large.out <- Apply(data = list(exp = exp, obs = obs), target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs), @@ -139,22 +173,44 @@ Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-o fun = .data.set.sufficiently.large)$output1 if(!all(data.set.sufficiently.large.out)){ - if(na.fill){ - warning("Some forecast data could not be corrected due to data lack and is replaced with NA values") - } else { - warning("Some forecast data could not be corrected due to data lack and is replaced with uncorrected values") - } + if(na.fill){ + warning("Some forecast data could not be corrected due to data lack", + " and is replaced with NA values") + } else { + warning("Some forecast data could not be corrected due to data lack", + " and is replaced with uncorrected values") + } } - + + if (!na.rm %in% c(TRUE,FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE.") + } + if (cal.method == 'rpc-based') { + if (is.null(apply_to)) { + apply_to <- 'sign' + warning("'apply_to' cannot be NULL for 'rpc-based' method so it has been set to 'sign', as in Eade et al. (2014).") + } else if (!apply_to %in% c('all','sign')) { + stop("'apply_to' must be either 'all' or 'sign' when 'rpc-based' method is used.") + } + if (apply_to == 'sign') { + if (is.null(alpha)) { + alpha <- 0.1 + warning("'alpha' cannot be NULL for 'rpc-based' method so it has been set to 0.1, as in Eade et al. (2014).") + } else if (!is.numeric(alpha) | alpha <= 0 | alpha >= 1) { + stop("'alpha' must be a number between 0 and 1.") + } + } + } + calibrated <- Apply(data = list(exp = exp, obs = obs), cal.method = cal.method, eval.method = eval.method, multi.model = multi.model, - na.fill = na.fill, + na.fill = na.fill, na.rm = na.rm, + apply_to = apply_to, alpha = alpha, target_dims = list(exp = target.dim.names.exp, obs = target.dim.names.obs), - ncores = ncores, + ncores = ncores, output_dims = target.dim.names.exp, fun = .cal)$output1 - dexes <- match(names(dim(exp)), names(dim(calibrated))) calibrated <- aperm(calibrated, dexes) dimnames(calibrated) <- dimnames(exp)[dexes] @@ -181,22 +237,22 @@ Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-o return(dexes.lst) } -.cal <- function(exp, obs, cal.method, eval.method, multi.model, na.fill) { +.cal <- function(exp, obs, cal.method, eval.method, multi.model, na.fill, na.rm, apply_to, alpha) { obs <- as.vector(obs) dims.fc <- dim(exp) amt.mbr <- dims.fc[1] amt.sdate <- dims.fc[2] var.cor.fc <- NA * exp - names(dim(var.cor.fc)) <- c("member", "sdate") + names(dim(var.cor.fc)) <- dims.fc if(!.data.set.sufficiently.large(exp = exp, obs = obs)){ - if(na.fill){ - return(var.cor.fc) - } else { - var.cor.fc[] <- exp[] - return(var.cor.fc) - } + if(na.fill){ + return(var.cor.fc) + } else { + var.cor.fc[] <- exp[] + return(var.cor.fc) + } } eval.train.dexeses <- .make.eval.train.dexes(eval.method, amt.points = amt.sdate) @@ -211,54 +267,72 @@ Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-o obs.tr <- obs[train.dexes , drop = FALSE] if(cal.method == "bias"){ - var.cor.fc[ , eval.dexes] <- fc.ev + mean(obs.tr, na.rm = TRUE) - mean(fc.tr, na.rm = TRUE) - } else if(cal.method == "evmos"){ - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr) + var.cor.fc[ , eval.dexes] <- fc.ev + mean(obs.tr, na.rm = na.rm) - mean(fc.tr, na.rm = na.rm) + } else if(cal.method == "evmos"){ + #calculate ensemble and observational characteristics + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) #calculate value for regression parameters - init.par <- c(.calc.evmos.par(quant.obs.fc.tr)) - #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.evmos.fc(fc.ev , init.par) - } else if (cal.method == "mse_min"){ - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr) + init.par <- c(.calc.evmos.par(quant.obs.fc.tr, na.rm = na.rm)) + #correct evaluation subset + var.cor.fc[ , eval.dexes] <- .correct.evmos.fc(fc.ev , init.par, na.rm = na.rm) + } else if (cal.method == "mse_min"){ + #calculate ensemble and observational characteristics + quant.obs.fc.tr <- .calc.obs.fc.quant(obs = obs.tr, fc = fc.tr, na.rm = na.rm) #calculate value for regression parameters - init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model) - #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.mse.min.fc(fc.ev , init.par) + init.par <- .calc.mse.min.par(quant.obs.fc.tr, multi.model, na.rm = na.rm) + #correct evaluation subset + var.cor.fc[ , eval.dexes] <- .correct.mse.min.fc(fc.ev , init.par, na.rm = na.rm) } else if (cal.method == "crps_min"){ - #calculate ensemble and observational characteristics - quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr) + #calculate ensemble and observational characteristics + quant.obs.fc.tr <- .calc.obs.fc.quant.ext(obs = obs.tr, fc = fc.tr, na.rm = na.rm) #calculate initial value for regression parameters - init.par <- c(.calc.mse.min.par(quant.obs.fc.tr), 0.001) + init.par <- c(.calc.mse.min.par(quant.obs.fc.tr, na.rm = na.rm), 0.001) init.par[3] <- sqrt(init.par[3]) #calculate regression parameters on training dataset optim.tmp <- optim(par = init.par, fn = .calc.crps.opt, gr = .calc.crps.grad.opt, - quant.obs.fc = quant.obs.fc.tr, + quant.obs.fc = quant.obs.fc.tr, + na.rm = na.rm, method = "BFGS") mbm.par <- optim.tmp$par - #correct evaluation subset - var.cor.fc[ , eval.dexes] <- .correct.crps.min.fc(fc.ev , mbm.par) + #correct evaluation subset + var.cor.fc[ , eval.dexes] <- .correct.crps.min.fc(fc.ev , mbm.par, na.rm = na.rm) + } else if (cal.method == 'rpc-based') { + ens_mean.ev <- s2dv::MeanDims(data = fc.ev, dims = names(amt.mbr), na.rm = na.rm) + ens_mean.tr <- s2dv::MeanDims(data = fc.tr, dims = names(amt.mbr), na.rm = na.rm) ## Ensemble mean + ens_spread.tr <- multiApply::Apply(data = list(fc.tr, ens_mean.tr), target_dims = names(amt.sdate), fun = "-")$output1 ## Ensemble spread + exp_mean.tr <- mean(fc.tr, na.rm = na.rm) ## Mean (climatology) + var_signal.tr <- var(ens_mean.tr, na.rm = na.rm) ## Ensemble mean variance + var_noise.tr <- var(as.vector(ens_spread.tr), na.rm = na.rm) ## Variance of ensemble members about ensemble mean (= spread) + var_obs.tr <- var(obs.tr, na.rm = na.rm) ## Variance in the observations + r.tr <- cor(x = ens_mean.tr, y = obs.tr, method = 'pearson', use = ifelse(test = isTRUE(na.rm), yes = "pairwise.complete.obs", no = "everything")) ## Correlation between observations and the ensemble mean + if ((apply_to == 'all') || (apply_to == 'sign' && cor.test(ens_mean.tr, obs.tr, method = 'pearson', alternative = 'greater')$p.value < alpha)) { + ens_mean_cal <- (ens_mean.ev - exp_mean.tr) * r.tr * sqrt(var_obs.tr) / sqrt(var_signal.tr) + exp_mean.tr + var.cor.fc[ , eval.dexes] <- s2dv::Reorder(data = multiApply::Apply(data = list(exp = fc.ev, ens_mean = ens_mean.ev, ens_mean_cal = ens_mean_cal), target_dims = names(amt.sdate), fun = .CalibrationMembersRPC, var_obs = var_obs.tr, var_noise = var_noise.tr, r = r.tr)$output1, + order = names(dims.fc)) + dim(var.cor.fc) <- dims.fc + } else { ## no significant -> replacing with observed climatology + var.cor.fc[ , eval.dexes] <- array(data = mean(obs.tr, na.rm = na.rm), dim = dim(fc.tr)) + } } else { - stop("unknown calibration method: ",cal.method) + stop("unknown calibration method: ",cal.method) } } return(var.cor.fc) } -.calc.obs.fc.quant <- function(obs, fc){ #function to calculate different quantities of a series of ensemble forecasts and corresponding observations +.calc.obs.fc.quant <- function(obs, fc, na.rm){ #function to calculate different quantities of a series of ensemble forecasts and corresponding observations amt.mbr <- dim(fc)[1] - obs.per.ens <- InsertDim(var = obs, posdim = 1, lendim = amt.mbr) - fc.ens.av <- apply(fc, c(2), mean, na.rm = TRUE) + obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) + fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs") - obs.av <- mean(obs, na.rm = TRUE) - obs.sd <- sd(obs, na.rm = TRUE) + obs.av <- mean(obs, na.rm = na.rm) + obs.sd <- sd(obs, na.rm = na.rm) return( append( - .calc.fc.quant(fc = fc), + .calc.fc.quant(fc = fc, na.rm = na.rm), list( obs.per.ens = obs.per.ens, cor.obs.fc = cor.obs.fc, @@ -269,17 +343,17 @@ Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-o ) } -.calc.obs.fc.quant.ext <- function(obs, fc){ #extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations +.calc.obs.fc.quant.ext <- function(obs, fc, na.rm){ #extended function to calculate different quantities of a series of ensemble forecasts and corresponding observations amt.mbr <- dim(fc)[1] - obs.per.ens <- InsertDim(var = obs, posdim = 1, lendim = amt.mbr) - fc.ens.av <- apply(fc, c(2), mean, na.rm = TRUE) + obs.per.ens <- InsertDim(obs, posdim = 1, lendim = amt.mbr) + fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) cor.obs.fc <- cor(fc.ens.av, obs, use = "complete.obs") - obs.av <- mean(obs, na.rm = TRUE) - obs.sd <- sd(obs, na.rm = TRUE) + obs.av <- mean(obs, na.rm = na.rm) + obs.sd <- sd(obs, na.rm = na.rm) return( append( - .calc.fc.quant.ext(fc = fc), + .calc.fc.quant.ext(fc = fc, na.rm = na.rm), list( obs.per.ens = obs.per.ens, cor.obs.fc = cor.obs.fc, @@ -291,18 +365,18 @@ Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-o } -.calc.fc.quant <- function(fc){ #function to calculate different quantities of a series of ensemble forecasts +.calc.fc.quant <- function(fc, na.rm){ #function to calculate different quantities of a series of ensemble forecasts amt.mbr <- dim(fc)[1] - fc.ens.av <- apply(fc, c(2), mean, na.rm = TRUE) - fc.ens.av.av <- mean(fc.ens.av, na.rm = TRUE) - fc.ens.av.sd <- sd(fc.ens.av, na.rm = TRUE) - fc.ens.av.per.ens <- InsertDim(var = fc.ens.av, posdim = 1, lendim = amt.mbr) - fc.ens.sd <- apply(fc, c(2), sd, na.rm = TRUE) - fc.ens.var.av.sqrt <- sqrt(mean(fc.ens.sd^2, na.rm = TRUE)) + fc.ens.av <- apply(fc, c(2), mean, na.rm = na.rm) + fc.ens.av.av <- mean(fc.ens.av, na.rm = na.rm) + fc.ens.av.sd <- sd(fc.ens.av, na.rm = na.rm) + fc.ens.av.per.ens <- InsertDim(fc.ens.av, posdim = 1, lendim = amt.mbr) + fc.ens.sd <- apply(fc, c(2), sd, na.rm = na.rm) + fc.ens.var.av.sqrt <- sqrt(mean(fc.ens.sd^2, na.rm = na.rm)) fc.dev <- fc - fc.ens.av.per.ens - fc.dev.sd <- sd(fc.dev, na.rm = TRUE) - fc.av <- mean(fc, na.rm = TRUE) - fc.sd <- sd(fc, na.rm = TRUE) + fc.dev.sd <- sd(fc.dev, na.rm = na.rm) + fc.av <- mean(fc, na.rm = na.rm) + fc.sd <- sd(fc, na.rm = na.rm) return( list( fc.ens.av = fc.ens.av, @@ -319,22 +393,22 @@ Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-o ) } -.calc.fc.quant.ext <- function(fc){ #extended function to calculate different quantities of a series of ensemble forecasts +.calc.fc.quant.ext <- function(fc, na.rm){ #extended function to calculate different quantities of a series of ensemble forecasts amt.mbr <- dim(fc)[1] - repmat1.tmp <- InsertDim(var = fc, posdim = 1, lendim = amt.mbr) + repmat1.tmp <- InsertDim(fc, posdim = 1, lendim = amt.mbr) repmat2.tmp <- aperm(repmat1.tmp, c(2, 1, 3)) - spr.abs <- apply(abs(repmat1.tmp - repmat2.tmp), c(3), mean, na.rm = TRUE) - spr.abs.per.ens <- InsertDim(var = spr.abs, posdim = 1, lendim = amt.mbr) + spr.abs <- apply(abs(repmat1.tmp - repmat2.tmp), c(3), mean, na.rm = na.rm) + spr.abs.per.ens <- InsertDim(spr.abs, posdim = 1, lendim = amt.mbr) return( - append(.calc.fc.quant(fc), + append(.calc.fc.quant(fc, na.rm = na.rm), list(spr.abs = spr.abs, spr.abs.per.ens = spr.abs.per.ens)) ) } #Below are the core or elementary functions to calculate the regression parameters for the different methods -.calc.mse.min.par <- function(quant.obs.fc, multi.model = F){ +.calc.mse.min.par <- function(quant.obs.fc, multi.model = F, na.rm){ par.out <- rep(NA, 3) if(multi.model){ @@ -343,54 +417,60 @@ Calibration <- function(exp, obs, cal.method = "mse_min", eval.method = "leave-o par.out[3] <- with(quant.obs.fc, obs.sd * sqrt(1. - cor.obs.fc^2) / fc.dev.sd) } par.out[2] <- with(quant.obs.fc, cor.obs.fc * obs.sd / fc.ens.av.sd) - par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = TRUE) + par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = na.rm) return(par.out) } -.calc.evmos.par <- function(quant.obs.fc){ +.calc.evmos.par <- function(quant.obs.fc, na.rm){ par.out <- rep(NA, 2) par.out[2] <- with(quant.obs.fc, obs.sd / fc.sd) - par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = TRUE) + par.out[1] <- with(quant.obs.fc, obs.av - par.out[2] * fc.ens.av.av, na.rm = na.rm) return(par.out) } #Below are the core or elementary functions to calculate the functions necessary for the minimization of crps -.calc.crps.opt <- function(par, quant.obs.fc){ +.calc.crps.opt <- function(par, quant.obs.fc, na.rm){ return( with(quant.obs.fc, mean(abs(obs.per.ens - (par[1] + par[2] * fc.ens.av.per.ens + - ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev)), na.rm = TRUE) - - mean(abs((par[3])^2 * spr.abs + par[4]) / 2., na.rm = TRUE) + ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev)), na.rm = na.rm) - + mean(abs((par[3])^2 * spr.abs + par[4]) / 2., na.rm = na.rm) ) ) } -.calc.crps.grad.opt <- function(par, quant.obs.fc){ +.calc.crps.grad.opt <- function(par, quant.obs.fc, na.rm){ sgn1 <- with(quant.obs.fc,sign(obs.per.ens - (par[1] + par[2] * fc.ens.av.per.ens + ((par[3])^2 + par[4] / spr.abs.per.ens) * fc.dev))) sgn2 <- with(quant.obs.fc, sign((par[3])^2 + par[4] / spr.abs.per.ens)) sgn3 <- with(quant.obs.fc,sign((par[3])^2 * spr.abs + par[4])) - deriv.par1 <- mean(sgn1, na.rm = TRUE) - deriv.par2 <- with(quant.obs.fc, mean(sgn1 * fc.dev, na.rm = TRUE)) + deriv.par1 <- mean(sgn1, na.rm = na.rm) + deriv.par2 <- with(quant.obs.fc, mean(sgn1 * fc.dev, na.rm = na.rm)) deriv.par3 <- with(quant.obs.fc, - mean(2* par[3] * sgn1 * sgn2 * fc.ens.av.per.ens, na.rm = TRUE) - - mean(spr.abs * sgn3, na.rm = TRUE) / 2.) + mean(2* par[3] * sgn1 * sgn2 * fc.ens.av.per.ens, na.rm = na.rm) - + mean(spr.abs * sgn3, na.rm = na.rm) / 2.) deriv.par4 <- with(quant.obs.fc, - mean(sgn1 * sgn2 * fc.ens.av.per.ens / spr.abs.per.ens, na.rm = TRUE) - - mean(sgn3, na.rm = TRUE) / 2.) + mean(sgn1 * sgn2 * fc.ens.av.per.ens / spr.abs.per.ens, na.rm = na.rm) - + mean(sgn3, na.rm = na.rm) / 2.) return(c(deriv.par1, deriv.par2, deriv.par3, deriv.par4)) } #Below are the core or elementary functions to correct the evaluation set based on the regression parameters -.correct.evmos.fc <- function(fc, par){ - quant.fc.mp <- .calc.fc.quant(fc = fc) +.correct.evmos.fc <- function(fc, par, na.rm){ + quant.fc.mp <- .calc.fc.quant(fc = fc, na.rm = na.rm) return(with(quant.fc.mp, par[1] + par[2] * fc)) } -.correct.mse.min.fc <- function(fc, par){ - quant.fc.mp <- .calc.fc.quant(fc = fc) +.correct.mse.min.fc <- function(fc, par, na.rm){ + quant.fc.mp <- .calc.fc.quant(fc = fc, na.rm = na.rm) return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * par[3])) } -.correct.crps.min.fc <- function(fc, par){ - quant.fc.mp <- .calc.fc.quant.ext(fc = fc) +.correct.crps.min.fc <- function(fc, par, na.rm){ + quant.fc.mp <- .calc.fc.quant.ext(fc = fc, na.rm = na.rm) return(with(quant.fc.mp, par[1] + par[2] * fc.ens.av.per.ens + fc.dev * abs((par[3])^2 + par[4] / spr.abs))) } + +# Function to calibrate the individual members with the RPC-based method +.CalibrationMembersRPC <- function(exp, ens_mean, ens_mean_cal, var_obs, var_noise, r){ + member_cal <- (exp - ens_mean) * sqrt(var_obs) * sqrt(1 - r^2) / sqrt(var_noise) + ens_mean_cal + return(member_cal) +} \ No newline at end of file diff --git a/R/CST_CategoricalEnsCombination.R b/R/CST_CategoricalEnsCombination.R index 5b70d068449d58a4c1f56b00484d7ce06b604a1f..909792ee593606a4c2e3b9e5c5afb551cadff206 100644 --- a/R/CST_CategoricalEnsCombination.R +++ b/R/CST_CategoricalEnsCombination.R @@ -55,7 +55,7 @@ #'@references Robertson, A. W., Lall, U., Zebiak, S. E., & Goddard, L. (2004). Improved combination of multiple atmospheric GCM ensembles for seasonal prediction. Monthly Weather Review, 132(12), 2732-2744. #'@references Van Schaeybroeck, B., & Vannitsem, S. (2019). Postprocessing of Long-Range Forecasts. In Statistical Postprocessing of Ensemble Forecasts (pp. 267-290). #' -#'@import s2dverification +#'@importFrom s2dv InsertDim #'@import abind #'@examples #' @@ -86,19 +86,45 @@ CST_CategoricalEnsCombination <- function(exp, obs, cat.method = "pool", eval.me "of the parameter 'obs' must be equal to 1.") } names.dim.tmp <- names(dim(exp$data)) - exp$data <- .CategoricalEnsCombination.wrap(fc = exp$data, obs = obs$data, cat.method = cat.method, eval.method = eval.method, amt.cat = amt.cat, ...) + exp$data <- CategoricalEnsCombination(fc = exp$data, obs = obs$data, cat.method = cat.method, + eval.method = eval.method, amt.cat = amt.cat, ...) names.dim.tmp[which(names.dim.tmp == "member")] <- "category" names(dim(exp$data)) <- names.dim.tmp - exp$data <- InsertDim(var = exp$data, lendim = 1, posdim = 2) + exp$data <- suppressWarnings(InsertDim(exp$data, lendim = 1, posdim = 2)) names(dim(exp$data))[2] <- "member" exp$Datasets <- c(exp$Datasets, obs$Datasets) exp$source_files <- c(exp$source_files, obs$source_files) return(exp) } +#' Make categorical forecast based on a multi-model forecast with potential for calibrate +#' +#'@author Bert Van Schaeybroeck, \email{bertvs@meteo.be} +#'@description This function converts a multi-model ensemble forecast +#' into a categorical forecast by giving the probability +#' for each category. Different methods are available to combine +#' the different ensemble forecasting models into +#' probabilistic categorical forecasts. +#' +#' See details in ?CST_CategoricalEnsCombination +#'@param fc a multi-dimensional array with named dimensions containing the seasonal forecast experiment data in the element named \code{$data}. The amount of forecasting models is equal to the size of the \code{dataset} dimension of the data array. The amount of members per model may be different. The size of the \code{member} dimension of the data array is equal to the maximum of the ensemble members among the models. Models with smaller ensemble sizes have residual indices of \code{member} dimension in the data array filled with NA values. +#'@param obs a multidimensional array with named dimensions containing the observed data in the element named \code{$data}. +#'@param amt.cat is the amount of categories. Equally-sized quantiles will be calculated based on the amount of categories. +#'@param cat.method method used to produce the categorical forecast, can be either \code{pool}, \code{comb}, \code{mmw} or \code{obs}. The method pool assumes equal weight for all ensemble members while the method comb assumes equal weight for each model. The weighting method is descirbed in Rajagopalan et al. (2002), Robertson et al. (2004) and Van Schaeybroeck and Vannitsem (2019). Finally, the \code{obs} method classifies the observations into the different categories and therefore contains only 0 and 1 values. +#'@param eval.method is the sampling method used, can be either \code{"in-sample"} or \code{"leave-one-out"}. Default value is the \code{"leave-one-out"} cross validation. +#'@param ... other parameters to be passed on to the calibration procedure. +#' +#'@return an array containing the categorical forecasts in the element called \code{$data}. The first two dimensions of the returned object are named dataset and member and are both of size one. An additional dimension named category is introduced and is of size amt.cat. +#' +#'@references Rajagopalan, B., Lall, U., & Zebiak, S. E. (2002). Categorical climate forecasts through regularization and optimal combination of multiple GCM ensembles. Monthly Weather Review, 130(7), 1792-1811. +#'@references Robertson, A. W., Lall, U., Zebiak, S. E., & Goddard, L. (2004). Improved combination of multiple atmospheric GCM ensembles for seasonal prediction. Monthly Weather Review, 132(12), 2732-2744. +#'@references Van Schaeybroeck, B., & Vannitsem, S. (2019). Postprocessing of Long-Range Forecasts. In Statistical Postprocessing of Ensemble Forecasts (pp. 267-290). +#' +#'@importFrom s2dv InsertDim +#'@import abind +#'@export - -.CategoricalEnsCombination.wrap <- function (fc, obs, cat.method, eval.method, amt.cat, ...) { +CategoricalEnsCombination <- function (fc, obs, cat.method, eval.method, amt.cat, ...) { if (!all(c("member", "sdate") %in% names(dim(fc)))) { stop("Parameter 'exp' must have the dimensions 'member' and 'sdate'.") @@ -276,10 +302,10 @@ comb.dims <- function(arr.in, dims.to.combine){ optim.tmp <- constrOptim(theta = init.par, f = .funct.optim, grad = .funct.optim.grad, ui = constr.mtrx, ci = constr.vec, freq.per.mdl.at.obs = freq.per.mdl.at.obs) - init.par <- optim.tmp$par * (1 - abs(rnorm(amt.coeff, 0, 0.01))) - var.cat.fc[ , eval.dexes] <- apply(InsertDim(var = - InsertDim(var = optim.tmp$par, lendim = amt.cat, posdim = 2), - lendim = amt.sdate.ev, posdim = 3) * + init.par <- optim.tmp$par * (1 - abs(rnorm(amt.coeff, 0, 0.01))) + var.cat.fc[ , eval.dexes] <- apply(suppressWarnings(InsertDim( + InsertDim(optim.tmp$par, lendim = amt.cat, posdim = 2), + lendim = amt.sdate.ev, posdim = 3)) * freq.per.mdl.ev[ , , , drop = FALSE], c(2,3), sum, na.rm = TRUE) } else if (cat.method == "comb") { freq.per.mdl.ev <- .calc.freq.per.mdl(cat.fc = cat.fc.ev, mdl.feat = mdl.feat, amt.cat = amt.cat) @@ -363,8 +389,12 @@ comb.dims <- function(arr.in, dims.to.combine){ .funct.optim.grad <- function(par, freq.per.mdl.at.obs){ amt.model <- dim(freq.per.mdl.at.obs)[1] - return(-apply(freq.per.mdl.at.obs/InsertDim(var = drop(par %*% freq.per.mdl.at.obs), - lendim = amt.model, posdim = 1), c(1), mean, na.rm = TRUE)) + preprocess <- drop(par %*% freq.per.mdl.at.obs) + if (is.null(dim(preprocess))) { + dim(preprocess) <- c(dim = length(preprocess)) + } + return(-apply(freq.per.mdl.at.obs/suppressWarnings(InsertDim(preprocess, + lendim = as.numeric(amt.model), posdim = 1)), c(1), mean, na.rm = TRUE)) } .calc.freq.per.mdl.at.obs <- function(cat.obs, cat.fc, amt.cat, mdl.feat){ @@ -373,7 +403,7 @@ comb.dims <- function(arr.in, dims.to.combine){ amt.mdl <- mdl.feat$amt.mdl mdl.msk.tmp <- mdl.feat$mdl.msk amt.coeff <- amt.mdl + 1 - msk.fc.obs <- (cat.fc == InsertDim(var = cat.obs, posdim = 1, lendim = amt.mbr)) + msk.fc.obs <- (cat.fc == InsertDim(cat.obs, posdim = 1, lendim = amt.mbr)) freq.per.mdl.at.obs <- array(NA, c(amt.coeff, amt.sdate)) for (i.mdl in seq(1, amt.mdl)){ freq.per.mdl.at.obs[i.mdl, ] <- apply(msk.fc.obs[mdl.msk.tmp[i.mdl, ], , drop = FALSE], diff --git a/R/CST_Load.R b/R/CST_Load.R index 76d5deb3335941151a227f174122027828eb7eee..65b695cdcf11a3bf0e5ecb06a7a36c24c4cec1d2 100644 --- a/R/CST_Load.R +++ b/R/CST_Load.R @@ -9,7 +9,7 @@ #' @param ... Parameters that are automatically forwarded to the `s2dverification::Load` function. See details in `?s2dverification::Load`. #' @return A list with one or two S3 objects, named 'exp' and 'obs', of the class 's2dv_cube', containing experimental and date-corresponding observational data, respectively. These 's2dv_cube's can be ingested by other functions in CSTools. If the parameter `exp` in the call to `CST_Load` is set to `NULL`, then only the 'obs' component is returned, and viceversa. #' @author Nicolau Manubens, \email{nicolau.manubens@bsc.es} -#' @import s2dverification +#' @importFrom s2dverification Load #' @importFrom utils glob2rx #' @export #' @examples diff --git a/R/CST_MergeDims.R b/R/CST_MergeDims.R index a9923c581b95e6c48c37d2efa2e32d7588a947f0..720448c7fc02d4fe7fea20655d761473654275d6 100644 --- a/R/CST_MergeDims.R +++ b/R/CST_MergeDims.R @@ -10,7 +10,7 @@ #'@param na.rm a logical indicating if the NA values should be removed or not. #' #'@import abind -#'@import s2dverification +#'@importFrom s2dverification Subset #'@examples #' #'data <- 1 : c(2 * 3 * 4 * 5 * 6 * 7) @@ -49,7 +49,7 @@ CST_MergeDims <- function(data, merge_dims = c('ftime', 'monthly'), rename_dim = #'@param na.rm a logical indicating if the NA values should be removed or not. #' #'@import abind -#'@import s2dverification +#'@importFrom s2dverification Subset #'@examples #' #'data <- 1 : 20 diff --git a/R/CST_MultiMetric.R b/R/CST_MultiMetric.R index e85c8b68d7b205e492dac752d8186f7a5ec85215..2390b490847bf9ed596fec25f15aa6b9cfe83df4 100644 --- a/R/CST_MultiMetric.R +++ b/R/CST_MultiMetric.R @@ -4,18 +4,24 @@ #'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} #'@description This function calculates correlation (Anomaly Correlation Coefficient; ACC), root mean square error (RMS) and the root mean square error skill score (RMSSS) of individual anomaly models and multi-models mean (if desired) with the observations. #' -#'@param exp an object of class \code{s2dv_cube} as returned by \code{CST_Anomaly} function, containing the anomaly of the seasonal forecast experiment data in the element named \code{$data}. +#'@param exp an object of class \code{s2dv_cube} as returned by \code{CST_Anomaly} function, containing the anomaly of the seasonal forecast experiments data in the element named \code{$data}. #'@param obs an object of class \code{s2dv_cube} as returned by \code{CST_Anomaly} function, containing the anomaly of observed data in the element named \code{$data}. -#'@param metric a character string giving the metric for computing the maximum skill. This must be one of the strings 'correlation', 'rms' or 'rmsss. +#'@param metric a character string giving the metric for computing the maximum skill. This must be one of the strings 'correlation', 'rms', 'rmsss' and 'rpss'. If 'rpss' is chossen the terciles probabilities are evaluated. #'@param multimodel a logical value indicating whether a Multi-Model Mean should be computed. #' -#'@return an object of class \code{s2dv_cube} containing the statistics of the selected metric in the element \code{$data} which is an array with two datset dimensions equal to the 'dataset' dimension in the \code{exp$data} and \code{obs$data} inputs. If \code{multimodel} is TRUE, the greatest first dimension correspons to the Multi-Model Mean. The third dimension contains the statistics selected. For metric \code{correlation}, the third dimension is of length four and they corresponds to the lower limit of the 95\% confidence interval, the statistics itselfs, the upper limit of the 95\% confidence interval and the 95\% significance level. For metric \code{rms}, the third dimension is length three and they corresponds to the lower limit of the 95\% confidence interval, the RMSE and the upper limit of the 95\% confidence interval. For metric \code{rmsss}, the third dimension is length two and they corresponds to the statistics itselfs and the p-value of the one-sided Fisher test with Ho: RMSSS = 0. -#'@seealso \code{\link[s2dverification]{Corr}}, \code{\link[s2dverification]{RMS}}, \code{\link[s2dverification]{RMSSS}} and \code{\link{CST_Load}} +#'@param time_dim name of the temporal dimension where a mean will be applied. It can be NULL, the default value is 'ftime'. +#'@param memb_dim name of the member dimension. It can be NULL, the default value is 'member'. +#'@param sdate_dim name of the start date dimension or a dimension name identifiying the different forecast. It can be NULL, the default value is 'sdate'. +#'@return an object of class \code{s2dv_cube} containing the statistics of the selected metric in the element \code{$data} which is a list of arrays: for the metric requested and others for statistics about its signeificance. The arrays have two dataset dimensions equal to the 'dataset' dimension in the \code{exp$data} and \code{obs$data} inputs. If \code{multimodel} is TRUE, the first position in the first 'nexp' dimension correspons to the Multi-Model Mean. +#'@seealso \code{\link[s2dv]{Corr}}, \code{\link[s2dv]{RMS}}, \code{\link[s2dv]{RMSSS}} and \code{\link{CST_Load}} #'@references -#'Mishra, N., Prodhomme, C., & Guemas, V. (n.d.). Multi-Model Skill Assessment of Seasonal Temperature and Precipitation Forecasts over Europe, 29-31.\url{http://link.springer.com/10.1007/s00382-018-4404-z} +#'Mishra, N., Prodhomme, C., & Guemas, V. (n.d.). Multi-Model Skill Assessment of Seasonal Temperature and Precipitation Forecasts over Europe, 29-31.\url{https://link.springer.com/10.1007/s00382-018-4404-z} #' -#'@import s2dverification +#'@importFrom s2dv MeanDims Reorder Corr RMS RMSSS InsertDim +#'@import abind +#'@importFrom easyVerification climFairRpss veriApply #'@import stats +#'@import multiApply #'@examples #'library(zeallot) #'mod <- 1 : (2 * 3 * 4 * 5 * 6 * 7) @@ -31,21 +37,59 @@ #'c(ano_exp, ano_obs) %<-% CST_Anomaly(exp = exp, obs = obs, cross = TRUE, memb = TRUE) #'a <- CST_MultiMetric(exp = ano_exp, obs = ano_obs) #'str(a) +#'\donttest{ +#'exp <- lonlat_data$exp +#'obs <- lonlat_data$obs +#'a <- CST_MultiMetric(exp, obs, metric = 'rpss', multimodel = FALSE) +#'a <- CST_MultiMetric(exp, obs, metric = 'correlation') +#'a <- CST_MultiMetric(exp, obs, metric = 'rms') +#'a <- CST_MultiMetric(exp, obs, metric = 'rmsss') +#'} #'@export -CST_MultiMetric <- function(exp, obs, metric = "correlation", multimodel = TRUE) { +CST_MultiMetric <- function(exp, obs, metric = "correlation", multimodel = TRUE, + time_dim = 'ftime', memb_dim = 'member', + sdate_dim = 'sdate') { if (!inherits(exp, 's2dv_cube') || !inherits(obs, 's2dv_cube')) { stop("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", "as output by CSTools::CST_Load.") } - - if (dim(obs$data)['member'] != 1) { - stop("The length of the dimension 'member' in the component 'data' ", - "of the parameter 'obs' must be equal to 1.") - } - - if (!is.null(names(dim(exp$data))) & !is.null(names(dim(obs$data)))) { - if (all(names(dim(exp$data)) %in% names(dim(obs$data)))) { - dimnames <- names(dim(exp$data)) + result <- MultiMetric(exp$data, obs$data, metric = metric, multimodel = multimodel, + time_dim = time_dim, memb_dim = memb_dim, sdate_dim = sdate_dim) + exp$data <- result + return(exp) +} +#'Multiple Metrics applied in Multiple Model Anomalies +#' +#'@author Mishra Niti, \email{niti.mishra@bsc.es} +#'@author Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +#'@description This function calculates correlation (Anomaly Correlation Coefficient; ACC), root mean square error (RMS) and the root mean square error skill score (RMSSS) of individual anomaly models and multi-models mean (if desired) with the observations on arrays with named dimensions. +#' +#'@param exp a multidimensional array with named dimensions. +#'@param obs a multidimensional array with named dimensions. +#'@param metric a character string giving the metric for computing the maximum skill. This must be one of the strings 'correlation', 'rms' or 'rmsss. +#'@param multimodel a logical value indicating whether a Multi-Model Mean should be computed. +#' +#'@param time_dim name of the temporal dimension where a mean will be applied. It can be NULL, the default value is 'ftime'. +#'@param memb_dim name of the member dimension. It can be NULL, the default value is 'member'. +#'@param sdate_dim name of the start date dimension or a dimension name identifiying the different forecast. It can be NULL, the default value is 'sdate'. +#'@return a list of arrays containing the statistics of the selected metric in the element \code{$data} which is a list of arrays: for the metric requested and others for statistics about its signeificance. The arrays have two dataset dimensions equal to the 'dataset' dimension in the \code{exp$data} and \code{obs$data} inputs. If \code{multimodel} is TRUE, the greatest position in the first dimension correspons to the Multi-Model Mean. +#'@seealso \code{\link[s2dv]{Corr}}, \code{\link[s2dv]{RMS}}, \code{\link[s2dv]{RMSSS}} and \code{\link{CST_Load}} +#'@references +#'Mishra, N., Prodhomme, C., & Guemas, V. (n.d.). Multi-Model Skill Assessment of Seasonal Temperature and Precipitation Forecasts over Europe, 29-31.\url{https://link.springer.com/10.1007/s00382-018-4404-z} +#' +#'@importFrom s2dv MeanDims Reorder Corr RMS RMSSS InsertDim +#'@import abind +#'@importFrom easyVerification climFairRpss veriApply +#'@import stats +#'@import multiApply +#'@examples +#'res <- MultiMetric(lonlat_data$exp$data, lonlat_data$obs$data) +#'@export +MultiMetric <- function(exp, obs, metric = "correlation", multimodel = TRUE, + time_dim = 'ftime', memb_dim = 'member', sdate_dim = 'sdate') { + if (!is.null(names(dim(exp))) & !is.null(names(dim(obs)))) { + if (all(names(dim(exp)) %in% names(dim(obs)))) { + dimnames <- names(dim(exp)) } else { stop("Dimension names of element 'data' from parameters 'exp'", " and 'obs' should have the same name dimmension.") @@ -54,7 +98,6 @@ CST_MultiMetric <- function(exp, obs, metric = "correlation", multimodel = TRUE) stop("Element 'data' from parameters 'exp' and 'obs'", " should have dimmension names.") } - if (!is.logical(multimodel)) { stop("Parameter 'multimodel' must be a logical value.") } @@ -68,49 +111,110 @@ CST_MultiMetric <- function(exp, obs, metric = "correlation", multimodel = TRUE) warning("Parameter 'multimodel' has length > 1 and only the first ", "element will be used.") } - - # seasonal average of anomalies per model - AvgExp <- MeanListDim(exp$data, narm = T, c(2, 4)) - AvgObs <- MeanListDim(obs$data, narm = T, c(2, 4)) - - # indv model correlation - if (metric == 'correlation') { - corr <- Corr(AvgExp, AvgObs, posloop = 1, poscor = 2) - } else if (metric == 'rms') { - corr <- RMS(AvgExp, AvgObs, posloop = 1, posRMS = 2) - } else if (metric == 'rmsss') { - corr <- RMSSS(AvgExp, AvgObs, posloop = 1, posRMS = 2) + if (is.null(time_dim) | !is.character(time_dim)) { + time_dim <- 'time' + } + if (is.null(memb_dim) | !is.character(memb_dim)) { + memb_dim <- 'memb' + } + if( is.null(sdate_dim) | !is.character(sdate_dim)) { + sdate_dim <- 'sdate' + } + exp_dims <- dim(exp) + obs_dims <- dim(obs) + if (!is.null(names(exp_dims)) & !is.null(names(obs_dims))) { + if (all(names(exp_dims) == names(obs_dims))) { + if (!(time_dim %in% names(exp_dims))) { + warning("Parameter 'time_dim' does not match with a dimension name in 'exp'", + " and 'obs'. A 'time_dim' of length 1 is added.") + dim(exp) <- c(exp_dims, time_dim = 1) + names(dim(exp))[length(dim(exp))] <- time_dim + dim(obs) <- c(obs_dims, time_dim = 1) + names(dim(obs))[length(dim(obs))] <- time_dim + exp_dims <- dim(exp) + obs_dims <- dim(obs) + } + if (!(memb_dim %in% names(exp_dims))) { + warning("Parameter 'memb_dim' does not match with a dimension name in ", + "'exp' and 'obs'. A 'memb_dim' of length 1 is added.") + dim(exp) <- c(exp_dims, memb_dim = 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + dim(obs) <- c(obs_dims, memb_dim = 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + exp_dims <- dim(exp) + obs_dims <- dim(obs) + } + if (!(sdate_dim %in% names(exp_dims))) { + warning("Parameter 'sdate_dim' does not match with a dimension name in ", + "'exp' and 'obs'. A 'sdate_dim' of length 1 is added.") + dim(exp) <- c(exp_dims, sdate_dim = 1) + names(dim(exp))[length(dim(exp))] <- sdate_dim + dim(obs) <- c(obs_dims, sdate_dim = 1) + names(dim(obs))[length(dim(obs))] <- sdate_dim + exp_dims <- dim(exp) + obs_dims <- dim(obs) + } + } else { + stop("Dimension names of element 'data' from parameters 'exp'", + " and 'obs' should be the same and in the same order.") + } } else { - stop("Parameter 'metric' must be a character string indicating ", - "one of the options: 'correlation', 'rms' or 'rmse'.") + stop("Element 'data' from parameters 'exp' and 'obs'", + " should have dimmension names.") } - if (multimodel == TRUE) { - # seasonal avg of anomalies for multi-model - AvgExp_MMM <- MeanListDim(AvgExp, narm = TRUE, 1) - AvgObs_MMM <- MeanListDim(AvgObs, narm = TRUE, 1) - # multi model correlation + if (metric == 'rpss') { + if (multimodel == TRUE) { + warning("A probabilistic metric cannot be use to evaluate a multimodel mean.") + } + AvgExp <- MeanDims(exp, time_dim, na.rm = TRUE) + AvgObs <- MeanDims(obs, time_dim, na.rm = TRUE) + dif_dims <- which(dim(AvgExp) != dim(AvgObs)) + dif_dims <- names(dif_dims[-which(names(dif_dims) == memb_dim)]) + lapply(dif_dims, function(x) { + names(dim(AvgExp))[which(names(dim(AvgExp)) == x)] <<- paste0(dif_dims, '_exp')}) + + pos_memb <- which(names(dim(AvgExp)) == memb_dim) + dim(AvgObs) <- dim(AvgObs)[-pos_memb] + AvgExp <- Reorder(AvgExp, c(names(dim(AvgExp))[-pos_memb], memb_dim)) + pos_memb <- which(names(dim(AvgExp)) == memb_dim) + pos_sdate <- which(names(dim(AvgExp)) == sdate_dim) + corr <- Apply(list(AvgExp, AvgObs), + target_dims = list(c(sdate_dim, 'lat', 'lon', memb_dim), c(sdate_dim, 'lat', 'lon')), + fun = function(x, y) { + veriApply('FairRpss', fcst = x, obs = y, + ensdim = which(names(dim(x)) == 'member'), + tdim = which(names(dim(x)) == 'sdate'), + prob = c(1/3, 2/3))}) + + } else if (metric %in% c('correlation', 'rms', 'rmsss')) { + AvgExp <- MeanDims(exp, c(memb_dim, time_dim), na.rm = TRUE) + AvgObs <- MeanDims(obs, c(memb_dim, time_dim), na.rm = TRUE) + dataset_dim <- c('data', 'dataset', 'datsets', 'models') + if (any(dataset_dim %in% names(exp_dims))) { + dataset_dim <- dataset_dim[dataset_dim %in% names(dim(AvgExp))] + } else { + warning("Parameter 'exp' and 'obs' does not have a dimension 'dataset'.") + } + if (multimodel == TRUE) { + # seasonal avg of anomalies for multi-model + AvgExp_MMM <- MeanDims(AvgExp, c(dataset_dim), na.rm = TRUE) + pos_dataset <- which(names(dim(AvgExp)) == dataset_dim) + AvgExp_MMM <- s2dv::InsertDim(AvgExp_MMM, posdim = pos_dataset, lendim = 1, + name = dataset_dim) + AvgExp <- abind(AvgExp_MMM, AvgExp, along = pos_dataset) + names(dim(AvgExp)) <- names(dim(AvgExp_MMM)) + } if (metric == 'correlation') { - corr_MMM <- Corr(var_exp = InsertDim(AvgExp_MMM, 1, 1), - var_obs = InsertDim(AvgObs_MMM, 1, 1), - posloop = 1, poscor = 2) + corr <- s2dv::Corr(AvgExp, AvgObs, dat_dim = dataset_dim, time_dim = sdate_dim) } else if (metric == 'rms') { - corr_MMM <- RMS(var_exp = InsertDim(AvgExp_MMM, 1, 1), - var_obs = InsertDim(AvgObs_MMM, 1, 1), - posloop = 1, posRMS = 2) + corr <- s2dv::RMS(AvgExp, AvgObs, dat_dim = dataset_dim, time_dim = sdate_dim) } else if (metric == 'rmsss') { - corr_MMM <- RMSSS(var_exp = InsertDim(AvgExp_MMM, 1, 1), - var_obs = InsertDim(AvgObs_MMM, 1, 1), - posloop = 1, posRMS = 2) - } - corr <- abind::abind(corr, corr_MMM, along = 1) + corr <- s2dv::RMSSS(AvgExp, AvgObs, dat_dim = dataset_dim, time_dim = sdate_dim) + } + } else { + stop("Parameter 'metric' must be a character string indicating ", + "one of the options: 'correlation', 'rms', 'rmsss' or 'rpss'.") } - names(dim(corr)) <- c(dimnames[1], dimnames[1], 'statistics', dimnames[5 : 6]) - - #exp$data <- ano$ano_exp - #obs$data <- ano$ano_obs - exp$data <- corr - exp$Datasets <- c(exp$Datasets, obs$Datasets) - exp$source_files <- c(exp$source_files, obs$source_files) - return(exp) + return(corr) } diff --git a/R/CST_MultivarRMSE.R b/R/CST_MultivarRMSE.R index c5ab53b5291a690d50a139b317db39b77b3ccad7..70c88c09c8d88c6ed785e4e60eb2b6fd35d56080 100644 --- a/R/CST_MultivarRMSE.R +++ b/R/CST_MultivarRMSE.R @@ -10,8 +10,8 @@ #' #'@return an object of class \code{s2dv_cube} containing the RMSE in the element \code{$data} which is an array with two datset dimensions equal to the 'dataset' dimension in the \code{exp$data} and \code{obs$data} inputs. An array with dimensions: c(number of exp, number of obs, 1 (the multivariate RMSE value), number of lat, number of lon) #' -#'@seealso \code{\link[s2dverification]{RMS}} and \code{\link{CST_Load}} -#'@import s2dverification +#'@seealso \code{\link[s2dv]{RMS}} and \code{\link{CST_Load}} +#'@importFrom s2dv RMS MeanDims #'@examples #'# Creation of sample s2dverification objects. These are not complete #'# s2dverification objects though. The Load function returns complete objects. @@ -108,17 +108,18 @@ CST_MultivarRMSE <- function(exp, obs, weight = NULL) { sumweights <- 0 for (j in 1 : nvar) { # seasonal average of anomalies - AvgExp <- MeanListDim(exp[[j]]$data, narm = TRUE, c(2, 4)) - AvgObs <- MeanListDim(obs[[j]]$data, narm = TRUE, c(2, 4)) + AvgExp <- MeanDims(exp[[j]]$data, c('member', 'ftime'), na.rm = TRUE) + AvgObs <- MeanDims(obs[[j]]$data, c('member', 'ftime'), na.rm = TRUE) # multivariate RMSE (weighted) - rmse <- RMS(AvgExp, AvgObs, posloop = 1, posRMS = 2, conf = FALSE) + rmse <- s2dv::RMS(AvgExp, AvgObs, dat_dim = 'dataset', time_dim = 'sdate', + conf = FALSE)$rms stdev <- sd(AvgObs) mvrmse <- mvrmse + (rmse / stdev * as.numeric(weight[j])) sumweights <- sumweights + as.numeric(weight[j]) } mvrmse <- mvrmse / sumweights - names(dim(mvrmse)) <- c(dimnames[1], dimnames[1], 'statistics', dimnames[5 : 6]) + # names(dim(mvrmse)) <- c(dimnames[1], dimnames[1], 'statistics', dimnames[5 : 6]) exp_Datasets <- unlist(lapply(exp, function(x) { x[[which(names(x) == 'Datasets')]]})) exp_source_files <- unlist(lapply(exp, function(x) { diff --git a/R/CST_QuantileMapping.R b/R/CST_QuantileMapping.R index 3923569ae3ea59355fc2ba36dbd2d11f05d99d4a..b27cbd4cd49bc92d20bbbcc954d679b271706557 100644 --- a/R/CST_QuantileMapping.R +++ b/R/CST_QuantileMapping.R @@ -23,6 +23,8 @@ #'\itemize{ #'\item{wet.day} {logical indicating whether to perform wet day correction or not.(Not available in 'DIS' method)} #'\item{qstep} {NULL or a numeric value between 0 and 1.}} +#' When providing a forecast to be corrected through the pararmeter \code{exp_cor}, some inputs might need to be modified. The quantile correction is compute by comparing objects passed through 'exp' and 'obs' parameters, this correction will be later applied to the forecast provided in 'exp_cor'. Imaging the case of 'exp' and 'obs' having several start dates, stored using a dimension e.g. 'sdate', 'sample_dims' include this dimension 'sdate' and 'exp_cor' has forecasts for several sdates but different from the ones in 'exp'. In this case, the correction computed with 'exp' and 'obs' would be applied for each 'sdate' of 'exp_cor' separately. This example corresponds to a case of split a dataset in training set and validation set. +#' #'@return an oject of class \code{s2dv_cube} containing the experimental data after applyingthe quantile mapping correction. #') <- c(dataset = 1, member = 10, sdate = 20, ftime = 60 , #'@import qmap @@ -48,6 +50,14 @@ #'obs <- lonlat_data$obs #'res <- CST_QuantileMapping(exp, obs) #' +#'exp_cor <- exp +#'exp_cor$data <- exp_cor$data[,,1,,,] +#'dim(exp_cor$data) <- c(dataset = 1, member = 15, sdate = 1, ftime = 3, +#' lat = 22, lon = 53) +#'res <- CST_QuantileMapping(exp, obs, exp_cor, +#' sample_dims = c('sdate', 'ftime', 'member')) +#'res <- CST_QuantileMapping(exp, obs, exp_cor, +#' sample_dims = c('ftime', 'member')) #'data(obsprecip) #'data(modprecip) #'exp <- modprecip$MOSS[1:10000] @@ -60,6 +70,27 @@ #'class(obs) <- 's2dv_cube' #'res <- CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time', #' method = 'DIST') +#'# Example using different lenght of members and sdates: +#'exp <- lonlat_data$exp +#'exp$data <- exp$data[,,1:4,,,] +#'dim(exp$data) <- c(dataset = 1, member = 15, sdate = 4, ftime = 3, +#' lat = 22, lon = 53) +#'obs <- lonlat_data$obs +#'obs$data <- obs$data[,,1:4, ,,] +#'dim(obs$data) <- c(dataset = 1, member = 1, sdate = 4, ftime = 3, +#' lat = 22, lon = 53) +#'exp_cor <- lonlat_data$exp +#'exp_cor$data <- exp_cor$data[,1:5,5:6,,,] +#'dim(exp_cor$data) <- c(dataset = 1, member = 5, sdate = 2, ftime = 3, +#' lat = 22, lon = 53) +#'res <- CST_QuantileMapping(exp, obs, exp_cor, +#' sample_dims = c('sdate', 'ftime', 'member')) +#'exp_cor <- lonlat_data$exp +#'exp_cor$data <- exp_cor$data[,,5:6,,,] +#'dim(exp_cor$data) <- c(dataset = 1, member = 15, sdate = 2, ftime = 3, +#' lat = 22, lon = 53) +#'res <- CST_QuantileMapping(exp, obs, exp_cor, +#' sample_dims = c('sdate', 'ftime', 'member')) #'} #'@export CST_QuantileMapping <- function(exp, obs, exp_cor = NULL, @@ -79,18 +110,52 @@ CST_QuantileMapping <- function(exp, obs, exp_cor = NULL, stop("Parameter 'method' must be one of the following methods: ", "'PTF','DIST','RQUANT','QUANT','SSPLIN'.") } - dimnames <- names(dim(exp$data)) QMapped <- QuantileMapping(exp = exp$data, obs = obs$data, exp_cor = exp_cor$data, sample_dims = sample_dims, sample_length = sample_length, method = method, ncores = ncores, ...) - pos <- match(dimnames, names(dim(QMapped))) - QMapped <- aperm(QMapped, pos) - names(dim(QMapped)) <- dimnames - exp$data <- QMapped - exp$Datasets <- c(exp$Datasets, obs$Datasets) - exp$source_files <- c(exp$source_files, obs$source_files) + if (is.null(exp_cor)) { + exp$data <- QMapped + exp$source_files <- c(exp$source_files, obs$source_files) + } else { + exp_cor$data <- QMapped + exp_cor$source_files <- c(exp$source_files, obs$source_files, exp_cor$source_files) + exp <- exp_cor + } return(exp) } +#'Quantiles Mapping for seasonal or decadal forecast data +#' +#'@description This function is a wrapper from fitQmap and doQmap from package 'qmap'to be applied in CSTools objects of class 's2dv_cube'. The quantile mapping adjustment between an experiment, tipically a hindcast, and observations is applied to the experiment itself or to a provided forecast. +#' +#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +#'@param exp a multi-dimensional array with named dimensions containing the hindcast. +#'@param obs a multi-dimensional array with named dimensions (the same as the provided in 'exp') containing the reference dataset. +#'@param exp_cor a multi-dimensional array with named dimensions in which the quantile mapping correction will be applied. If it is not specified, the correction is applied in object \code{exp}. +#'@param sample_dims a character vector indicating the dimensions that can be used as sample for the same distribution +#'@param sample_length a numeric value indicating the length of the timeseries window to be used as sample for the sample distribution and correction. By default, NULL, the total length of the timeseries will be used. +#'@param method a character string indicating the method to be used: 'PTF','DIST','RQUANT','QUANT','SSPLIN'. By default, the empirical quantile mapping 'QUANT' is used. +#'@param ncores an integer indicating the number of parallel processes to spawn for the use for parallel computation in multiple cores. +#'@param ... additional arguments passed to the method specified by \code{method}. +#' +#'@details The different methods are: +#'\itemize{ +#'\item{'PTF'} {fits a parametric transformations to the quantile-quantile relation of observed and modelled values. See \code{?qmap::fitQmapPTF}.} +#' \item{'DIST'} {fits a theoretical distribution to observed and to modelled time series. See \code{?qmap::fitQmapDIST}.} +#'\item{'RQUANT'} {estimates the values of the quantile-quantile relation of observed and modelled time series for regularly spaced quantiles using local linear least square regression. See \code{?qmap::fitQmapRQUANT}.} +#'\item{'QUANT'} {estimates values of the empirical cumulative distribution function of observed and modelled time series for regularly spaced quantiles. See \code{?qmap::fitQmapQUANT}.} +#'\item{'SSPLIN'} {fits a smoothing spline to the quantile-quantile plot of observed and modelled time series. See \code{?qmap::fitQmapSSPLIN}.}} +#'All methods accepts some common arguments: +#'\itemize{ +#'\item{wet.day} {logical indicating whether to perform wet day correction or not.(Not available in 'DIS' method)} +#'\item{qstep} {NULL or a numeric value between 0 and 1.}} +#'@return an oject of class \code{s2dv_cube} containing the experimental data after applyingthe quantile mapping correction. +#') <- c(dataset = 1, member = 10, sdate = 20, ftime = 60 , +#'@import qmap +#'@import multiApply +#'@import abind +#' +#'@seealso \code{qmap::fitQmap} and \code{qmap::doQmap} +#'@export QuantileMapping <- function(exp, obs, exp_cor = NULL, sample_dims = 'ftime', sample_length = NULL, method = 'QUANT', ncores = NULL, ...) { obsdims <- names(dim(obs)) @@ -134,12 +199,41 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sample_dims = 'ftime', obs <- adrop(obs, drop = todrop) } } - if (!all(sample_dims %in% obsdims)) { newobsdims <- sample_dims[!sample_dims %in% obsdims] dim(obs) <- c(dim(obs), 1 : length(newobsdims)) names(dim(obs))[-c(1:length(obsdims))] <- newobsdims + } + + if (!is.null(exp_cor)) { + commondims <- exp_cordims[exp_cordims %in% expdims] + commondims <- names(which(unlist(lapply(commondims, function(x) { + dim(exp_cor)[exp_cordims == x] != dim(exp)[expdims == x]})))) + if (any(commondims %in% sample_dims)) { + todrop <- commondims[(commondims %in% sample_dims)] + todroppos <- match(todrop, sample_dims) + if (all(dim(exp_cor)[todrop] != 1)) { + warning(paste("The sample_dims", paste(todrop, collapse = " "), + "are not used when applying the", + "correction to 'exp_cor'")) + sample_dims <- list(sample_dims, sample_dims, sample_dims[-todroppos]) + } else { + exp_cor <- adrop(exp_cor, drop = todroppos) + } + } else { + todrop <- commondims[!(commondims %in% sample_dims)] + todrop <- match(todrop, obsdims) + if (all(dim(exp_cor)[todrop] != 1)) { + stop("Review parameter 'sample_dims' or the data dimensions ", + "since multiple dimensions with different length have ", + "being found in the data inputs that don't match with ", + "'sample_dims' parameter.") + } else { + exp_cor <- adrop(exp_cor, drop = todrop) + } + } } + if (!is.null(sample_length) & !is.numeric(sample_length)) { warning("Parameter 'sample_length' has not been correctly defined and ", "the whole length of the timeseries will be used.") @@ -174,8 +268,16 @@ QuantileMapping <- function(exp, obs, exp_cor = NULL, sample_dims = 'ftime', method = method, ncores = ncores)$output1 } pos <- match(expdims, names(dim(qmaped))) + out_names <- names(dim(exp)) + if (length(pos) < length(dim(qmaped))) { + toadd <- length(dim(qmaped)) - length(pos) + toadd <- seq(max(pos) + 1, max(pos) + toadd, 1) + pos <- c(pos, toadd) + new <- names(dim(qmaped))[names(dim(qmaped)) %in% out_names == FALSE] + out_names <- c(out_names, new) + } qmaped <- aperm(qmaped, pos) - dim(qmaped) <- dim(exp) + names(dim(qmaped)) <- out_names return(qmaped) } qmapcor <- function(exp, obs, exp_cor = NULL, sample_length = NULL, method = 'QUANT', diff --git a/R/CST_RFSlope.R b/R/CST_RFSlope.R index 6bc3dc2d989d373b5f59912b72f7abf7af03fb2c..759820377b10fd520ac8f09c74321698ae5e3c08 100644 --- a/R/CST_RFSlope.R +++ b/R/CST_RFSlope.R @@ -15,6 +15,7 @@ #' over which to compute spectral slopes. If a character array of dimension names is provided, the spectral slopes #' will be computed as an average over all elements belonging to those dimensions. #' If omitted one of c("ftime", "sdate", "time") is searched and the first one with more than one element is chosen. +#' @param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. #' @return CST_RFSlope() returns spectral slopes using the RainFARM convention #' (the logarithmic slope of k*|A(k)|^2 where A(k) are the spectral amplitudes). #' The returned array has the same dimensions as the \code{exp} element of the input object, @@ -38,7 +39,7 @@ #' #[1,] 1.893503 1.893503 1.893503 #' #[2,] 1.893503 1.893503 1.893503 #' @export -CST_RFSlope <- function(data, kmin = 1, time_dim = NULL) { +CST_RFSlope <- function(data, kmin = 1, time_dim = NULL, ncores = 1) { slopes <- RFSlope(data$data, kmin, time_dim, lon_dim = "lon", lat_dim = "lat") @@ -68,11 +69,15 @@ CST_RFSlope <- function(data, kmin = 1, time_dim = NULL) { #' with more than one element is chosen. #' @param lon_dim Name of lon dimension ("lon" by default). #' @param lat_dim Name of lat dimension ("lat" by default). +#' @param ncores is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. +#' #' @return RFSlope() returns spectral slopes using the RainFARM convention #' (the logarithmic slope of k*|A(k)|^2 where A(k) are the spectral amplitudes). #' The returned array has the same dimensions as the input array, #' minus the dimensions specified by \code{lon_dim}, \code{lat_dim} and \code{time_dim}. #' @import multiApply +#' @import rainfarmr +#' @importFrom s2dverification Subset #' @export #' @examples #' # Example for the 'reduced' RFSlope function @@ -94,8 +99,17 @@ CST_RFSlope <- function(data, kmin = 1, time_dim = NULL) { #' #[3,] 1.893503 1.893503 1.893503 #' #[4,] 1.893503 1.893503 1.893503 RFSlope <- function(data, kmin = 1, time_dim = NULL, - lon_dim = "lon", lat_dim = "lat") { - + lon_dim = "lon", lat_dim = "lat", ncores = 1) { + if (length(ncores) > 1) { + ncores = ncores[1] + warning("Parameter 'ncores' has length > 1 and only the first element will be used.") + } + if (!is.null(ncores)) { + ncores <- round(ncores) + if (ncores == 0) { + ncores = NULL + } + } # Ensure input grid is square and with even dimensions if ( (dim(data)[lon_dim] != dim(data)[lat_dim]) | (dim(data)[lon_dim] %% 2 == 1)) { @@ -139,7 +153,7 @@ RFSlope <- function(data, kmin = 1, time_dim = NULL, # Repeatedly apply .RFSlope result <- Apply(data, c(lon_dim, lat_dim, "rainfarm_samples"), - .RFSlope, kmin)$output1 + .RFSlope, kmin, ncores = ncores)$output1 return(slopes = result) } @@ -152,7 +166,11 @@ RFSlope <- function(data, kmin = 1, time_dim = NULL, #' @noRd .RFSlope <- function(pr, kmin) { - + if (any(is.na(pr))) { + posna <- unlist(lapply(1:dim(pr)['rainfarm_samples'], + function(x){!is.na(pr[1, 1, x])})) + pr <- Subset(pr, 'rainfarm_samples', posna) + } fxp <- fft2d(pr) sx <- fitslope(fxp, kmin = kmin) return(sx) diff --git a/R/CST_RFWeights.R b/R/CST_RFWeights.R index 16415a572c4c84d3220fd740bae003f2c672f7fa..fa7eec75045717fdd08918dcc1b1d0b19cbd77f6 100644 --- a/R/CST_RFWeights.R +++ b/R/CST_RFWeights.R @@ -17,16 +17,22 @@ #' high-resolution gridded climatology from observations, or a reconstruction such as those which #' can be downloaded from the WORLDCLIM (http://www.worldclim.org) or CHELSA (http://chelsa-climate.org) #' websites. The latter data will need to be converted to NetCDF format before being used (see for example the GDAL tools (https://www.gdal.org). +#' It could also be a 's2dv_cube' object. #' @param nf Refinement factor for downscaling (the output resolution is increased by this factor). -#' @param lon Vector of longitudes. +#' @param lon Vector of longitudes. #' @param lat Vector of latitudes. #' The number of longitudes and latitudes is expected to be even and the same. If not #' the function will perform a subsetting to ensure this condition. #' @param varname Name of the variable to be read from \code{climfile}. -#' @param fsmooth Logical to use smooth conservation (default) or large-scale box-average conservation. -#' @return A matrix containing the weights with dimensions (lon, lat). +#' @param fsmooth Logical to use smooth conservation (default) or large-scale box-average conservation. +#' @param lonname a character string indicating the name of the longitudinal dimension set as 'lon' by default. +#' @param latname a character string indicating the name of the latitudinal dimension set as 'lat' by default. +#' @param ncores an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. +#' +#' @return An object of class 's2dv_cube' containing in matrix \code{data} the weights with dimensions (lon, lat). #' @import ncdf4 #' @import rainfarmr +#' @import multiApply #' @importFrom utils tail #' @importFrom utils head #' @examples @@ -42,8 +48,15 @@ #' } #' @export -CST_RFWeights <- function(climfile, nf, lon, lat, varname = "", fsmooth=TRUE) { - +CST_RFWeights <- function(climfile, nf, lon, lat, varname = NULL, + fsmooth = TRUE, + lonname = 'lon', latname = 'lat', ncores = NULL) { + if (!inherits(climfile, "s2dv_cube")) { + if (!is.null(varname) & !is.character(varname)) { + stop("Parameter 'varname' must be a character string indicating the name", + " of the variable to be read from the file.") + } + } # Ensure input grid is square and with even dimensions if ((length(lat) != length(lon)) | (length(lon) %% 2 == 1)) { warning("Input data are expected to be on a square grid", @@ -57,17 +70,86 @@ CST_RFWeights <- function(climfile, nf, lon, lat, varname = "", fsmooth=TRUE) { " lat: [", lat[1], ", ", lat[length(lat)], "]")) } - ncin <- nc_open(climfile) - latin <- ncvar_get(ncin, grep("lat", attributes(ncin$dim)$names, - value = TRUE)) - lonin <- ncvar_get(ncin, grep("lon", attributes(ncin$dim)$names, - value = TRUE)) - if (varname == "") { - varname <- grep("bnds", attributes(ncin$var)$names, - invert = TRUE, value = TRUE)[1] + if (is.character(climfile)) { + ncin <- nc_open(climfile) + latin <- ncvar_get(ncin, grep(latname, attributes(ncin$dim)$names, + value = TRUE)) + lonin <- ncvar_get(ncin, grep(lonname, attributes(ncin$dim)$names, + value = TRUE)) + if (varname == "") { + varname <- grep("bnds", attributes(ncin$var)$names, + invert = TRUE, value = TRUE)[1] + } + zclim <- ncvar_get(ncin, varname) + nc_close(ncin) + } else if (inherits(climfile, "s2dv_cube")) { + zclim <- climfile$data + latin <- climfile$lat + lonin <- climfile$lon + } else { + stop("Parameter 'climfile' is expected to be a character string indicating", + " the path to the files or an object of class 's2dv_cube'.") + } + # Check dim names and order + if (length(names(dim(zclim))) < 1) { + stop("The dataset provided in 'climfile' requires dimension names.") + } + + result <- RF_Weights(zclim, latin, lonin, nf, lat, lon, fsmooth = fsmooth, + lonname = lonname, latname = latname, ncores = ncores) + if (inherits(climfile, "s2dv_cube")) { + climfile$data <- result$data + climfile$lon <- result$lon + climfile$lat <- result$lat + } else { + climfile <- s2dv_cube(data = result, lon = result$lon, lat = result$lat) } - zclim <- ncvar_get(ncin, varname) + return(climfile) +} +#' Compute climatological weights for RainFARM stochastic precipitation downscaling +#' +#' @author Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} +#' +#' @description Compute climatological ("orographic") weights from a fine-scale precipitation climatology file. +#' @references Terzago, S., Palazzi, E., & von Hardenberg, J. (2018). +#' Stochastic downscaling of precipitation in complex orography: +#' A simple method to reproduce a realistic fine-scale climatology. +#' Natural Hazards and Earth System Sciences, 18(11), +#' 2825-2840. http://doi.org/10.5194/nhess-18-2825-2018 . +#' @param zclim a multi-dimensional array with named dimension containing at least one precipiation field with spatial dimensions. +#' @param lonin a vector indicating the longitudinal coordinates corresponding to the \code{zclim} parameter. +#' @param latin a vector indicating the latitudinal coordinates corresponding to the \code{zclim} parameter. +#' @param nf Refinement factor for downscaling (the output resolution is increased by this factor). +#' @param lon Vector of longitudes. +#' @param lat Vector of latitudes. +#' The number of longitudes and latitudes is expected to be even and the same. If not +#' the function will perform a subsetting to ensure this condition. +#' @param fsmooth Logical to use smooth conservation (default) or large-scale box-average conservation. +#' @param lonname a character string indicating the name of the longitudinal dimension set as 'lon' by default. +#' @param latname a character string indicating the name of the latitudinal dimension set as 'lat' by default. +#' @param ncores an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one. +#' +#' @return An object of class 's2dv_cube' containing in matrix \code{data} the weights with dimensions (lon, lat). +#' @import ncdf4 +#' @import rainfarmr +#' @import multiApply +#' @importFrom utils tail +#' @importFrom utils head +#' @examples +#' a <- array(1:2500, c(lat = 50, lon = 50)) +#' res <- RF_Weights(a, seq(0.1 ,5, 0.1), seq(0.1 ,5, 0.1), +#' nf = 5, lat = 1:5, lon = 1:5) +#' @export +RF_Weights <- function(zclim, latin, lonin, nf, lat, lon, fsmooth = TRUE, + lonname = 'lon', latname = 'lat', ncores = NULL) { + x <- Apply(list(zclim), target_dims = c(lonname, latname), fun = rf_weights, + latin = latin, lonin = lonin, nf = nf, lat = lat, lon = lon, + fsmooth = fsmooth, ncores = ncores)$output1 + grid <- lon_lat_fine(lon, lat, nf) + return(list(data = x, lon = grid$lon, lat = grid$lat)) +} +rf_weights <- function(zclim, latin, lonin, nf, lat, lon, fsmooth = TRUE) { # Check if lon and lat need to be reversed if (lat[1] > lat[2]) { lat <- rev(lat) diff --git a/R/CST_RainFARM.R b/R/CST_RainFARM.R index 49065a974711aa9732778ee46e8ca9717ee667b9..0c94650f9e7ccdd925229794e438cea541b37f1d 100644 --- a/R/CST_RainFARM.R +++ b/R/CST_RainFARM.R @@ -21,11 +21,10 @@ #' the function will perform a subsetting to ensure this condition. #' @param weights Matrix with climatological weights which can be obtained using #' the \code{CST_RFWeights} function. If \code{weights=1.} (default) no weights are used. -#' The matrix should have dimensions (lon, lat) in this order. -#' The names of these dimensions are not checked. +#' The names of these dimensions must be at least 'lon' and 'lat'. #' @param nf Refinement factor for downscaling (the output resolution is increased by this factor). #' @param slope Prescribed spectral slope. The default is \code{slope=0.} -#' meaning that the slope is determined automatically over the dimensions specified by \code{time_dim}. +#' meaning that the slope is determined automatically over the dimensions specified by \code{time_dim}. A 1D array with named dimension can be provided (see details and examples) #' @param kmin First wavenumber for spectral slope (default: \code{kmin=1}). #' @param nens Number of ensemble members to produce (default: \code{nens=1}). #' @param fglob Logical to conserve global precipitation over the domain (default: FALSE). @@ -51,10 +50,11 @@ #' #' @return CST_RainFARM() returns a downscaled CSTools object (i.e., of the #' class 's2dv_cube'). -#' If \code{nens>1} an additional dimension named "realization" is added to the +#' If \code{nens>1} an additional dimension named "realizatio"n is added to the #' \code{$data} array after the "member" dimension (unless #' \code{drop_realization_dim=TRUE} is specified). #' The ordering of the remaining dimensions in the \code{$data} element of the input object is maintained. +#' @details Wether parameter 'slope' and 'weights' presents seasonality dependency, a dimension name should match between these parameters and the input data in parameter 'data'. See example 2 below where weights and slope vary with 'sdate' dimension. #' @import multiApply #' @import rainfarmr #' @examples @@ -68,8 +68,8 @@ #' dim(lat) <- c(lat = length(lat)) #' data <- list(data = exp, lon = lon, lat = lat) #' # Create a test array of weights -#' ww <- array(1., dim = c(8 * nf, 8 * nf)) -#' res <- CST_RainFARM(data, nf, ww, nens=3) +#' ww <- array(1., dim = c(lon = 8 * nf, lat = 8 * nf)) +#' res <- CST_RainFARM(data, nf = nf, weights = ww, nens=3) #' str(res) #' #List of 3 #' # $ data: num [1, 1:2, 1:3, 1:3, 1:4, 1:64, 1:64] 260 553 281 278 143 ... @@ -79,20 +79,28 @@ #' # dataset member realization sdate ftime lat lon #' # 1 2 3 3 4 64 64 #' +#' # Example 2: +#' slo <- array(c(0.1, 0.5, 0.7), c(sdate= 3)) +#' wei <- array(rnorm(8 * 8 * 3), c(lon = 8, lat = 8, sdate = 3)) +#' res <- CST_RainFARM(lonlat_prec, +#' weights = wei, slope = slo, nf = 2) #' @export -CST_RainFARM <- function(data, nf, weights = 1., slope = 0, kmin = 1, +CST_RainFARM <- function(data, weights = 1., slope = 0, nf, kmin = 1, nens = 1, fglob = FALSE, fsmooth = TRUE, nprocs = 1, time_dim = NULL, verbose = FALSE, drop_realization_dim = FALSE) { res <- RainFARM(data$data, data$lon, data$lat, - nf, weights, nens, slope, kmin, fglob, fsmooth, + nf = nf, weights = weights, nens, slope, kmin, fglob, fsmooth, nprocs, time_dim, lon_dim = "lon", lat_dim = "lat", drop_realization_dim, verbose) - + att_lon <- attributes(data$lon)[-1] + att_lat <- attributes(data$lat)[-1] data$data <- res$data data$lon <- res$lon + attributes(data$lon) <- att_lon data$lat <- res$lat + attributes(data$lat) <- att_lat return(data) } @@ -117,13 +125,12 @@ CST_RainFARM <- function(data, nf, weights = 1., slope = 0, kmin = 1, #' the function will perform a subsetting to ensure this condition. #' @param lon Vector or array of longitudes. #' @param lat Vector or array of latitudes. -#' @param weights Matrix with climatological weights which can be obtained using +#' @param weights multi-dimensional array with climatological weights which can be obtained using #' the \code{CST_RFWeights} function. If \code{weights=1.} (default) no weights are used. -#' The matrix should have dimensions (lon, lat) in this order. -#' The names of these dimensions are not checked. -#' @param nf Refinement factor for downscaling (the output resolution is increased by this factor). +#' The names of these dimensions must be at least 'lon' and 'lat'. +#' @param nf Refinement factor for downscaling (the output resolution is increased by this factor). #' @param slope Prescribed spectral slope. The default is \code{slope=0.} -#' meaning that the slope is determined automatically over the dimensions specified by \code{time_dim}. +#' meaning that the slope is determined automatically over the dimensions specified by \code{time_dim}. A 1D array with named dimension can be provided (see details and examples) #' @param kmin First wavenumber for spectral slope (default: \code{kmin=1}). #' @param nens Number of ensemble members to produce (default: \code{nens=1}). #' @param fglob Logical to conseve global precipitation over the domain (default: FALSE) @@ -153,7 +160,10 @@ CST_RainFARM <- function(data, nf, weights = 1., slope = 0, kmin = 1, #' If \code{nens>1} an additional dimension named "realization" is added to the output array #' after the "member" dimension (if it exists and unless \code{drop_realization_dim=TRUE} is specified). #' The ordering of the remaining dimensions in the \code{exp} element of the input object is maintained. +#' @details Wether parameter 'slope' and 'weights' presents seasonality dependency, a dimension name should match between these parameters and the input data in parameter 'data'. See example 2 below where weights and slope vary with 'sdate' dimension. #' @import multiApply +#' @importFrom s2dverification Subset +#' @importFrom abind abind #' @export #' @examples #' # Example for the 'reduced' RainFARM function @@ -166,7 +176,7 @@ CST_RainFARM <- function(data, nf, weights = 1., slope = 0, kmin = 1, #' lon_mat <- seq(10, 13.5, 0.5) # could also be a 2d matrix #' lat_mat <- seq(40, 43.5, 0.5) #' # Create a test array of weights -#' ww <- array(1., dim = c(8 * nf, 8 * nf)) +#' ww <- array(1., dim = c(lon = 8 * nf, lat = 8 * nf)) #' # or create proper weights using an external fine-scale climatology file #' # Specify a weightsfn filename if you wish to save the weights #' \dontrun{ @@ -185,7 +195,11 @@ CST_RainFARM <- function(data, nf, weights = 1., slope = 0, kmin = 1, #' dim(res$data) #' # lon lat ftime realization #' # 64 64 20 2 -#' +#' # Example 2: +#' slo <- array(c(0.1, 0.5, 0.7), c(sdate= 3)) +#' wei <- array(rnorm(8*8*3), c(lon = 8, lat = 8, sdate = 3)) +#' res <- RainFARM(lonlat_prec$data, lon = lonlat_prec$lon, +#' lat = lonlat_prec$lat, weights = wei, slope = slo, nf = 2) RainFARM <- function(data, lon, lat, nf, weights = 1., nens = 1, slope = 0, kmin = 1, fglob = FALSE, fsmooth = TRUE, nprocs = 1, time_dim = NULL, lon_dim = "lon", lat_dim = "lat", @@ -211,13 +225,28 @@ RainFARM <- function(data, lon, lat, nf, weights = 1., nens = 1, warning(paste0("lon: [", lon[1], ", ", lon[length(lon)], "] ", " lat: [", lat[1], ", ", lat[length(lat)], "]")) } - if (!(length(dim(weights)) == 0) & - !(all(dim(weights) == c(dim(data)[lon_dim] * nf, - dim(data)[lat_dim] * nf)))) { + if (length(dim(weights)) > 0) { + if (length(names(dim(weights))) == 0) { + stop("Parameter 'weights' must have dimension names when it is not a scalar.") + } else { + if (length(which(names(dim(weights)) == 'lon')) > 0 & + length(which(names(dim(weights)) == 'lat')) > 0) { + lonposw <- which(names(dim(weights)) == 'lon') + latposw <- which(names(dim(weights)) == 'lat') + } else { + stop("Parameter 'weights' must have dimension names 'lon' and 'lat' when", + " it is not a scalar.") + } + } + } + if (!(length(dim(weights)) == 0)) { + if (!(dim(weights)[lonposw] == dim(data)[lon_dim] * nf) & + !(dim(weights)[latposw] == dim(data)[lat_dim] * nf)) { stop(paste("The dimensions of the weights matrix (", dim(weights)[1], "x", dim(weights)[2] , ") are not consistent with the size of the data (", dim(data)[lon_dim], ") and the refinement factor (", nf, ")")) + } } # Check/detect time_dim if (is.null(time_dim)) { @@ -238,11 +267,11 @@ RainFARM <- function(data, lon, lat, nf, weights = 1., nens = 1, warning(paste("Selected time dim:", time_dim)) } # Check if slope is an array - if (length(slope) > 1) { - warning("Parameter 'slope' has length > 1 and only the first ", - "element will be used.") - slope <- as.numeric(slope[1]) - } + #if (length(slope) > 1) { + # warning("Parameter 'slope' has length > 1 and only the first ", + # "element will be used.") + # slope <- as.numeric(slope[1]) + #} # Perform common calls r <- lon_lat_fine(lon, lat, nf) @@ -259,14 +288,33 @@ RainFARM <- function(data, lon, lat, nf, weights = 1., nens = 1, dim(data) <- c(cdim[ind], rainfarm_samples = prod(cdim[-ind])) # Repeatedly apply .RainFARM - result <- Apply(data, c(lon_dim, lat_dim, "rainfarm_samples"), .RainFARM, - weights, nf, nens, slope, kmin, - fglob, fsmooth, ncores = nprocs, verbose, - split_factor = "greatest")$output1 + if (length(weights) == 1 & length(slope) == 1) { + result <- Apply(data, c(lon_dim, lat_dim, "rainfarm_samples"), .RainFARM, + weights, slope, nf, nens, kmin, + fglob, fsmooth, ncores = nprocs, verbose, + split_factor = "greatest")$output1 + } else if (length(slope) == 1 & length(weights) > 1 ) { + result <- Apply(list(data, weights), + list(c(lon_dim, lat_dim, "rainfarm_samples"), + c(lonposw, latposw)), + .RainFARM, slope = slope, + nf = nf, nens = nens, kmin = kmin, + fglob = fglob, fsmooth = fsmooth, ncores = nprocs, + verbose = verbose, + split_factor = "greatest")$output1 + } else { + result <- Apply(list(data, weights, slope), + list(c(lon_dim, lat_dim, "rainfarm_samples"), + c(lonposw, latposw), NULL), + fun = .RainFARM, + nf = nf, nens = nens, kmin = kmin, + fglob = fglob, fsmooth = fsmooth, ncores = nprocs, + verbose = verbose, + split_factor = "greatest")$output1 + } # result has dims: lon, lat, rainfarm_samples, realization, other dims # Expand back rainfarm_samples to compacted dims dim(result) <- c(dim(result)[1:2], cdim[-ind], dim(result)[-(1:3)]) - # Reorder as it was in original data # + realization dim after member if it exists ienspos <- which(names(cdim0) == "member") @@ -282,7 +330,6 @@ RainFARM <- function(data, lon, lat, nf, weights = 1., nens = 1, if (drop_realization_dim) { cdim <- dim(result) if (nens == 1) { - # just drop it if only one member dim(result) <- cdim[-which(names(cdim) == "realization")[1]] } else if ("member" %in% names(cdim)) { # compact member and realization dimension if member dim exists, @@ -295,7 +342,6 @@ RainFARM <- function(data, lon, lat, nf, weights = 1., nens = 1, names(dim(result))[ind] <- "member" } } - return(list(data = result, lon = lon_f, lat = lat_f)) } @@ -303,8 +349,8 @@ RainFARM <- function(data, lon, lat, nf, weights = 1., nens = 1, #' @param pr Precipitation array to downscale with dimensions (lon, lat, time). #' @param weights Matrix with climatological weights which can be obtained using #' the \code{CST_RFWeights} function (default: \code{weights=1.} i.e. no weights). -#' @param nf Refinement factor for downscaling (the output resolution is increased by this factor). #' @param slope Prescribed spectral slope (default: \code{slope=0.} +#' @param nf Refinement factor for downscaling (the output resolution is increased by this factor). #' meaning that the slope is determined automatically over the dimensions specified by \code{time_dim}. #' @param kmin First wavenumber for spectral slope (default: \code{kmin=1}). #' @param nens Number of ensemble members to produce (default: \code{nens=1}). @@ -313,24 +359,44 @@ RainFARM <- function(data, lon, lat, nf, weights = 1., nens = 1, #' @param verbose Logical for verbose output (default: FALSE). #' @return .RainFARM returns a downscaled array with dimensions (lon, lat, time, realization) #' @noRd -.RainFARM <- function(pr, weights, nf, nens, slope, kmin, +.RainFARM <- function(pr, weights, slope, nf, nens, kmin, fglob, fsmooth, verbose) { - + posna <- NULL + if (any(is.na(pr))) { + posna <- unlist(lapply(1:dim(pr)['rainfarm_samples'], + function(x){!is.na(pr[1, 1, x])})) + pr <- Subset(pr, 'rainfarm_samples', posna) + } if (slope == 0) { fxp <- fft2d(pr) sx <- fitslope(fxp, kmin = kmin) } else { sx <- slope } - result_dims <- c(dim(pr)[1] * nf, dim(pr)[2] * nf, dim(pr)[3], realization = nens) r <- array(dim = result_dims) - for (i in 1:nens) { r[, , , i] <- rainfarm(pr, sx, nf, weights, fglob = fglob, fsmooth = fsmooth, verbose = verbose) } + # restoring NA values in their position: + if (!is.null(posna)) { + pos <- which(posna == FALSE) + dimdata <- dim(r) + xdim <- which(names(dimdata) == 'rainfarm_samples') + dimdata[xdim] <- dimdata[xdim] + length(pos) + new <- array(NA, dimdata) + posT <- which(posna == TRUE) + i = 1 + invisible(lapply(posT, function(x) { + new[,,x,] <<- r[,,i,] + i <<- i + 1 + })) + #names(dim(r)) <- names(result_dims) + warning("Missing values found in the samples.") + r <- new + } return(r) } diff --git a/R/CST_RegimesAssign.R b/R/CST_RegimesAssign.R index 60f0d7045a622bb70526b7f7248b12130cd87637..8985d3355bb5c048aab525bfb8a9653ec1f182cb 100644 --- a/R/CST_RegimesAssign.R +++ b/R/CST_RegimesAssign.R @@ -27,7 +27,8 @@ #' that accounts for the serial dependence of the data with the same structure as Composite.)(only when composite = 'TRUE'), #' \code{$cluster} (array with the same dimensions as data (except latitude and longitude which are removed) indicating the ref_maps to which each point is allocated.) , #' \code{$frequency} (A vector of integers (from k=1,...k n reference maps) indicating the percentage of assignations corresponding to each map.), -#'@import s2dverification +#'@importFrom s2dverification ACC Mean1Dim +#'@importFrom s2dv InsertDim #'@import multiApply #'@examples #'\dontrun{ @@ -102,7 +103,8 @@ CST_RegimesAssign <- function(data, ref_maps, #' \code{$cluster} (array with the same dimensions as data (except latitude and longitude which are removed) indicating the ref_maps to which each point is allocated.) , #' \code{$frequency} (A vector of integers (from k = 1, ... k n reference maps) indicating the percentage of assignations corresponding to each map.), #' -#'@import s2dverification +#'@importFrom s2dverification ACC Mean1Dim Eno +#'@importFrom s2dv InsertDim #'@import multiApply #'@examples #'\dontrun{ @@ -278,12 +280,8 @@ RegimesAssign <- function(data, ref_maps, lat, method = "distance", composite = corr <- rep(NA, nclust) for (i in 1:nclust) { corr[i] <- - ACC(InsertDim(InsertDim( - InsertDim(ref[i, , ] * latWeights, 1, 1), 2, 1 - ), 3, 1), - InsertDim(InsertDim( - InsertDim(target * latWeights, 1, 1), 2, 1 - ), 3, 1))$ACC[2] + ACC(InsertDim(InsertDim(InsertDim(ref[i, , ] * latWeights, 1, 1), 2, 1), 3, 1), + InsertDim(InsertDim(InsertDim(target * latWeights, 1, 1), 2, 1), 3, 1))$ACC[2] } assign <- which(corr == max(corr)) } diff --git a/R/CST_SaveExp.R b/R/CST_SaveExp.R index b98f3668b3316260c9e28b9a4658b795566b68c5..9c689ff7156d825d22cdb1bde8d4a6785c854dbc 100644 --- a/R/CST_SaveExp.R +++ b/R/CST_SaveExp.R @@ -17,14 +17,14 @@ #'@seealso \code{\link{CST_Load}}, \code{\link{as.s2dv_cube}} and \code{\link{s2dv_cube}} #' #'@import ncdf4 -#'@importFrom s2dv Reorder +#'@importFrom s2dv Reorder InsertDim #'@import multiApply #' #'@examples #'\dontrun{ #'library(CSTools) #'data <- lonlat_data$exp -#'destination <- "./path/" +#'destination <- "./path2/" #'CST_SaveExp(data = data, destination = destination) #'} #' @@ -40,7 +40,7 @@ CST_SaveExp <- function(data, destination = "./CST_Data") { "as output by CSTools::CST_Load.") } sdates <- lapply(1:length(data$Datasets), function(x) { - data$Datasets[[x]]$InitializationDates[[1]]})[[1]] + unique(data$Datasets[[x]]$InitializationDates)})[[1]] if (!is.character(attributes(data$Variable)$units)) { units <- attributes(data$Variable)$variable$units } else { @@ -78,7 +78,7 @@ CST_SaveExp <- function(data, destination = "./CST_Data") { #' The path will be created with the name of the variable and each Datasets. #' #'@import ncdf4 -#'@importFrom s2dv Reorder +#'@importFrom s2dv Reorder InsertDim #'@import multiApply #' #'@examples @@ -142,7 +142,7 @@ SaveExp <- function(data, lon, lat, Dataset, var_name, units, startdates, Dates, if (length(dataset_pos) == 0) { warning("Element 'data' in parameter 'data' hasn't 'dataset' dimension. ", "All data is stored in the same 'dataset' folder.") - data$data <- InsertDim(var = data, posdim = 1, lendim = 1) + data$data <- InsertDim(data, posdim = 1, lendim = 1) names(dim(data))[1] <- "dataset" dimname <- c("dataset", dimname) dataset_pos = 1 @@ -256,9 +256,10 @@ SaveExp <- function(data, lon, lat, Dataset, var_name, units, startdates, Dates, dim_names <- names(dim(data)) if (any(dim_names != c('longitude', 'latitude', 'member', 'time'))) { data <- Reorder(data, c('longitude', 'latitude', 'member', 'time')) - } - dim_time <- ncdim_def(name = 'time', units = 'days since 1970-01-01', - vals = as.numeric(Dates), + } + differ <- as.numeric((Dates - Dates[1])/3600) + dim_time <- ncdim_def(name = 'time', units = paste('hours since', Dates[1]), + vals = differ, calendar = 'proleptic_gregorian', longname = 'time', unlim = TRUE) list_pos = length(dims_var) + 1 dims_var[[list_pos]] <- dim_time diff --git a/R/CST_SplitDim.R b/R/CST_SplitDim.R index 46cd97cca21aeb1787b6d572f30662fdffb4181e..5e85182d6d23d36b1afd60e204304bf14410bfe2 100644 --- a/R/CST_SplitDim.R +++ b/R/CST_SplitDim.R @@ -8,9 +8,12 @@ #'@param split_dim a character string indicating the name of the dimension to split #'@param indices a vector of numeric indices or dates. If left at NULL, the dates provided in the s2dv_cube object (element Dates) will be used. #'@param freq a character string indicating the frequency: by 'day', 'month' and 'year' or 'monthly' (by default). 'month' identifies months between 1 and 12 independently of the year they belong to, while 'monthly' differenciates months from different years. +#'@param new_dim_name a character string indicating the name of the new dimension. +#'@param insert_ftime an integer indicating the number of time steps to add at the begining of the time series. #' +#'@details Parameter 'insert_ftime' has been included for the case of using daily data, requiring split the temporal dimensions by months (or similar) and the first lead time doesn't correspondt to the 1st day of the month. In this case, the insert_ftime could be used, to get a final output correctly organized. E.g.: leadtime 1 is the 2nd of November and the input time series extend to the 31st of December. When requiring split by month with \code{inset_ftime = 1}, the 'monthly' dimension of length two will indicate the month (position 1 for November and position 2 for December), dimension 'time' will be length 31. For November, the position 1 and 31 will be NAs, while from positon 2 to 30 will be filled with the data provided. This allows to select correctly days trhough time dimension. #'@import abind -#'@import s2dverification +#'@importFrom s2dverification Subset #'@examples #' #'data <- 1 : 20 @@ -34,12 +37,46 @@ #'dim(new_data$data) #'@export CST_SplitDim <- function(data, split_dim = 'time', indices = NULL, - freq = 'monthly') { + freq = 'monthly', new_dim_name = NULL, insert_ftime = NULL) { if (!inherits(data, 's2dv_cube')) { - stop("Parameter 'data' must be of the class 's2dv_cube', ", - "as output by CSTools::CST_Load.") + stop("Parameter 'data' must be of the class 's2dv_cube', ", + "as output by CSTools::CST_Load.") } + if (!is.null(insert_ftime)) { + if (!is.numeric(insert_ftime)) { + stop("Parameter 'insert_ftime' should be an integer.") + } else { + if (length(insert_ftime) > 1) { + warning("Parameter 'insert_ftime' must be of length 1, and only the", + " first element will be used.") + insert_ftime <- insert_ftime[1] + } + # adding NAs at the begining of the data in ftime dim + ftimedim <- which(names(dim(data$data)) == 'ftime') + dims <- dim(data$data) + dims[ftimedim] <- insert_ftime + empty_array <- array(NA, dims) + data$data <- abind(empty_array, data$data, along = ftimedim) + names(dim(data$data)) <- names(dims) + # adding dates to Dates for the new NAs introduced + if ((data$Dates[[1]][2] - data$Dates[[1]][1]) == 1) { + timefreq <- 'days' + } else { + timefreq <- 'months' + warning("Time frequency of forecast time is considered monthly.") + } + start <- data$Dates[[1]] + dim(start) <- c(ftime = length(start)/dims['sdate'], sdate = dims['sdate']) + #new <- array(NA, prod(dim(data$data)[c('ftime', 'sdate')])) + # Pending fix transform to UTC when concatenaiting + data$Dates$start <- do.call(c, lapply(1:dim(start)[2], function(x) { + seq(start[1,x] - as.difftime(insert_ftime, + units = timefreq), + start[dim(start)[1],x], by = timefreq, tz = "UTC")})) + } + } if (is.null(indices)) { + if (any(split_dim %in% c('ftime', 'time', 'sdate'))) { if (is.list(data$Dates)) { indices <- data$Dates[[1]] } else { @@ -53,9 +90,10 @@ CST_SplitDim <- function(data, split_dim = 'time', indices = NULL, indices <- indices[1 : dim(data$data)[which(names(dim(data$data)) == split_dim)]] } + } } data$data <- SplitDim(data$data, split_dim = split_dim, indices = indices, - freq = freq) + freq = freq, new_dim_name = new_dim_name) return(data) } #'Function to Split Dimension @@ -67,10 +105,10 @@ CST_SplitDim <- function(data, split_dim = 'time', indices = NULL, #'@param data an n-dimensional array with named dimensions #'@param split_dim a character string indicating the name of the dimension to split #'@param indices a vector of numeric indices or dates -#'@param freq a character string indicating the frequency: by 'day', 'month' and 'year' or 'monthly' (by default). 'month' identifies months between 1 and 12 independetly of the year they belong to, while 'monthly' differenciates months from different years. Parameter 'freq' can also be numeric indicating the length in which to subset the dimension -#' +#'@param freq a character string indicating the frequency: by 'day', 'month' and 'year' or 'monthly' (by default). 'month' identifies months between 1 and 12 independetly of the year they belong to, while 'monthly' differenciates months from different years. Parameter 'freq' can also be numeric indicating the length in which to subset the dimension. +#'@param new_dim_name a character string indicating the name of the new dimension. #'@import abind -#'@import s2dverification +#'@importFrom s2dverification Subset #'@examples #' #'data <- 1 : 20 @@ -85,7 +123,8 @@ CST_SplitDim <- function(data, split_dim = 'time', indices = NULL, #'new_data <- SplitDim(data, indices = time, freq = 'month') #'new_data <- SplitDim(data, indices = time, freq = 'year') #'@export -SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly') { +SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly', + new_dim_name = NULL) { # check data if (is.null(data)) { stop("Parameter 'data' cannot be NULL.") @@ -123,6 +162,7 @@ SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly') { } indices <- rep(1 : (dims[pos_split] / freq), freq) indices <- sort(indices) + repited <- sort(unique(indices)) } } else if (is.numeric(indices)) { if (!is.null(freq)) { @@ -131,6 +171,7 @@ SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly') { "parameter 'indices' is numeric.") } } + repited <- sort(unique(indices)) } else { # Indices should be Dates and freq character if (!is.character(freq)) { @@ -161,19 +202,33 @@ SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly') { if (!is.numeric(indices)) { if (freq == 'day') { indices <- as.numeric(strftime(indices, format = "%d")) + repited <- unique(indices) } else if (freq == 'month') { indices <- as.numeric(strftime(indices, format = "%m")) + repited <- unique(indices) } else if (freq == 'year') { indices <- as.numeric(strftime(indices, format = "%Y")) + repited <- unique(indices) } else if (freq == 'monthly' ) { indices <- as.numeric(strftime(indices, format = "%m%Y")) + repited <- unique(indices) } else { stop("Parameter 'freq' must be numeric or a character: ", "by 'day', 'month', 'year' or 'monthly' (for ", "distinguishable month).") } } - repited <- unique(indices) + # check new_dim_name + if (!is.null(new_dim_name)) { + if (!is.character(new_dim_name)) { + stop("Parameter 'new_dim_name' must be character string") + } + if (length(new_dim_name) > 1) { + new_dim_name <- new_dim_name[1] + warning("Parameter 'new_dim_name' has length greater than 1 ", + "and only the first elemenst is used.") + } + } max_times <- max(unlist(lapply(repited, function(x){sum(indices == x)}))) data <- lapply(repited, function(x) {rebuild(x, data, along = split_dim, @@ -184,6 +239,9 @@ SplitDim <- function(data, split_dim = 'time', indices, freq = 'monthly') { } else { names(dim(data)) <- c(names(dims), 'index') } + if (!is.null(new_dim_name)) { + names(dim(data)) <- c(names(dims), new_dim_name) + } return(data) } @@ -200,3 +258,4 @@ rebuild <- function(x, data, along, indices, max_times) { } return(a) } + diff --git a/R/CST_WeatherRegimes.R b/R/CST_WeatherRegimes.R index 72ab3987e38e2ab4b7f45bcc2b10016f3eecbb92..e7cc925c3d9bc9d11180c33dbd01ab722aecafdc 100644 --- a/R/CST_WeatherRegimes.R +++ b/R/CST_WeatherRegimes.R @@ -34,7 +34,7 @@ #' \code{cluster} (A matrix or vector with integers (from 1:k) indicating the cluster to which each time step is allocated.), #' \code{persistence} (Percentage of days in a month/season before a cluster is replaced for a new one (only if method=’kmeans’ has been selected.)), #' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), -#'@import s2dverification +#'@importFrom s2dverification EOF #'@import multiApply #'@examples #'\dontrun{ @@ -109,7 +109,7 @@ CST_WeatherRegimes <- function(data, ncenters = NULL, #' \code{cluster} (A matrix or vector with integers (from 1:k) indicating the cluster to which each time step is allocated.), #' \code{persistence} (Percentage of days in a month/season before a cluster is replaced for a new one (only if method=’kmeans’ has been selected.)), #' \code{frequency} (Percentage of days in a month/season belonging to each cluster (only if method=’kmeans’ has been selected).), -#'@import s2dverification +#'@importFrom s2dverification EOF #'@import multiApply #'@examples #'\dontrun{ diff --git a/R/PlotCombinedMap.R b/R/PlotCombinedMap.R index 7f8f5d64e0191ab40cd7e88b25d23fd95f9269e9..2aee71b8ef7a3861a3834e05f0110ea10bc1751d 100644 --- a/R/PlotCombinedMap.R +++ b/R/PlotCombinedMap.R @@ -25,7 +25,7 @@ #'@seealso \code{PlotCombinedMap} and \code{PlotEquiMap} #' -#'@import s2dverification +#'@importFrom s2dv PlotEquiMap ColorBar #'@importFrom maps map #'@importFrom graphics box image layout mtext par plot.new #'@importFrom grDevices adjustcolor bmp colorRampPalette dev.cur dev.new dev.off hcl jpeg pdf png postscript svg tiff diff --git a/R/PlotForecastPDF.R b/R/PlotForecastPDF.R index 14e6cd3aadca4cda3f6d040e526b8bcc5a0b206a..6311628916972bdd150e2f3682520b1e0fe2433a 100644 --- a/R/PlotForecastPDF.R +++ b/R/PlotForecastPDF.R @@ -23,8 +23,7 @@ #'@importFrom reshape2 melt #'@importFrom plyr . #'@importFrom plyr dlply -#'@import s2dverification -#' +#'@importFrom s2dv InsertDim #'@examples #'fcsts <- data.frame(fcst1 = rnorm(10), fcst2 = rnorm(10, 0.5, 1.2), #' fcst3 = rnorm(10, -0.5, 0.9)) @@ -49,23 +48,23 @@ PlotForecastPDF <- function(fcst, tercile.limits, extreme.limits = NULL, obs = N #------------------------ color.set <- match.arg(color.set) if (color.set == "s2s4e") { - colorFill <- rev(c("#FF764D", "#b5b5b5", "#33BFD1")) - colorHatch <- c("deepskyblue3", "indianred3") + colorFill <- c("#FF764D", "#b5b5b5", "#33BFD1") # AN, N, BN fill colors + colorHatch <- c("indianred3", "deepskyblue3") # AP90, BP10 line colors colorMember <- c("#ffff7f") colorObs <- "purple" - colorLab <- c("blue", "red") + colorLab <- c("red", "blue") # AP90, BP10 text colors } else if (color.set == "hydro") { - colorFill <- rev(c("#41CBC9", "#b5b5b5", "#FFAB38")) - colorHatch <- c("darkorange1", "deepskyblue3") + colorFill <- c("#41CBC9", "#b5b5b5", "#FFAB38") + colorHatch <- c("deepskyblue3", "darkorange1") colorMember <- c("#ffff7f") colorObs <- "purple" - colorLab <- c("darkorange3", "blue") + colorLab <- c("blue", "darkorange3") } else if (color.set == "ggplot") { - colorFill <- rev(ggColorHue(3)) - colorHatch <- c("deepskyblue3", "indianred1") + colorFill <- ggColorHue(3) + colorHatch <- c("indianred3", "deepskyblue3") colorMember <- c("#ffff7f") colorObs <- "purple" - colorLab <- c("blue", "red") + colorLab <- c("red", "blue") } else { stop("Parameter 'color.set' should be one of ggplot/s2s4e/hydro") } @@ -109,7 +108,7 @@ PlotForecastPDF <- function(fcst, tercile.limits, extreme.limits = NULL, obs = N if (length(tercile.limits) != 2) { stop("Provide two tercile limits") } - tercile.limits <- InsertDim(tercile.limits, 1, npanels) + tercile.limits <- InsertDim(tercile.limits, 1, npanels, name = "new") } else if (is.array(tercile.limits)) { if (length(dim(tercile.limits)) == 2) { if (dim(tercile.limits)[2] != 2) { @@ -136,7 +135,7 @@ PlotForecastPDF <- function(fcst, tercile.limits, extreme.limits = NULL, obs = N if (length(extreme.limits) != 2) { stop("Provide two extreme limits") } - extreme.limits <- InsertDim(extreme.limits, 1, npanels) + extreme.limits <- InsertDim(extreme.limits, 1, npanels, name = "new") } else if (is.array(extreme.limits)) { if (length(dim(extreme.limits)) == 2) { if (dim(extreme.limits)[2] != 2) { @@ -171,10 +170,11 @@ PlotForecastPDF <- function(fcst, tercile.limits, extreme.limits = NULL, obs = N #------------------------ # Produce a first plot with the pdf for each init in a panel #------------------------ - melt.df <- melt(fcst.df, variable.name = "init", id.vars = NULL) - plot <- ggplot(melt.df, aes(x = value)) + geom_density(alpha = 1, na.rm = T) + - coord_flip() + facet_wrap(~init, strip.position = "top", nrow = 1) + xlim(range(c(obs, - density(melt.df$value, na.rm = T)$x))) + melt.df <- reshape2::melt(fcst.df, variable.name = "init", id.vars = NULL) + plot <- ggplot(melt.df, aes(x = value)) + + geom_density(alpha = 1, na.rm = T) + + coord_flip() + facet_wrap(~init, strip.position = "top", nrow = 1) + + xlim(range(c(obs, density(melt.df$value, na.rm = T)$x))) ggp <- ggplot_build(plot) #------------------------ # Gather the coordinates of the plots together with init and corresponding @@ -188,9 +188,10 @@ PlotForecastPDF <- function(fcst, tercile.limits, extreme.limits = NULL, obs = N } else { stop("Cannot find PANELS in ggp object") } - tmp.df$tercile <- factor(ifelse(tmp.df$x < tercile.limits[tmp.df$PANEL, 1], "Below normal", - ifelse(tmp.df$x < tercile.limits[tmp.df$PANEL, 2], "Normal", "Above normal")), levels = c("Below normal", - "Normal", "Above normal")) + tmp.df$tercile <- factor(ifelse(tmp.df$x < tercile.limits[tmp.df$PANEL, 1], + "Below normal", ifelse(tmp.df$x < tercile.limits[tmp.df$PANEL, 2], + "Normal", "Above normal")), + levels = c("Above normal", "Normal", "Below normal")) #------------------------ # Get the height and width of a panel #------------------------ @@ -201,9 +202,10 @@ PlotForecastPDF <- function(fcst, tercile.limits, extreme.limits = NULL, obs = N # Compute hatch coordinates for extremes #------------------------ if (!is.null(extreme.limits)) { - tmp.df$extremes <- factor(ifelse(tmp.df$x < extreme.limits[tmp.df$PANEL, 1], "Below P10", - ifelse(tmp.df$x < extreme.limits[tmp.df$PANEL, 2], "Normal", "Above P90")), levels = c("Below P10", - "Normal", "Above P90")) + tmp.df$extremes <- factor(ifelse(tmp.df$x < extreme.limits[tmp.df$PANEL, 1], + "Below P10", ifelse(tmp.df$x < extreme.limits[tmp.df$PANEL, 2], + "Normal", "Above P90")), + levels = c("Above P90", "Normal", "Below P10")) hatch.ls <- dlply(tmp.df, .(init, extremes), function(x) { # close the polygon tmp.df2 <- data.frame(x = c(x$x, max(x$x), min(x$x)), y = c(x$ymax, 0, @@ -237,10 +239,10 @@ PlotForecastPDF <- function(fcst, tercile.limits, extreme.limits = NULL, obs = N # Compute jitter space for ensemble members #------------------------ if (add.ensmemb != "no") { - jitter.df <- melt(data.frame(dlply(melt.df, .(init), function(x) { - .jitter.ensmemb(sort(x$value, na.last = T), pan.width/100) + jitter.df <- reshape2::melt(data.frame(dlply(melt.df, .(init), function(x) { + .jitter.ensmemb(sort(x$value, na.last = T), pan.width / 100) }), check.names = F), value.name = "yjitter", variable.name = "init", id.vars = NULL) - jitter.df$x <- melt(data.frame(dlply(melt.df, .(init), function(x) { + jitter.df$x <- reshape2::melt(data.frame(dlply(melt.df, .(init), function(x) { sort(x$value, na.last = T) })), value.name = "x", id.vars = NULL)$x } @@ -258,8 +260,10 @@ PlotForecastPDF <- function(fcst, tercile.limits, extreme.limits = NULL, obs = N #------------------------ # Fill each pdf with different colors for the terciles #------------------------ - plot <- plot + geom_ribbon(data = tmp.df, aes(x = x, ymin = ymin, ymax = ymax, - fill = tercile), alpha = 0.7) + plot <- plot + + geom_ribbon(data = tmp.df, + aes(x = x, ymin = ymin, ymax = ymax, fill = tercile), + alpha = 0.7) #------------------------ # Add hatches for extremes #------------------------ @@ -268,37 +272,55 @@ PlotForecastPDF <- function(fcst, tercile.limits, extreme.limits = NULL, obs = N warning("The provided extreme categories are outside the plot bounds. The extremes will not be drawn.") extreme.limits <- NULL } else { - plot <- plot + geom_segment(data = hatch.df[hatch.df$extremes != "Normal", - ], aes(x = x, y = y, xend = xend, yend = yend, color = extremes)) + plot <- plot + + geom_segment(data = hatch.df[hatch.df$extremes != "Normal", ], + aes(x = x, y = y, + xend = xend, yend = yend, color = extremes)) } } #------------------------ # Add obs line #------------------------ if (!is.null(obs)) { - plot <- plot + geom_vline(data = obs.dt, aes(xintercept = value), linetype = "dashed", - color = colorObs) + plot <- plot + + geom_vline(data = obs.dt, + aes(xintercept = value), + linetype = "dashed", color = colorObs) } #------------------------ # Add ensemble members #------------------------ if (add.ensmemb == "below") { - plot <- plot + # this adds a grey box for ensmembers - geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = -pan.height/10), - fill = "gray95", color = "black", width = 0.2) + # this adds the ensemble members - geom_point(data = jitter.df, color = "black", fill = colorMember, alpha = 1, - aes(x = x, y = -pan.height/10 - magic.ratio * yjitter, shape = "Ensemble members")) + plot <- plot + + # this adds a grey box for ensmembers + geom_rect(aes(xmin = -Inf, xmax = Inf, + ymin = -Inf, ymax = -pan.height / 10), + fill = "gray95", color = "black", width = 0.2) + + # this adds the ensemble members + geom_point(data = jitter.df, + aes(x = x, + y = -pan.height / 10 - magic.ratio * yjitter, + shape = "Ensemble members"), + color = "black", fill = colorMember, alpha = 1) + } else if (add.ensmemb == "above") { - plot <- plot + geom_point(data = jitter.df, color = "black", fill = colorMember, - alpha = 1, aes(x = x, y = 0.7 * magic.ratio * yjitter, shape = "Ensemble members")) + plot <- plot + + geom_point(data = jitter.df, + aes(x = x, + y = 0.7 * magic.ratio * yjitter, + shape = "Ensemble members"), + color = "black", fill = colorMember, alpha = 1) + } #------------------------ # Add obs diamond #------------------------ if (!is.null(obs)) { - plot <- plot + # this adds the obs diamond - geom_point(data = obs.xy, aes(x = x, y = ymax, size = "Observation"), shape = 23, - color = "black", fill = colorObs) + plot <- plot + + # this adds the obs diamond + geom_point(data = obs.xy, + aes(x = x, y = ymax, size = "Observation"), + shape = 23, color = "black", fill = colorObs) } #------------------------ # Compute probability for each tercile and identify MLT @@ -306,26 +328,36 @@ PlotForecastPDF <- function(fcst, tercile.limits, extreme.limits = NULL, obs = N tmp.dt <- data.table(tmp.df) pct <- tmp.dt[, .(pct = integrate(approxfun(x, ymax), lower = min(x), upper = max(x))$value), by = .(init, tercile)] + # include potentially missing groups + pct <- merge(pct, CJ(init = factor(levels(pct$init), levels = levels(pct$init)), + tercile = factor(c("Below normal", "Normal", "Above normal"), + levels = c("Above normal", "Normal", "Below normal"))), + by = c("init", "tercile"), all.y = T) + pct[is.na(pct),"pct"] <- 0 tot <- pct[, .(tot = sum(pct)), by = init] pct <- merge(pct, tot, by = "init") - pct$pct <- round(100 * pct$pct/pct$tot, 0) + pct$pct <- round(100 * pct$pct / pct$tot, 0) pct$MLT <- pct[, .(MLT = pct == max(pct)), by = init]$MLT - pct$lab.pos <- as.vector(apply(tercile.limits, 1, function(x) {c(min(x), mean(x), max(x))})) + pct <- pct[order(init, tercile)] + pct$lab.pos <- as.vector(apply(tercile.limits, 1, function(x) {c(max(x), mean(x), min(x))})) #------------------------ # Compute probability for extremes #------------------------ if (!is.null(extreme.limits)) { pct2 <- tmp.dt[, .(pct = integrate(approxfun(x, ymax), lower = min(x), upper = max(x))$value), by = .(init, extremes)] + # include potentially missing groups + pct2 <- merge(pct2, CJ(init = factor(levels(pct2$init), levels = levels(pct2$init)), + extremes = factor(c("Below P10", "Normal", "Above P90"), + levels = c("Above P90", "Normal", "Below P10"))), + by = c("init", "extremes"), all.y=T) + pct2[is.na(pct),"pct"] <- 0 tot2 <- pct2[, .(tot = sum(pct)), by = init] pct2 <- merge(pct2, tot2, by = "init") - pct2$pct <- round(100 * pct2$pct/pct2$tot, 0) - pct2$lab.pos <- as.vector(apply(extreme.limits, 1, function(x) {c(x[1], NA, x[2])})) - pct2 <- merge(pct2, max.df, by = c("init", "extremes")) - # include potentially missing groups - pct2 <- pct2[CJ(factor(levels(pct2$init), levels = levels(pct2$init)), - factor(c("Below P10", "Normal", "Above P90"), - levels = c("Below P10", "Normal", "Above P90"))), ] + pct2$pct <- round(100 * pct2$pct / pct2$tot, 0) + pct2 <- pct2[order(init, extremes)] + pct2$lab.pos <- as.vector(apply(extreme.limits, 1, function(x) {c(x[2], NA, x[1])})) + pct2 <- merge(pct2, max.df, by = c("init", "extremes"), all.x = T) } #------------------------ # Add probability labels for terciles @@ -337,35 +369,53 @@ PlotForecastPDF <- function(fcst, tercile.limits, extreme.limits = NULL, obs = N labpos <- 0 vjust <- -0.5 } - plot <- plot + geom_text(data = pct, aes(x = lab.pos, y = labpos, - label = paste0(pct, "%"), hjust = as.integer(tercile) * 1.5 - 2.5), vjust = vjust, - angle = -90, size = 3.2) + geom_text(data = pct[MLT == T, ], aes(x = lab.pos, - y = labpos, label = "*", hjust = as.integer(tercile) * 3.5 - 5), vjust = 0.1, - angle = -90, size = 7, color = "black") + plot <- plot + + geom_text(data = pct, + aes(x = lab.pos, y = labpos, label = paste0(pct, "%"), + hjust = as.integer(tercile) * -1.5 + 3.5), + vjust = vjust, angle = -90, size = 3.2) + + geom_text(data = pct[MLT == T, ], + aes(x = lab.pos, y = labpos, label = "*", + hjust = as.integer(tercile) * -3.5 + 9), + vjust = 0.1, angle = -90, size = 7, color = "black") #------------------------ # Add probability labels for extremes #------------------------ if (!is.null(extreme.limits)) { - plot <- plot + geom_text(data = pct2[extremes != "Normal", ], aes(x = lab.pos, - y = 0.9 * y, label = paste0(pct, "%"), hjust = as.integer(extremes) * - 1.5 - 2.5), vjust = -0.5, angle = -90, size = 3.2, color = rep(colorLab, - dim(fcst.df)[2])) + plot <- plot + + geom_text(data = pct2[extremes != "Normal", ], + aes(x = lab.pos, y = 0.9 * y, label = paste0(pct, "%"), + hjust = as.integer(extremes) * -1.5 + 3.5), + vjust = -0.5, angle = -90, size = 3.2, + color = rep(colorLab, dim(fcst.df)[2])) } #------------------------ # Finish all theme and legend details #------------------------ - plot <- plot + theme_minimal() + scale_fill_manual(name = "Probability of\nterciles", - breaks = c("Above normal", "Normal", "Below normal"), values = colorFill, - drop = F) + scale_color_manual(name = "Probability of\nextremes", values = colorHatch) + - scale_shape_manual(name = "Ensemble\nmembers", values = c(21)) + scale_size_manual(name = "Observation", - values = c(3)) + labs(x = var.name, y = "Probability density\n(total area=1)", - title = title) + theme(axis.text.x = element_blank(), panel.grid.minor.x = element_blank(), - legend.key.size = unit(0.3, "in"), panel.border = element_rect(fill = NA, - color = "black"), strip.background = element_rect(colour = "black", fill = "gray80"), - panel.spacing = unit(0.2, "in"), panel.grid.major.x = element_line(color = "grey93")) + - guides(fill = guide_legend(order = 1), color = guide_legend(order = 2, reverse = T), - shape = guide_legend(order = 3, label = F), size = guide_legend(order = 4, - label = F)) + plot <- plot + + theme_minimal() + + scale_fill_manual(name = "Probability of\nterciles", + values = colorFill, drop = F) + + scale_color_manual(name = "Probability of\nextremes", + values = colorHatch) + + scale_shape_manual(name = "Ensemble\nmembers", + values = c(21)) + + scale_size_manual(name = "Observation", + values = c(3)) + + labs(x = var.name, + y = "Probability density\n(total area=1)", + title = title) + + theme(axis.text.x = element_blank(), + panel.grid.minor.x = element_blank(), + legend.key.size = unit(0.3, "in"), + panel.border = element_rect(fill = NA, color = "black"), + strip.background = element_rect(colour = "black", fill = "gray80"), + panel.spacing = unit(0.2, "in"), + panel.grid.major.x = element_line(color = "grey93")) + + guides(fill = guide_legend(order = 1), + color = guide_legend(order = 2), + shape = guide_legend(order = 3, label = F), + size = guide_legend(order = 4, label = F)) #------------------------ # Save to plotfile if needed, and return plot #------------------------ diff --git a/R/PlotMostLikelyQuantileMap.R b/R/PlotMostLikelyQuantileMap.R index 57be86435ddf526ebe051cb8b2cfd3fd4c394fa6..b4e974a88a70b39fac07e339e1fe7bdae7fe65c6 100644 --- a/R/PlotMostLikelyQuantileMap.R +++ b/R/PlotMostLikelyQuantileMap.R @@ -12,7 +12,6 @@ #'@param ... additional parameters to be sent to \code{PlotCombinedMap} and \code{PlotEquiMap}. #'@seealso \code{PlotCombinedMap} and \code{PlotEquiMap} #' -#'@import s2dverification #'@importFrom maps map #'@importFrom graphics box image layout mtext par plot.new #'@importFrom grDevices adjustcolor bmp colorRampPalette dev.cur dev.new dev.off hcl jpeg pdf png postscript svg tiff diff --git a/R/PlotPDFsOLE.R b/R/PlotPDFsOLE.R index fc4ad76e230b535456f6c6e5aa5930dc42761424..25c669a471bb96392ebfea4cc614764f67e07a1f 100644 --- a/R/PlotPDFsOLE.R +++ b/R/PlotPDFsOLE.R @@ -14,6 +14,10 @@ #' to combining. #' @param nsigma (optional) A numeric value for setting the limits of X axis. #' (Default nsigma = 3). +#' @param legendPos (optional) A character value for setting the position of the +#' legend ("bottom", "top", "right" or "left")(Default 'bottom'). +#' @param legendSize (optional) A numeric value for setting the size of the +#' legend text. (Default 1.0). #' @param plotfile (optional) A filename where the plot will be saved. #' (Default: the plot is not saved). #' @param width (optional) A numeric value indicating the plot width in @@ -40,12 +44,26 @@ #' #' PlotPDFsOLE(pdf_1, pdf_2) #' +#' # Example 2 +#' Glosea5PDF <- c(2.25, 0.67) +#' attr(Glosea5PDF, "name") <- "Glosea5" +#' dim(Glosea5PDF) <- c(statistic = 2) +#' ECMWFPDF <- c(2.38, 0.61) +#' attr(ECMWFPDF, "name") <- "ECMWF" +#' dim(ECMWFPDF) <- c(statistic = 2) +#' MFPDF <- c(4.52, 0.34) +#' attr(MFPDF, "name") <- "MF" +#' dim(MFPDF) <- c(statistic = 2) +#' PlotPDFsOLE(pdf_1 = Glosea5PDF, pdf_2 = ECMWFPDF, legendPos = 'left') +#' PlotPDFsOLE(pdf_1 = Glosea5PDF, pdf_2 = MFPDF, legendPos = 'top') +#' PlotPDFsOLE(pdf_1 = ECMWFPDF, pdf_2 = MFPDF, legendSize = 1.2) + #'@export -PlotPDFsOLE <- function(pdf_1, pdf_2, nsigma = 3, plotfile = NULL, - width = 30, height = 15, - units = "cm", dpi = 300) { +PlotPDFsOLE <- function(pdf_1, pdf_2, nsigma = 3, legendPos = 'bottom', + legendSize = 1.0, plotfile = NULL, width = 30, + height = 15, units = "cm", dpi = 300) { y <- type <- NULL - + if(!is.null(plotfile)){ if (!is.numeric(dpi)) { stop("Parameter 'dpi' must be numeric.") @@ -87,6 +105,15 @@ PlotPDFsOLE <- function(pdf_1, pdf_2, nsigma = 3, plotfile = NULL, "indicating the path and name of output png file.") } } + if (!is.character(legendPos)) { + stop("Parameter 'legendPos' must be character") + } + if(!(legendPos %in% c("bottom", "top", "right", "left"))) { + stop("Parameter 'legendPos' must be equal to 'bottom', 'top', 'right' or 'left'.") + } + if (!is.numeric(legendSize)) { + stop("Parameter 'legendSize' must be numeric.") + } if (!is.numeric(nsigma)) { stop("Parameter 'nsigma' must be numeric.") } @@ -196,14 +223,16 @@ PlotPDFsOLE <- function(pdf_1, pdf_2, nsigma = 3, plotfile = NULL, g <- g + scale_colour_manual(values = cols, limits = c(name1, name2, nameBest), labels = c(label1, label2, labelBest)) + g <- g + theme(plot.title=element_text(size=rel(1.1), colour="black", face= "bold"), axis.text.x = element_text(size=rel(1.2)), axis.text.y = element_text(size=rel(1.2)), axis.title.x = element_blank(), legend.title = element_blank(), - legend.position = c(1,1), legend.justification = c(1,1), - legend.text = element_text(face = "bold")) + legend.position = legendPos, + legend.text = element_text(face = "bold", size=rel(legendSize))) + g <- g + ggtitle(graphicTitle) g <- g + labs(y="probability", size=rel(1.9)) g <- g + stat_function(fun = dnorm_limit, args = list(mean=mean1, sd=sigma1), diff --git a/R/PlotTriangles4Categories.R b/R/PlotTriangles4Categories.R index fcfb36bbb68b7b58019634d9417ce1ad0609523a..9e3d995df521f9b0a9f9b8a8a31a8df01272c1ed 100644 --- a/R/PlotTriangles4Categories.R +++ b/R/PlotTriangles4Categories.R @@ -40,8 +40,10 @@ #'@param cex_leg a number to indicate the increase/reductuion of the lab_legend used #' to represent sig_data. #'@param col_leg color of the legend (triangles). +#'@param cex_axis a number to indicate the increase/reduction of the axis labels. #'@param fileout A string of full directory path and file name indicating where #' to save the plot. If not specified (default), a graphics device will pop up. +#'@param mar A numerical vector of the form c(bottom, left, top, right) which gives the number of lines of margin to be specified on the four sides of the plot. #'@param size_units A string indicating the units of the size of the device #' (file or window) to plot in. Set 'px' as default. See ?Devices and the #' creator function of the corresponding device. @@ -72,6 +74,7 @@ #'@importFrom grDevices dev.new dev.off dev.cur #'@importFrom graphics plot points polygon text title axis #'@importFrom RColorBrewer brewer.pal +#'@importFrom s2dv ColorBar #'@export PlotTriangles4Categories <- function(data, brks = NULL, cols = NULL, toptitle = NULL, sig_data = NULL, @@ -81,6 +84,7 @@ PlotTriangles4Categories <- function(data, brks = NULL, cols = NULL, ylabels = NULL, ytitle = NULL, legend = TRUE, lab_legend = NULL, cex_leg = 1, col_leg = 'black', + cex_axis = 1.5, mar = c(5, 4, 0, 0), fileout = NULL, size_units = 'px', res = 100, figure.width = 1, ...) { # Checking the dimensions @@ -100,7 +104,9 @@ PlotTriangles4Categories <- function(data, brks = NULL, cols = NULL, stop("Parameter 'data' should contain 'dimx', 'dimy' and 'dimcat' dimension names. ") } } - + if (!is.vector(mar) & length(mar) != 4) { + stop("Parameter 'mar' must be a vector of length 4.") + } if (!is.null(sig_data)) { if (!is.logical(sig_data)) { stop("Parameter 'sig_data' array must be logical.")} @@ -179,7 +185,7 @@ PlotTriangles4Categories <- function(data, brks = NULL, cols = NULL, if(legend){ layout(matrix(c(1, 2, 1, 3), 2, 2, byrow = T), widths = c(10, 3.4), heights = c(10, 3.5)) - par(oma = c(1, 1, 1, 1), mar = c(5, 4, 0, 0)) + par(oma = c(1, 1, 1, 1), mar = mar) if(is.null(lab_legend)) { lab_legend = 1:ncat } @@ -202,10 +208,10 @@ PlotTriangles4Categories <- function(data, brks = NULL, cols = NULL, } if (xlab){ - axis(1, at =(1:ncol) - 0.5, las = 2, labels = xlabels, cex.axis = 1.5) + axis(1, at =(1:ncol) - 0.5, las = 2, labels = xlabels, cex.axis = cex_axis) } if (ylab){ - axis(2, at = (1:nrow) - 0.5, las = 2, labels = ylabels, cex.axis = 1.5) + axis(2, at = (1:nrow) - 0.5, las = 2, labels = ylabels, cex.axis = cex_axis) } diff --git a/R/sample_data.R b/R/sample_data.R index 3a59ec1b546be9aa6212741fb1069ec76a5e8a0d..666c0143b701e2ecd47d53178ddbe9926d8d02a1 100644 --- a/R/sample_data.R +++ b/R/sample_data.R @@ -63,42 +63,3 @@ NULL #' @author Jost von Hardenberg \email{j.vonhardenberg@isac.cnr.it} #' @keywords data NULL - -#' Sample Of Experimental And Observational Climate Data Averaged Over A Region -#' -#' This sample data set contains area-averaged seasonal forecast and corresponding observational data from the Copernicus Climate Change ECMWF-System 5 forecast system, and from the Copernicus Climate Change ERA-5 reconstruction. Specifically, for the 'tas' (2-meter temperature) variable, for the 15 first forecast ensemble members, monthly averaged, for the 3 first forecast time steps (lead months 1 to 4) of the November start dates of 2000 to 2005, for the Mediterranean region (27N-48N, 12W-40E). -#' -#' It is recommended to use the data set as follows: -#'\preformatted{ -#' require(zeallot) -#' c(exp, obs) %<-% CSTools::areave_data -#'} -#' -#' The `CST_Load` call used to generate the data set in the infrastructure of the Earth Sciences Department of the Barcelona Supercomputing Center is shown next. Note that `CST_Load` internally calls `s2dverification::Load`, which would require a configuration file (not provided here) expressing the distribution of the 'system5c3s' and 'era5' NetCDF files in the file system. -#'\preformatted{ -#' library(CSTools) -#' require(zeallot) -#' -#' startDates <- c('20001101', '20011101', '20021101', -#' '20031101', '20041101', '20051101') -#' -#' areave_data <- -#' CST_Load( -#' var = 'tas', -#' exp = 'system5c3s', -#' obs = 'era5', -#' nmember = 15, -#' sdates = startDates, -#' leadtimemax = 3, -#' latmin = 27, latmax = 48, -#' lonmin = -12, lonmax = 40, -#' output = 'areave', -#' nprocs = 1 -#' ) -#'} -#' -#' @name areave_data -#' @docType data -#' @author Nicolau Manubens \email{nicolau.manubens@bsc.es} -#' @keywords data -NULL diff --git a/data/areave_data.RData b/data/areave_data.RData deleted file mode 100644 index 426d4e165fada19e79a8a6912cc78124b24fd9f1..0000000000000000000000000000000000000000 Binary files a/data/areave_data.RData and /dev/null differ diff --git a/man/AdamontQQCorr.Rd b/man/AdamontQQCorr.Rd new file mode 100644 index 0000000000000000000000000000000000000000..ec49bad348628476df668ad136d1770197f2f430 --- /dev/null +++ b/man/AdamontQQCorr.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_AdamontQQCorr.R +\name{AdamontQQCorr} +\alias{AdamontQQCorr} +\title{AdamontQQCorr computes quantile-quantile correction of seasonal or decadal +forecast data using weather types} +\usage{ +AdamontQQCorr( + exp, + wt_exp, + obs, + wt_obs, + corrdims = c("member", "sdate", "ftime"), + londim = "lon", + latdim = "lat", + regrid = FALSE, + NN = NULL +) +} +\arguments{ +\item{exp}{array with named dimensions (such as \code{$data} array of +experiment data from an object of class \code{s2dv_cube})} + +\item{wt_exp}{corresponding weather types (same dimensions as \code{exp} but +lat/lon)} + +\item{obs}{array with named dimensions with reference data (can also be +\code{$data} array of class \code{s2dv_cube}). lat/lon dimensions can differ +from \code{exp} if non rectilinear latlon grids are used, in which case +regrid should be set to TRUE and .NearestNeighbors \code{NN} output should be +provided} + +\item{wt_obs}{corresponding weather types (same dimensions as \code{obs} but +lat/lon)} + +\item{corrdims}{list of dimensions in \code{exp} for which quantile mapping +correction is applied} + +\item{londim}{character name of longitude dimension in \code{exp} and +\code{obs}} + +\item{latdim}{character name of latitude dimension in \code{exp} and +\code{obs}} + +\item{regrid}{(optional) boolean indicating whether .NearestNeighbors +regridding is needed} + +\item{NN}{(optional, if regrid=TRUE) list (output from .NearestNeighbors) +maps (nlat, nlon) onto (nlat_o, nlon_o)} +} +\value{ +an array (such as \code{$data} array from an object of class +\code{s2dv_cube}) with named dimensions, containing experiment data on the +lat/lon grid of \code{obs} array, corrected by quantile mapping depending on +the weather types \code{wt_exp} +} +\description{ +This function computes a quantile mapping based on weather types +for experiment data (typically a hindcast) onto reference \code{obs}, +typically provided by reanalysis data. +} +\examples{ +\dontrun{ +wt_exp <- sample(1:3, 15*6*3, replace=T) +dim(wt_exp) <- c(dataset=1, member=15, sdate=6, ftime=3) +wt_obs <- sample(1:3, 6*3, replace=T) +dim(wt_obs) <- c(dataset=1, member=1, sdate=6, ftime=3) +exp_corr <- AdamontQQCorr(exp=lonlat_data$exp$data, wt_exp=wt_exp, + obs=lonlat_data$obs$data, wt_obs=wt_obs, + corrdims = c('dataset','member','sdate','ftime')) +} +} +\author{ +Paola Marson, \email{paola.marson@meteo.fr} for PROSNOW version + +Lauriane Batté, \email{lauriane.batte@meteo.fr} for CSTools adaptation +} diff --git a/man/Analogs.Rd b/man/Analogs.Rd index 06107c078ff49d5978679daa47625b319bec3926..746ebdd1d8e48fac7a7b21b68a1f029626ec71b8 100644 --- a/man/Analogs.Rd +++ b/man/Analogs.Rd @@ -8,14 +8,17 @@ Analogs( expL, obsL, time_obsL, + time_expL = NULL, expVar = NULL, obsVar = NULL, criteria = "Large_dist", + excludeTime = NULL, lonVar = NULL, latVar = NULL, region = NULL, nAnalogs = NULL, - return_list = FALSE + AnalogsInfo = FALSE, + ncores = 1 ) } \arguments{ @@ -28,11 +31,15 @@ to be already subset for the desired large scale region.} \item{obsL}{an array of N named dimensions containing the observational field on the large scale. The element 'data' in the 's2dv_cube' object must have -the same latitudinal and longitudinal dimensions as parameter 'expL' and a -temporal dimension with the maximum number of available observations.} +the same latitudinal and longitudinal dimensions as parameter 'expL' and a +single temporal dimension with the maximum number of available observations.} \item{time_obsL}{a character string indicating the date of the observations -in the format "dd/mm/yyyy"} +in the format "dd/mm/yyyy". Reference time to search for analogs.} + +\item{time_expL}{an array of N named dimensions (coinciding with time +dimensions in expL) of character string(s) indicating the date(s) of the +experiment in the format "dd/mm/yyyy". Time(s) to find the analogs.} \item{expVar}{an array of N named dimensions containing the experimental field on the local scale, usually a different variable to the parameter @@ -49,8 +56,14 @@ selection of analogs: \item{Local_dist} minimum Euclidean distance in the large scale pattern and minimum Euclidean distance in the local scale pattern; and \item{Local_cor} minimum Euclidean distance in the large scale pattern, -minimum Euclidean distance in the local scale pattern and highest correlation -in the local variable to downscale.}} +minimum Euclidean distance in the local scale pattern and highest +correlation in the local variable to downscale.}} + +\item{excludeTime}{an array of N named dimensions (coinciding with time +dimensions in expL) of character string(s) indicating the date(s) of the +observations in the format "dd/mm/yyyy" to be excluded during the search of +analogs. It can be NULL but if expL is not a forecast (time_expL contained in +time_obsL),by default time_expL will be removed during the search of analogs.} \item{lonVar}{a vector containing the longitude of parameter 'expVar'.} @@ -63,25 +76,29 @@ the maximum longitude, the minimum latitude and the maximum latitude.} 'Local_dist' or 'Local_cor'. This is not the necessary the number of analogs that the user can get, but the number of events with minimum distance in which perform the search of the best Analog. The default value for the -'Large_dist' criteria is 1, for 'Local_dist' and 'Local_cor'criterias must -be selected by the user otherwise the default value will be set as 10.} - -\item{return_list}{TRUE to get a list with the best analogs. FALSE -to get a single analog, the best analog. For Downscaling return_list must be -FALSE.} +'Large_dist' criteria is 1, for 'Local_dist' and 'Local_cor' criterias must +be greater than 1 in order to match with the first criteria, if nAnalogs is +NULL for 'Local_dist' and 'Local_cor' the default value will be set at the +length of 'time_obsL'. If AnalogsInfo is FALSE the function returns just +the best analog.} + +\item{AnalogsInfo}{TRUE to get a list with two elements: 1) the downscaled +field and 2) the AnalogsInfo which contains: a) the number of the best +analogs, b) the corresponding value of the metric used in the selected +criteria (distance values for Large_dist and Local_dist,correlation values +for Local_cor), c)dates of the analogs). The analogs are listed in decreasing +order, the first one is the best analog (i.e if the selected criteria is +Local_cor the best analog will be the one with highest correlation, while for +Large_dist criteria the best analog will be the day with minimum Euclidean +distance). Set to FALSE to get a single analog, the best analog, for instance +for downscaling.} + +\item{ncores}{the number of cores to use in parallel computation.} } \value{ AnalogsFields, dowscaled values of the best analogs for the criteria -selected. - -AnalogsInfo, a dataframe with information about the number of the -best analogs, the corresponding value of the metric used in the selected -criteria (distance values for Large_dist and Local_dist,correlation values -for Local_cor), date of the analog). The analogs are listed in decreasing -order, the first one is the best analog (i.e if the selected criteria -is Local_cor the best analog will be the one with highest correlation, while -for Large_dist criteria the best analog will be the day with minimum -Euclidean distance) +selected. If AnalogsInfo is set to TRUE the function also returns a +list with the dowsncaled field and the Analogs Information. } \description{ This function perform a downscaling using Analogs. To compute @@ -96,9 +113,9 @@ The analogs function will find the best analogs based in three criterias: (1) Minimum Euclidean distance in the large scale pattern (i.e. SLP) (2) Minimum Euclidean distance in the large scale pattern (i.e. SLP) and minimum Euclidean distance in the local scale pattern (i.e. SLP). -(3) Minimum Euclidean distance in the large scale pattern (i.e. SLP), minimum -distance in the local scale pattern (i.e. SLP) and highest correlation in the -local variable to downscale (i.e Precipitation). +(3) Minimum Euclidean distance in the large scale pattern (i.e. SLP), +minimum distance in the local scale pattern (i.e. SLP) and highest +correlation in the local variable to downscale (i.e Precipitation). The search of analogs must be done in the longest dataset posible. This is important since it is necessary to have a good representation of the possible states of the field in the past, and therefore, to get better @@ -114,279 +131,102 @@ the large and local scale in based of the observations. The function is an adapted version of the method of Yiou et al 2013. } \examples{ -require(zeallot) - # Example 1:Downscaling using criteria 'Large_dist' and a single variable: -# The best analog is found using a single variable (i.e. Sea level pressure, -# SLP). The downscaling will be done in the same variable used to study the -# minimal distance (i.e. SLP). obsVar and expVar NULLS or equal to obsL and -# expL respectively region, lonVar and latVar not necessary here. -# return_list=FALSE - expSLP <- rnorm(1:20) dim(expSLP) <- c(lat = 4, lon = 5) -obsSLP <- c(rnorm(1:180),expSLP*1.2) -dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) +obsSLP <- c(rnorm(1:180), expSLP * 1.2) +dim(obsSLP) <- c(time = 10, lat = 4, lon = 5) time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -downscale_field <- Analogs(expL=expSLP, obsL=obsSLP, time_obsL=time_obsSLP) -str(downscale_field) +downscale_field <- Analogs(expL = expSLP, obsL = obsSLP, + time_obsL = time_obsSLP,time_expL = "01-01-1994") # Example 2: Downscaling using criteria 'Large_dist' and 2 variables: -# The best analog is found using 2 variables (i.e. Sea Level Pressure, SLP -# and precipitation, pr): one variable (i.e. sea level pressure, expL) to -# find the best analog day (defined in criteria 'Large_dist' as the day, in -# obsL, with the minimum Euclidean distance to the day of interest in expL) -# The second variable will be the variable to donwscale (i.e. precipitation, -# obsVar). Parameter obsVar must be different to obsL.The downscaled -# precipitation will be the precipitation that belongs to the best analog day -# in SLP. Region not needed here since will be the same for both variables. - -expSLP <- rnorm(1:20) -dim(expSLP) <- c(lat = 4, lon = 5) -obsSLP <- c(rnorm(1:180),expSLP*2) -dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) -time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -obs.pr <- c(rnorm(1:200)*0.001) -dim(obs.pr)=dim(obsSLP) -downscale_field <- Analogs(expL=expSLP, obsL=obsSLP, - obsVar=obs.pr, - time_obsL=time_obsSLP) -str(downscale_field) +obs.pr <- c(rnorm(1:200) * 0.001) +dim(obs.pr) <- dim(obsSLP) +downscale_field <- Analogs(expL = expSLP, obsL = obsSLP, obsVar = obs.pr, + time_obsL = time_obsSLP, time_expL = "01-01-1994") # Example 3:List of best Analogs using criteria 'Large_dist' and a single -# variable: same as Example 1 but getting a list of best analogs (return_list -# =TRUE) with the corresponding downscaled values, instead of only 1 single -# donwscaled value instead of 1 single downscaled value. Imposing nAnalogs -# (number of analogs to do the search of the best Analogs). obsVar and expVar -# NULL or equal to obsL and expL,respectively. - -expSLP <- rnorm(1:20) -dim(expSLP) <- c(lat = 4, lon = 5) -obsSLP <- c(rnorm(1:1980),expSLP*1.5) +obsSLP <- c(rnorm(1:1980), expSLP * 1.5) dim(obsSLP) <- c(lat = 4, lon = 5, time = 100) time_obsSLP <- paste(rep("01", 100), rep("01", 100), 1920 : 2019, sep = "-") -downscale_field<- Analogs(expL=expSLP, obsL=obsSLP, time_obsSLP, - nAnalogs=5,return_list = TRUE) -str(downscale_field) +downscale_field<- Analogs(expL = expSLP, obsL = obsSLP, time_obsSLP, + nAnalogs = 5, time_expL = "01-01-2003", + AnalogsInfo = TRUE, excludeTime = "01-01-2003") # Example 4:List of best Analogs using criteria 'Large_dist' and 2 variables: -# same as example 2 but getting a list of best analogs (return_list =TRUE) -# with the values instead of only a single downscaled value. Imposing -# nAnalogs (number of analogs to do the search of the best Analogs). obsVar -# and expVar must be different to obsL and expL. - -expSLP <- rnorm(1:20) -dim(expSLP) <- c(lat = 4, lon = 5) -obsSLP <- c(rnorm(1:180),expSLP*2) +obsSLP <- c(rnorm(1:180), expSLP * 2) dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -obs.pr <- c(rnorm(1:200)*0.001) -dim(obs.pr)=dim(obsSLP) -downscale_field <- Analogs(expL=expSLP, obsL=obsSLP, - obsVar=obs.pr, - time_obsL=time_obsSLP,nAnalogs=5, - return_list = TRUE) -str(downscale_field) +downscale_field <- Analogs(expL = expSLP, obsL = obsSLP, obsVar = obs.pr, + time_obsL = time_obsSLP,nAnalogs=5, + time_expL = "01-10-2003", AnalogsInfo = TRUE) # Example 5: Downscaling using criteria 'Local_dist' and 2 variables: -# The best analog is found using 2 variables (i.e. Sea Level Pressure, -# SLP and precipitation, pr). Parameter obsVar must be different to obsL.The -# downscaled precipitation will be the precipitation that belongs to the best -# analog day in SLP. lonVar, latVar and Region must be given here to select -# the local scale - -expSLP <- rnorm(1:20) -dim(expSLP) <- c(lat = 4, lon = 5) -obsSLP <- c(rnorm(1:180),expSLP*2) -dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) -time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -obs.pr <- c(rnorm(1:200)*0.001) -dim(obs.pr)=dim(obsSLP) # analogs of local scale using criteria 2 -lonmin=-1 -lonmax=2 -latmin=30 -latmax=33 -region=c(lonmin,lonmax,latmin,latmax) -Local_scale <- Analogs(expL=expSLP, - obsL=obsSLP, time_obsL=time_obsSLP, - obsVar=obs.pr, - criteria="Local_dist",lonVar=seq(-1,5,1.5), - latVar=seq(30,35,1.5),region=region, - nAnalogs = 10, return_list = FALSE) -str(Local_scale) +region=c(lonmin = -1 ,lonmax = 2, latmin = 30, latmax = 33) +Local_scale <- Analogs(expL = expSLP, obsL = obsSLP, time_obsL = time_obsSLP, + obsVar = obs.pr, criteria = "Local_dist", + lonVar = seq(-1, 5, 1.5),latVar = seq(30, 35, 1.5), + region = region,time_expL = "01-10-2000", + nAnalogs = 10, AnalogsInfo = TRUE) # Example 6: list of best analogs using criteria 'Local_dist' and 2 -# variables: -# The best analog is found using 2 variables. Parameter obsVar must be -# different to obsL in this case.The downscaled precipitation will be the -# precipitation that belongs to the best analog day in SLP. lonVar, latVar -# and Region needed. return_list=TRUE - -expSLP <- rnorm(1:20) -dim(expSLP) <- c(lat = 4, lon = 5) -obsSLP <- c(rnorm(1:180),expSLP*2) -dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) -time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -obs.pr <- c(rnorm(1:200)*0.001) -dim(obs.pr)=dim(obsSLP) -# analogs of local scale using criteria 2 -lonmin=-1 -lonmax=2 -latmin=30 -latmax=33 -region=c(lonmin,lonmax,latmin,latmax) -Local_scale <- Analogs(expL=expSLP, - obsL=obsSLP, time_obsL=time_obsSLP, - obsVar=obs.pr, - criteria="Local_dist",lonVar=seq(-1,5,1.5), - latVar=seq(30,35,1.5),region=region, - nAnalogs = 5, return_list = TRUE) -str(Local_scale) +Local_scale <- Analogs(expL = expSLP, obsL = obsSLP, time_obsL = time_obsSLP, + criteria = "Local_dist", lonVar = seq(-1, 5, 1.5), + latVar = seq(30, 35, 1.5), region = region, + time_expL = "01-10-2000", nAnalogs = 5, + AnalogsInfo = TRUE) # Example 7: Downscaling using Local_dist criteria -# without parameters obsVar and expVar. return list =FALSE - -expSLP <- rnorm(1:20) -dim(expSLP) <- c(lat = 4, lon = 5) -obsSLP <- c(rnorm(1:180),expSLP*2) -dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) -time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -# analogs of local scale using criteria 2 -lonmin=-1 -lonmax=2 -latmin=30 -latmax=33 -region=c(lonmin,lonmax,latmin,latmax) -Local_scale <- Analogs(expL=expSLP, - obsL=obsSLP, time_obsL=time_obsSLP, - criteria="Local_dist",lonVar=seq(-1,5,1.5), - latVar=seq(30,35,1.5),region=region, - nAnalogs = 10, return_list = TRUE) -str(Local_scale) +Local_scale <- Analogs(expL = expSLP, obsL = obsSLP, time_obsL = time_obsSLP, + criteria = "Local_dist", lonVar = seq(-1, 5, 1.5), + latVar = seq(30, 35, 1.5), region = region, + time_expL = "01-10-2000", + nAnalogs = 10, AnalogsInfo = FALSE) # Example 8: Downscaling using criteria 'Local_cor' and 2 variables: -# The best analog is found using 2 variables. Parameter obsVar and expVar -# are necessary and must be different to obsL and expL, respectively. -# The downscaled precipitation will be the precipitation that belongs to -# the best analog day in SLP large and local scales, and to the day with -# the higher correlation in precipitation. return_list=FALSE. two options -# for nAnalogs - -expSLP <- rnorm(1:20) -dim(expSLP) <- c(lat = 4, lon = 5) -obsSLP <- c(rnorm(1:180),expSLP*2) -dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) -time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -exp.pr <- c(rnorm(1:20)*0.001) -dim(exp.pr)=dim(expSLP) -obs.pr <- c(rnorm(1:200)*0.001) -dim(obs.pr)=dim(obsSLP) -# analogs of local scale using criteria 2 -lonmin=-1 -lonmax=2 -latmin=30 -latmax=33 -region=c(lonmin,lonmax,latmin,latmax) -Local_scalecor <- Analogs(expL=expSLP, - obsL=obsSLP, time_obsL=time_obsSLP, - obsVar=obs.pr,expVar=exp.pr, - criteria="Local_cor",lonVar=seq(-1,5,1.5), - latVar=seq(30,35,1.5),nAnalogs=8,region=region, - return_list = FALSE) -Local_scalecor$AnalogsInfo -Local_scalecor$DatesAnalogs -# same but without imposing nAnalogs, so nAnalogs will be set by default as 10 -Local_scalecor <- Analogs(expL=expSLP, - obsL=obsSLP, time_obsL=time_obsSLP, - obsVar=obs.pr,expVar=exp.pr, - criteria="Local_cor",lonVar=seq(-1,5,1.5), - latVar=seq(30,35,1.5),region=region, - return_list = FALSE) -Local_scalecor$AnalogsInfo -Local_scalecor$DatesAnalogs - -# Example 9: List of best analogs in the three criterias Large_dist, -# Local_dist, and Local_cor return list TRUE same variable - -expSLP <- rnorm(1:20) -dim(expSLP) <- c(lat = 4, lon = 5) -obsSLP <- c(rnorm(1:180),expSLP*2) -dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) -time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -# analogs of large scale using criteria 1 -Large_scale <- Analogs(expL=expSLP, - obsL=obsSLP, time_obsL=time_obsSLP, - criteria="Large_dist", - nAnalogs = 7, return_list = TRUE) -str(Large_scale) -Large_scale$AnalogsInfo -# analogs of local scale using criteria 2 -lonmin=-1 -lonmax=2 -latmin=30 -latmax=33 -region=c(lonmin,lonmax,latmin,latmax) -Local_scale <- Analogs(expL=expSLP, - obsL=obsSLP, time_obsL=time_obsSLP, - criteria="Local_dist",lonVar=seq(-1,5,1.5), - latVar=seq(30,35,1.5),nAnalogs=7,region=region, - return_list = TRUE) -str(Local_scale) -Local_scale$AnalogsInfo -# analogs of local scale using criteria 3 -Local_scalecor <- Analogs(expL=expSLP, - obsL=obsSLP, time_obsL=time_obsSLP, - obsVar=obsSLP,expVar=expSLP, - criteria="Local_cor",lonVar=seq(-1,5,1.5), - latVar=seq(30,35,1.5),nAnalogs=7,region=region, - return_list = TRUE) -str(Local_scalecor) -Local_scalecor$AnalogsInfo - -# Example 10: Downscaling in the three criteria Large_dist, Local_dist, and -# Local_cor return list FALSE, different variable - -expSLP <- rnorm(1:20) -dim(expSLP) <- c(lat = 4, lon = 5) -obsSLP <- c(rnorm(1:180),expSLP*2) -dim(obsSLP) <- c(lat = 4, lon = 5, time = 10) -time_obsSLP <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") -exp.pr <- c(rnorm(1:20)*0.001) -dim(exp.pr)=dim(expSLP) -obs.pr <- c(rnorm(1:200)*0.001) -dim(obs.pr)=dim(obsSLP) -# analogs of large scale using criteria 1 -Large_scale <- Analogs(expL=expSLP, - obsL=obsSLP, time_obsL=time_obsSLP, - criteria="Large_dist", - nAnalogs = 7, return_list = FALSE) -str(Large_scale) -Large_scale$AnalogsInfo -# analogs of local scale using criteria 2 -lonmin=-1 -lonmax=2 -latmin=30 -latmax=33 -region=c(lonmin,lonmax,latmin,latmax) -Local_scale <- Analogs(expL=expSLP, - obsL=obsSLP, time_obsL=time_obsSLP, - obsVar=obs.pr, - criteria="Local_dist",lonVar=seq(-1,5,1.5), - latVar=seq(30,35,1.5),nAnalogs=7,region=region, - return_list = FALSE) -str(Local_scale) -Local_scale$AnalogsInfo -# analogs of local scale using criteria 3 -Local_scalecor <- Analogs(expL=expSLP, - obsL=obsSLP, time_obsL=time_obsSLP, - obsVar=obs.pr,expVar=exp.pr, - criteria="Local_cor",lonVar=seq(-1,5,1.5), - latVar=seq(30,35,1.5),nAnalogs=7,region=region, - return_list = FALSE) -str(Local_scalecor) -Local_scalecor$AnalogsInfo - +exp.pr <- c(rnorm(1:20) * 0.001) +dim(exp.pr) <- dim(expSLP) +Local_scalecor <- Analogs(expL = expSLP, obsL = obsSLP,time_obsL = time_obsSLP, + obsVar = obs.pr, expVar = exp.pr, + criteria = "Local_cor", lonVar = seq(-1, 5, 1.5), + time_expL = "01-10-2000", latVar = seq(30, 35, 1.5), + nAnalogs = 8, region = region, AnalogsInfo = FALSE) +# same but without imposing nAnalogs,so nAnalogs will be set by default as 10 +Local_scalecor <- Analogs(expL = expSLP, obsL = obsSLP,time_obsL = time_obsSLP, + obsVar = obs.pr, expVar = exp.pr, + criteria = "Local_cor", lonVar = seq(-1,5,1.5), + time_expL = "01-10-2000", latVar=seq(30, 35, 1.5), + region = region, AnalogsInfo = TRUE) + +#'Example 9: List of best analogs in the three criterias Large_dist, +Large_scale <- Analogs(expL = expSLP, obsL = obsSLP, time_obsL = time_obsSLP, + criteria = "Large_dist", time_expL = "01-10-2000", + nAnalogs = 7, AnalogsInfo = TRUE) +Local_scale <- Analogs(expL = expSLP, obsL = obsSLP, time_obsL = time_obsSLP, + time_expL = "01-10-2000", criteria = "Local_dist", + lonVar = seq(-1, 5, 1.5), latVar = seq(30, 35, 1.5), + nAnalogs = 7,region = region, AnalogsInfo = TRUE) +Local_scalecor <- Analogs(expL = expSLP, obsL = obsSLP,time_obsL = time_obsSLP, + obsVar = obsSLP, expVar = expSLP, + time_expL = "01-10-2000",criteria = "Local_cor", + lonVar = seq(-1, 5, 1.5), latVar = seq(30, 35, 1.5), + nAnalogs = 7,region = region, + AnalogsInfo = TRUE) +#Example 10: Downscaling using criteria 'Large_dist' and a single variable, +# more than 1 sdate: +expSLP <- rnorm(1:40) +dim(expSLP) <- c(sdate = 2, lat = 4, lon = 5) +obsSLP <- c(rnorm(1:180), expSLP * 1.2) +dim(obsSLP) <- c(time = 11, lat = 4, lon = 5) +time_obsSLP <- paste(rep("01", 11), rep("01", 11), 1993 : 2003, sep = "-") +time_expSLP <- paste(rep("01", 2), rep("01", 2), 1994 : 1995, sep = "-") +excludeTime <- c("01-01-2003", "01-01-2003") +dim(excludeTime) <- c(sdate = 2) +downscale_field_exclude <- Analogs(expL = expSLP, obsL = obsSLP, + time_obsL = time_obsSLP, time_expL = time_expSLP, + excludeTime = excludeTime, AnalogsInfo = TRUE) } \references{ Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard, @@ -397,5 +237,9 @@ from surface pressure using analogues. Clim. Dyn., 41, 1419-1437. \author{ M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} +Maria M. Chaves-Montero, \email{mariadm.chaves@cmcc.it } + +Veronica Torralba, \email{veronica.torralba@cmcc.it} + Nuria Perez-Zanon \email{nuria.perez@bsc.es} } diff --git a/man/CST_AdamontAnalog.Rd b/man/CST_AdamontAnalog.Rd new file mode 100644 index 0000000000000000000000000000000000000000..91372564305a9a0a77f6279ca393fd72c2014292 --- /dev/null +++ b/man/CST_AdamontAnalog.Rd @@ -0,0 +1,103 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_AdamontAnalog.R +\name{CST_AdamontAnalog} +\alias{CST_AdamontAnalog} +\alias{AdamontAnalog} +\title{CST_AdamontAnalog finds analogous data in the reference dataset to experiment +data based on weather types} +\usage{ +CST_AdamontAnalog( + exp, + obs, + wt_exp, + wt_obs, + nanalogs, + method = "pattcorr", + thres = NULL, + search_obsdims = c("member", "sdate", "ftime"), + londim = "lon", + latdim = "lat" +) + +AdamontAnalog( + exp, + obs, + wt_exp, + wt_obs, + nanalogs = 5, + method = "pattcorr", + thres = NULL, + search_obsdims = c("member", "sdate", "ftime"), + londim = "lon", + latdim = "lat" +) +} +\arguments{ +\item{exp}{\itemize{ +\item\code{CST_AdamontAnalog}{experiment data an object of class \code{s2dv_cube}, can be output +from quantile correction using CST_AdamontQQCorr} +\item\code{AdamontAnalog}{experiment data array with named dimension}}} + +\item{obs}{\itemize{ +\item\code{CST_AdamontAnalog}{reference data, also of class \code{s2dv_cube}.} +\item\code{AdamontAnalog}{reference data array with named dimension.}} +Note that lat/lon dimensions need to be the same as \code{exp}} + +\item{wt_exp}{corresponding weather types (same dimensions as \code{exp$data} +but lat/lon)} + +\item{wt_obs}{corresponding weather types (same dimensions as \code{obs$data} +but lat/lon)} + +\item{nanalogs}{integer defining the number of analog values to return +(default: 5)} + +\item{method}{a character string indicating the method used for analog +definition + Coded are 'pattcorr': pattern correlation + 'rain1' (for precip patterns): rain occurrence consistency + 'rain01' (for precip patterns): rain occurrence/non + occurrence consistency} + +\item{thres}{real number indicating the threshold to define rain +occurrence/non occurrence in rain(0)1} + +\item{search_obsdims}{list of dimensions in \code{obs} along which analogs are +searched for} + +\item{londim}{name of longitude dimension} + +\item{latdim}{name of latitude dimension} +} +\value{ +analog_vals +\itemize{ +\item\code{CST_AdamontAnalog}{an object of class \code{s2dv_cube} containing nanalogs +analog values for each value of \code{exp} input data} +\item\code{AdamontAnalog}{an array containing nanalogs analog values}} +} +\description{ +This function searches for analogs in a reference dataset for +experiment data, based on corresponding weather types. The experiment data is +typically a hindcast, observations are typically provided by reanalysis data. +} +\examples{ +\dontrun{ +wt_exp <- sample(1:3, 15*6*3, replace=T) +dim(wt_exp) <- c(dataset=1, member=15, sdate=6, ftime=3) +wt_obs <- sample(1:3, 6*3, replace=T) +dim(wt_obs) <- c(dataset=1, member=1, sdate=6, ftime=3) +} +\dontrun{ +wt_exp <- sample(1:3, 15*6*3, replace=T) +dim(wt_exp) <- c(dataset=1, member=15, sdate=6, ftime=3) +wt_obs <- sample(1:3, 6*3, replace=T) +dim(wt_obs) <- c(dataset=1, member=1, sdate=6, ftime=3) + obs=lonlat_data$obs$data, wt_exp=wt_exp, wt_obs=wt_obs, nanalogs=2) +} +} +\author{ +Paola Marson, \email{paola.marson@meteo.fr} for PROSNOW version + +Lauriane Batté, \email{lauriane.batte@meteo.fr} for CSTools adaptation +} diff --git a/man/CST_AdamontQQCorr.Rd b/man/CST_AdamontQQCorr.Rd new file mode 100644 index 0000000000000000000000000000000000000000..967ce1710e4ddccdfaed41ab378b2775d331782d --- /dev/null +++ b/man/CST_AdamontQQCorr.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_AdamontQQCorr.R +\name{CST_AdamontQQCorr} +\alias{CST_AdamontQQCorr} +\title{CST_AdamontQQCorr computes quantile-quantile correction of seasonal or +decadal forecast data using weather types} +\usage{ +CST_AdamontQQCorr( + exp, + wt_exp, + obs, + wt_obs, + corrdims = c("member", "sdate", "ftime"), + londim = "lon", + latdim = "lat" +) +} +\arguments{ +\item{exp}{experiment data an object of class \code{s2dv_cube}} + +\item{wt_exp}{corresponding weather types (same dimensions as \code{exp$data} +but lat/lon)} + +\item{obs}{reference data, also of class \code{s2dv_cube}. lat/lon dimensions +can differ from \code{exp} if non rectilinear latlon grids are used, +in which case regrid should be set to TRUE and .NearestNeighbors \code{NN} +output should be provided} + +\item{wt_obs}{corresponding weather types (same dimensions as \code{obs} but +lat/lon)} + +\item{corrdims}{list of dimensions in \code{exp} for which quantile mapping +correction is applied} + +\item{londim}{character name of longitude dimension in \code{exp} and +\code{obs}} + +\item{latdim}{character name of latitude dimension in \code{exp} and +\code{obs}} +} +\value{ +an object of class \code{s2dv_cube} containing experiment data on the + lat/lon grid of \code{obs} input data, corrected by quantile mapping + depending on the weather types \code{wt_exp} +} +\description{ +This function computes a quantile mapping based on weather types +for experiment data (typically a hindcast) onto reference \code{obs}, +typically provided by reanalysis data. +} +\examples{ +\dontrun{ +wt_exp <- sample(1:3, 15*6*3, replace=T) +dim(wt_exp) <- c(dataset=1, member=15, sdate=6, ftime=3) +wt_obs <- sample(1:3, 6*3, replace=T) +dim(wt_obs) <- c(dataset=1, member=1, sdate=6, ftime=3) +exp_corr <- CST_AdamontQQCorr(exp=lonlat_data$exp, wt_exp=wt_exp, + obs=lonlat_data$obs, wt_obs=wt_obs, + corrdims = c('dataset','member','sdate','ftime')) +} +} +\author{ +Lauriane Batté, \email{lauriane.batte@meteo.fr} + +Paola Marson, \email{paola.marson@meteo.fr} + +Gildas Dayon, \email{gildas.dayon@meteo.fr} +} diff --git a/man/CST_Analogs.Rd b/man/CST_Analogs.Rd index d7dd5e14b8ff65624d6b91807f60f0333768a227..3c014909a6a433f7c2314804d85c2a2c56fce803 100644 --- a/man/CST_Analogs.Rd +++ b/man/CST_Analogs.Rd @@ -7,11 +7,16 @@ CST_Analogs( expL, obsL, - time_obsL, expVar = NULL, obsVar = NULL, region = NULL, - criteria = "Large_dist" + criteria = "Large_dist", + excludeTime = NULL, + time_expL = NULL, + time_obsL = NULL, + nAnalogs = NULL, + AnalogsInfo = FALSE, + ncores = 1 ) } \arguments{ @@ -27,9 +32,6 @@ large scale. The element 'data' in the 's2dv_cube' object must have the same latitudinal and longitudinal dimensions as parameter 'expL' and a temporal dimension with the maximum number of available observations.} -\item{time_obsL}{a character string indicating the date of the observations -in the format "dd/mm/yyyy"} - \item{expVar}{an 's2dv_cube' object containing the experimental field on the local scale, usually a different variable to the parameter 'expL'. If it is not NULL (by default, NULL), the returned field by this function will be the @@ -38,56 +40,103 @@ analog of parameter 'expVar'.} \item{obsVar}{an 's2dv_cube' containing the field of the same variable as the passed in parameter 'expVar' for the same region.} -\item{region}{a vector of length four indicating the minimum longitude, the -maximum longitude, the minimum latitude and the maximum latitude.} +\item{region}{a vector of length four indicating the minimum longitude, +the maximum longitude, the minimum latitude and the maximum latitude.} -\item{criteria}{a character string indicating the criteria to be used for the +\item{criteria}{a character string indicating the criteria to be used for the selection of analogs: \itemize{ -\item{Large_dist} minimal distance in the large scale pattern; -\item{Local_dist} minimal distance in the large scale pattern and minimal -distance in the local scale pattern; and -\item{Local_cor} minimal distance in the large scale pattern, minimal -distance in the local scale pattern and maxima correlation in the -local variable to downscale.}} +\item{Large_dist} minimum Euclidean distance in the large scale pattern; +\item{Local_dist} minimum Euclidean distance in the large scale pattern +and minimum Euclidean distance in the local scale pattern; and +\item{Local_cor} minimum Euclidean distance in the large scale pattern, +minimum Euclidean distance in the local scale pattern and highest +correlation in the local variable to downscale.} +Criteria 'Large_dist' is recommended for CST_Analogs, for an advanced use of +the criterias 'Local_dist' and 'Local_cor' use 'Analogs' function.} + +\item{excludeTime}{an array of N named dimensions (coinciding with time +dimensions in expL)of character string(s) indicating the date(s) of the +observations in the format "dd/mm/yyyy" to be excluded during the search of +analogs. It can be NULL but if expL is not a forecast (time_expL contained in +time_obsL), by default time_expL will be removed during the search of analogs.} + +\item{time_expL}{a character string indicating the date of the experiment +in the same format than time_obsL (i.e. "yyyy-mm-dd"). By default it is NULL +and dates are taken from element \code{$Dates$start} from expL.} + +\item{time_obsL}{a character string indicating the date of the observations +in the date format (i.e. "yyyy-mm-dd"). By default it is NULL and dates are +taken from element \code{$Dates$start} from obsL.} + +\item{nAnalogs}{number of Analogs to be selected to apply the criterias +'Local_dist' or 'Local_cor'. This is not the necessary the number of analogs +that the user can get, but the number of events with minimum distance in +which perform the search of the best Analog. The default value for the +'Large_dist' criteria is 1, for 'Local_dist' and 'Local_cor' criterias must +be greater than 1 in order to match with the first criteria, if nAnalogs is +NULL for 'Local_dist' and 'Local_cor' the default value will be set at the +length of 'time_obsL'. If AnalogsInfo is FALSE the function returns just +the best analog.} + +\item{AnalogsInfo}{TRUE to get a list with two elements: 1) the downscaled +field and 2) the AnalogsInfo which contains: a) the number of the best +analogs, b) the corresponding value of the metric used in the selected +criteria (distance values for Large_dist and Local_dist,correlation values +for Local_cor), c)dates of the analogs). The analogs are listed in decreasing +order, the first one is the best analog (i.e if the selected criteria is +Local_cor the best analog will be the one with highest correlation, while for +Large_dist criteria the best analog will be the day with minimum Euclidean +distance). Set to FALSE to get a single analog, the best analog, for instance +for downscaling.} + +\item{ncores}{The number of cores to use in parallel computation} } \value{ -An 's2dv_cube' object containing the dowscaled values of the best -analogs in the criteria selected. +An 'array' object containing the dowscaled values of the best +analogs. } \description{ This function perform a downscaling using Analogs. To compute the analogs, the function search for days with similar large scale conditions -to downscaled fields in the local scale. The large scale and the local scale +to downscaled fields to a local scale. The large scale and the local scale regions are defined by the user. The large scale is usually given by atmospheric circulation as sea level pressure or geopotential height (Yiou et al, 2013) but the function gives the possibility to use another field. The local scale will be usually given by precipitation or temperature fields, but might be another variable.The analogs function will find the best -analogs based in three criterias: -(1) Minimal distance in the large scale pattern (i.e. SLP) -(2) Minimal distance in the large scale pattern (i.e. SLP) and minimal -distance in the local scale pattern (i.e. SLP). -(3) Minimal distance in the large scale pattern (i.e. SLP), minimal -distance in the local scale pattern (i.e. SLP) and maxima correlation in the -local variable to downscale (i.e Precipitation). +analogs based in Minimum Euclidean distance in the large scale pattern +(i.e.SLP). + The search of analogs must be done in the longest dataset posible. This is important since it is necessary to have a good representation of the possible states of the field in the past, and therefore, to get better -analogs. Once the search of the analogs is complete, and in order to used -the three criterias the user can select a number of analogs, using parameter -'nAnalogs' to restrict the selection of the best analogs in a short number -of posibilities, the best ones. +analogs. This function has not constrains of specific regions, variables to downscale, or data to be used (seasonal forecast data, climate projections data, reanalyses data). The regrid into a finner scale is done interpolating with CST_Load. Then, this interpolation is corrected selecting the analogs in the large and local scale in based of the observations. The function is an -adapted version of the method of Yiou et al 2013. +adapted version of the method of Yiou et al 2013. For an advanced search of +Analogs (multiple Analogs, different criterias, further information from the +metrics and date of the selected Analogs) use the'Analog' +function within 'CSTools' package. } \examples{ -res <- CST_Analogs(expL = lonlat_data$exp, obsL = lonlat_data$obs) - +expL <- rnorm(1:200) +dim(expL) <- c(member=10,lat = 4, lon = 5) +obsL <- c(rnorm(1:180),expL[1,,]*1.2) +dim(obsL) <- c(time = 10,lat = 4, lon = 5) +time_obsL <- paste(rep("01", 10), rep("01", 10), 1994 : 2003, sep = "-") +time_expL <- time_obsL[1] +lon <- seq(-1,5,1.5) +lat <- seq(30,35,1.5) +expL <- s2dv_cube(data = expL, lat = lat, lon = lon, + Dates = list(start = time_expL, end = time_expL)) +obsL <- s2dv_cube(data = obsL, lat = lat, lon = lon, + Dates = list(start = time_obsL, end = time_obsL)) +region <- c(min(lon), max(lon), min(lat), max(lat)) +downscaled_field <- CST_Analogs(expL = expL, obsL = obsL, region = region) } \references{ Yiou, P., T. Salameh, P. Drobinski, L. Menut, R. Vautard, @@ -102,5 +151,9 @@ code{\link{CST_Load}}, \code{\link[s2dverification]{Load}} and \author{ M. Carmen Alvarez-Castro, \email{carmen.alvarez-castro@cmcc.it} +Maria M. Chaves-Montero, \email{mariadm.chaves@cmcc.it} + +Veronica Torralba, \email{veronica.torralba@cmcc.it} + Nuria Perez-Zanon \email{nuria.perez@bsc.es} } diff --git a/man/CST_AnalogsPredictors.Rd b/man/CST_AnalogsPredictors.Rd new file mode 100644 index 0000000000000000000000000000000000000000..152b0c8a7060026f06ff1c72b04b0a9b2ae12015 --- /dev/null +++ b/man/CST_AnalogsPredictors.Rd @@ -0,0 +1,151 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_AnalogsPredictors.R +\name{CST_AnalogsPredictors} +\alias{CST_AnalogsPredictors} +\title{AEMET Downscaling +Precipitation and maximum and minimum temperature downscaling method +based on analogs: synoptic situations and significant predictors.} +\usage{ +CST_AnalogsPredictors( + exp, + slp, + obs, + lon, + lat, + slp_lon, + slp_lat, + var_name, + hr_obs, + tdates, + ddates, + restrain, + dim_name_longitude = "lon", + dim_name_latitude = "lat", + dim_name_time = "time" +) +} +\arguments{ +\item{exp}{List of arrays with downscaled period seasonal forecast data. The list +has to contain model atmospheric variables (instantaneous 12h data) that must +be indentify by parenthesis name. +For precipitation: +- u component of wind at 500 hPa (u500_mod) in m/s +- v component of wind at 500 hPa (v500_mod) in m/s +- temperature at 500 hPa (t500_mod) in K +- temperature at 850 hPa (t850_mod) in K +- specific humidity at 700 hPa (q700_mod) in g/kg +For temperature: +- u component of wind at 500 hPa (u500_mod) in m/s +- v component of wind at 500 hPa (v500_mod) in m/s +- temperature at 500 hPa (t500_mod) in K +- temperature at 700 hPa (t700_mod) in K +- temperature at 850 hPa (t850_mod) in K +- specific humidity at 700 hPa (q700_mod) in g/kg +- 2 meters temperature (tm2m_mod) in K +The arrays must have at least three dimensions with names 'lon', 'lat' and 'time'. +(lon = gridpoints of longitude, lat = gridpoints of latitude, time = number of downscaling days) +Seasonal forecast variables must have the same resolution and +domain as reanalysis variables ('obs' parameter, below).} + +\item{slp}{Array with atmospheric seasonal forecast model sea level pressure +(instantaneous 12h data) that must be indentify as 'slp' (hPa). It has the same +resolution as 'exp' and 'obs' paremeters but with an extended domain. +This domain contains extra degrees (most in the north and west part) compare to +synoptic domain. The array must have at least three dimensions +with names 'lon', 'lat' and 'time'.} + +\item{obs}{List of arrays with training period reanalysis data. +The list has to contain reanalysis atmospheric variables (instantaneous +12h data) that must be indentify by parenthesis name. +For precipitation: +- u component of wind at 500 hPa (u500) in m/s +- v component of wind at 500 hPa (v500) in m/s +- temperature at 500 hPa (t500) in K +- temperature at 850 hPa (t850) in K +- sea level pressure (slp) in hPa +- specific humidity at 700 hPa (q700) in g/kg +For maximum and minimum temperature: +- u component of wind at 500 hPa (u500) in m/s +- v component of wind at 500 hPa (v500) in m/s +- temperature at 500 hPa (t500) in K +- temperature at 700 hPa (t700) in K +- temperature at 850 hPa (t850) in K +- sea level pressure (slp) in hPa +- specific humidity at 700 hPa (q700) in g/kg +- 2 meters temperature (tm2m) in K +The arrays must have at least three dimensions with names 'lon', 'lat' and 'time'.} + +\item{lon}{Vector of the synoptic longitude (from (-180º) to 180º), +The vector must go from west to east. The same as for the training function.} + +\item{lat}{Vector of the synoptic latitude. The vector must go from north to south. +The same as for the training function.} + +\item{slp_lon}{Vector of the extended longitude (from (-180º) to 180º), +The vector must go from west to east. The same as for the training function.} + +\item{slp_lat}{Vector of the extended latitude. The vector must go from north to south. +The same as for the training function.} + +\item{var_name}{Variable name to downscale. There are two options: 'prec' for +precipitation and 'temp' for maximum and minimum temperature.} + +\item{hr_obs}{Local path of HR observational files (maestro and pcp/tmx-tmn). +For precipitation can be downloaded from http://www.aemet.es/documentos/es/serviciosclimaticos/cambio_climat/datos_diarios/dato_observacional/rejilla_5km/v2/Serie_AEMET_v2_pcp_1951a202006_txt.tar.gz +For maximum and minimum temperature can be downloaded from http://www.aemet.es/documentos/es/serviciosclimaticos/cambio_climat/datos_diarios/dato_observacional/rejilla_5km/temperatura/v1/tmax/Serie_AEMET_v1_tmax_1951a202006_txt.tar.gz and http://www.aemet.es/documentos/es/serviciosclimaticos/cambio_climat/datos_diarios/dato_observacional/rejilla_5km/temperatura/v1/tmin/Serie_AEMET_v1_tmin_1951a202006_txt.tar.gz respetively. +Maestro file (maestro_red_hr_SPAIN.txt) has gridpoint (nptos), longitude (lon), latitude (lat) and +altitude (alt) in columns (vector structure). +Data file (pcp/tmx/tmn_red_SPAIN_1951-201903.txt) includes 5km resolution spanish daily data +(precipitation or maximum and minimum temperature from january 1951 to june 2020. See README +file for more information. +IMPORTANT!: HR observational period must be the same as for reanalysis variables. +It is assumed that the training period is smaller than the HR original one (1951-2019), so it is +needed to make a new ascii file with the new period and the same structure as original, +specifying the training dates in the name (e.g. 'pcp_red_SPAIN_19810101-19961231.txt' for +'19810101-19961231' period).} + +\item{tdates}{Training period dates in format YYYYMMDD(start)-YYYYMMDD(end) +(e.g. 19810101-20181231).} + +\item{ddates}{Downscaling period dates in format YYYYMMDD(start)-YYYYMMDD(end) (e.g. 20191001-20200331).} + +\item{restrain}{Output (list of matrix) obtained from 'training_analogs' function. +For precipitation, 'restrain' object must contains um, vm, nger, gu92, gv92, +gu52, gv52, neni, vdmin, vref, ccm, lab_pred and cor_pred variables. +For maximum and minimum temperature, 'restrain' object must contains um, vm, +insol, neni, vdmin y vref. See 'AnalogsPred_train.R' for more information.} + +\item{dim_name_longitude}{A character string indicating the name of the longitude +dimension, by default 'longitude'.} + +\item{dim_name_latitude}{A character string indicating the name of the latitude +dimension, by default 'latitude'.} + +\item{dim_name_time}{A character string indicating the name of the time +dimension, by default 'time'.} +} +\value{ +Matrix with seasonal forecast precipitation (mm) or +maximum and minimum temperature (dozens of ºC) in a 5km x 5km regular grid +over peninsular Spain and Balearic Islands. The resulted matrices have two +dimensions ('ddates' x 'nptos').(ddates = number of downscaling days +and nptos = number of 'hr_obs' gridpoints). +} +\description{ +This function downscales low resolution precipitation data (e.g. from +Seasonal Forecast Models) through the association with an observational high +resolution (HR) dataset (AEMET 5 km gridded data of daily precipitation (Peral et al., 2017)) +and a collection of predictors and past synoptic situations similar to estimated day. +The method uses three domains: +- peninsular Spain and Balearic Islands domain (5 km resolution): HR precipitation + and the downscaling result domain. +- synoptic domain (low resolution, e.g. 1.5º x 1.5º): it should be centered over Iberian Peninsula + and cover enough extension to detect as much synoptic situations as possible. +- extended domain (low resolution, e.g. 1.5º x 1.5º): it should have the same resolution +as synoptic domain. It is used for SLP Seasonal Forecast Models. +} +\author{ +Marta Dominguez Alonso - AEMET, \email{mdomingueza@aemet.es} + +Nuria Perez-Zanon - BSC, \email{nuria.perez@bsc.es} +} diff --git a/man/CST_Calibration.Rd b/man/CST_Calibration.Rd index 76812a438ce7f4cd0b42975e7feef911d6f9b48c..33b3c5eca2297f5eeda067942cd198784648f89e 100644 --- a/man/CST_Calibration.Rd +++ b/man/CST_Calibration.Rd @@ -11,6 +11,11 @@ CST_Calibration( eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, + na.rm = TRUE, + apply_to = NULL, + alpha = NULL, + memb_dim = "member", + sdate_dim = "sdate", ncores = 1 ) } @@ -19,7 +24,7 @@ CST_Calibration( \item{obs}{an object of class \code{s2dv_cube} as returned by \code{CST_Load} function, containing the observed data in the element named \code{$data}.} -\item{cal.method}{is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min} or \code{crps_min}. Default value is \code{mse_min}.} +\item{cal.method}{is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}.} \item{eval.method}{is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation.} @@ -27,6 +32,16 @@ CST_Calibration( \item{na.fill}{is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned.} +\item{na.rm}{is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}. See Details section for further information about its use and compatibility with \code{na.fill}.} + +\item{apply_to}{is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}.} + +\item{alpha}{is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}.} + +\item{memb_dim}{is a character string indicating the name of the member dimension. By default, it is set to 'member'.} + +\item{sdate_dim}{is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'.} + \item{ncores}{is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one.} } \value{ diff --git a/man/CST_MultiMetric.Rd b/man/CST_MultiMetric.Rd index 8e3ce593b7024138e191c4fc818516358503be7d..72ec383254b749d478689bb64449aa80e434c383 100644 --- a/man/CST_MultiMetric.Rd +++ b/man/CST_MultiMetric.Rd @@ -4,19 +4,33 @@ \alias{CST_MultiMetric} \title{Multiple Metrics applied in Multiple Model Anomalies} \usage{ -CST_MultiMetric(exp, obs, metric = "correlation", multimodel = TRUE) +CST_MultiMetric( + exp, + obs, + metric = "correlation", + multimodel = TRUE, + time_dim = "ftime", + memb_dim = "member", + sdate_dim = "sdate" +) } \arguments{ -\item{exp}{an object of class \code{s2dv_cube} as returned by \code{CST_Anomaly} function, containing the anomaly of the seasonal forecast experiment data in the element named \code{$data}.} +\item{exp}{an object of class \code{s2dv_cube} as returned by \code{CST_Anomaly} function, containing the anomaly of the seasonal forecast experiments data in the element named \code{$data}.} \item{obs}{an object of class \code{s2dv_cube} as returned by \code{CST_Anomaly} function, containing the anomaly of observed data in the element named \code{$data}.} -\item{metric}{a character string giving the metric for computing the maximum skill. This must be one of the strings 'correlation', 'rms' or 'rmsss.} +\item{metric}{a character string giving the metric for computing the maximum skill. This must be one of the strings 'correlation', 'rms', 'rmsss' and 'rpss'. If 'rpss' is chossen the terciles probabilities are evaluated.} \item{multimodel}{a logical value indicating whether a Multi-Model Mean should be computed.} + +\item{time_dim}{name of the temporal dimension where a mean will be applied. It can be NULL, the default value is 'ftime'.} + +\item{memb_dim}{name of the member dimension. It can be NULL, the default value is 'member'.} + +\item{sdate_dim}{name of the start date dimension or a dimension name identifiying the different forecast. It can be NULL, the default value is 'sdate'.} } \value{ -an object of class \code{s2dv_cube} containing the statistics of the selected metric in the element \code{$data} which is an array with two datset dimensions equal to the 'dataset' dimension in the \code{exp$data} and \code{obs$data} inputs. If \code{multimodel} is TRUE, the greatest first dimension correspons to the Multi-Model Mean. The third dimension contains the statistics selected. For metric \code{correlation}, the third dimension is of length four and they corresponds to the lower limit of the 95\% confidence interval, the statistics itselfs, the upper limit of the 95\% confidence interval and the 95\% significance level. For metric \code{rms}, the third dimension is length three and they corresponds to the lower limit of the 95\% confidence interval, the RMSE and the upper limit of the 95\% confidence interval. For metric \code{rmsss}, the third dimension is length two and they corresponds to the statistics itselfs and the p-value of the one-sided Fisher test with Ho: RMSSS = 0. +an object of class \code{s2dv_cube} containing the statistics of the selected metric in the element \code{$data} which is a list of arrays: for the metric requested and others for statistics about its signeificance. The arrays have two dataset dimensions equal to the 'dataset' dimension in the \code{exp$data} and \code{obs$data} inputs. If \code{multimodel} is TRUE, the first position in the first 'nexp' dimension correspons to the Multi-Model Mean. } \description{ This function calculates correlation (Anomaly Correlation Coefficient; ACC), root mean square error (RMS) and the root mean square error skill score (RMSSS) of individual anomaly models and multi-models mean (if desired) with the observations. @@ -36,12 +50,20 @@ attr(obs, 'class') <- 's2dv_cube' c(ano_exp, ano_obs) \%<-\% CST_Anomaly(exp = exp, obs = obs, cross = TRUE, memb = TRUE) a <- CST_MultiMetric(exp = ano_exp, obs = ano_obs) str(a) +\donttest{ +exp <- lonlat_data$exp +obs <- lonlat_data$obs +a <- CST_MultiMetric(exp, obs, metric = 'rpss', multimodel = FALSE) +a <- CST_MultiMetric(exp, obs, metric = 'correlation') +a <- CST_MultiMetric(exp, obs, metric = 'rms') +a <- CST_MultiMetric(exp, obs, metric = 'rmsss') +} } \references{ -Mishra, N., Prodhomme, C., & Guemas, V. (n.d.). Multi-Model Skill Assessment of Seasonal Temperature and Precipitation Forecasts over Europe, 29-31.\url{http://link.springer.com/10.1007/s00382-018-4404-z} +Mishra, N., Prodhomme, C., & Guemas, V. (n.d.). Multi-Model Skill Assessment of Seasonal Temperature and Precipitation Forecasts over Europe, 29-31.\url{https://link.springer.com/10.1007/s00382-018-4404-z} } \seealso{ -\code{\link[s2dverification]{Corr}}, \code{\link[s2dverification]{RMS}}, \code{\link[s2dverification]{RMSSS}} and \code{\link{CST_Load}} +\code{\link[s2dv]{Corr}}, \code{\link[s2dv]{RMS}}, \code{\link[s2dv]{RMSSS}} and \code{\link{CST_Load}} } \author{ Mishra Niti, \email{niti.mishra@bsc.es} diff --git a/man/CST_MultivarRMSE.Rd b/man/CST_MultivarRMSE.Rd index 24af608c8360985de73763434a16f1a11fd35ffe..c318c105ee312313426535a35152684e4479a5fe 100644 --- a/man/CST_MultivarRMSE.Rd +++ b/man/CST_MultivarRMSE.Rd @@ -57,7 +57,7 @@ a <- CST_MultivarRMSE(exp = ano_exp, obs = ano_obs, weight = weight) str(a) } \seealso{ -\code{\link[s2dverification]{RMS}} and \code{\link{CST_Load}} +\code{\link[s2dv]{RMS}} and \code{\link{CST_Load}} } \author{ Deborah Verfaillie, \email{deborah.verfaillie@bsc.es} diff --git a/man/CST_QuantileMapping.Rd b/man/CST_QuantileMapping.Rd index e78c8d563745e077b54edd3f8d904e07510c9bf6..ec5fc8a34aeb5b7dab411383c795c3f26b3437c8 100644 --- a/man/CST_QuantileMapping.Rd +++ b/man/CST_QuantileMapping.Rd @@ -51,6 +51,7 @@ All methods accepts some common arguments: \itemize{ \item{wet.day} {logical indicating whether to perform wet day correction or not.(Not available in 'DIS' method)} \item{qstep} {NULL or a numeric value between 0 and 1.}} +When providing a forecast to be corrected through the pararmeter \code{exp_cor}, some inputs might need to be modified. The quantile correction is compute by comparing objects passed through 'exp' and 'obs' parameters, this correction will be later applied to the forecast provided in 'exp_cor'. Imaging the case of 'exp' and 'obs' having several start dates, stored using a dimension e.g. 'sdate', 'sample_dims' include this dimension 'sdate' and 'exp_cor' has forecasts for several sdates but different from the ones in 'exp'. In this case, the correction computed with 'exp' and 'obs' would be applied for each 'sdate' of 'exp_cor' separately. This example corresponds to a case of split a dataset in training set and validation set. } \examples{ library(qmap) @@ -70,6 +71,14 @@ exp <- lonlat_data$exp obs <- lonlat_data$obs res <- CST_QuantileMapping(exp, obs) +exp_cor <- exp +exp_cor$data <- exp_cor$data[,,1,,,] +dim(exp_cor$data) <- c(dataset = 1, member = 15, sdate = 1, ftime = 3, + lat = 22, lon = 53) +res <- CST_QuantileMapping(exp, obs, exp_cor, + sample_dims = c('sdate', 'ftime', 'member')) +res <- CST_QuantileMapping(exp, obs, exp_cor, + sample_dims = c('ftime', 'member')) data(obsprecip) data(modprecip) exp <- modprecip$MOSS[1:10000] @@ -82,6 +91,27 @@ obs <- list(data = obs) class(obs) <- 's2dv_cube' res <- CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time', method = 'DIST') +# Example using different lenght of members and sdates: +exp <- lonlat_data$exp +exp$data <- exp$data[,,1:4,,,] +dim(exp$data) <- c(dataset = 1, member = 15, sdate = 4, ftime = 3, + lat = 22, lon = 53) +obs <- lonlat_data$obs +obs$data <- obs$data[,,1:4, ,,] +dim(obs$data) <- c(dataset = 1, member = 1, sdate = 4, ftime = 3, + lat = 22, lon = 53) +exp_cor <- lonlat_data$exp +exp_cor$data <- exp_cor$data[,1:5,5:6,,,] +dim(exp_cor$data) <- c(dataset = 1, member = 5, sdate = 2, ftime = 3, + lat = 22, lon = 53) +res <- CST_QuantileMapping(exp, obs, exp_cor, + sample_dims = c('sdate', 'ftime', 'member')) +exp_cor <- lonlat_data$exp +exp_cor$data <- exp_cor$data[,,5:6,,,] +dim(exp_cor$data) <- c(dataset = 1, member = 15, sdate = 2, ftime = 3, + lat = 22, lon = 53) +res <- CST_QuantileMapping(exp, obs, exp_cor, + sample_dims = c('sdate', 'ftime', 'member')) } } \seealso{ diff --git a/man/CST_RFSlope.Rd b/man/CST_RFSlope.Rd index 0c4e16710c82a98b1d1245a2321f4675cd5a578e..b76ac93ef98a9ee157a71a664ab86a24824c84c3 100644 --- a/man/CST_RFSlope.Rd +++ b/man/CST_RFSlope.Rd @@ -4,7 +4,7 @@ \alias{CST_RFSlope} \title{RainFARM spectral slopes from a CSTools object} \usage{ -CST_RFSlope(data, kmin = 1, time_dim = NULL) +CST_RFSlope(data, kmin = 1, time_dim = NULL, ncores = 1) } \arguments{ \item{data}{An object of the class 's2dv_cube', containing the spatial precipitation fields to downscale. @@ -18,6 +18,8 @@ to average these slopes, which can be specified by parameter \code{time_dim}.} over which to compute spectral slopes. If a character array of dimension names is provided, the spectral slopes will be computed as an average over all elements belonging to those dimensions. If omitted one of c("ftime", "sdate", "time") is searched and the first one with more than one element is chosen.} + +\item{ncores}{is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one.} } \value{ CST_RFSlope() returns spectral slopes using the RainFARM convention diff --git a/man/CST_RFWeights.Rd b/man/CST_RFWeights.Rd index ef5ebe4d5dd8585143c3fb78d2854e0783dbdd1d..acae8c6a512d775a3f0b0e892f4c121d12f907e0 100644 --- a/man/CST_RFWeights.Rd +++ b/man/CST_RFWeights.Rd @@ -4,7 +4,17 @@ \alias{CST_RFWeights} \title{Compute climatological weights for RainFARM stochastic precipitation downscaling} \usage{ -CST_RFWeights(climfile, nf, lon, lat, varname = "", fsmooth = TRUE) +CST_RFWeights( + climfile, + nf, + lon, + lat, + varname = NULL, + fsmooth = TRUE, + lonname = "lon", + latname = "lat", + ncores = NULL +) } \arguments{ \item{climfile}{Filename of a fine-scale precipitation climatology. @@ -15,7 +25,8 @@ Suitable climatology files could be for example a fine-scale precipitation clima from a high-resolution regional climate model (see e.g. Terzago et al. 2018), a local high-resolution gridded climatology from observations, or a reconstruction such as those which can be downloaded from the WORLDCLIM (http://www.worldclim.org) or CHELSA (http://chelsa-climate.org) -websites. The latter data will need to be converted to NetCDF format before being used (see for example the GDAL tools (https://www.gdal.org).} +websites. The latter data will need to be converted to NetCDF format before being used (see for example the GDAL tools (https://www.gdal.org). +It could also be a 's2dv_cube' object.} \item{nf}{Refinement factor for downscaling (the output resolution is increased by this factor).} @@ -28,9 +39,15 @@ the function will perform a subsetting to ensure this condition.} \item{varname}{Name of the variable to be read from \code{climfile}.} \item{fsmooth}{Logical to use smooth conservation (default) or large-scale box-average conservation.} + +\item{lonname}{a character string indicating the name of the longitudinal dimension set as 'lon' by default.} + +\item{latname}{a character string indicating the name of the latitudinal dimension set as 'lat' by default.} + +\item{ncores}{an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one.} } \value{ -A matrix containing the weights with dimensions (lon, lat). +An object of class 's2dv_cube' containing in matrix \code{data} the weights with dimensions (lon, lat). } \description{ Compute climatological ("orographic") weights from a fine-scale precipitation climatology file. diff --git a/man/CST_RainFARM.Rd b/man/CST_RainFARM.Rd index 1c609e084ca509c1343802604dddf9899088da3e..f86ab89cb65487fd36c22bd40e6ccddcf6cfc289 100644 --- a/man/CST_RainFARM.Rd +++ b/man/CST_RainFARM.Rd @@ -6,9 +6,9 @@ \usage{ CST_RainFARM( data, - nf, weights = 1, slope = 0, + nf, kmin = 1, nens = 1, fglob = FALSE, @@ -29,15 +29,14 @@ which can be specified by parameter \code{time_dim}. The number of longitudes and latitudes in the input data is expected to be even and the same. If not the function will perform a subsetting to ensure this condition.} -\item{nf}{Refinement factor for downscaling (the output resolution is increased by this factor).} - \item{weights}{Matrix with climatological weights which can be obtained using the \code{CST_RFWeights} function. If \code{weights=1.} (default) no weights are used. -The matrix should have dimensions (lon, lat) in this order. -The names of these dimensions are not checked.} +The names of these dimensions must be at least 'lon' and 'lat'.} \item{slope}{Prescribed spectral slope. The default is \code{slope=0.} -meaning that the slope is determined automatically over the dimensions specified by \code{time_dim}.} +meaning that the slope is determined automatically over the dimensions specified by \code{time_dim}. A 1D array with named dimension can be provided (see details and examples)} + +\item{nf}{Refinement factor for downscaling (the output resolution is increased by this factor).} \item{kmin}{First wavenumber for spectral slope (default: \code{kmin=1}).} @@ -72,7 +71,7 @@ with the following behaviour if set to TRUE: \value{ CST_RainFARM() returns a downscaled CSTools object (i.e., of the class 's2dv_cube'). -If \code{nens>1} an additional dimension named "realization" is added to the +If \code{nens>1} an additional dimension named "realizatio"n is added to the \code{$data} array after the "member" dimension (unless \code{drop_realization_dim=TRUE} is specified). The ordering of the remaining dimensions in the \code{$data} element of the input object is maintained. @@ -84,6 +83,9 @@ downscaling method and accepts a CSTools object (an object of the class Adapted for climate downscaling and including orographic correction as described in Terzago et al. 2018. } +\details{ +Wether parameter 'slope' and 'weights' presents seasonality dependency, a dimension name should match between these parameters and the input data in parameter 'data'. See example 2 below where weights and slope vary with 'sdate' dimension. +} \examples{ #Example 1: using CST_RainFARM for a CSTools object nf <- 8 # Choose a downscaling by factor 8 @@ -95,8 +97,8 @@ lat <- seq(40, 43.5, 0.5) dim(lat) <- c(lat = length(lat)) data <- list(data = exp, lon = lon, lat = lat) # Create a test array of weights -ww <- array(1., dim = c(8 * nf, 8 * nf)) -res <- CST_RainFARM(data, nf, ww, nens=3) +ww <- array(1., dim = c(lon = 8 * nf, lat = 8 * nf)) +res <- CST_RainFARM(data, nf = nf, weights = ww, nens=3) str(res) #List of 3 # $ data: num [1, 1:2, 1:3, 1:3, 1:4, 1:64, 1:64] 260 553 281 278 143 ... @@ -106,6 +108,11 @@ dim(res$data) # dataset member realization sdate ftime lat lon # 1 2 3 3 4 64 64 +# Example 2: +slo <- array(c(0.1, 0.5, 0.7), c(sdate= 3)) +wei <- array(rnorm(8 * 8 * 3), c(lon = 8, lat = 8, sdate = 3)) +res <- CST_RainFARM(lonlat_prec, + weights = wei, slope = slo, nf = 2) } \references{ Terzago, S. et al. (2018). NHESS 18(11), 2825-2840. diff --git a/man/CST_SaveExp.Rd b/man/CST_SaveExp.Rd index 0e49c11955488a64772269976f01ed95919f4af3..ddd9164e1c9d88e284d4653d1b05941699e2adf4 100644 --- a/man/CST_SaveExp.Rd +++ b/man/CST_SaveExp.Rd @@ -25,7 +25,7 @@ This function allows to divide and save a object of class \dontrun{ library(CSTools) data <- lonlat_data$exp -destination <- "./path/" +destination <- "./path2/" CST_SaveExp(data = data, destination = destination) } diff --git a/man/CST_SplitDim.Rd b/man/CST_SplitDim.Rd index ee93aedc2075b38292d392a655a57b83955aecbf..80a94da3c8735a4fe76ecb202af69d788732bf36 100644 --- a/man/CST_SplitDim.Rd +++ b/man/CST_SplitDim.Rd @@ -4,7 +4,14 @@ \alias{CST_SplitDim} \title{Function to Split Dimension} \usage{ -CST_SplitDim(data, split_dim = "time", indices = NULL, freq = "monthly") +CST_SplitDim( + data, + split_dim = "time", + indices = NULL, + freq = "monthly", + new_dim_name = NULL, + insert_ftime = NULL +) } \arguments{ \item{data}{a 's2dv_cube' object} @@ -14,10 +21,17 @@ CST_SplitDim(data, split_dim = "time", indices = NULL, freq = "monthly") \item{indices}{a vector of numeric indices or dates. If left at NULL, the dates provided in the s2dv_cube object (element Dates) will be used.} \item{freq}{a character string indicating the frequency: by 'day', 'month' and 'year' or 'monthly' (by default). 'month' identifies months between 1 and 12 independently of the year they belong to, while 'monthly' differenciates months from different years.} + +\item{new_dim_name}{a character string indicating the name of the new dimension.} + +\item{insert_ftime}{an integer indicating the number of time steps to add at the begining of the time series.} } \description{ This function split a dimension in two. The user can select the dimension to split and provide indices indicating how to split that dimension or dates and the frequency expected (monthly or by day, month and year). The user can also provide a numeric frequency indicating the length of each division. } +\details{ +Parameter 'insert_ftime' has been included for the case of using daily data, requiring split the temporal dimensions by months (or similar) and the first lead time doesn't correspondt to the 1st day of the month. In this case, the insert_ftime could be used, to get a final output correctly organized. E.g.: leadtime 1 is the 2nd of November and the input time series extend to the 31st of December. When requiring split by month with \code{inset_ftime = 1}, the 'monthly' dimension of length two will indicate the month (position 1 for November and position 2 for December), dimension 'time' will be length 31. For November, the position 1 and 31 will be NAs, while from positon 2 to 30 will be filled with the data provided. This allows to select correctly days trhough time dimension. +} \examples{ data <- 1 : 20 diff --git a/man/Calibration.Rd b/man/Calibration.Rd index 64452279fce1be4719bcdfdaa18864398fd01ee2..f61a3cd3537844714533a899fa196125e9917319 100644 --- a/man/Calibration.Rd +++ b/man/Calibration.Rd @@ -11,6 +11,11 @@ Calibration( eval.method = "leave-one-out", multi.model = FALSE, na.fill = TRUE, + na.rm = TRUE, + apply_to = NULL, + alpha = NULL, + memb_dim = "member", + sdate_dim = "sdate", ncores = 1 ) } @@ -19,7 +24,7 @@ Calibration( \item{obs}{an array containing the observed data.} -\item{cal.method}{is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min} or \code{crps_min}. Default value is \code{mse_min}.} +\item{cal.method}{is the calibration method used, can be either \code{bias}, \code{evmos}, \code{mse_min}, \code{crps_min} or \code{rpc-based}. Default value is \code{mse_min}.} \item{eval.method}{is the sampling method used, can be either \code{in-sample} or \code{leave-one-out}. Default value is the \code{leave-one-out} cross validation.} @@ -27,16 +32,29 @@ Calibration( \item{na.fill}{is a boolean that indicates what happens in case calibration is not possible or will yield unreliable results. This happens when three or less forecasts-observation pairs are available to perform the training phase of the calibration. By default \code{na.fill} is set to true such that NA values will be returned. If \code{na.fill} is set to false, the uncorrected data will be returned.} +\item{na.rm}{is a boolean that indicates whether to remove the NA values or not. The default value is \code{TRUE}.} + +\item{apply_to}{is a character string that indicates whether to apply the calibration to all the forecast (\code{"all"}) or only to those where the correlation between the ensemble mean and the observations is statistically significant (\code{"sign"}). Only useful if \code{cal.method == "rpc-based"}.} + +\item{alpha}{is a numeric value indicating the significance level for the correlation test. Only useful if \code{cal.method == "rpc-based" & apply_to == "sign"}.} + +\item{memb_dim}{is a character string indicating the name of the member dimension. By default, it is set to 'member'.} + +\item{sdate_dim}{is a character string indicating the name of the start date dimension. By default, it is set to 'sdate'.} + \item{ncores}{is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one.} } \value{ an array containing the calibrated forecasts with the same dimensions as the \code{exp} array. } \description{ -Four types of member-by-member bias correction can be performed. The \code{bias} method corrects the bias only, the \code{evmos} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). +Four types of member-by-member bias correction can be performed. The \code{"bias"} method corrects the bias only, the \code{"evmos"} method applies a variance inflation technique to ensure the correction of the bias and the correspondence of variance between forecast and observation (Van Schaeybroeck and Vannitsem, 2011). The ensemble calibration methods \code{"mse_min"} and \code{"crps_min"} correct the bias, the overall forecast variance and the ensemble spread as described in Doblas-Reyes et al. (2005) and Van Schaeybroeck and Vannitsem (2015), respectively. While the \code{"mse_min"} method minimizes a constrained mean-squared error using three parameters, the \code{"crps_min"} method features four parameters and minimizes the Continuous Ranked Probability Score (CRPS). The \code{"rpc-based"} method adjusts the forecast variance ensuring that the ratio of predictable components (RPC) is equal to one, as in Eade et al. (2014). Both in-sample or our out-of-sample (leave-one-out cross validation) calibration are possible. } +\details{ +Both the \code{na.fill} and \code{na.rm} parameters can be used to indicate how the function has to handle the NA values. The \code{na.fill} parameter checks whether there are more than three forecast-observations pairs to perform the computation. In case there are three or less pairs, the computation is not carried out, and the value returned by the function depends on the value of this parameter (either NA if \code{na.fill == TRUE} or the uncorrected value if \code{na.fill == TRUE}). On the other hand, \code{na.rm} is used to indicate the function whether to remove the missing values during the computation of the parameters needed to perform the calibration. +} \examples{ mod1 <- 1 : (1 * 3 * 4 * 5 * 6 * 7) dim(mod1) <- c(dataset = 1, member = 3, sdate = 4, ftime = 5, lat = 6, lon = 7) @@ -48,6 +66,8 @@ str(a) \references{ Doblas-Reyes F.J, Hagedorn R, Palmer T.N. The rationale behind the success of multi-model ensembles in seasonal forecasting-II calibration and combination. Tellus A. 2005;57:234-252. doi:10.1111/j.1600-0870.2005.00104.x +Eade, R., Smith, D., Scaife, A., Wallace, E., Dunstone, N., Hermanson, L., & Robinson, N. (2014). Do seasonal-to-decadal climate predictions underestimate the predictability of the read world? Geophysical Research Letters, 41(15), 5620-5628. doi: 10.1002/2014GL061146 + Van Schaeybroeck, B., & Vannitsem, S. (2011). Post-processing through linear regression. Nonlinear Processes in Geophysics, 18(2), 147. doi:10.5194/npg-18-147-2011 Van Schaeybroeck, B., & Vannitsem, S. (2015). Ensemble post-processing using member-by-member approaches: theoretical aspects. Quarterly Journal of the Royal Meteorological Society, 141(688), 807-818. doi:10.1002/qj.2397 diff --git a/man/CategoricalEnsCombination.Rd b/man/CategoricalEnsCombination.Rd new file mode 100644 index 0000000000000000000000000000000000000000..2f5ad14d69ba9db0ea970da8bc340ec73f66e2a1 --- /dev/null +++ b/man/CategoricalEnsCombination.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_CategoricalEnsCombination.R +\name{CategoricalEnsCombination} +\alias{CategoricalEnsCombination} +\title{Make categorical forecast based on a multi-model forecast with potential for calibrate} +\usage{ +CategoricalEnsCombination(fc, obs, cat.method, eval.method, amt.cat, ...) +} +\arguments{ +\item{fc}{a multi-dimensional array with named dimensions containing the seasonal forecast experiment data in the element named \code{$data}. The amount of forecasting models is equal to the size of the \code{dataset} dimension of the data array. The amount of members per model may be different. The size of the \code{member} dimension of the data array is equal to the maximum of the ensemble members among the models. Models with smaller ensemble sizes have residual indices of \code{member} dimension in the data array filled with NA values.} + +\item{obs}{a multidimensional array with named dimensions containing the observed data in the element named \code{$data}.} + +\item{cat.method}{method used to produce the categorical forecast, can be either \code{pool}, \code{comb}, \code{mmw} or \code{obs}. The method pool assumes equal weight for all ensemble members while the method comb assumes equal weight for each model. The weighting method is descirbed in Rajagopalan et al. (2002), Robertson et al. (2004) and Van Schaeybroeck and Vannitsem (2019). Finally, the \code{obs} method classifies the observations into the different categories and therefore contains only 0 and 1 values.} + +\item{eval.method}{is the sampling method used, can be either \code{"in-sample"} or \code{"leave-one-out"}. Default value is the \code{"leave-one-out"} cross validation.} + +\item{amt.cat}{is the amount of categories. Equally-sized quantiles will be calculated based on the amount of categories.} + +\item{...}{other parameters to be passed on to the calibration procedure.} +} +\value{ +an array containing the categorical forecasts in the element called \code{$data}. The first two dimensions of the returned object are named dataset and member and are both of size one. An additional dimension named category is introduced and is of size amt.cat. +} +\description{ +This function converts a multi-model ensemble forecast +into a categorical forecast by giving the probability +for each category. Different methods are available to combine +the different ensemble forecasting models into +probabilistic categorical forecasts. + +See details in ?CST_CategoricalEnsCombination +} +\references{ +Rajagopalan, B., Lall, U., & Zebiak, S. E. (2002). Categorical climate forecasts through regularization and optimal combination of multiple GCM ensembles. Monthly Weather Review, 130(7), 1792-1811. + +Robertson, A. W., Lall, U., Zebiak, S. E., & Goddard, L. (2004). Improved combination of multiple atmospheric GCM ensembles for seasonal prediction. Monthly Weather Review, 132(12), 2732-2744. + +Van Schaeybroeck, B., & Vannitsem, S. (2019). Postprocessing of Long-Range Forecasts. In Statistical Postprocessing of Ensemble Forecasts (pp. 267-290). +} +\author{ +Bert Van Schaeybroeck, \email{bertvs@meteo.be} +} diff --git a/man/MultiMetric.Rd b/man/MultiMetric.Rd new file mode 100644 index 0000000000000000000000000000000000000000..10a4c33f1a0852dc28ca79684610f8129c135cf9 --- /dev/null +++ b/man/MultiMetric.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_MultiMetric.R +\name{MultiMetric} +\alias{MultiMetric} +\title{Multiple Metrics applied in Multiple Model Anomalies} +\usage{ +MultiMetric( + exp, + obs, + metric = "correlation", + multimodel = TRUE, + time_dim = "ftime", + memb_dim = "member", + sdate_dim = "sdate" +) +} +\arguments{ +\item{exp}{a multidimensional array with named dimensions.} + +\item{obs}{a multidimensional array with named dimensions.} + +\item{metric}{a character string giving the metric for computing the maximum skill. This must be one of the strings 'correlation', 'rms' or 'rmsss.} + +\item{multimodel}{a logical value indicating whether a Multi-Model Mean should be computed.} + +\item{time_dim}{name of the temporal dimension where a mean will be applied. It can be NULL, the default value is 'ftime'.} + +\item{memb_dim}{name of the member dimension. It can be NULL, the default value is 'member'.} + +\item{sdate_dim}{name of the start date dimension or a dimension name identifiying the different forecast. It can be NULL, the default value is 'sdate'.} +} +\value{ +a list of arrays containing the statistics of the selected metric in the element \code{$data} which is a list of arrays: for the metric requested and others for statistics about its signeificance. The arrays have two dataset dimensions equal to the 'dataset' dimension in the \code{exp$data} and \code{obs$data} inputs. If \code{multimodel} is TRUE, the greatest position in the first dimension correspons to the Multi-Model Mean. +} +\description{ +This function calculates correlation (Anomaly Correlation Coefficient; ACC), root mean square error (RMS) and the root mean square error skill score (RMSSS) of individual anomaly models and multi-models mean (if desired) with the observations on arrays with named dimensions. +} +\examples{ +res <- MultiMetric(lonlat_data$exp$data, lonlat_data$obs$data) +} +\references{ +Mishra, N., Prodhomme, C., & Guemas, V. (n.d.). Multi-Model Skill Assessment of Seasonal Temperature and Precipitation Forecasts over Europe, 29-31.\url{https://link.springer.com/10.1007/s00382-018-4404-z} +} +\seealso{ +\code{\link[s2dv]{Corr}}, \code{\link[s2dv]{RMS}}, \code{\link[s2dv]{RMSSS}} and \code{\link{CST_Load}} +} +\author{ +Mishra Niti, \email{niti.mishra@bsc.es} + +Perez-Zanon Nuria, \email{nuria.perez@bsc.es} +} diff --git a/man/PlotPDFsOLE.Rd b/man/PlotPDFsOLE.Rd index f2e2be8c8c75cd70dabba63accaa5aafbcc37cc3..ff3c568e638a93870726d36d58d29d236c1aa2aa 100644 --- a/man/PlotPDFsOLE.Rd +++ b/man/PlotPDFsOLE.Rd @@ -9,6 +9,8 @@ PlotPDFsOLE( pdf_1, pdf_2, nsigma = 3, + legendPos = "bottom", + legendSize = 1, plotfile = NULL, width = 30, height = 15, @@ -28,6 +30,12 @@ two parameters: mean' and 'standard deviation' of the second gaussian pdf \item{nsigma}{(optional) A numeric value for setting the limits of X axis. (Default nsigma = 3).} +\item{legendPos}{(optional) A character value for setting the position of the +legend ("bottom", "top", "right" or "left")(Default 'bottom').} + +\item{legendSize}{(optional) A numeric value for setting the size of the +legend text. (Default 1.0).} + \item{plotfile}{(optional) A filename where the plot will be saved. (Default: the plot is not saved).} @@ -61,6 +69,19 @@ dim(pdf_2) <- c(statistic = 2) PlotPDFsOLE(pdf_1, pdf_2) +# Example 2 +Glosea5PDF <- c(2.25, 0.67) +attr(Glosea5PDF, "name") <- "Glosea5" +dim(Glosea5PDF) <- c(statistic = 2) +ECMWFPDF <- c(2.38, 0.61) +attr(ECMWFPDF, "name") <- "ECMWF" +dim(ECMWFPDF) <- c(statistic = 2) +MFPDF <- c(4.52, 0.34) +attr(MFPDF, "name") <- "MF" +dim(MFPDF) <- c(statistic = 2) +PlotPDFsOLE(pdf_1 = Glosea5PDF, pdf_2 = ECMWFPDF, legendPos = 'left') +PlotPDFsOLE(pdf_1 = Glosea5PDF, pdf_2 = MFPDF, legendPos = 'top') +PlotPDFsOLE(pdf_1 = ECMWFPDF, pdf_2 = MFPDF, legendSize = 1.2) } \author{ Eroteida Sanchez-Garcia - AEMET, //email{esanchezg@aemet.es} diff --git a/man/PlotTriangles4Categories.Rd b/man/PlotTriangles4Categories.Rd index 6e95df38351cb9f4c7bfcfa749e36af265f0d160..abd3085eff6da0742189afb1ce5f3c7d16d00ff5 100644 --- a/man/PlotTriangles4Categories.Rd +++ b/man/PlotTriangles4Categories.Rd @@ -23,6 +23,8 @@ PlotTriangles4Categories( lab_legend = NULL, cex_leg = 1, col_leg = "black", + cex_axis = 1.5, + mar = c(5, 4, 0, 0), fileout = NULL, size_units = "px", res = 100, @@ -84,6 +86,10 @@ to represent sig_data.} \item{col_leg}{color of the legend (triangles).} +\item{cex_axis}{a number to indicate the increase/reduction of the axis labels.} + +\item{mar}{A numerical vector of the form c(bottom, left, top, right) which gives the number of lines of margin to be specified on the four sides of the plot.} + \item{fileout}{A string of full directory path and file name indicating where to save the plot. If not specified (default), a graphics device will pop up.} diff --git a/man/QuantileMapping.Rd b/man/QuantileMapping.Rd new file mode 100644 index 0000000000000000000000000000000000000000..8771e5495022218e2f373f73b91d4f04c1361745 --- /dev/null +++ b/man/QuantileMapping.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_QuantileMapping.R +\name{QuantileMapping} +\alias{QuantileMapping} +\title{Quantiles Mapping for seasonal or decadal forecast data} +\usage{ +QuantileMapping( + exp, + obs, + exp_cor = NULL, + sample_dims = "ftime", + sample_length = NULL, + method = "QUANT", + ncores = NULL, + ... +) +} +\arguments{ +\item{exp}{a multi-dimensional array with named dimensions containing the hindcast.} + +\item{obs}{a multi-dimensional array with named dimensions (the same as the provided in 'exp') containing the reference dataset.} + +\item{exp_cor}{a multi-dimensional array with named dimensions in which the quantile mapping correction will be applied. If it is not specified, the correction is applied in object \code{exp}.} + +\item{sample_dims}{a character vector indicating the dimensions that can be used as sample for the same distribution} + +\item{sample_length}{a numeric value indicating the length of the timeseries window to be used as sample for the sample distribution and correction. By default, NULL, the total length of the timeseries will be used.} + +\item{method}{a character string indicating the method to be used: 'PTF','DIST','RQUANT','QUANT','SSPLIN'. By default, the empirical quantile mapping 'QUANT' is used.} + +\item{ncores}{an integer indicating the number of parallel processes to spawn for the use for parallel computation in multiple cores.} + +\item{...}{additional arguments passed to the method specified by \code{method}.} +} +\value{ +an oject of class \code{s2dv_cube} containing the experimental data after applyingthe quantile mapping correction. +) <- c(dataset = 1, member = 10, sdate = 20, ftime = 60 , +} +\description{ +This function is a wrapper from fitQmap and doQmap from package 'qmap'to be applied in CSTools objects of class 's2dv_cube'. The quantile mapping adjustment between an experiment, tipically a hindcast, and observations is applied to the experiment itself or to a provided forecast. +} +\details{ +The different methods are: +\itemize{ +\item{'PTF'} {fits a parametric transformations to the quantile-quantile relation of observed and modelled values. See \code{?qmap::fitQmapPTF}.} +\item{'DIST'} {fits a theoretical distribution to observed and to modelled time series. See \code{?qmap::fitQmapDIST}.} +\item{'RQUANT'} {estimates the values of the quantile-quantile relation of observed and modelled time series for regularly spaced quantiles using local linear least square regression. See \code{?qmap::fitQmapRQUANT}.} +\item{'QUANT'} {estimates values of the empirical cumulative distribution function of observed and modelled time series for regularly spaced quantiles. See \code{?qmap::fitQmapQUANT}.} +\item{'SSPLIN'} {fits a smoothing spline to the quantile-quantile plot of observed and modelled time series. See \code{?qmap::fitQmapSSPLIN}.}} +All methods accepts some common arguments: +\itemize{ +\item{wet.day} {logical indicating whether to perform wet day correction or not.(Not available in 'DIS' method)} +\item{qstep} {NULL or a numeric value between 0 and 1.}} +} +\seealso{ +\code{qmap::fitQmap} and \code{qmap::doQmap} +} +\author{ +Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +} diff --git a/man/RFSlope.Rd b/man/RFSlope.Rd index db3f0e1054ff5a4b93289a453c5d1bd272a4f9e8..5c0c168955e9aef866a0e44c31b302e9e66756ca 100644 --- a/man/RFSlope.Rd +++ b/man/RFSlope.Rd @@ -4,7 +4,14 @@ \alias{RFSlope} \title{RainFARM spectral slopes from an array (reduced version)} \usage{ -RFSlope(data, kmin = 1, time_dim = NULL, lon_dim = "lon", lat_dim = "lat") +RFSlope( + data, + kmin = 1, + time_dim = NULL, + lon_dim = "lon", + lat_dim = "lat", + ncores = 1 +) } \arguments{ \item{data}{Array containing the spatial precipitation fields to downscale. @@ -25,6 +32,8 @@ with more than one element is chosen.} \item{lon_dim}{Name of lon dimension ("lon" by default).} \item{lat_dim}{Name of lat dimension ("lat" by default).} + +\item{ncores}{is an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one.} } \value{ RFSlope() returns spectral slopes using the RainFARM convention diff --git a/man/RF_Weights.Rd b/man/RF_Weights.Rd new file mode 100644 index 0000000000000000000000000000000000000000..66e1ac5153c2a41605867e6cbbcd35c8b3120bd7 --- /dev/null +++ b/man/RF_Weights.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CST_RFWeights.R +\name{RF_Weights} +\alias{RF_Weights} +\title{Compute climatological weights for RainFARM stochastic precipitation downscaling} +\usage{ +RF_Weights( + zclim, + latin, + lonin, + nf, + lat, + lon, + fsmooth = TRUE, + lonname = "lon", + latname = "lat", + ncores = NULL +) +} +\arguments{ +\item{zclim}{a multi-dimensional array with named dimension containing at least one precipiation field with spatial dimensions.} + +\item{latin}{a vector indicating the latitudinal coordinates corresponding to the \code{zclim} parameter.} + +\item{lonin}{a vector indicating the longitudinal coordinates corresponding to the \code{zclim} parameter.} + +\item{nf}{Refinement factor for downscaling (the output resolution is increased by this factor).} + +\item{lat}{Vector of latitudes. +The number of longitudes and latitudes is expected to be even and the same. If not +the function will perform a subsetting to ensure this condition.} + +\item{lon}{Vector of longitudes.} + +\item{fsmooth}{Logical to use smooth conservation (default) or large-scale box-average conservation.} + +\item{lonname}{a character string indicating the name of the longitudinal dimension set as 'lon' by default.} + +\item{latname}{a character string indicating the name of the latitudinal dimension set as 'lat' by default.} + +\item{ncores}{an integer that indicates the number of cores for parallel computations using multiApply function. The default value is one.} +} +\value{ +An object of class 's2dv_cube' containing in matrix \code{data} the weights with dimensions (lon, lat). +} +\description{ +Compute climatological ("orographic") weights from a fine-scale precipitation climatology file. +} +\examples{ +a <- array(1:2500, c(lat = 50, lon = 50)) +res <- RF_Weights(a, seq(0.1 ,5, 0.1), seq(0.1 ,5, 0.1), + nf = 5, lat = 1:5, lon = 1:5) +} +\references{ +Terzago, S., Palazzi, E., & von Hardenberg, J. (2018). +Stochastic downscaling of precipitation in complex orography: +A simple method to reproduce a realistic fine-scale climatology. +Natural Hazards and Earth System Sciences, 18(11), +2825-2840. http://doi.org/10.5194/nhess-18-2825-2018 . +} +\author{ +Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} +} diff --git a/man/RainFARM.Rd b/man/RainFARM.Rd index 0db8467926923fbed0f9da2f9625e67d5fe06a7e..ef4485c94b1c290236206f97e2b0dcbc1ff06153 100644 --- a/man/RainFARM.Rd +++ b/man/RainFARM.Rd @@ -38,15 +38,14 @@ the function will perform a subsetting to ensure this condition.} \item{nf}{Refinement factor for downscaling (the output resolution is increased by this factor).} -\item{weights}{Matrix with climatological weights which can be obtained using +\item{weights}{multi-dimensional array with climatological weights which can be obtained using the \code{CST_RFWeights} function. If \code{weights=1.} (default) no weights are used. -The matrix should have dimensions (lon, lat) in this order. -The names of these dimensions are not checked.} +The names of these dimensions must be at least 'lon' and 'lat'.} \item{nens}{Number of ensemble members to produce (default: \code{nens=1}).} \item{slope}{Prescribed spectral slope. The default is \code{slope=0.} -meaning that the slope is determined automatically over the dimensions specified by \code{time_dim}.} +meaning that the slope is determined automatically over the dimensions specified by \code{time_dim}. A 1D array with named dimension can be provided (see details and examples)} \item{kmin}{First wavenumber for spectral slope (default: \code{kmin=1}).} @@ -96,6 +95,9 @@ References: Terzago, S. et al. (2018). NHESS 18(11), 2825-2840. http://doi.org/10.5194/nhess-18-2825-2018, D'Onofrio et al. (2014), J of Hydrometeorology 15, 830-843; Rebora et. al. (2006), JHM 7, 724. } +\details{ +Wether parameter 'slope' and 'weights' presents seasonality dependency, a dimension name should match between these parameters and the input data in parameter 'data'. See example 2 below where weights and slope vary with 'sdate' dimension. +} \examples{ # Example for the 'reduced' RainFARM function nf <- 8 # Choose a downscaling by factor 8 @@ -107,7 +109,7 @@ dim(pr) <- c(lon = 8, lat = 8, ftime = 20) lon_mat <- seq(10, 13.5, 0.5) # could also be a 2d matrix lat_mat <- seq(40, 43.5, 0.5) # Create a test array of weights -ww <- array(1., dim = c(8 * nf, 8 * nf)) +ww <- array(1., dim = c(lon = 8 * nf, lat = 8 * nf)) # or create proper weights using an external fine-scale climatology file # Specify a weightsfn filename if you wish to save the weights \dontrun{ @@ -126,7 +128,11 @@ str(res) dim(res$data) # lon lat ftime realization # 64 64 20 2 - +# Example 2: +slo <- array(c(0.1, 0.5, 0.7), c(sdate= 3)) +wei <- array(rnorm(8*8*3), c(lon = 8, lat = 8, sdate = 3)) +res <- RainFARM(lonlat_prec$data, lon = lonlat_prec$lon, + lat = lonlat_prec$lat, weights = wei, slope = slo, nf = 2) } \author{ Jost von Hardenberg - ISAC-CNR, \email{j.vonhardenberg@isac.cnr.it} diff --git a/man/SplitDim.Rd b/man/SplitDim.Rd index f07e4756bd2fdbdeaafd4d7cfae30010ec29009a..a49043062c8e2dea743b9f391f53f9929bd41530 100644 --- a/man/SplitDim.Rd +++ b/man/SplitDim.Rd @@ -4,7 +4,13 @@ \alias{SplitDim} \title{Function to Split Dimension} \usage{ -SplitDim(data, split_dim = "time", indices, freq = "monthly") +SplitDim( + data, + split_dim = "time", + indices, + freq = "monthly", + new_dim_name = NULL +) } \arguments{ \item{data}{an n-dimensional array with named dimensions} @@ -13,7 +19,9 @@ SplitDim(data, split_dim = "time", indices, freq = "monthly") \item{indices}{a vector of numeric indices or dates} -\item{freq}{a character string indicating the frequency: by 'day', 'month' and 'year' or 'monthly' (by default). 'month' identifies months between 1 and 12 independetly of the year they belong to, while 'monthly' differenciates months from different years. Parameter 'freq' can also be numeric indicating the length in which to subset the dimension} +\item{freq}{a character string indicating the frequency: by 'day', 'month' and 'year' or 'monthly' (by default). 'month' identifies months between 1 and 12 independetly of the year they belong to, while 'monthly' differenciates months from different years. Parameter 'freq' can also be numeric indicating the length in which to subset the dimension.} + +\item{new_dim_name}{a character string indicating the name of the new dimension.} } \description{ This function split a dimension in two. The user can select the dimension to split and provide indices indicating how to split that dimension or dates and the frequency expected (monthly or by day, month and year). The user can also provide a numeric frequency indicating the length of each division. diff --git a/man/areave_data.Rd b/man/areave_data.Rd deleted file mode 100644 index a772220a9e3a7a19a02186938bd9b4f2e72fbc5c..0000000000000000000000000000000000000000 --- a/man/areave_data.Rd +++ /dev/null @@ -1,43 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sample_data.R -\docType{data} -\name{areave_data} -\alias{areave_data} -\title{Sample Of Experimental And Observational Climate Data Averaged Over A Region} -\description{ -This sample data set contains area-averaged seasonal forecast and corresponding observational data from the Copernicus Climate Change ECMWF-System 5 forecast system, and from the Copernicus Climate Change ERA-5 reconstruction. Specifically, for the 'tas' (2-meter temperature) variable, for the 15 first forecast ensemble members, monthly averaged, for the 3 first forecast time steps (lead months 1 to 4) of the November start dates of 2000 to 2005, for the Mediterranean region (27N-48N, 12W-40E). -} -\details{ -It is recommended to use the data set as follows: -\preformatted{ -require(zeallot) -c(exp, obs) %<-% CSTools::areave_data -} - -The `CST_Load` call used to generate the data set in the infrastructure of the Earth Sciences Department of the Barcelona Supercomputing Center is shown next. Note that `CST_Load` internally calls `s2dverification::Load`, which would require a configuration file (not provided here) expressing the distribution of the 'system5c3s' and 'era5' NetCDF files in the file system. -\preformatted{ -library(CSTools) -require(zeallot) - -startDates <- c('20001101', '20011101', '20021101', - '20031101', '20041101', '20051101') - -areave_data <- - CST_Load( - var = 'tas', - exp = 'system5c3s', - obs = 'era5', - nmember = 15, - sdates = startDates, - leadtimemax = 3, - latmin = 27, latmax = 48, - lonmin = -12, lonmax = 40, - output = 'areave', - nprocs = 1 - ) -} -} -\author{ -Nicolau Manubens \email{nicolau.manubens@bsc.es} -} -\keyword{data} diff --git a/man/training_analogs.Rd b/man/training_analogs.Rd new file mode 100644 index 0000000000000000000000000000000000000000..447f8b0c27ea0f3a2b1c5feef79c75165a3dd1a2 --- /dev/null +++ b/man/training_analogs.Rd @@ -0,0 +1,132 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AnalogsPred_train.R +\name{training_analogs} +\alias{training_analogs} +\title{AEMET Training +Training method (pre-downscaling) based on analogs: +synoptic situations and significant predictors.} +\usage{ +training_analogs( + pred, + slp_ext, + lon, + lat, + slp_lon, + slp_lat, + var, + HR_path, + tdates +) +} +\arguments{ +\item{pred}{List of matrix reanalysis data in a synoptic domain. The list +has to contain reanalysis atmospheric variables (instantaneous 12h data) +that must be indentify by parenthesis name. +For precipitation: +- u component of wind at 500 hPa (u500) in m/s +- v component of wind at 500 hPa (v500) in m/s +- temperature at 500 hPa (t500) in K +- temperature at 850 hPa (t850) in K +- temperature at 1000 hPa (t1000) in K +- geopotential height at 500 hPa (z500) in m +- geopotential height at 1000 hPa (z1000) in m +- sea level pressure (slp) in hPa +- specific humidity at 700 hPa (q700) in g/kg +For maximum and minimum temperature: +- temperature at 1000 hPa (t1000) in K +- sea level pressure (slp) in hPa +All matrix must have [time,gridpoint] dimensions. +(time = number of training days, gridpoint = number of synoptic gridpoints).} + +\item{slp_ext}{Matrix with atmospheric reanalysis sea level pressure +(instantaneous 12h data)(hPa). It has the same resolution as 'pred' parameter +but with an extended domain. This domain contains extra degrees (most in the +north and west part) compare to synoptic domain. The matrix must have +[time,gridpoint] dimensions. +(time = number of training days, gridpoint = number of extended gridpoints).} + +\item{lon}{Vector of the synoptic longitude (from (-180º) to 180º), +The vector must go from west to east.} + +\item{lat}{Vector of the synoptic latitude. The vector must go from north to south.} + +\item{slp_lon}{Vector of the extended longitude (from (-180º) to 180º) +The vector must go from west to east.} + +\item{slp_lat}{Vector of the extended latitude. The vector must go from north to south.} + +\item{var}{Variable name to downscale. There are two options: 'prec' for +precipitation and 'temp' for maximum and minimum temperature.} + +\item{HR_path}{Local path of HR observational files (maestro and pcp/tmx-tmn). +For precipitation can be downloaded from http://www.aemet.es/documentos/es/serviciosclimaticos/cambio_climat/datos_diarios/dato_observacional/rejilla_5km/v2/Serie_AEMET_v2_pcp_1951a202006_txt.tar.gz +For maximum and minimum temperature can be downloaded from http://www.aemet.es/documentos/es/serviciosclimaticos/cambio_climat/datos_diarios/dato_observacional/rejilla_5km/temperatura/v1/tmax/Serie_AEMET_v1_tmax_1951a202006_txt.tar.gz and http://www.aemet.es/documentos/es/serviciosclimaticos/cambio_climat/datos_diarios/dato_observacional/rejilla_5km/temperatura/v1/tmin/Serie_AEMET_v1_tmin_1951a202006_txt.tar.gz respetively. +Maestro file (maestro_red_hr_SPAIN.txt) has gridpoint (nptos), longitude (lon), latitude (lat) and +altitude (alt) in columns (vector structure). +Data file (pcp/tmx/tmn_red_SPAIN_1951-201903.txt) includes 5km resolution spanish daily data +(precipitation or maximum and minimum temperature from january 1951 to june 2020. See README +file for more information. +IMPORTANT!: HR observational period must be the same as for reanalysis variables. +It is assumed that the training period is smaller than the HR original one (1951-2020), so it is +needed to make a new ascii file with the new period and the same structure as original, +specifying the training dates ('tdates' parameter) in the name +(e.g. 'pcp_red_SPAIN_19810101-19961231.txt' for '19810101-19961231' period).} + +\item{tdates}{Training period dates in format YYYYMMDD(start)-YYYYMMDD(end) (e.g. 19810101-19961231).} +} +\value{ +matrix list (e.g. restrain) as a result of characterize the past synoptic +situations and the significant predictors needed to downscale seasonal forecast variables. +For precipitation the output includes: +um: u component of geostrophic wind in all period (numeric matrix with [time,gridpoint] dimensions) +vm: v component of geostrophic wind in all period (numeric matrix with [time,gridpoint] dimensions) +nger: number of synoptic situations (integer) +gu92: u component of geostrophic wind for each synoptic situation (numeric matrix with + [nger,gridpoint] dimensions) +gv92: v component of geostrophic wind for each synoptic situation (numeric matrix with + [nger,gridpoint] dimensions) +gu52: u component of wind at 500 hPa for each synotic situation (numeric matrix with + [nger,gridpoint] dimensions) +gv52: v component of wind at 500 hPa for each synotic situation (numeric matrix with + [nger,gridpoint] dimensions) +neni: number of reference centers where predictors are calculated (integer) +vdmin: minimum distances between each HR gridpoint and the four nearest synoptic + gridpoints (numeric matrix with [nptos,4] dimensions) (nptos = number of HR gridpoints) +vref: four nearest synoptic gridpoints to each HR gridpoint (integer matrix with + [nptos,4] dimensions) +ccm: multiple correlation coeficients (numeric matrix with [nger,nptos] dimensions) +indices: + - lab_pred: numeric labels of selected predictors (integer matrix + with [nger,nptos,11,1] dimensions) + - cor_pred: partial correlation of selected predictors (numeric matrix with + [nger,nptos,11,2] dimensions) +For maximum and minimum temperature the output includes: +um: u component of geostrophic wind in all training period (numeric matrix with [time,gridpoint] dimensions) +vm: v component of geostrophic wind in all training period (numeric matrix with [time,gridpoint] dimensions) +insol: insolation in all training period (numeric vector with [time] dimension) +neni: number of reference centers where predictors are calculated (integer) +vdmin: minimum distances between each HR gridpoint and the four nearest synoptic + gridpoints (numeric matrix with [nptos,4] dimensions) (nptos = number of HR gridpoints) +vref: four nearest synoptic gridpoints to each HR gridpoint (integer matrix with + [nptos,4] dimensions) + +The output can directly use as argument to 'CST_AnalogsPredictors' function +(e.g. resdowns <- CST_AnalogsPredictors(...,restrain)) +} +\description{ +This function caracterizes the synoptic situations in a past period based on +low resolution reanalysis data (e.g, ERAInterim 1.5º x 1.5º) and an observational high +resolution (HR) dataset (AEMET 5 km gridded daily precipitation and maximum and +minimum temperature) (Peral et al., 2017)). +The method uses three domains: +- peninsular Spain and Balearic Islands domain (5 km resolution): HR domain +- synoptic domain (low resolution): it should be centered over Iberian Peninsula and + cover enough extension to detect as much synoptic situations as possible. +- extended domain (low resolution): it is an extension of the synoptic + domain. It is used for 'slp_ext' parameter (see 'slp_lon' and 'slp_lat' below). +} +\author{ +Marta Dominguez Alonso - AEMET, \email{mdomingueza@aemet.es} + +Nuria Perez-Zanon - BSC, \email{nuria.perez@bsc.es} +} diff --git a/src/CSTools.so b/src/CSTools.so new file mode 100755 index 0000000000000000000000000000000000000000..b5e81187254aa25073268fa773029085c71c0779 Binary files /dev/null and b/src/CSTools.so differ diff --git a/src/Makevars b/src/Makevars new file mode 100755 index 0000000000000000000000000000000000000000..42ca55ca811d36a314de163b040aa50b3200711c --- /dev/null +++ b/src/Makevars @@ -0,0 +1,27 @@ + +FC = gfortran +MOD_OBJS = mod_csts.o mod_funcs.o +FF_OBJS = calc_geoswind.o calculo_tempes_densi_sealevel.o calc_utmcandelasgrid.o calc_utm_rej_era5_penin.o clasif_era_pen_kmeans.o insol.o predictores_significativos.o pts_ref_est_pen_4int.o pts_ref_est_pen.o +F_OBJS = training_part1_prec.o training_part2_prec.o training_temp.o downscaling_prec.o downscaling_temp.o + +all: + @$(MAKE) $(SHLIB) + @rm -f *.mod *.o + +$(SHLIB): $(MOD_OBJS) $(FF_OBJS) $(F_OBJS) + +calc_geoswind.o: mod_csts.o mod_funcs.o +calculo_tempes_densi_sealevel.o: calculo_tempes_densi_sealevel.f90 mod_csts.o +calc_utmcandelasgrid.o: calc_utmcandelasgrid.f90 mod_csts.o mod_funcs.o +calc_utm_rej_era5_penin.o: calc_utm_rej_era5_penin.f90 mod_csts.o mod_funcs.o +clasif_era_pen_kmeans.o: clasif_era_pen_kmeans.f90 mod_csts.o mod_funcs.o +insol.o: insol.f90 mod_csts.o mod_funcs.o +predictores_significativos.o: predictores_significativos.f90 mod_csts.o mod_funcs.o +pts_ref_est_pen_4int.o: pts_ref_est_pen_4int.f90 mod_csts.o +pts_ref_est_pen.o: pts_ref_est_pen.f90 mod_csts.o +training_part1_prec.o: training_part1_prec.f90 mod_csts.o mod_funcs.o calculo_tempes_densi_sealevel.o calc_geoswind.o clasif_era_pen_kmeans.o +training_part2_prec.o: training_part2_prec.f90 calc_utmcandelasgrid.o calc_utm_rej_era5_penin.o pts_ref_est_pen_4int.o pts_ref_est_pen.o predictores_significativos.o +training_temp.o: training_temp.f90 mod_csts.o mod_funcs.o calculo_tempes_densi_sealevel.o calc_geoswind.o insol.o calc_utm_rej_era5_penin.o calc_utmcandelasgrid.o pts_ref_est_pen_4int.o +downscaling_prec.o: downscaling_prec.f90 mod_csts.o mod_funcs.o +downscaling_temp.o: downscaling_temp.f90 mod_csts.o mod_funcs.o + diff --git a/src/calc_geoswind.f90 b/src/calc_geoswind.f90 new file mode 100755 index 0000000000000000000000000000000000000000..d6b4645476b0f01ada5ea29fb8c171dc6ca24dd7 --- /dev/null +++ b/src/calc_geoswind.f90 @@ -0,0 +1,65 @@ + +! Program to calculate the geostrophic wind based on mean +! sea level pressure (msl) and sea level density + +SUBROUTINE geos(ic,nd,id,slat,slon,slats,slons,rlat,& + rlon,rlats,rlons,nlat,nlon,nlats,nlons,den,msl_lr,& + ngridd,um,vm) + +!ccccccccccccccccccccccccccccccccccccccccccccccccccccccc +!ccccccccccccccccccccccccccccccccccccccccccccccccccccccc + +USE MOD_CSTS +USE MOD_FUNCS, ONLY : geostrofico,dobla,bessel + + Implicit none + + INTEGER, INTENT(IN) :: nd + INTEGER, INTENT(IN) :: ic + INTEGER, INTENT(IN) :: id + + INTEGER, INTENT(IN) :: nlat + INTEGER, INTENT(IN) :: nlon + INTEGER, INTENT(IN) :: nlats + INTEGER, INTENT(IN) :: nlons + DOUBLE PRECISION, INTENT(IN) :: slat + DOUBLE PRECISION, INTENT(IN) :: slon + DOUBLE PRECISION, INTENT(IN) :: slats + DOUBLE PRECISION, INTENT(IN) :: slons + DOUBLE PRECISION, INTENT(IN) :: rlat + DOUBLE PRECISION, INTENT(IN) :: rlon + DOUBLE PRECISION, INTENT(IN) :: rlats + DOUBLE PRECISION, INTENT(IN) :: rlons + REAL, INTENT(IN) :: den(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: msl_lr(nd,id) + INTEGER, INTENT(IN) :: ngridd + DOUBLE PRECISION, INTENT(OUT) :: um(nd,ic) + DOUBLE PRECISION, INTENT(OUT) :: vm(nd,ic) + + integer i + real psl(id),umint(ic),vmint(ic) + character for10*50 + +! print*,"program 2: geostrophic wind" + + do 1000 i=1,nd + +! Pressure data + psl(:)=msl_lr(i,:)*100./g + +! Calculate the geostrophic wind components (umint,vmint) + call geostrofico(psl,umint,vmint,id,ic,slat,slon,slats,slons,& + rlat,rlon,rlats,rlons,nlat,nlon,nlats,nlons,ngridd) + +! do j=1,ic +! um(i,j)=umint(j)/den(i,j) +! vm(i,j)=vmint(j)/den(i,j) +! enddo + + um(i,:)=umint(:)/den(i,:) + vm(i,:)=vmint(:)/den(i,:) + + 1000 continue + +END SUBROUTINE geos + diff --git a/src/calc_utm_rej_era5_penin.f90 b/src/calc_utm_rej_era5_penin.f90 new file mode 100755 index 0000000000000000000000000000000000000000..75fee505cbef04745464de4a2607f25f1d7b4ef0 --- /dev/null +++ b/src/calc_utm_rej_era5_penin.f90 @@ -0,0 +1,52 @@ + +! The utm_ERA program calculates the Reanalysis UTM coordinates +! with a time zone of 30. + +SUBROUTINE utm_ERA(ic,nlat,nlon,slat,slon,rlat,rlon,x,y) + +USE MOD_CSTS +USE MOD_FUNCS + + Implicit none + + INTEGER, INTENT(IN) :: ic + + INTEGER, INTENT(IN) :: nlat + INTEGER, INTENT(IN) :: nlon + DOUBLE PRECISION, INTENT(IN) :: slat + DOUBLE PRECISION, INTENT(IN) :: slon + DOUBLE PRECISION, INTENT(IN) :: rlat + DOUBLE PRECISION, INTENT(IN) :: rlon + REAL, INTENT(OUT) :: x(ic) + REAL, INTENT(OUT) :: y(ic) + + integer j + integer igrad,imin,rseg,igrad1,imin1 + real rseg1 + real rlt(ic),rln(ic) +! real*8 r1,r2,r3,r4,r5,r6,rad,rad1,xint,yint + double precision r1,r2,r3,r4,r5,r6,rad,rad1,xint,yint + +! print*,"program 4: reanalysis UTM coordinates" + +! Calculation of geostrophic coordinates in each synoptic grid points + + do j=1,ic + rlt(j)=slat+(((j-1)/nlon)*rlat) + rln(j)=slon+((mod(j-1,nlon)+1-1)*rlon) + enddo + +! calculation of UTM coordinates + + do j=1,ic + rad1=rln(j) + rad=rlt(j) + + call geoutm(rad1,rad,huso,xint,yint) + + x(j)=xint + y(j)=yint + + enddo + +END SUBROUTINE utm_ERA diff --git a/src/calc_utmcandelasgrid.f90 b/src/calc_utmcandelasgrid.f90 new file mode 100755 index 0000000000000000000000000000000000000000..1acf8fb05212a64ea3605d42707c0c8510290373 --- /dev/null +++ b/src/calc_utmcandelasgrid.f90 @@ -0,0 +1,41 @@ + +! The utm_obs program calculates the UTM coordinates of high resolution +! (5km x 5km) observational database created by AEMET (Peral et al., 2017). +! +SUBROUTINE utm_obs(lon_hr,lat_hr,xcand,ycand) + +USE MOD_CSTS +USE MOD_FUNCS, ONLY : geoutm + + Implicit none + + DOUBLE PRECISION, INTENT(IN) :: lon_hr(nptos) + DOUBLE PRECISION, INTENT(IN) :: lat_hr(nptos) + REAL, INTENT(OUT) :: xcand(nptos) + REAL, INTENT(OUT) :: ycand(nptos) + + integer n + integer i +! real*8 rad,rad1,xint,yint + double precision rad,rad1,xint,yint + +! print*,"program 5: UTM coordinates high resolution observational database" + + n=nptos + + do i=1,n + rad1=lon_hr(i) + rad=lat_hr(i) +! Calculation of UTM coordinates + call geoutm(rad1,rad,huso,xint,yint) + + xcand(i)=xint + ycand(i)=yint + + enddo + +END SUBROUTINE utm_obs + +!Peral, C., Navascués, B. and Ramos, P.: Serie de precipitación diaria en +!rejilla con fines climáticos. Nota Técnica nº 24, AEMET, +!http://hdl.handle.net/20.500.11765/7573, 2017. diff --git a/src/calculo_tempes_densi_sealevel.f90 b/src/calculo_tempes_densi_sealevel.f90 new file mode 100755 index 0000000000000000000000000000000000000000..362c071442188dc6deef2a4306b9cfdc5ca9cd67 --- /dev/null +++ b/src/calculo_tempes_densi_sealevel.f90 @@ -0,0 +1,37 @@ + +! Program to calculate sea level temperature and density from 1000Mb +! temperature and mean sea level pressure (msl) + +SUBROUTINE calc_tempes_densi_sealev(ic,nd,msl_si,t1000,den) + +USE MOD_CSTS + + IMPLICIT NONE + +!ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc +!ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc + INTEGER, INTENT(IN) :: nd + INTEGER, INTENT(IN) :: ic + DOUBLE PRECISION, INTENT(IN) :: msl_si(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: t1000(nd,ic) + REAL, INTENT(OUT) :: den(nd,ic) + + real c,yy + real psl(ic),tmil(ic),tsl(ic) + integer i,j + + c=r*a/g + + do i=1,nd + psl(:)=msl_si(i,:) + tmil(:)=t1000(i,:) + do j=1,ic + yy=log(tmil(j))-c*log(1000./psl(j)) + tsl(j)=exp(yy) +! Air density equation + den(i,j)=(psl(j)*100.)/(r*tsl(j)) + enddo + enddo + +END SUBROUTINE calc_tempes_densi_sealev + diff --git a/src/clasif_era_pen_kmeans.f90 b/src/clasif_era_pen_kmeans.f90 new file mode 100755 index 0000000000000000000000000000000000000000..3db1e7e3d3576099831d5af9adec167e372ae762 --- /dev/null +++ b/src/clasif_era_pen_kmeans.f90 @@ -0,0 +1,273 @@ + +! Program to do synoptic clasification from reanalysis: geostrophic wind +! components, 500Mb wind components and 1000Mb and 500Mb geopotential + +SUBROUTINE clasif(ic,nd,nlon,nlat,slat,slon,rlat,rlon,um,vm,u500,v500,& + z1000,z500,nger,gu92,gv92,gu52,gv52) + +USE MOD_CSTS +USE MOD_FUNCS + + Implicit none + + INTEGER, INTENT(IN) :: ic + INTEGER, INTENT(IN) :: nd + + INTEGER, INTENT(IN) :: nlat + INTEGER, INTENT(IN) :: nlon + DOUBLE PRECISION, INTENT(IN) :: slat + DOUBLE PRECISION, INTENT(IN) :: slon + DOUBLE PRECISION, INTENT(IN) :: rlat + DOUBLE PRECISION, INTENT(IN) :: rlon + DOUBLE PRECISION, INTENT(IN) :: um(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: vm(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: u500(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: v500(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: z1000(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: z500(nd,ic) + INTEGER, INTENT(OUT) :: nger + DOUBLE PRECISION, INTENT(OUT) :: gu92(nd,ic) + DOUBLE PRECISION, INTENT(OUT) :: gv92(nd,ic) + DOUBLE PRECISION, INTENT(OUT) :: gu52(nd,ic) + DOUBLE PRECISION, INTENT(OUT) :: gv52(nd,ic) + + integer n,nm + integer i,j,k,i1,i2,iel,iger,igmin,ipos,iter + real dis,disu5,disv5,disu9,disv9,dmin + real u9(nd,ic),v9(nd,ic),u5(nd,ic),v5(nd,ic) + real gu91(nd,ic),gv91(nd,ic),gu51(nd,ic),gv51(nd,ic) + real ger9(nd,ic),ger5(nd,ic),geo9(nd,ic),geo5(nd,ic) + real ger9_newUnd(nd,ic) + real ser(nd),md,sg,mu9(ic),su9(ic),mv9(ic),sv9(ic) + real mu5(ic),su5(ic),mv5(ic),sv5(ic) + + integer cl(nd,umb_ger+1),ger(nd+1) + real p9(ic),p5(ic),rlt(ic),rln(ic) + + n=nd + nm=umb_ger + +! print*,'program 3: sinoptic clasification' + +! Calculation to assign the weights to each grid point. + + do j=1,ic + rlt(j)=slat+(((j-1)/nlon)*rlat) + rln(j)=slon+((mod(j-1,nlon)+1-1)*rlon) + enddo + + p9=0. + p5=1. + + do i1=1,ic + if((rlt(i1).le.fnor2).and.(rlt(i1).ge.fsur2)) then + if((rln(i1).ge.foes2).and.(rln(i1).le.fest2)) then + p9(i1)=1. + p5(i1)=4. + endif + endif + enddo + do i1=1,ic + if((rlt(i1).le.fnor1).and.(rlt(i1).ge.fsur1)) then + if((rln(i1).ge.foes1).and.(rln(i1).le.fest1)) then + p9(i1)=2. + p5(i1)=8. + endif + endif + enddo +! +! REANALYSIS VARIABLES + + u5(:,:)=u500(:,:) + v5(:,:)=v500(:,:) + geo9(:,:)=z1000(:,:) + geo5(:,:)=z500(:,:) + +! Mean and standard deviation of reference synoptic fields + + do j=1,ic + do i=1,n + ser(i)=um(i,j) + enddo + call estadis(ser,md,sg,n) + mu9(j)=md + su9(j)=sg + do i=1,n + ser(i)=vm(i,j) + enddo + call estadis(ser,md,sg,n) + mv9(j)=md + sv9(j)=sg + do i=1,n + ser(i)=u5(i,j) + enddo + call estadis(ser,md,sg,n) + mu5(j)=md + su5(j)=sg + do i=1,n + ser(i)=v5(i,j) + enddo + call estadis(ser,md,sg,n) + mv5(j)=md + sv5(j)=sg + enddo + + +! Geostrophic wind components standatization + + do i=1,n + do j=1,ic + u9(i,j)=(um(i,j)-mu9(j))/su9(j) + v9(i,j)=(vm(i,j)-mv9(j))/sv9(j) + u5(i,j)=(u5(i,j)-mu5(j))/su5(j) + v5(i,j)=(v5(i,j)-mv5(j))/sv5(j) + enddo + enddo + +! Finding the cluster centers + + ger(n+1)=1 + ger(1)=1 + do 200 i=2,n + do 210 j=1,ger(n+1) + iger=ger(j) + call distan9(u9,n,ic,i,iger,p9,disu9) + call distan9(v9,n,ic,i,iger,p9,disv9) + call distan5(u5,n,ic,i,iger,p5,disu5) + call distan5(v5,n,ic,i,iger,p5,disv5) + dis=(disu9+disv9+disu5+disv5)/4. + if(dis.lt.umb) go to 200 + 210 continue + ger(n+1)=ger(n+1)+1 + ipos=ger(n+1) + ger(ipos)=i + 200 continue + + do k=1,ger(n+1) + enddo + +! K-means method: weather types + + nger=ger(n+1) + +! print*,' number of synoptic types = ', nger + + do k=1,nger + iger=ger(k) + do j=1,ic + gu92(k,j)=u9(iger,j) + gv92(k,j)=v9(iger,j) + gu52(k,j)=u5(iger,j) + gv52(k,j)=v5(iger,j) + gu91(k,j)=u9(iger,j) + gv91(k,j)=v9(iger,j) + gu51(k,j)=u5(iger,j) + gv51(k,j)=v5(iger,j) + enddo + enddo + + iter=0 + 251 continue + cl=0 + iter=iter+1 + do 300 i1=1,n + dmin=1000. + igmin=0 + do 310 i2=1,nger + call distancia9(u9,n,gu92,n,i1,i2,p9,disu9,ic) + call distancia9(v9,n,gv92,n,i1,i2,p9,disv9,ic) + call distancia5(u5,n,gu52,n,i1,i2,p5,disu5,ic) + call distancia5(v5,n,gv52,n,i1,i2,p5,disv5,ic) + dis=(disu9+disv9+disu5+disv5)/4. + if(dis.lt.dmin) then + dmin=dis + igmin=i2 + endif + 310 continue + cl(igmin,nm+1)=cl(igmin,nm+1)+1 + ipos=cl(igmin,nm+1) + cl(igmin,ipos)=i1 + 300 continue + + ger9=0. + ger5=0. + gu92=0. + gv92=0. + gu52=0. + gv52=0. + + do i=1,nger + do j=1,cl(i,nm+1) + iel=cl(i,j) + do k=1,ic + ger9(i,k)=ger9(i,k)+geo9(iel,k) + ger5(i,k)=ger5(i,k)+geo5(iel,k) + gu92(i,k)=gu92(i,k)+u9(iel,k) + gv92(i,k)=gv92(i,k)+v9(iel,k) + gu52(i,k)=gu52(i,k)+u5(iel,k) + gv52(i,k)=gv52(i,k)+v5(iel,k) + enddo + enddo + do k=1,ic + gu92(i,k)=gu92(i,k)/real(cl(i,nm+1)) + gv92(i,k)=gv92(i,k)/real(cl(i,nm+1)) + gu52(i,k)=gu52(i,k)/real(cl(i,nm+1)) + gv52(i,k)=gv52(i,k)/real(cl(i,nm+1)) + enddo + + enddo + + do i=1,nger + call distancia9(gu91,n,gu92,n,i,i,p9,disu9,ic) + call distancia9(gv91,n,gv92,n,i,i,p9,disv9,ic) + call distancia5(gu51,n,gu52,n,i,i,p5,disu5,ic) + call distancia5(gv51,n,gv52,n,i,i,p5,disv5,ic) + dis=(disu9+disv9+disu5+disv5)/4. + if(dis.ge.0.10) go to 250 + enddo + + go to 252 + + 250 continue + + do i=1,nger + do j=1,ic + gu91(i,j)=gu92(i,j) + gv91(i,j)=gv92(i,j) + gu51(i,j)=gu52(i,j) + gv51(i,j)=gv52(i,j) + enddo + enddo + go to 251 + + 252 continue + +!cccccccccccccccccccccccccccccccccccccccccc + + do i=1,nger + do j=1,ic + gu92(i,j)=(gu92(i,j)*su9(j))+mu9(j) + gv92(i,j)=(gv92(i,j)*sv9(j))+mv9(j) + gu52(i,j)=(gu52(i,j)*su5(j))+mu5(j) + gv52(i,j)=(gv52(i,j)*sv5(j))+mv5(j) + enddo + enddo + +! These variables are not going to be used but they should not be delated + do 401 i=1,nger + do k=1,ic + ger9(i,k)=ger9(i,k)/real(cl(i,nm+1)) + ger9_newUnd(i,k)=1000.+(ger9(i,k)/8.) + ger5(i,k)=ger5(i,k)/real(cl(i,nm+1)) + enddo + + 401 continue + +END SUBROUTINE clasif + + + + + + + diff --git a/src/downscaling_prec.f90 b/src/downscaling_prec.f90 new file mode 100755 index 0000000000000000000000000000000000000000..c061488ee806e81b528589e7044d03b01c545de7 --- /dev/null +++ b/src/downscaling_prec.f90 @@ -0,0 +1,943 @@ +! Program to downscale precipitation based on analogs method +! for Iberian Peninsula and Balearic Islands (Autor: Petisco de Lara) +! ****************************************************** + +SUBROUTINE down_prec(ic,id,nd,nm,nlat,nlon,nlatt,nlont,slat,slon,rlat,rlon,& + slatt,slont,ngridd,u500,v500,t500,t850,msl_si,q700,& + prec_hr,nanx,um,vm,nger,gu92,gv92,gu52,gv52,& + neni,vdmin,vref4,new_ccm,new_kvars,new_corrpar,u500e,& + v500e,t500e,t850e,msle,q700e,pp) + + +USE MOD_CSTS +USE MOD_FUNCS + + IMPLICIT NONE + +!cccccccccccccccccccccccccccccccccc +!cccccccccccccccccccccccccccccccccc + + INTEGER, INTENT(IN) :: ic + INTEGER, INTENT(IN) :: id + INTEGER, INTENT(IN) :: nd + INTEGER, INTENT(IN) :: nm + + INTEGER, INTENT(IN) :: nlat + INTEGER, INTENT(IN) :: nlon + INTEGER, INTENT(IN) :: nlatt + INTEGER, INTENT(IN) :: nlont + DOUBLE PRECISION, INTENT(IN) :: slat + DOUBLE PRECISION, INTENT(IN) :: slon + DOUBLE PRECISION, INTENT(IN) :: rlat + DOUBLE PRECISION, INTENT(IN) :: rlon + DOUBLE PRECISION, INTENT(IN) :: slatt + DOUBLE PRECISION, INTENT(IN) :: slont + INTEGER, INTENT(IN) :: ngridd + + DOUBLE PRECISION, INTENT(IN) :: u500(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: v500(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: msl_si(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: q700(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: t500(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: t850(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: prec_hr(nd,nptos) + + INTEGER, INTENT(IN) :: nanx + + DOUBLE PRECISION, INTENT(IN) :: um(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: vm(nd,ic) + INTEGER, INTENT(IN) :: nger + DOUBLE PRECISION, INTENT(IN) :: gu92(nger,ic) + DOUBLE PRECISION, INTENT(IN) :: gv92(nger,ic) + DOUBLE PRECISION, INTENT(IN) :: gu52(nger,ic) + DOUBLE PRECISION, INTENT(IN) :: gv52(nger,ic) + + INTEGER, INTENT(IN) :: neni + DOUBLE PRECISION, INTENT(IN) :: vdmin(nptos,4) + INTEGER, INTENT(IN) :: vref4(nptos,4) + + DOUBLE PRECISION, INTENT(IN) :: new_ccm(nger,nptos) + INTEGER, INTENT(IN) :: new_kvars(nger,nptos,npx) + DOUBLE PRECISION, INTENT(IN) :: new_corrpar(nger,nptos,npx) + + DOUBLE PRECISION, INTENT(IN) :: u500e(nm,ic) + DOUBLE PRECISION, INTENT(IN) :: v500e(nm,ic) + DOUBLE PRECISION, INTENT(IN) :: t500e(nm,ic) + DOUBLE PRECISION, INTENT(IN) :: t850e(nm,ic) + DOUBLE PRECISION, INTENT(IN) :: msle(nm,id) + DOUBLE PRECISION, INTENT(IN) :: q700e(nm,ic) + + DOUBLE PRECISION, INTENT(OUT) :: pp(nm,nptos) + +!cccccccccccccccccccccccccccccccccc + integer m,n + integer i,j + integer nvar + integer ii + integer jt,jp + real rlx,rly + real sp,dd +!cccccccccccccccccccccccccccccccccc +!cccccccccccccccccccccccccccccccccc + integer i1,i2,i3,i4,i7,iana,ice,ien,ipos,ips,ipu,ir,ire,iti,jk,k + integer mesa,nan,nan2,nanf,nanv,nen,nmm,np + real disu5,disu9,disv5,disv9,dmin,dt,du5,du9,dv5,dv9,supo + real vorm,vorz + +!***************************************************************** + + integer anai(nanx),ana(nanx),puh(ic) + real u9(nd,ic),v9(nd,ic),u5(nd,ic),v5(nd,ic),he7(nd,ic),he7m(ic) + real psl(nd,ic),ut9(nger,ic),vt9(nger,ic),ut5(nger,ic),vt5(nger,ic) + real t5(nd,ic),t8(nd,ic),tm5(ic),tm8(ic) + real pslm(ic),um9(ic),vm9(ic),um5(ic),vm5(ic),pslma(ic) + real bdlon,bilat + real pres(id),bar(id) + real pred1(npx,nd,neni),pred1m(npx,neni) + character sc*8,pt*9,nomeb*90 + + integer nor(nanx),prs(nger,nptos,npx+7) + integer annoa + integer ior(nd),eqc(nptos) + integer ref(nptos),puce(neni),puen(neni,5001) + + integer est(nptos) + real prec(nd,nptos) + real p9(ic),p5(ic) + real dista(nd),prees(nm,nptos) + real rlt(ic),rln(ic),rltt(id),rlnt(id) + real dist(nanx),dist1(npx,nanx),serin(nanx) + real aaa(nanx) + real ser(nd),media(npx,neni),sigma(npx,neni) + real md,sg + real mu9(ic),su9(ic),mv9(ic),sv9(ic),copar(nger,nptos,npx) + real mu5(ic),su5(ic),mv5(ic),sv5(ic),ccm(nger,nptos) + +!cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc +!c Variables nuevas por la nueva interpolacion de los predictores + + integer Vref(nptos,4) + real Vdis(nptos,4) + integer iii + real distancion1, distancion2, distancion3, distancion4 + real peso1, peso2, peso3, peso4 + real calculin_modelo, calculin_calibracion + integer ien1, ien2, ien3, ien4 + integer ik + +!cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc + + m=nger + n=nd + +!******************************** +! 1. Synoptic latitude and longitude calculation and assignment of +! weights windows +! + do j=1,ic + rlt(j)=slat+(((j-1)/nlon)*rlat) + rln(j)=slon+((mod(j-1,nlon)+1-1)*rlon) + enddo + p9=0. + p5=1. + do i1=1,ic + if((rlt(i1).le.fnor2).and.(rlt(i1).ge.fsur2)) then + if((rln(i1).ge.foes2).and.(rln(i1).le.fest2)) then + p9(i1)=1. + p5(i1)=4. + endif + endif + enddo + do i1=1,ic + if((rlt(i1).le.fnor1).and.(rlt(i1).ge.fsur1)) then + if((rln(i1).ge.foes1).and.(rln(i1).le.fest1)) then + p9(i1)=2. + p5(i1)=8. + endif + endif + enddo +! +! Latitude and longitude calculation in the extended domain (called low +! resolution) + + do j=1,id + rltt(j)=slatt+(((j-1)/nlont)*rlat) + rlnt(j)=slont+((mod(j-1,nlont)+1-1)*rlon) + enddo +! +!**************************************************************** +! TRAINING REANALYSIS VARIABLES + + u5(:,:)=u500(:,:) + v5(:,:)=v500(:,:) + psl(:,:)=msl_si(:,:) + he7(:,:)=q700(:,:) + t5(:,:)=t500(:,:) + t8(:,:)=t850(:,:) + +! HIGH RESOLUTION (5KM) OBSERVATIONS +! It is neccesary to convert to tenths of mm (multiplying by 10). + + prec(:,:)=prec_hr(:,:)*10. + +! Mean and standard deviation of reference synoptic fields (wind components). + + do j=1,ic + do i=1,n + ser(i)=um(i,j) + enddo + md=0. + call estadis(ser,md,sg,n) + mu9(j)=md + su9(j)=sg + do i=1,n + ser(i)=vm(i,j) + enddo + call estadis(ser,md,sg,n) + mv9(j)=md + sv9(j)=sg + do i=1,n + ser(i)=u5(i,j) + enddo + call estadis(ser,md,sg,n) + mu5(j)=md + su5(j)=sg + do i=1,n + ser(i)=v5(i,j) + enddo + call estadis(ser,md,sg,n) + mv5(j)=md + sv5(j)=sg + + enddo + +! A reference centers (matching points between synoptic and high +! resolution grids) are define to know where the predictor must be +! calculated. + + Vref(:,:)=vref4(:,:) + Vdis(:,:)=vdmin(:,:) + + nen=1 + puce(1)=Vref(1,1) + + do iii=1,4 + do j=1,nptos + do k=1,nen + if (Vref(j,iii).eq.puce(k)) go to 101 + enddo + nen=nen+1 + ipos=nen + puce(ipos)=Vref(j,iii) + 101 continue + enddo + enddo + +! Each reference point have associated to a group of high resolution grids. + puen=0 + + do k=1,nen + do j=1,nptos + do iii=1,4 + if(Vref(j,iii).eq.puce(k)) then + puen(k,5001)=puen(k,5001)+1 + ipos=puen(k,5001) + puen(k,ipos)=j + endif + enddo + enddo + enddo + +! The predictors are obtained and normalized +! OBTAINING THE SEA LEVEL PRESSURE (PREDICTOR 1) IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(1,i,j)=psl(i,ice) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(1,i,j) + enddo + call estadis(ser,md,sg,n) + media(1,j)=md + sigma(1,j)=sg + do i=1,n + pred1(1,i,j)=(pred1(1,i,j)-media(1,j))/sigma(1,j) + enddo + enddo + +! OBTAINING THE TREND (PREDICTOR 11) IN THE REFERENCE CENTERS + + do j=1,nen + pred1(11,1,j)=0. + enddo + + do i=2,n + do j=1,nen + ice=puce(j) + pred1(11,i,j)=psl(i,ice)-psl((i-1),ice) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(11,i,j) + enddo + call estadis(ser,md,sg,n) + media(11,j)=md + sigma(11,j)=sg + do i=1,n + pred1(11,i,j)=(pred1(11,i,j)-media(11,j))/sigma(11,j) + enddo + enddo + +! OBTAINING THE VERTICAL THERMAL GRADIENT(PREDICTOR 3) +! IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(3,i,j)=t8(i,ice)-t5(i,ice) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(3,i,j) + enddo + call estadis(ser,md,sg,n) + media(3,j)=md + sigma(3,j)=sg + do i=1,n + pred1(3,i,j)=(pred1(3,i,j)-media(3,j))/sigma(3,j) + enddo + enddo + +! OBTAINING THE 500 hPa TEMPERATURE (PREDICTOR 2) +! IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(2,i,j)=t5(i,ice) + enddo + enddo + + do j=1,nen + do i=1,n + ser(i)=pred1(2,i,j) + enddo + call estadis(ser,md,sg,n) + media(2,j)=md + sigma(2,j)=sg + do i=1,n + pred1(2,i,j)=(pred1(2,i,j)-media(2,j))/sigma(2,j) + enddo + enddo + +! OBTAINING THE VORTICITY (PREDICTOR 4) IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + rlx=rt*cos(rlt(ice)*pi/180.)*pi*rlon/180. + rly=rt*abs(rlat)*pi/180. + vorm=um(i,ice-nlon)-um(i,ice+nlon) + vorm=vorm/(2.*rly) + vorz=vm(i,ice+1)-vm(i,ice-1) + vorz=vorz/(2.*rlx) + pred1(4,i,j)=vorz-vorm + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(4,i,j) + enddo + call estadis(ser,md,sg,n) + media(4,j)=md + sigma(4,j)=sg + do i=1,n + pred1(4,i,j)=(pred1(4,i,j)-media(4,j))/sigma(4,j) + enddo + enddo + +! OBTAINING THE GEOSTROPHIC U/V COMPONENTS (PREDICTORS 5 AND 6) IN THE REFERENCE +! CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(5,i,j)=um(i,ice) + pred1(6,i,j)=vm(i,ice) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(5,i,j) + enddo + call estadis(ser,md,sg,n) + media(5,j)=md + sigma(5,j)=sg + do i=1,n + pred1(5,i,j)=(pred1(5,i,j)-media(5,j))/sigma(5,j) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(6,i,j) + enddo + call estadis(ser,md,sg,n) + media(6,j)=md + sigma(6,j)=sg + do i=1,n + pred1(6,i,j)=(pred1(6,i,j)-media(6,j))/sigma(6,j) + enddo + enddo + +! OBTAINING THE VORTICITY IN 500 hPa (PREDICTOR 7) IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + rlx=rt*cos(rlt(ice)*pi/180.)*pi*rlon/180. + rly=rt*abs(rlat)*pi/180. + vorm=u5(i,ice-nlon)-u5(i,ice+nlon) + vorm=vorm/(2.*rly) + vorz=v5(i,ice+1)-v5(i,ice-1) + vorz=vorz/(2.*rlx) + pred1(7,i,j)=vorz-vorm + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(7,i,j) + enddo + call estadis(ser,md,sg,n) + media(7,j)=md + sigma(7,j)=sg + do i=1,n + pred1(7,i,j)=(pred1(7,i,j)-media(7,j))/sigma(7,j) + enddo + enddo + +! OBTAINING THE GEOSTROPHIC U/V COMPONENTS IN 500 hPa (PREDICTORS 8 AND 9) +! IN THE RERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(8,i,j)=u5(i,ice) + pred1(9,i,j)=v5(i,ice) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(8,i,j) + enddo + call estadis(ser,md,sg,n) + media(8,j)=md + sigma(8,j)=sg + do i=1,n + pred1(8,i,j)=(pred1(8,i,j)-media(8,j))/sigma(8,j) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(9,i,j) + enddo + call estadis(ser,md,sg,n) + media(9,j)=md + sigma(9,j)=sg + do i=1,n + pred1(9,i,j)=(pred1(9,i,j)-media(9,j))/sigma(9,j) + enddo + enddo + +! OBTAINING THE ESPECIFIC HUMIDITY IN 700 hPa (PREDICTOR 10) IN THE REFERENCE +! CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(10,i,j)=he7(i,ice) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(10,i,j) + enddo + call estadis(ser,md,sg,n) + media(10,j)=md + sigma(10,j)=sg + do i=1,n + pred1(10,i,j)=(pred1(10,i,j)-media(10,j))/sigma(10,j) + enddo + enddo + +! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! ESTANDARIZATION OF REFERENCE WINDS (SYNOPTIC WINDS ALSO) + + do i=1,n + do j=1,ic + u9(i,j)=(um(i,j)-mu9(j))/su9(j) + v9(i,j)=(vm(i,j)-mv9(j))/sv9(j) + u5(i,j)=(u5(i,j)-mu5(j))/su5(j) + v5(i,j)=(v5(i,j)-mv5(j))/sv5(j) + enddo + enddo + + do i=1,m + do j=1,ic + ut9(i,j)=(gu92(i,j)-mu9(j))/su9(j) + vt9(i,j)=(gv92(i,j)-mv9(j))/sv9(j) + ut5(i,j)=(gu52(i,j)-mu5(j))/su5(j) + vt5(i,j)=(gv52(i,j)-mv5(j))/sv5(j) + enddo + enddo + +! SIGNIFICANT PREDICTORS FOR EACH SYNOPTIC TYPE IN EACH HIGH +! RESOLUTION GRID POINT. + + ccm(:,:)=new_ccm(:,:) + prs(:,:,:)=new_kvars(:,:,:) + copar(:,:,:)=new_corrpar(:,:,:) + +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +!************************************************************** +!************************************************************** +! +! DOWNSCALING BEGINS (ESTIMATING THE PROBLEM DAYS PRECIPITATION +! IN EACH HIGH RESOLUTION GRID POINT) +! +!************************************************************** +!************************************************************** +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +! print*,'Downscaling begins...' +! print*,'estimated day... ' + + nmm=0 + do 1000 i=1,nm ! Estimated days loop + +! ESTIMATED REANALYSIS VARIABLES + + um5(:)=u500e(i,:) + vm5(:)=v500e(i,:) + pres(:)=msle(i,:) + he7m(:)=q700e(i,:) + tm5(:)=t500e(i,:) + tm8(:)=t850e(i,:) + +! print*,' ',i + nmm=nmm+1 + +! Pressure synoptic grid calculation from low resolution one + + bilat=slat+(nlat-1)*rlat + bdlon=slon+(nlon-1)*rlon + + ire=0 + do 111 j=1,id + if((rltt(j).gt.slat).or.(rltt(j).lt.bilat)) go to 111 + if((rlnt(j).lt.slon).or.(rlnt(j).gt.bdlon)) go to 111 + ire=ire+1 + pslm(ire)=pres(j) + 111 continue + +! Geostrophic wind components at sea level to the estimated +! day (pressure in Pa). +! "/g" to invalidate gravity acceleration. + + bar=pres*100./g + + call geostrofico(bar,um9,vm9,id,ic,slatt,slont,slat,slon,& + rlat,rlon,rlat,rlon,nlatt,nlont,nlat,nlon,ngridd) + +! It is divided by density at sea level (standard atmosphere) to obtain +! the geostrophic wind components. + + do j=1,ic + um9(j)=um9(j)/1.225 + vm9(j)=vm9(j)/1.225 + enddo + +! The estimated predictors are obtained and normalized + +! OBTAINING THE 500 hPa TEMPERATURE (PREDICTOR 2) +! IN THE REFERENCE CENTERS + + do j=1,nen + ice=puce(j) + pred1m(2,j)=tm5(ice) + pred1m(2,j)=(pred1m(2,j)-media(2,j))/sigma(2,j) + enddo + +! OBTAINING THE SEA LEVEL PRESSURE (PREDICTOR 1) IN THE REFERENCE CENTERS + + do j=1,nen + ice=puce(j) + pred1m(1,j)=pslm(ice) + pred1m(1,j)=(pred1m(1,j)-media(1,j))/sigma(1,j) + enddo + +! OBTAINING THE TREND (PREDICTOR 11) IN THE REFERENCE CENTERS + + if(i.eq.1) then + do j=1,nen + pred1m(11,j)=0. + enddo + else + do j=1,nen + ice=puce(j) + pred1m(11,j)=pslm(ice)-pslma(ice) + pred1m(11,j)=(pred1m(11,j)-media(11,j))/sigma(11,j) + enddo + endif + + pslma=pslm + +! OBTAINING THE VERTICAL THERMAL GRADIENT(PREDICTOR 3) +! IN THE REFERENCE CENTERS + + do j=1,nen + ice=puce(j) + pred1m(3,j)=tm8(ice)-tm5(ice) + pred1m(3,j)=(pred1m(3,j)-media(3,j))/sigma(3,j) + enddo + +! OBTAINING THE VORTICITY (PREDICTOR 4) IN THE REFERENCE CENTERS + + do j=1,nen + ice=puce(j) + rlx=rt*cos(rlt(ice)*pi/180.)*pi*rlon/180. + rly=rt*abs(rlat)*pi/180. + vorm=um9(ice-nlon)-um9(ice+nlon) + vorm=vorm/(2.*rly) + vorz=vm9(ice+1)-vm9(ice-1) + vorz=vorz/(2.*rlx) + pred1m(4,j)=vorz-vorm + pred1m(4,j)=(pred1m(4,j)-media(4,j))/sigma(4,j) + enddo + +! OBTAINING THE GEOSTROPHIC U/V COMPONENTS (PREDICTORS 5 AND 6) IN THE REFERENCE +! CENTERS + + do j=1,nen + ice=puce(j) + pred1m(5,j)=um9(ice) + pred1m(5,j)=(pred1m(5,j)-media(5,j))/sigma(5,j) + pred1m(6,j)=vm9(ice) + pred1m(6,j)=(pred1m(6,j)-media(6,j))/sigma(6,j) + enddo + +! OBTAINING THE VORTICITY IN 500 hPa (PREDICTOR 7) IN THE REFERENCE CENTERS + + do j=1,nen + ice=puce(j) + rlx=rt*cos(rlt(ice)*pi/180.)*pi*rlon/180. + rly=rt*abs(rlat)*pi/180. + vorm=um5(ice-nlon)-um5(ice+nlon) + vorm=vorm/(2.*rly) + vorz=vm5(ice+1)-vm5(ice-1) + vorz=vorz/(2.*rlx) + pred1m(7,j)=vorz-vorm + pred1m(7,j)=(pred1m(7,j)-media(7,j))/sigma(7,j) + enddo + +! OBTAINING THE GEOSTROPHIC U/V COMPONENTS IN 500 hPa (PREDICTORS 8 AND 9) +! IN THE RERENCE CENTERS + + do j=1,nen + ice=puce(j) + pred1m(8,j)=um5(ice) + pred1m(8,j)=(pred1m(8,j)-media(8,j))/sigma(8,j) + pred1m(9,j)=vm5(ice) + pred1m(9,j)=(pred1m(9,j)-media(9,j))/sigma(9,j) + enddo + +! OBTAINING THE ESPECIFIC HUMIDITY IN 700 hPa (PREDICTOR 10) IN THE REFERENCE +! CENTERS + + do j=1,nen + ice=puce(j) + pred1m(10,j)=he7m(ice) + pred1m(10,j)=(pred1m(10,j)-media(10,j))/sigma(10,j) + enddo + +! ESTANDARIZATION OF REFERENCE WINDS + + do j=1,ic + um9(j)=(um9(j)-mu9(j))/su9(j) + vm9(j)=(vm9(j)-mv9(j))/sv9(j) + um5(j)=(um5(j)-mu5(j))/su5(j) + vm5(j)=(vm5(j)-mv5(j))/sv5(j) + enddo + +! Synoptic type determination to which the estimated day belongs. + + dmin=99999. + iti=0 + do j=1,m + call distan9_2(um9,ut9,m,j,p9,du9,ic) + call distan9_2(vm9,vt9,m,j,p9,dv9,ic) + call distan5_2(um5,ut5,m,j,p5,du5,ic) + call distan5_2(vm5,vt5,m,j,p5,dv5,ic) + dd=(du9+dv9+du5+dv5)/4. + if(dd.lt.dmin) then + dmin=dd + iti=j + endif + enddo + +! Determine the "nanx" reference alements more similar to each synoptic type +! and the corresponding distances + + do i1=1,n + ior(i1)=i1 + dista(i1)=9999. + enddo + do 113 i1=1,n + call distan9_2(um9,u9,n,i1,p9,disu9,ic) + call distan9_2(vm9,v9,n,i1,p9,disv9,ic) + call distan5_2(um5,u5,n,i1,p5,disu5,ic) + call distan5_2(vm5,v5,n,i1,p5,disv5,ic) + dista(i1)=(disu9+disv9+disu5+disv5)/4. + 113 continue + call burbuja1(dista,ior,n,nanx) + do i1=1,nanx + anai(i1)=ior(i1) + enddo + +!******************************************************************* +!******************************************************************* + + do 1100 ien=1,nen + do 1200 i2=1,puen(ien,5001) + ipu=puen(ien,i2) + +!**************** +! An analog (nanf) have synoptic similarity regarding estimated day +! when it has value in a point and presents lower distance than a +! given threshold. + + nan=0 + nanf=0 + + do i3=1,nanx + iana=anai(i3) + if(prec(iana,ipu).ne.-999.) then + nan=nan+1 + ana(nan)=iana + dist(nan)=dista(i3) + if(dist(nan).eq.0.0) dist(nan)=0.1 + if(dist(nan).le.umb) nanf=nanf+1 + endif + enddo + + if(nan.lt.nmin) then +! print*,i,ipu,' there are not enough analogs ' + goto 1200 + endif + + if(nanf.le.nmin) nanf=nmin + +! Significant predictors for the synoptic type of the estimated day +! in each HR grid point. + + np=prs(iti,ipu,npx+7) + if(ccm(iti,ipu).lt.ccmi) np=0 + +! If no significant predictors, just a synoptic similarity is taken account. + + if(np.eq.0) then + if(nanf.gt.nmax) then + nan2=nmax + else + nan2=nanf + endif + +!!!!!!!!!!!!!!!!!!!!!!! + + prees(i,ipu)=0. + sp=0. + do i3=1,nan2 + iana=ana(i3) + dt=dist(i3) + sp=sp+1./dt + prees(i,ipu)=prees(i,ipu)+prec(iana,ipu)*(1./dt) + enddo + prees(i,ipu)=prees(i,ipu)/sp + go to 1200 + endif + +! If there are significant predictors: + + do ik=1,nen + ice=puce(ik) + + if (Vref(ipu,1).eq.ice) then + ien1=ik + distancion1=Vdis(ipu,1) + peso1=1/distancion1 + go to 251 + endif + enddo + 251 continue + + + do ik=1,nen + ice=puce(ik) + if (Vref(ipu,2).eq.ice) then + ien2=ik + distancion2=Vdis(ipu,2) + peso2=1/distancion2 + go to 252 + endif + enddo + 252 continue + + do ik=1,nen + ice=puce(ik) + if (Vref(ipu,3).eq.ice) then + ien3=ik + distancion3=Vdis(ipu,3) + peso3=1/distancion3 + go to 253 + endif + enddo + 253 continue + + + do ik=1,nen + ice=puce(ik) + if (Vref(ipu,4).eq.ice) then + ien4=ik + distancion4=Vdis(ipu,4) + peso4=1/distancion4 + go to 254 + endif + enddo + 254 continue + + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + do 1250 i4=1,np + ips=prs(iti,ipu,i4) + do i1=1,nanf + iana=ana(i1) + calculin_modelo = pred1m(ips,ien1)*peso1+pred1m(ips,ien2)* & + peso2+pred1m(ips,ien3)*peso3+pred1m(ips,ien4)*peso4/(peso1+peso2+ & + peso3+peso4) + calculin_calibracion = pred1(ips,iana,ien1)*peso1+ & + pred1(ips,iana,ien2)*peso2+pred1(ips,iana,ien3)*peso3+ & + pred1(ips,iana,ien4)*peso4/(peso1+peso2+peso3+peso4) + + dist1(ips,i1)=(calculin_modelo - calculin_calibracion)**2 + + enddo + 1250 continue + +! The "nanf" analogs are sorted from higher to lower similarity (taken account +! both synoptic similarity and significant predictors) + + do ii=1,nanf + aaa(ii)=0. + nor(ii)=ii + enddo + nanv=0 + do ii=1,nanf + supo=0. + do i7=1,np + ips=prs(iti,ipu,i7) + aaa(ii)= aaa(ii)+dist1(ips,nor(ii))*copar(iti,ipu,i7) + supo=supo+copar(iti,ipu,i7) + enddo + aaa(ii)=aaa(ii)/supo + if(aaa(ii).eq.0.) aaa(ii)=0.1 + if(aaa(ii).le.umbl) nanv=nanv+1 + serin(ii)=(aaa(ii)+dist(nor(ii)))/2. + enddo + + call burbuja(serin,nor,nanf,nanf,nanf) + +!!!!!!!!!!!!!!!! + + if(nanv.lt.nmin) go to 1998 + prees(i,ipu)=0. + sp=0. + nan2=0 + do 8888 ii=1,nanf + if(aaa(nor(ii)).gt.umbl) go to 8888 + iana=ana(nor(ii)) + dt=serin(ii) + sp=sp+(1./dt) + prees(i,ipu)=prees(i,ipu)+prec(iana,ipu)*(1./dt) + nan2=nan2+1 + if(nan2.eq.nmax) go to 1995 + 8888 continue + 1995 continue + prees(i,ipu)=prees(i,ipu)/sp + + go to 1200 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + 1998 continue + prees(i,ipu)=0. + sp=0. + nan2=0 +! 1) If there is some local similarity analog, it is taken account + if(nanv.gt.0) then + do 8889 ii=1,nanf + if(aaa(nor(ii)).gt.umbl) go to 8889 + nan2=nan2+1 + iana=ana(nor(ii)) + dt=serin(ii) + sp=sp+(1./dt) + prees(i,ipu)=prees(i,ipu)+prec(iana,ipu)*(1./dt) + 8889 continue +! and it is completed with the rest of analogs in order of +! total similarity until the minimum number of analogs are completed. + do 8890 ii=1,nanf + if(aaa(nor(ii)).le.umbl) go to 8890 + nan2=nan2+1 + iana=ana(nor(ii)) + dt=serin(ii) + sp=sp+(1./dt) + prees(i,ipu)=prees(i,ipu)+prec(iana,ipu)*(1./dt) + if(nan2.eq.nmin) go to 1997 + 8890 continue + 1997 continue + prees(i,ipu)=prees(i,ipu)/sp + go to 1200 +! 2)If no analogs with local similarity, analogs with total similarity +! are taken (synoptic+local predictors) until the minimum number of +! analogs are completed. + elseif(nanv.eq.0) then + do ii=1,nmin + nan2=nan2+1 + iana=ana(nor(ii)) + dt=serin(ii) + sp=sp+(1./dt) + prees(i,ipu)=prees(i,ipu)+prec(iana,ipu)*(1./dt) + enddo + prees(i,ipu)=prees(i,ipu)/sp + go to 1200 + endif + + + 1200 continue + 1100 continue + +!!!!!!!!!!!!!!!!!!!!!!!!!!!! + 1000 continue !End of estimated days loop +!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + pp(:,:)=prees(:,:) + +!++++++++++++++++++++++++++++++++++++++++++++++++++ + +END SUBROUTINE down_prec + diff --git a/src/downscaling_temp.f90 b/src/downscaling_temp.f90 new file mode 100755 index 0000000000000000000000000000000000000000..fb47b6bbacbcf7fb1a70a7dbeff1f4c8e64b65b8 --- /dev/null +++ b/src/downscaling_temp.f90 @@ -0,0 +1,664 @@ +! Program to downscale maximum and minimum temperature based on analogs method +! for Iberian Peninsula and Balearic Islands (Autor: Petisco de Lara) +! ****************************************************** + +SUBROUTINE down_temp(ic,id,nd,nm,nlat,nlon,nlatt,nlont,slat,slon,rlat,rlon,& + slatt,slont,ngridd,u500,v500,t500,t850,msl_si,q700,& + t700,tm2m,tmx_hr,tmn_hr,nanx,nvar,dia,mes,um,vm,& + insol,neni,vdmin,vref4,u500e,v500e,t500e,t850e,& + msle,q700e,t700e,tm2me,tmax,tmin) + + +USE MOD_CSTS +USE MOD_FUNCS + + IMPLICIT NONE + +!cccccccccccccccccccccccccccccccccc +!cccccccccccccccccccccccccccccccccc + + INTEGER, INTENT(IN) :: ic + INTEGER, INTENT(IN) :: id + INTEGER, INTENT(IN) :: nd + INTEGER, INTENT(IN) :: nm + + INTEGER, INTENT(IN) :: nlat + INTEGER, INTENT(IN) :: nlon + INTEGER, INTENT(IN) :: nlatt + INTEGER, INTENT(IN) :: nlont + DOUBLE PRECISION, INTENT(IN) :: slat + DOUBLE PRECISION, INTENT(IN) :: slon + DOUBLE PRECISION, INTENT(IN) :: rlat + DOUBLE PRECISION, INTENT(IN) :: rlon + DOUBLE PRECISION, INTENT(IN) :: slatt + DOUBLE PRECISION, INTENT(IN) :: slont + INTEGER, INTENT(IN) :: ngridd + + DOUBLE PRECISION, INTENT(IN) :: u500(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: v500(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: msl_si(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: q700(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: t500(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: t850(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: t700(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: tm2m(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: tmx_hr(nd,nptos) + DOUBLE PRECISION, INTENT(IN) :: tmn_hr(nd,nptos) + + INTEGER, INTENT(IN) :: nanx + INTEGER, INTENT(IN) :: nvar + INTEGER, INTENT(IN) :: dia(nm) + INTEGER, INTENT(IN) :: mes(nm) + + DOUBLE PRECISION, INTENT(IN) :: um(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: vm(nd,ic) + DOUBLE PRECISION, INTENT(IN) :: insol(nd) + + INTEGER, INTENT(IN) :: neni + DOUBLE PRECISION, INTENT(IN) :: vdmin(nptos,4) + INTEGER, INTENT(IN) :: vref4(nptos,4) + + DOUBLE PRECISION, INTENT(IN) :: u500e(nm,ic) + DOUBLE PRECISION, INTENT(IN) :: v500e(nm,ic) + DOUBLE PRECISION, INTENT(IN) :: t500e(nm,ic) + DOUBLE PRECISION, INTENT(IN) :: t850e(nm,ic) + DOUBLE PRECISION, INTENT(IN) :: msle(nm,id) + DOUBLE PRECISION, INTENT(IN) :: q700e(nm,ic) + DOUBLE PRECISION, INTENT(IN) :: t700e(nm,ic) + DOUBLE PRECISION, INTENT(IN) :: tm2me(nm,ic) + + DOUBLE PRECISION, INTENT(OUT) :: tmax(nm,nptos) + DOUBLE PRECISION, INTENT(OUT) :: tmin(nm,nptos) + +!cccccccccccccccccccccccccccccccccc + integer m,n + integer i,j + integer ii,jp + real sp,dd + real aaa,bdlon,bilat,dim,dift,ccm + integer jv,kk,kki,ik,it,kp,imes,ida,ida2,idia,mi,mm +! + integer i1,i2,i3,i4,i7,iana,ice,ien,ipos,ips,ipu,ir,ire,iti,jk,k + integer mesa,nan,nan2,nanf,nanv,nen,nmm,np + real disu5,disu9,disv5,disv9,dmin,dt,du5,du9,dv5,dv9,supo + real vorm,vorz +! + character sc*8,pt*9 + real u9(nd,ic),v9(nd,ic),u5(nd,ic),v5(nd,ic) + real mu9(ic),su9(ic),mv9(ic),sv9(ic) + real mu5(ic),su5(ic),mv5(ic),sv5(ic) + real p9(ic),p5(ic),rlt(ic),rln(ic),rltt(id),rlnt(id) + real inso(nd),t8(nd,ic),t7(nd,ic),t5(nd,ic),he7(nd,ic) + real efan(nd,ic),psl(nd,ic),pslm(ic),t2(nd,ic),t2m(ic) + real pres(id),bar(id),ser(nd),md,sg + integer ior(nd),ref(nptos),puce(neni) + + real um9(ic),vm9(ic),um5(ic),vm5(ic),he7m(ic) + real t8m(ic),t7m(ic),t5m(ic) + real insom,pred1(nps,nd,neni),pred1m(nps,neni),pred(nvar) + integer ana(nanx),anai(nanx) + real tmxr(nd,nptos),tmir(nd,nptos),dis(nd) + real dato1(nvar,nanx),tempx(nanx),tempi(nanx) + real ccmux(nptos),ccmui(nptos) + integer kvars(nvar) + real corrpar(nvar) + real tmxes(nm,nptos),tmies(nm,nptos) + real coe(nvar),con +!cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc + integer Vref(nptos,4) + real Vdis(nptos,4) + integer iii + real distancion1, distancion2, distancion3, distancion4 + real peso1, peso2, peso3, peso4, calculin + integer ien1, ien2, ien3, ien4 + +!cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc +!cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc + + m=nm + n=nd + +!******************************** +! 1. Synoptic latitude and longitude calculation and assignment of +! weights windows +! + do j=1,ic + rlt(j)=slat+(((j-1)/nlon)*rlat) + rln(j)=slon+((mod(j-1,nlon)+1-1)*rlon) + enddo + + p9=0. + p5=1. + do i1=1,ic + if((rlt(i1).le.fnor2).and.(rlt(i1).ge.fsur2)) then + if((rln(i1).ge.foes2).and.(rln(i1).le.fest2)) then + p9(i1)=1. + p5(i1)=4. + endif + endif + enddo + do i1=1,ic + if((rlt(i1).le.fnor1).and.(rlt(i1).ge.fsur1)) then + if((rln(i1).ge.foes1).and.(rln(i1).le.fest1)) then + p9(i1)=2. + p5(i1)=8. + endif + endif + enddo + +! +! Latitude and longitude calculation in the extended domain (called low +! resolution) + + do j=1,id + rltt(j)=slatt+(((j-1)/nlont)*rlat) + rlnt(j)=slont+((mod(j-1,nlont)+1-1)*rlon) + enddo + +! A reference centers (matching points between synoptic and high +! resolution grids) are define to know where the predictor must be +! calculated. + + Vref(:,:)=vref4(:,:) + Vdis(:,:)=vdmin(:,:) + + nen=1 + puce(1)=Vref(1,1) + + do iii=1,4 + do j=1,nptos + do k=1,nen + if (Vref(j,iii).eq.puce(k)) go to 101 + enddo + nen=nen+1 + ipos=nen + puce(ipos)=Vref(j,iii) + 101 continue + enddo + enddo + +! +!**************************************************************** +! TRAINING REANALYSIS VARIABLES + + u9(:,:)=um(:,:) + v9(:,:)=vm(:,:) + u5(:,:)=u500(:,:) + v5(:,:)=v500(:,:) + psl(:,:)=msl_si(:,:) + he7(:,:)=q700(:,:) + t5(:,:)=t500(:,:) + t8(:,:)=t850(:,:) + t7(:,:)=t700(:,:) + t2(:,:)=tm2m(:,:) + +! INSOLATION PARAMETER + + inso(:)=insol(:) + +! The predictors are obtained +! OBTAINING THE GEOSTROPHIC U/V COMPONENTS IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(2,i,j)=u9(i,ice) + pred1(3,i,j)=v9(i,ice) + enddo + enddo + +! OBTAINING THE SEA LEVEL PRESSURE IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(4,i,j)=psl(i,ice) + enddo + enddo + +! OBTAINING THE 850 hPa TEMPERATURE IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(1,i,j)=t8(i,ice) + enddo + enddo + +! OBTAINING THE ESPECIFIC HUMIDITY IN 700 hPa IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(5,i,j)=he7(i,ice) + enddo + enddo + +! OBTAINING THE 2 METERS TEMPERATURE IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(8,i,j)=t2(i,ice) + enddo + enddo + +! OBTAINING THE 700 hPa TEMPERATURE IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(6,i,j)=t7(i,ice) + enddo + enddo + +! OBTAINING THE 500 hPa TEMPERATURE IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(7,i,j)=t5(i,ice) + enddo + enddo + +! MEAN AND DEVIATION OF REFERENCE FIELDS (WINDS) + + do j=1,ic + do i=1,n + ser(i)=u9(i,j) + enddo + call estadis(ser,md,sg,n) + mu9(j)=md + su9(j)=sg + do i=1,n + ser(i)=v9(i,j) + enddo + call estadis(ser,md,sg,n) + mv9(j)=md + sv9(j)=sg + do i=1,n + ser(i)=u5(i,j) + enddo + call estadis(ser,md,sg,n) + mu5(j)=md + su5(j)=sg + do i=1,n + ser(i)=v5(i,j) + enddo + call estadis(ser,md,sg,n) + mv5(j)=md + sv5(j)=sg + enddo + +! REFERENCE WINDS ARE NORMALIZED + + do i=1,n + do j=1,ic + u9(i,j)=(u9(i,j)-mu9(j))/su9(j) + v9(i,j)=(v9(i,j)-mv9(j))/sv9(j) + u5(i,j)=(u5(i,j)-mu5(j))/su5(j) + v5(i,j)=(v5(i,j)-mv5(j))/sv5(j) + enddo + enddo + +! HIGH RESOLUTION (5KM) MAXIMUM AND MINIMUM OBSERVED TEMPERATURE + + tmxr(:,:)=tmx_hr(:,:) + tmir(:,:)=tmn_hr(:,:) + +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +!************************************************************** +!************************************************************** +! +! DOWNSCALING BEGINS (ESTIMATING THE PROBLEM DAYS MAXIMUM AND +! MINIMUM TEMPERATURES IN EACH HIGH RESOLUTION GRID POINT) +! +!************************************************************** +!************************************************************** +!++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +! print*,'Downscaling begins...' +! print*,'estimated day... ' + + mm=0 + do 1000 i=1,m + +! ESTIMATED REANALYSIS VARIABLES + + um5(:)=u500e(i,:) + vm5(:)=v500e(i,:) + t8m(:)=t850e(i,:) + t7m(:)=t700e(i,:) + t5m(:)=t500e(i,:) + he7m(:)=q700e(i,:) + t2m(:)=tm2me(i,:) + pres(:)=msle(i,:) + +! print*,' ',i + mm=mm+1 + +! Pressure synoptic grid calculation from low resolution one + + bilat=slat+(nlat-1)*rlat + bdlon=slon+(nlon-1)*rlon + + ire=0 + do 111 j=1,id + if((rltt(j).gt.slat).or.(rltt(j).lt.bilat)) go to 111 + if((rlnt(j).lt.slon).or.(rlnt(j).gt.bdlon)) go to 111 + ire=ire+1 + pslm(ire)=pres(j) + 111 continue + +! Geostrophic wind components at sea level to the estimated +! day (pressure in Pa). +! "/g" to invalidate gravity acceleration. + + bar=pres*100./9.81 + + call geostrofico(bar,um9,vm9,id,ic,slatt,slont,slat,slon,& + rlat,rlon,rlat,rlon,nlatt,nlont,nlat,nlon,ngridd) + +! It is divided by density at sea level (standard atmosphere) to obtain +! the geostrophic wind components. + + do j=1,ic + um9(j)=um9(j)/1.225 + vm9(j)=vm9(j)/1.225 + enddo + +! The estimated predictors are obtained +! OBTAINING THE GEOSTROPHIC U/V COMPONENTS IN THE REFERENCE CENTERS + + do j=1,nen + ice=puce(j) + pred1m(2,j)=um9(ice) + pred1m(3,j)=vm9(ice) + enddo + +! OBTAINING THE SEA LEVEL PRESSURE IN THE REFERENCE CENTERS + + do j=1,nen + ice=puce(j) + pred1m(4,j)=pslm(ice) + enddo + +! OBTAINING THE 850 hPa TEMPERATURE IN THE REFERENCE CENTERS + + do j=1,nen + ice=puce(j) + pred1m(1,j)=t8m(ice) + enddo + +! OBTAINING THE ESPECIFIC HUMIDITY IN 700 hPa IN THE REFERENCE CENTERS + + do j=1,nen + ice=puce(j) + pred1m(5,j)=he7m(ice) + enddo + +! OBTAINING THE 2 METERS TEMPERATURE IN THE REFERENCE CENTERS + + do j=1,nen + ice=puce(j) + pred1m(8,j)=t2m(ice) + enddo + +! OBTAINING THE 700 hPa TEMPERATURE IN THE REFERENCE CENTERS + + do j=1,nen + ice=puce(j) + pred1m(6,j)=t7m(ice) + enddo + +! OBTAINING THE 500 hPa TEMPERATURE IN THE REFERENCE CENTERS + + do j=1,nen + ice=puce(j) + pred1m(7,j)=t5m(ice) + enddo + +! REFERENCE WINDS ARE NORMALIZED + + do j=1,ic + um9(j)=(um9(j)-mu9(j))/su9(j) + vm9(j)=(vm9(j)-mv9(j))/sv9(j) + um5(j)=(um5(j)-mu5(j))/su5(j) + vm5(j)=(vm5(j)-mv5(j))/sv5(j) + enddo + +! INSOLATION PARAMETER ARE CALCULATED + + idia=dia(i) + imes=mes(i) + call fechanno(idia,imes,ida) + ida2=ida-80 + if(ida2.le.0) ida2=ida2+365 + aaa=2.*pi*float(ida2)/365. + insom=sin(aaa) + + +! Synoptic type determination: the "nanx" reference alements +! more similar to each synoptic type and the corresponding +! distances. + + do k=1,n + ior(k)=k + dis(k)=9999. + enddo + + do 110 j=1,n + call distan9_2(um9,u9,n,j,p9,disu9,ic) + call distan9_2(vm9,v9,n,j,p9,disv9,ic) + call distan5_2(um5,u5,n,j,p5,disu5,ic) + call distan5_2(vm5,v5,n,j,p5,disv5,ic) + dim=(disu9+disv9+disu5+disv5)/4. + dis(j)=dim + 110 continue + call burbuja1(dis,ior,n,nanx) + do j=1,nanx + anai(j)=ior(j) + enddo + + do 1200 ipu=1,nptos + +! Reference environment + + do ik=1,nen + ice=puce(ik) + + if (Vref(ipu,1).eq.ice) then + ien1=ik + distancion1=Vdis(ipu,1) + peso1=1/distancion1 + go to 251 + endif + enddo + 251 continue + + do ik=1,nen + ice=puce(ik) + if (Vref(ipu,2).eq.ice) then + ien2=ik + distancion2=Vdis(ipu,2) + peso2=1/distancion2 + go to 252 + endif + enddo + 252 continue + + do ik=1,nen + ice=puce(ik) + if (Vref(ipu,3).eq.ice) then + ien3=ik + distancion3=Vdis(ipu,3) + peso3=1/distancion3 + go to 253 + endif + enddo + 253 continue + + do ik=1,nen + ice=puce(ik) + if (Vref(ipu,4).eq.ice) then + ien4=ik + distancion4=Vdis(ipu,4) + peso4=1/distancion4 + go to 254 + endif + enddo + 254 continue + + +! Predictors for the estimated day +! INSOLATION PREDICTOR + + pred(1)=insom + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + do jp=1,nps + kp=jp+1 + calculin = pred1m(jp,ien1)*peso1+pred1m(jp,ien2)*peso2+ & + pred1m(jp,ien3)*peso3+pred1m(jp,ien4)*peso4 + pred(kp)= calculin/(peso1+peso2+peso3+peso4) + enddo + +!!!!!!!!!!!!!!!!!!!!!!!!! +! FOR MAXIMUM TEMPERATURE +!!!!!!!!!!!!!!!!!!!!!!!!! + + nan=0 + do 1201 i2=1,nanx + iana=anai(i2) + if(tmxr(iana,ipu).ne.-999.) then + nan=nan+1 + ana(nan)=iana + + dato1(1,nan)=inso(iana) + do jp=1,nps + kp=jp+1 + dato1(kp,nan) = (pred1(jp,iana,ien1)*peso1+ & + pred1(jp,iana,ien2)*peso2 + pred1(jp,iana,ien3)*peso3+ & + pred1(jp,iana,ien4)*peso4 ) / (peso1+peso2+peso3+peso4) + enddo + + tempx(nan)=tmxr(iana,ipu) + else + go to 1201 + endif + 1201 continue + + if(nan.gt.150) nan=150 + +! Calculation of significant predictors, their coeficients and their +! multiple and partial correlation coeficients to estimate the +! maximum temperature +! +! mi: number of selected predictors +! ccm: multiple correlation coeficient +! kvars: selected predictors labels (vector) +! corrpar: partial correlation of selected predictors (vector) +! coe: regression coeficients associated to each predictor (vector). +! (value = 0 when there is no selected predictor). +! con: Y-intercept (independent equation term) +! tol: tolerance to select predictors + + call stepregrs & + (tempx,dato1,nanx,nvar,nan,mi,ccm,kvars,corrpar,coe,con,tol) + +! Maximum temperature estimation. When there are no significant predictors, +! estimated temperature is the temperature of the analog that has the 2 +! meters temperature more similar to the estimated day. + + if(mi.eq.0) then + dift=999999. + do kk=1,nan + if(abs(pred(9)-dato1(9,kk)).lt.dift) then + kki=kk + dift=abs(pred(9)-dato1(9,kk)) + endif + enddo + tmxes(i,ipu)=tmxr(ana(kki),ipu) + else + tmxes(i,ipu)=con + do jv=1,nvar + tmxes(i,ipu)=coe(jv)*pred(jv)+tmxes(i,ipu) + enddo + endif + + 1203 CONTINUE + +!!!!!!!!!!!!!!!!!!!!!!!!! +! FOR MINIMUM TEMPERATURE +!!!!!!!!!!!!!!!!!!!!!!!!! + + nan=0 + do 1202 i2=1,nanx + iana=anai(i2) + if(tmir(iana,ipu).ne.-999.) then + nan=nan+1 + ana(nan)=iana +!!!! +! With NAN observed tmin data, next lines should be included +!c dato1(1,nan)=inso(iana) +!c do jp=1,nps +!c kp=jp+1 +!c dato1(kp,nan)=pred1(jp,iana,ien) +!c enddo +!!!! + tempi(nan)=tmir(iana,ipu) + else + go to 1202 + endif + 1202 continue + + if(nan.gt.150) nan=150 + +! Calculation of significant predictors, their coeficients and their +! multiple and partial correlation coeficients to estimate the +! minimum temperature +! +! mi: number of selected predictors +! ccm: multiple correlation coeficient +! kvars: selected predictors labels (vector) +! corrpar: partial correlation of selected predictors (vector) +! coe: regression coeficients associated to each predictor (vector). +! (value = 0 when there is no selected predictor). +! con: Y-intercept (independent equation term) +! tol: tolerance to select predictors + + call stepregrs & + (tempi,dato1,nanx,nvar,nan,mi,ccm,kvars,corrpar,coe,con,tol) + +! Minimum temperature estimation. When there are no significant predictors, +! estimated temperature is the temperature of the analog that has the 2 +! meters temperature more similar to the estimated day. + + if(mi.eq.0) then + dift=999999. + do kk=1,nan + if(abs(pred(9)-dato1(9,kk)).lt.dift) kki=kk + enddo + tmies(i,ipu)=tmir(ana(kki),ipu) + else + tmies(i,ipu)=con + do jv=1,nvar + tmies(i,ipu)=coe(jv)*pred(jv)+tmies(i,ipu) + enddo + endif + + 1200 continue + +!!!!!!!!!!!!!!!!!!!!!!!!!!!! + 1000 continue !End of estimated days loop +!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + tmax(:,:)=nint(10.*tmxes(:,:)) + tmin(:,:)=nint(10.*tmies(:,:)) + +!++++++++++++++++++++++++++++++++++++++++++++++++++ + +END SUBROUTINE down_temp + + diff --git a/src/insol.f90 b/src/insol.f90 new file mode 100755 index 0000000000000000000000000000000000000000..df4717377d0e2b683bee38dad5aa9dc5b226ca38 --- /dev/null +++ b/src/insol.f90 @@ -0,0 +1,38 @@ +! The insol program calculates insolation of 'nd' period + +SUBROUTINE insolation(nd,day,month,insol) + +USE MOD_CSTS +USE MOD_FUNCS, ONLY : fechanno + + IMPLICIT NONE + + INTEGER, INTENT(IN) :: nd + INTEGER, INTENT(IN) :: day(nd) + INTEGER, INTENT(IN) :: month(nd) + + DOUBLE PRECISION, INTENT(OUT) :: insol(nd) +! REAL, INTENT(OUT) :: insol(nd) + + integer i,ida,ida2 + integer dd,mm + real aaa + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +! print*,'program 3: insolation' + + do 1000 i=1,nd + dd=day(i) + mm=month(i) + call fechanno(dd,mm,ida) + + ida2=ida-80 + if(ida2.le.0) ida2=ida2+365 + aaa=2.*pi*float(ida2)/365. + insol(i)=sin(aaa) + 1000 continue + +END SUBROUTINE insolation + + diff --git a/src/mod_csts.f90 b/src/mod_csts.f90 new file mode 100755 index 0000000000000000000000000000000000000000..eb98684877e3a6eaf943144fd1d981060d091a5a --- /dev/null +++ b/src/mod_csts.f90 @@ -0,0 +1,67 @@ +! !!!!!!!!!!!!!!! + MODULE MOD_CSTS +! !!!!!!!!!!!!!!! +!* 0. DECLARATIONS +! ------------ +! +IMPLICIT NONE + +!CHARACTER(LEN=100),SAVE :: pathSI_estim="/home/sclim/cle/regionalizacion/asun/eraint/datosAscii_SI_estim/" +!CHARACTER(LEN=100),SAVE :: pathLR_estim="/home/sclim/cle/regionalizacion/asun/eraint/datosAscii_LR_estim/" +!*********************************************** +! CONSTANTS +!*********************************************** +! +CHARACTER(LEN=5),SAVE :: mt="aemet" +INTEGER,SAVE :: nvar_p=11 ! total number of predictors for precipitation +INTEGER,SAVE :: nvar_t=9 ! total number of predictors for temperature +INTEGER,SAVE :: nps=8 ! number of predictors from the reference grid point of the model +INTEGER, SAVE :: npx=11 +REAL,SAVE :: umb=0.75 +REAL,SAVE :: umbl=0.75 +INTEGER,SAVE :: nmin=10 +INTEGER,SAVE :: nmax=30 +REAL,SAVE :: ccmi=0.30 +!----------------------------------------------- +REAL,SAVE :: r=287.05 +REAL,SAVE :: a=0.0065 +REAL,SAVE :: g=9.81 +REAL,SAVE :: rt=6.37e6 +REAL,SAVE :: pi=3.14159265 +REAL,SAVE :: romega=7.292E-5 +!----------------------------------------------- +INTEGER,SAVE :: umb_ger=10000 +!----------------------------------------------- +!REAL,SAVE :: huso=30. +DOUBLE PRECISION,SAVE :: huso=30. +!----------------------------------------------- +REAL,SAVE :: tol=0.020 +! +!*********************************************** +! DOMAIN +!************************************************ +!------------------------------------------------ +! Limits (lat-lon) of strips to assign weights +! +REAL,SAVE :: fnor1=45.0 +REAL,SAVE :: fnor2=47.5 +REAL,SAVE :: fsur1=35.0 +REAL,SAVE :: fsur2=32.5 +REAL,SAVE :: foes1=-12.5 +REAL,SAVE :: foes2=-17.5 +REAL,SAVE :: fest1=5.0 +REAL,SAVE :: fest2=7.5 +!------------------------------------------------ +! High resolution (5km x 5km) observed AEMET grid +! +! For precipitation: the variable is multiplied to 10, to +! obtain it in decimes of mm, as the program requaire. +INTEGER, PARAMETER :: nptos=20945 ! number of point where make the estimations +!------------------------------------------------ +INTEGER,SAVE :: icl=24 +INTEGER,SAVE :: ipui=99 +INTEGER,SAVE :: ila=4 +INTEGER,SAVE :: ilo=6 + +END MODULE MOD_CSTS + diff --git a/src/mod_funcs.f90 b/src/mod_funcs.f90 new file mode 100755 index 0000000000000000000000000000000000000000..442750d49de1a35263314449fcd94cc47f70ea95 --- /dev/null +++ b/src/mod_funcs.f90 @@ -0,0 +1,1302 @@ + +MODULE MOD_FUNCS + +CONTAINS + + SUBROUTINE ESTADIS(SER,MEDIA,SIGMA,N) +! CALCULA LA MEDIA Y LA DESVIACION TIPO DE UNA SERIE DE DATOS + REAL SER(N),MEDIA,SIGMA + MEDIA=0. + DO I=1,N + MEDIA=MEDIA+SER(I) + ENDDO + MEDIA=MEDIA/REAL(N) + SIGMA=0. + DO I=1,N + SIGMA=SIGMA+(SER(I)-MEDIA)**2 + ENDDO + SIGMA=SIGMA/REAL(N) + SIGMA=SQRT(SIGMA) + RETURN + END SUBROUTINE + + SUBROUTINE BURBUJA(A,NOR,NAN,NAN1,IAN) + REAL A(NAN) + INTEGER NOR(NAN) + DO 100 I=1,IAN + DO 110 J=I+1,NAN1 + IF(A(I).GT.A(J)) THEN + TEM=A(J) + ITEM=NOR(J) + A(J)=A(I) + NOR(J)=NOR(I) + A(I)=TEM + NOR(I)=ITEM + ENDIF + 110 CONTINUE + 100 CONTINUE + RETURN + END SUBROUTINE + + subroutine burbuja1(a,nor,n,nan) + real a(n) + integer nor(n) + do 100 i=1,nan + do 110 j=i+1,n + if(a(i).gt.a(j)) then + tem=a(j) + item=nor(j) + a(j)=a(i) + nor(j)=nor(i) + a(i)=tem + nor(i)=item + endif + 110 continue + 100 continue + return + end subroutine + + subroutine distan9(ca,n,ic,i,nr,p,dis) + real ca(n,ic),p(ic) + dis=0. + sp=0. + do 10 k=1,ic + if(p(k).eq.0.) go to 10 + dis=dis+p(k)*(ca(i,k)-ca(nr,k))**2 + sp=sp+p(k) + 10 continue + dis=dis/sp + return + end subroutine + + subroutine distan5(ca,n,ic,i,nr,p,dis) + real ca(n,ic),p(ic) + dis=0. + sp=0. + do k=1,ic + dis=dis+p(k)*(ca(i,k)-ca(nr,k))**2 + sp=sp+p(k) + enddo + dis=dis/sp + return + end subroutine + +! + subroutine distancia9(ca,n,cg,m,i,nr,p,dis,ic) +! implicit none +! real ca(n,ic),cg(m,ic),p(ic) + real ca(n,ic),p(ic) +! real cg(m,ic) + double precision cg(m,ic) + dis=0. + sp=0. + do 100 k=1,ic + if(p(k).eq.0.) go to 100 + dis=dis+p(k)*(ca(i,k)-cg(nr,k))**2 + sp=sp+p(k) + 100 continue + dis=dis/sp + return + end subroutine + + subroutine distancia5(ca,n,cg,m,i,nr,p,dis,ic) +! implicit none +! real ca(n,ic),cg(m,ic),p(ic) + real ca(n,ic),p(ic) + double precision cg(m,ic) + dis=0. + sp=0. + do k=1,ic + dis=dis+p(k)*(ca(i,k)-cg(nr,k))**2 + sp=sp+p(k) + enddo + dis=dis/sp + return + end subroutine + + subroutine distan9_2(cb,ca,n,nr,p,dis,ic) + real cb(ic),ca(n,ic),p(ic) + dis=0. + sp=0. + do 100 k=1,ic + if(p(k).eq.0.) go to 100 + dis=dis+p(k)*(cb(k)-ca(nr,k))**2 + sp=sp+p(k) + 100 continue + dis=dis/sp + return + end subroutine + + subroutine distan5_2(cb,ca,n,nr,p,dis,ic) + real cb(ic),ca(n,ic),p(ic) + dis=0. + sp=0. + do k=1,ic + dis=dis+p(k)*(cb(k)-ca(nr,k))**2 + sp=sp+p(k) + enddo + dis=dis/sp + return + end subroutine + + subroutine distancia9_2(ca,n,cg,m,i,nr,p,dis,ic) + real ca(n,ic),cg(m,ic),p(ic) + dis=0. + sp=0. + do 100 k=1,ic + if(p(k).eq.0.) go to 100 + dis=dis+p(k)*(ca(i,k)-cg(nr,k))**2 + sp=sp+p(k) + 100 continue + dis=dis/sp + return + end subroutine + + subroutine distancia5_2(ca,n,cg,m,i,nr,p,dis,ic) + real ca(n,ic),cg(m,ic),p(ic) + dis=0. + sp=0. + do k=1,ic + dis=dis+p(k)*(ca(i,k)-cg(nr,k))**2 + sp=sp+p(k) + enddo + dis=dis/sp + return + end subroutine + + SUBROUTINE STEPREGRS & + (YI,XI,NX,NVARX,N,MI,CCM,IVAR,COPA,COE,CON,TOL) + +!EFECTUA UNA REGRESION LINEAL MULTIPLE POR ETAPAS +!MEDIANTE LA TECNICA 'PASO A PASO' INTRODUCIENDO +!EN CADA PASO COMO NUEVA VARIABLE LA DE MEJOR +!CORRELACION PARCIAL CON LA VARIABLE DEPENDIENTE +!Y ELIMINANDO AQUELLAS QUE DESPUES DE CADA +!REGRESION NO SEAN SIGNIFICATIVAS + +!NX=numero maximo de datos posibles +!N=numero de datos actuales a usar +!NVAR= numero total de variables posibles +! de regresion + +!LA VARIABLE YI(NX) CONTIENE LOS VALORES DEL PREDICTANDO +!LA VARIABLE XI(NVARX,NX) CONTIENE LOS VALORES DE LOS PREDICTORES + +!LA VARIABLE YY(N) CONTIENE LOS VALORES DEL PREDICTANDO +!LA VARIABLE XX(NVARX,N) CONTIENE LOS VALORES DE LOS PREDICTORES + +! MI es el numero de variables o predictores seleccionados + +! CCM es el coeficiente de correlacion multiple + +! IVAR(NVARX) contiene los numeros de etiqueta de los predictores +! seleccionados en la regresion + +!COPA(NVARX) contiene las correlaciones parciales de +! los predictores seleccionados. + +! COE(NVARX) contiene los coeficientes de regresion beta(i) de las +! variables seleccionadas + +! CON contiene la constante de la regresion (beta0) + +! DATO1(NVARX,N) contiene los datos de los predictores +! que se meten en cada paso de regresion + +! COEF(0:NVARX) contiene la constante y los coeficientes +! de regresion de las variables introducidas en cada paso + +! YYES(N) contiene valores del predictando estimados por la regresion + +! SST es la variabilidad total, +! SSE es la variabilidad residual no explicada por la regresion + +! CDET es el coeficiente de determinacion en el paso actual +! CDATA es el coeficiente de determinacion en el paso anterior + +! CDETP(NVARX) contiene los coeficientes de determinacion +! cuando se considera cada variable como introducida en el paso +! actual. Se utiliza como base para eliminar variables que +! se consideran no significativas + +! TOL representa el minimo incremento de variabilidad explicada +! por la introduccion de una variable para que esta se +! considere significativa + + + real yi(nx),xi(nvarx,nx) + real yy(n),xx(nvarx,n),res1(n),res(n),ser1(n),ser2(n),aa(n) + real yyr(n),cdet1,cp,ay(n),cormax,cor,ccm + real copa(nvarx),dato1(nvarx,n) + real coef(0:nvarx),coe(nvarx),con + real yyes(n),sst,sse,cdet,cdeta,cdetp(nvarx),myy,incr + + character var(nvarx)*5 + integer ivar(nvarx),ivar1(nvarx) + +!TRASPASAMOS LOS DATOS INICIALES ENVIADOS POR EL PROGRAMA PRINCIPAL +!DESDE LAS VARIABLES DE DIMENSION MAXIMA A LAS VARIABLES CON LA +!DIMENSION AJUSTADA AL NUMERO ACTUAL DE DATOS UTILIZADOS + + do i=1,n + yy(i)=yi(i) + do k=1,nvarx + xx(k,i)=xi(k,i) + enddo + enddo + + +!CALCULO DE LA MEDIA Y DE LA VARIABILIDAD TOTAL DEL PREDICTANDO + + myy=0. + do i=1,n + myy=myy+yy(i) + enddo + myy=myy/real(n) + + sst=0. + do i=1,n + sst=sst+(yy(i)-myy)**2 + enddo + + + + + +!******************************************************** +!INICIALIZACION DEL CONTROL +!DE LAS VARIABLES INTRODUCIDAS EN EL MODELO +!EN CADA PASO + + do j=1,nvarx + var(j)='nosel' + enddo + +!**************************************************** + +!BUSQUEDA DE LA PRIMERA VARIABLE DEL MODELO +! (UNICA VARIABLE EN LA PRIMERA ETAPA) + + cdeta=0. + cormax=-2.0 + nvx=0 + do j=1,nvarx + do i=1,n + ser2(i)=xx(j,i) + enddo + call corr1(yy,ser2,n,cor) + if(abs(cor).gt.cormax) then + cormax=abs(cor) + nvx=j + endif + enddo + var(nvx)='sisel' + nvult=nvx + + +!**************************************************** +!PREPARACION DE LA MATRIZ DE DATOS PARA EL +!CALCULO DE LA REGRESION DE LAS VARIABLES +!INDEPENDIENTES SELECCIONADAS CON LA VARIABLE +!DEPENDIENTE + + 222 nuvar=0 + dato1=0. + ivar=0 + do 100 j=1,nvarx + if(var(j).ne.'sisel') go to 100 + nuvar=nuvar+1 + ivar(nuvar)=j + do i=1,n + dato1(nuvar,i)=xx(j,i) + enddo + 100 continue + +!SE CALCULA LA REGRESION CON LAS VARIABLES SELECCIONADAS + + yyr=yy + call regr(yyr,dato1,nvarx,nuvar,n,coef) + + + +!SE CALCULA EL COEFICIENTE DE DETERMINACION (esta subrutina +! devuelve los residuos de la regresion y el coeficiente de +! determinacion) + + call coedet(yy,xx,n,nvarx,ivar,nuvar,coef,res1,sst,cdet) + + +!COMPROBAMOS SI EL COEFICIENTE DE DETERMINACION SE HA INCREMENTADO +!SUFICIENTEMENTE COMO PARA CONSIDERAR SIGNIFICATIVA LA ULTIMA VARIABLE +!INTRODUCIDA. SI NO LO ES SE ACABA EL PROCESO PASO A PASO Y OBTENEMOS +!LA REGRESION DEFINITIVA + + incr=cdet-cdeta + if(incr.lt.tol) then + if(nuvar.eq.1) then + mi=0 + ccm=-8.88 + go to 555 + else + var(nvult)='elimi' + go to 444 + endif + endif + + cdeta=cdet + +!SE COMPRUEBA SI ALGUNA DE LAS VARIABLES SELECCIONADAS RESULTA NO +!SIGNIFICATIVA. PARA ELLO SE COMPARAN LOS COEFICIENTES DE DETERMINACION +!OBTENIDOS QUITANDO CADA VARIABLE, CON EL OBTENIDO SIN QUITAR NINGUNA +!DE LAS QUE YA TENEMOS, SI PARA ALGUNA VARIABLE EL INCREMENTO NO +!SUPERA EL MINIMO LA VARIABLE SE ELIMINA DEFINITIVAMENTE + + if(nuvar.eq.1) go to 333 + +! Quitamos una variable cada vez + + do 200 k=1,nuvar + dato1=0. + nivar=0 + ivar1=0 + do 210 k1=1,nuvar + if(k1.eq.k) go to 210 + nivar=nivar+1 + ivar1(nivar)=ivar(k1) + do i=1,n + dato1(nivar,i)=xx(ivar1(nivar),i) + enddo + 210 continue + +! Se calcula la regresion con la variable quitada + + yyr=yy + call regr(yyr,dato1,nvarx,nivar,n,coef) + +! Se calcula el coeficiente de determinacion + + call coedet(yy,xx,n,nvarx,ivar1,nivar,coef,res,sst,cdet1) + +! Si la diferencia entre el coeficiente de determinacion +! con todas las variables y el mismo con la variable +! quitada es menor que el umbral, la variable se considera +! no significativa y se elimina definitivamente. + + if((cdet-cdet1).lt.tol) then + var(ivar(k))='elimi' + endif + 200 continue + + do k=1,nuvar + if(var(ivar(k)).eq.'elimi') go to 332 + enddo + go to 333 + + + +!ELIMINADAS LAS VARIABLES NO SIGNIFICATIVAS SE CALCULA DE NUEVO +!LA REGRESION CON LAS VARIABLES QUE HAN QUEDADO + + 332 continue + nuvar=0 + dato1=0. + ivar=0 + do 220 j=1,nvarx + if(var(j).ne.'sisel') go to 220 + nuvar=nuvar+1 + ivar(nuvar)=j + do i=1,n + dato1(nuvar,i)=xx(j,i) + enddo + 220 continue + + + yyr=yy + call regr(yyr,dato1,nvarx,nuvar,n,coef) + call coedet(yy,xx,n,nvarx,ivar,nuvar,coef,res1,sst,cdet) + + cdeta=cdet + + 333 continue + +!SE COMPRUEBA SI HAY AUN VARIABLES QUE PUEDAN SER SELECCIONADAS +!SI HAY SE TRATA DE BUSCAR UNA NUEVA, SI NO HAY SE TERMINA + + do j=1,nvarx + if(var(j).eq.'nosel') go to 334 + enddo + go to 444 + + 334 continue + +!SE BUSCA UNA NUEVA VARIABLE TOMANDO LA QUE TENGA MAYOR CORRELACION +!PARCIAL CON EL PREDICTANDO + +! Se construye matriz de datos con variables ya seleccionadas + + dato1=0. + nivar=0 + do j=1,nvarx + if(var(j).eq.'sisel') then + nivar=nivar+1 + do i=1,n + dato1(nivar,i)=xx(j,i) + enddo + endif + enddo + +! Se busca nueva variable + + cormax=-2.0 + nvx=0 + do 230 j=1,nvarx + if(var(j).ne.'nosel') go to 230 + do i=1,n + aa(i)=xx(j,i) + enddo + call corpar(res1,n,dato1,nvarx,nivar,aa,cp) + if(abs(cp).gt.cormax) then + cormax=abs(cp) + nvx=j + endif + 230 continue + if (nvx.gt.0) then + var(nvx)='sisel' + nvult=nvx + endif + go to 222 + + 444 continue + +! REGRESION DEFINITIVA + +! PREPARACION DE MATRIZ DE DATOS CON VARIABLES DEFINITIVAS + + nuvar=0 + dato1=0. + ivar=0 + do 250 j=1,nvarx + if(var(j).ne.'sisel') go to 250 + nuvar=nuvar+1 + ivar(nuvar)=j + do i=1,n + dato1(nuvar,i)=xx(j,i) + enddo + 250 continue + +! CALCULO DE LA REGRESION + + + yyr=yy + call regr(yyr,dato1,nvarx,nuvar,n,coef) + +! CALCULO DEL COEFICIENTE DE DETERMINACION Y DE LOS RESIDUOS + + call coedet(yy,xx,n,nvarx,ivar,nuvar,coef,res1,sst,cdet) + + + +! RESULTADOS FINALES + +! COEFICIENTES Y DEMAS DATOS DE LA REGRESION + + mi=nuvar + ccm=sqrt(cdet) + + con=coef(0) + coe=0. + do k=1,nuvar + coe(ivar(k))=coef(k) + enddo + + +! COEFICIENTES DE CORRELACION PARCIAL DE LAS VARIABLES +! SELECCIONADAS CON LA VARIABLE DEPENDIENTE + + copa=-1. + + do 300 j=1,nuvar + do i=1,n + aa(i)=xx(ivar(j),i) + ay(i)=yy(i) + enddo + nivar=0 + dato1=0. + do k=1,nuvar + if(k.ne.j) then + nivar=nivar+1 + do i=1,n + dato1(nivar,i)=xx(ivar(k),i) + enddo + endif + enddo + call corpar1(ay,n,dato1,nvarx,nivar,aa,cp) + copa(ivar(j))=abs(cp) + 300 continue + + 555 continue + + return + end subroutine + + + SUBROUTINE CORR1(CENT,COMP,IC,CORRE1) + REAL SUM1,SUM2,MED1,MED2 + REAL CENT(IC),COMP(IC),SUMC1,SUMC2,SUMCR + REAL COV,VAR1,VAR2,CORRE1 + SUM1=0.0 + SUM2=0.0 + DO 100 I=1,IC + SUM1=SUM1+CENT(I) + SUM2=SUM2+COMP(I) +100 CONTINUE + C=REAL(IC) + MED1=SUM1/C + MED2=SUM2/C + SUMC1=0.0 + SUMC2=0.0 + SUMCR=0.0 + DO 200 J=1,IC + SUMCR=SUMCR+((CENT(J)-MED1)*(COMP(J)-MED2)) + SUMC1=SUMC1+(CENT(J)-MED1)**2 + SUMC2=SUMC2+(COMP(J)-MED2)**2 +200 CONTINUE + COV=SUMCR/C + VAR1=SUMC1/C + VAR2=SUMC2/C + CORRE1=COV/SQRT(VAR1*VAR2) + RETURN + END SUBROUTINE + + + SUBROUTINE REGR(aa,bb,nvarx,nvar,ndat,creg) + + +!CALCULA LA ECUACION DE REGRESION A PARTIR DE UNA MUESTRA DE DATOS + +!TRABAJA CON LAS DESVIACIONES RESPECTO A LA MEDIA PARA MINIMIZAR +!LOS ERRORES DE REDONDEO POR LO QUE AL FINAL HAY QUE CALCULAR +!APARTE EL TERMINO INDEPENDIENTE DE LA ECUACION DE REGRESION +!(ver libro de D. Penna capitulo regresion multiple) + + +! ndat: numero de datos de la muestra +! nvar: numero de variables independientes +! yy(ndat): contiene las desviaciones del predictando +! xx(nvar,ndat): contiene las desviaciones de los predictores +! myy,mxx(nvar): contiene las medias de predictandos y predictores + +! Elementos de las ecuaciones normales + +!nn(nvar,nvar): Matriz de los coeficientes de las incognitas +! (coeficientes de regresion salvo termino +! independiente beta0) +!b(nvar) : En entrada contiene los terminos independientes +! de las ecuaciones normales que se pasan a +! a las subrutinas que resuelven el sistema +! En salida contiene los coeficientes de regresion +! (no el termino independiente beta0) +!creg(0:nvarx) : Contiene la salida al programa principal de los +! coeficientes de regresion y del termino +! independiente +!sxx(nvar): Suma de los valores de los predictores de todos los +! datos de la muestra +!syy: Suma de los valores de los predictandos +!syyxx(nvar): Suma de productos predictando-predictores +!sxxxx(nvar): Suma de productos predictores-predictores + + + real yy(ndat) + real xx(nvar,ndat) + real aa(ndat),bb(nvarx,ndat) + real myy,mxx(nvar) + real b(nvar),nn(nvar,nvar),creg(0:nvarx) + real sxx(nvar),syy,syyxx(nvar),sxxxx(nvar,nvar),d + integer indx(nvar) + + + +! SE CALCULAN LAS MEDIAS DE LOS VALORES DE PREDICTANDOS Y PREDICTORES + + myy=0. + do i=1,ndat + myy=myy+aa(i) + enddo + myy=myy/real(ndat) + + mxx=0. + do j=1,nvar + do i=1,ndat + mxx(j)=mxx(j)+bb(j,i) + enddo + mxx(j)=mxx(j)/real(ndat) + enddo + +! SE SUSTITUYEN LOS DATOS ORIGINALES POR SUS DESVIACIONES RESPECTO +! A LA MEDIA + + do i=1,ndat + yy(i)=aa(i)-myy + enddo + + do j=1,nvar + do i=1,ndat + xx(j,i)=bb(j,i)-mxx(j) + enddo + enddo + +! CALCULO DE LA SUMA DE VALORES DE PREDICTANDO Y PREDICTORES +! DE TODOS LOS DATOS DE LA MUESTRA ASI COMO LA DE PRODUCTOS +! CRUZADOS (utiliza ya como variables las desviaciones respecto +! a las medias) +! (En realidad utilizando el modelo de regresion en desviaciones +! no utilizamos las sumas de las variables aunque las calculamos) + + syy=0. + do i=1,ndat + syy=syy+yy(i) + enddo + + + sxx=0. + syyxx=0. + do j=1,nvar + do i=1,ndat + sxx(j)=sxx(j)+xx(j,i) + syyxx(j)=syyxx(j)+yy(i)*xx(j,i) + enddo + enddo + + sxxxx=0. + do j=1,nvar + do k=j,nvar + do i=1,ndat + sxxxx(j,k)=sxxxx(j,k)+xx(j,i)*xx(k,i) + enddo + if(j.ne.k) sxxxx(k,j)=sxxxx(j,k) + enddo + enddo + + +! CONSTRUYE LA MATRIZ DE LOS COEFICIENTES DE LAS ECUACIONES NORMALES + + do j=1,nvar + do k=1,nvar + nn(j,k)=sxxxx(j,k) + enddo + enddo + + +! CONSTRUYE EL VECTOR DE TERMINOS INDEPENDIENTES DE LAS ECUACIONES +! NORMALES. EN LA SALIDA CONTENDRA LOS VALORES DE LOS COEFICIENTES +! DE REGRESION + + + do j=1,nvar + b(j)=syyxx(j) + enddo + + +! SE RESUELVE EL SISTEMA DE ECUACIONES NORMALES Y SE OBTIENEN +! LOS COEFICIENTES DE REGRESION DE CADA VARIABLE EN LA ECUACION +! DE REGRESION + + call ludcmp(nn,nvar,nvar,indx,d) + call lubksb(nn,nvar,nvar,indx,b) + + do j=1,nvar + creg(j)=b(j) + enddo + +! SE CALCULA EL TERMINO INDEPENDIENTE DE LA ECUACION DE REGRESION + + creg(0)=myy + do j=1,nvar + creg(0)=creg(0)-b(j)*mxx(j) + enddo + + + return + end subroutine + + SUBROUTINE lubksb(a,n,np,indx,b) + INTEGER n,np,indx(nP) + REAL a(np,np),b(np) + INTEGER i,ii,j,ll + REAL sum + ii=0 + do 12 i=1,n + ll=indx(i) + sum=b(ll) + b(ll)=b(i) + if (ii.ne.0)then + do 11 j=ii,i-1 + sum=sum-a(i,j)*b(j) +11 continue + else if (sum.ne.0.) then + ii=i + endif + b(i)=sum +12 continue + do 14 i=n,1,-1 + sum=b(i) + do 13 j=i+1,n + sum=sum-a(i,j)*b(j) +13 continue + b(i)=sum/a(i,i) +14 continue + return + END SUBROUTINE +! (C) Copr. 1986-92 Numerical Recipes Software !)#. +!********************************************************** +! + SUBROUTINE ludcmp(a,n,np,indx,d) + INTEGER n,np,indx(nP),NMAX + REAL d,a(np,np),TINY + PARAMETER (NMAX=500,TINY=1.0e-20) + INTEGER i,imax,j,k + REAL aamax,dum,sum,vv(NMAX) + d=1. + do 12 i=1,n + aamax=0. + do 11 j=1,n + if (abs(a(i,j)).gt.aamax) aamax=abs(a(i,j)) +11 continue + if (aamax .eq. 0.) then +! write (*,*) 'singular matrix in ludcmp' + else + vv(i)=1./aamax + endif +12 continue + do 19 j=1,n + do 14 i=1,j-1 + sum=a(i,j) + do 13 k=1,i-1 + sum=sum-a(i,k)*a(k,j) +13 continue + a(i,j)=sum +14 continue + aamax=0. + do 16 i=j,n + sum=a(i,j) + do 15 k=1,j-1 + sum=sum-a(i,k)*a(k,j) +15 continue + a(i,j)=sum + dum=vv(i)*abs(sum) + if (dum.ge.aamax) then + imax=i + aamax=dum + endif +16 continue + if (j.ne.imax)then + do 17 k=1,n + dum=a(imax,k) + a(imax,k)=a(j,k) + a(j,k)=dum +17 continue + d=-d + vv(imax)=vv(j) + endif + indx(j)=imax + if(a(j,j).eq.0.)a(j,j)=TINY + if(j.ne.n)then + dum=1./a(j,j) + do 18 i=j+1,n + a(i,j)=a(i,j)*dum +18 continue + endif +19 continue + return + END SUBROUTINE + + SUBROUTINE COEDET(yy,xx,n,nvarx,ivar1,nivar,coef,res,sst,cdet1) + real yy(n),yyes(n),xx(nvarx,n),res(n),sst,cdet1 + real sse + real coef(0:nvarx) + integer ivar1(nvarx) + +! ESTA SUBRUTINA DEVUELVE LOS RESIDUOS DE LA REGRESION +! Y EL COEFICIENTE DE DETERMINACION + +!SE CALCULAN LOS VALORES ESTIMADOS DEL PREDICTANDO ESTIMADOS +!CON LA REGRESION + + + do i=1,n + yyes(i)=coef(0) + do k=1,nivar + yyes(i)=yyes(i)+coef(k)*xx(ivar1(k),i) + enddo + enddo + +!SE CALCULAN LOS RESIDUOS DE LA REGRESION Y LA VARIABILIDAD +!NO EXPLICADA + + sse=0. + do i=1,n + res(i)=yy(i)-yyes(i) + sse=sse+res(i)**2 + enddo + + +!SE CALCULA EL COEFICIENTE DE DETERMINACION + + cdet1=sse/sst + cdet1=1.-cdet1 + + return + end subroutine + + + SUBROUTINE CORPAR(res1,n,dato1,nvarx,nivar,aa,cp) + real res1(n),res2(n),dato1(nvarx,n),aa(n) + real coef(0:nvarx) + real aaes(n),aar(n),cp + +! SE OBTIENE LA REGRESION DE LA VARIABLE CUYA CORRELACION +! PARCIAL SE CALCULA, CON LAS OTRAS VARIABLES PRESENTES + + aar=aa + call regr(aar,dato1,nvarx,nivar,n,coef) + +! Y SE OBTIENEN LOS VALORES ESTIMADOS POR ESTA REGRESION + + do i=1,n + aaes(i)=coef(0) + do k=1,nivar + aaes(i)=aaes(i)+coef(k)*dato1(k,i) + enddo + enddo + +! SE OBTIENEN LOS RESIDUOS CORRESPONDIENTES + + do i=1,n + res2(i)=aa(i)-aaes(i) + enddo + +! SE CALCULA LA CORRELACION PARCIAL + + call corr1(res1,res2,n,cp) + + + return + end subroutine + + SUBROUTINE CORPAR1(ay,n,dato1,nvarx,nivar,aa,cp) + real ay(n),res1(n),res2(n),dato1(nvarx,n),aa(n) + real ayes(n),aaes(n),ayr(n),aar(n),cp + real coef(0:nvarx),coefy(0:nvarx) + + +! SE OBTIENE LA REGRESION DE LA VARIABLE DEPENDIENTE CON LAS +! OTRAS VARIABLES PRESENTES DISTINTAS DE AQUELLAS CUYA CORRELACION +! PARCIAL SE QUIERE CALCULAR + + ayr=ay + call regr(ayr,dato1,nvarx,nivar,n,coefy) + +! Y SE OBTIENEN LOS VALORES ESTIMADOS POR ESTA REGRESION + + do i=1,n + ayes(i)=coefy(0) + do k=1,nivar + ayes(i)=ayes(i)+coefy(k)*dato1(k,i) + enddo + enddo + +! SE OBTIENEN LOS RESIDUOS CORRESPONDIENTES + + do i=1,n + res1(i)=ay(i)-ayes(i) + enddo + + + +! SE OBTIENE LA REGRESION DE LA VARIABLE CUYA CORRELACION +! PARCIAL SE CALCULA, CON LAS OTRAS VARIABLES PRESENTES + + aar=aa + call regr(aar,dato1,nvarx,nivar,n,coef) + +! Y SE OBTIENEN LOS VALORES ESTIMADOS POR ESTA REGRESION + + do i=1,n + aaes(i)=coef(0) + do k=1,nivar + aaes(i)=aaes(i)+coef(k)*dato1(k,i) + enddo + enddo + +! SE OBTIENEN LOS RESIDUOS CORRESPONDIENTES + + do i=1,n + res2(i)=aa(i)-aaes(i) + enddo + +! SE CALCULA LA CORRELACION PARCIAL + + call corr1(res1,res2,n,cp) + return + end subroutine + + SUBROUTINE GEOSTROFICO(& + Z,U,V,NGRID,NGRIDS,SLAT,SLON,SLATS,SLONS,RLAT,RLON,RLATS,RLONS,& + NLAT,NLON,NLATS,NLONS,NGRIDD) + +USE MOD_CSTS + +! CALCULA VIENTO GEOSTROFICO A PARTIR DE CAMPOS DE DOBLE RESOLUCION, +! CON CALCULO CENTRADO, Y DEVUELVE VALORES EN LOS PUNTOS DE GRID DE +! RESOLUCION NORMAL + + IMPLICIT INTEGER(K) + +! --------- Parametros de la REJILLA DE BAJA RESOLUCION ----- +! NGRID es el npuntos de la rejilla de baja resolucion +! NGRIDD se deja igual +! SLAT es la latitud de la rejilla de baja resolucion -latitud superior +! izda, SLON es la longitud ...........de la rej baja resol-longitud +! superior oeste NLAT es el numero de latitudes + +! PARAMETER (NGRID=${id},NGRIDD=${ngridd},ROMEGA=${romega}) +! PARAMETER (SLAT=${slatt},SLON=${slont},RLAT=${rlat}, +! $ RLON=${rlon},NLAT=${nlatt},NLON=${nlont}) +! ---------------------------------------------------------------------- +! ------------- Parametros de la REJILLA SINOPTICA --------- +! PARAMETER (NGRIDS=${ic},NLATS=${nlat},NLONS=${nlon}) !TERMINADO EN S, +! GRID DE SALIDA +! PARAMETER (SLATS=${slat},SLONS=${slon},RLATS=${rlat}, +! $ RLONS=${rlon}) +! PARAMETER (GR=${g},RT=${rt},R=${r},PI=${pi}) +! ---------------------------------------------------------------------- + INTEGER, INTENT(IN) :: ngrid + INTEGER, INTENT(IN) :: ngrids + + INTEGER, INTENT(IN) :: nlat + INTEGER, INTENT(IN) :: nlon + INTEGER, INTENT(IN) :: nlats + INTEGER, INTENT(IN) :: nlons + DOUBLE PRECISION, INTENT(IN) :: slat + DOUBLE PRECISION, INTENT(IN) :: slon + DOUBLE PRECISION, INTENT(IN) :: slats + DOUBLE PRECISION, INTENT(IN) :: slons + DOUBLE PRECISION, INTENT(IN) :: rlat + DOUBLE PRECISION, INTENT(IN) :: rlon + DOUBLE PRECISION, INTENT(IN) :: rlats + DOUBLE PRECISION, INTENT(IN) :: rlons + INTEGER, INTENT(IN) :: ngridd + + REAL GR + +! RLX ES LA LONGITUD DEL PASO DE REJILLA SOBRE EL PARALELO CORRESPONDIENTE +! RLY ES LA LONGITUD DEL PASO DE REJILLA SOBRE EL MERIDIANO CORRESPONDIENTE + +! OJO, RLY Y RLX CORRESPONDEN A RESOL NORMAL, Y SON 2Ay Y 2Ax DE LA RES DOBLE + REAL, INTENT(IN) :: Z(NGRID) + REAL, INTENT(OUT) :: U(NGRIDS),V(NGRIDS) + + REAL RLX(NGRID),RLY + REAL F(NGRIDD) + REAL GG(NGRID),GD(NGRIDD) + REAL RLT(NGRID),RLN(NGRID) + REAL RLTS(NGRIDS),RLNS(NGRIDS) + + GR=g +! print*,'En GEOSTROFICO, g,ngrid,ngrids= ',g,ngrid,ngrids +! print*,'En GEOSTROFICO, ngrid= ',ngrid +! print*,'En GEOSTROFICO, ngrids= ',ngrids +! print*,'En GEOSTROFICO, ngridd= ',ngridd +! print*,'En GEOSTROFICO, nlat= ',nlat +! print*,'En GEOSTROFICO, nlon= ',nlon +! print*,'En GEOSTROFICO, nlats= ',nlats +! print*,'En GEOSTROFICO, nlons= ',nlons +! print*,'En GEOSTROFICO, slat= ',slat +! print*,'En GEOSTROFICO, slon= ',slon +! print*,'En GEOSTROFICO, slats= ',slats +! print*,'En GEOSTROFICO, slons= ',slons +! print*,'En GEOSTROFICO, rlat= ',rlat +! print*,'En GEOSTROFICO, rlon= ',rlon +! print*,'En GEOSTROFICO, rlats= ',rlats +! print*,'En GEOSTROFICO, rlons= ',rlons + +! +! CALCULA LATITUD Y LONGITUD DE CADA PUNTO J + DO J=1,NGRID + RLT(J)=SLAT+(((J-1)/NLON)*RLAT) + RLN(J)=SLON+((MOD(J-1,NLON)+1-1)*RLON) + +! IF(J.GE.1.AND.J.LE.50) THEN +! print*,"J, RLT = ",J,RLT(J) +! print*," RLN = ",RLN(J) +! ENDIF + ENDDO +! print*,"fuera del bucle de calculo" +! print*,"RLT=",RLT(1:50) +! print*,"RLN",RLN(1:50) + NLOND=(NLON*2)-1 +! +! CALCULA LOS VALORES DE RLX Y F,LEE P, Y ACTUALIZA KCOD + DO J=1,NGRID + RLX(J)=(2.*PI*RT*COS(RLT(J)*PI/180.))/(360./RLON) + F(J)=2.*ROMEGA*SIN(RLT(J)*PI/180.) + ENDDO + RLY=2.*PI*RT*ABS(RLAT)/360. + K0=0 +! +! print*,"antes de dobla" +! print*,"RLT=",RLT(1:50) +! print*,"RLN",RLN(1:50) +! +! TRANFORMA ALTURA GEOPOTENCIAL EN GEOPOTENCIAL PHI=Z*g Y CALCULA LOS VALORES +! EN DOBLE RESOLUCION + + DO IG=1,NGRID + GG(IG)=Z(IG)*GR + ENDDO + +! print*,"antes de DOBLA" + CALL DOBLA(SLAT,SLON,RLAT,RLON,NLAT,NLON,GG,GD) +! print*,"despues de DOBLA" +! +! print*,"despues de dobla" +! print*,"RLT=",RLT(1:50) +! print*,"RLN",RLN(1:50) +! +! CALCULO DEL VIENTO GEOSTROFICO + JS=0 + DO 17 J=1,NGRID + +! SI NO PERTENECE A LA VENTANA DE SALIDA, SALTA + IF(RLT(J).GT.SLATS.OR.RLT(J).LT.SLATS+((NLATS-1)*RLATS))cycle +! print*,"entro en el primer IF",j,RLT(J),SLATS,SLATS+((NLATS-1)& +! *RLATS) +! print*,"entro en el primer IF, j, RLT =",j,RLT(1:50) +! stop + +! cycle +! endif + IF(RLN(J).LT.SLONS.OR.RLN(J).GT.SLONS+((NLONS-1)*RLONS))cycle +! print*,"entro en el segundo IF",j +! cycle +! endif + + JS=JS+1 +! print*,"JS=",JS + + JD=((MOD(J-1,NLON)+1)*2)-1+(((((J-1)/NLON)*2)+1-1)*NLOND) +! POS EN DOBLE= PTO EN ESA LAT + NUM LATD PASADAS* NLOND + U(JS)=-(GD(JD-NLOND)-GD(JD+NLOND))/(RLY*F(J)) + V(JS)=(GD(JD+1)-GD(JD-1))/(RLX(J)*F(J)) + RLTS(JS)=RLT(J) + RLNS(JS)=RLN(J) + 17 ENDDO +! +! +! + RETURN + END SUBROUTINE + + SUBROUTINE DOBLA(SLAT,SLON,RLAT,RLON,NLAT,NLON,GG,GD) +! +! SLAT, LATITUD DEL LIMITE MERIDIONAL DEL CAMPO DE ENTRADA +! SLATS, LATITUD DEL LIMITE MERIDIONAL DEL CAMPO DE SALIDA +! SLON, LONGITUD DEL LIMITE OCCIDENTAL DEL CAMPO DE ENTRADA +! SLONS, LONGITUD DEL LIMITE OCCIDENTAL DEL CAMPO DE SALIDA + IMPLICIT INTEGER (K) + INTEGER,INTENT(IN) :: NLON,NLAT +! REAL,INTENT(IN) :: SLAT,SLON,RLAT,RLON + DOUBLE PRECISION,INTENT(IN) :: SLAT,SLON,RLAT,RLON + REAL,INTENT(IN) :: GG(NLON*NLAT) + REAL,INTENT(OUT) :: GD(((NLON*2)-1)*((NLAT*2)-1)) + INTEGER IGA(((NLON*2)-1)*((NLAT*2)-1)) + REAL A(NLON,NLAT),S(((NLON-2)*2)-1,((NLAT-2)*2)-1) +! NLON2 y NLAT2 son nlont y nlatt que son NLON y NLAT de +! GEOSTROFICO que llama a DOBLA + NLON2=NLON + NLAT2=NLAT +! +! IF(NLAT.NE.NLAT2 .OR. NLON.NE.NLON2)THEN +! PRINT*,' CAMBIAR NLAT2 Y NLON2 EN SUBRUTINA DOBLA',& +! NLAT,NLAT2,NLON,NLON2 +! STOP +! ENDIF +! + XLATMIH=SLAT+(RLAT*(NLAT-1)) + XLONMIH=SLON + XLATMIR=XLATMIH+ABS(RLAT) + XLONMIR=XLONMIH+RLON + DLATH=ABS(RLAT) + DLONH=ABS(RLON) + DLATR=ABS(RLAT/2.) + DLONR=ABS(RLON/2.) + NLONH=NLON + NLATH=NLAT + NLONR=((NLON-2)*2)-1 + NLATR=((NLAT-2)*2)-1 + ICA=0 + DO J=NLAT,1,-1 + DO I=1,NLON + ICA=ICA+1 + A(I,J)=GG(ICA) + ENDDO + ENDDO + CALL BESSEL(XLATMIH,XLONMIH,XLATMIR,XLONMIR,DLATH,DLONH,& + DLATR,DLONR,NLONH,NLATH,NLONR,NLATR,A,S) +! + NLONS=(NLON*2)-1 + NLATS=(NLAT*2)-1 + IGA=0 + ICA=0 + DO 10 IG=1,NLONS*NLATS + IF (MOD(((IG-1)/NLONS),2).EQ.1) cycle !LATITUDES PARES + IF (MOD(MOD((IG-1),NLONS)+1,2).EQ.0) cycle !LONGITUDES PARES + ICA=ICA+1 + IGA(IG)=ICA + 10 ENDDO +! ESCRIBE LOS PUNTOS DE LA REJILLA ORIGINAL + DO IG=1,NLONS*NLATS + IF(IGA(IG).NE.0) GD(IG)=GG(IGA(IG)) + ENDDO +! + DO IG=1,NLONS*NLATS +! SOBREESCRIBE INTERPOLACIONES HECHAS POR BESSEL + IF(IG.GT.NLONS*2 .AND. IG.LT.NLONS*(NLATS-2) .AND.& + MOD((IG-1),NLONS)+1.GT.2 .AND. MOD((IG-1),NLONS)+1.LT.NLONS-2)& + THEN !INT POR BESSEL, TODAS MENOS LAS DOS PRIMERAS Y ULTIMAS +! LATITUDES Y LONGITUDES + ILAT=NLATR-((((IG-1)/NLONS)-1)-1) + ILON=(MOD(IG-1,NLONS)+1)-2 + GD(IG)=S(ILON,ILAT) +! INTERPOLA PARA LA FRONTERA DE LA REJILLA + ELSE + IF(IGA(IG).EQ.0)THEN + IF(MOD(((IG-1)/NLONS),2).EQ.1 .AND.& + MOD(MOD((IG-1),NLONS)+1,2).EQ.0)THEN !FILA PAR, COLUMNA PAR, INT +!4 PTOS + GD(IG)=(GD(IG-NLONS-1)+GD(IG-NLONS+1)+GD(IG+NLONS-1)+& + GD(IG+NLONS+1))/4. + ELSEIF(MOD(((IG-1)/NLONS),2).EQ.1)THEN !FILA PAR, COLUMNA IMPAR, +!INT 2 PTOS + GD(IG)=(GD(IG-NLONS)+GD(IG+NLONS))/2. + ELSEIF(MOD(MOD((IG-1),NLONS)+1,2).EQ.0)THEN !FILA IMPAR, COLUMNA +!PAR, INT 2 PTOS + GD(IG)=(GD(IG-1)+GD(IG+1))/2. + ENDIF + ENDIF + ENDIF +! + ENDDO +! + RETURN + END SUBROUTINE +! + SUBROUTINE BESSEL(XLATMIH,XLONMIH,XLATMIR,XLONMIR,DLATH,DLONH,& + DLATR,DLONR,NLONH,NLATH,NLONR,NLATR,A,E) +! + INTEGER,INTENT(IN) :: NLONH,NLATH,NLONR,NLATR + REAL,INTENT(IN) :: DLONH,DLONR,DLATH,DLATR,XLATMIH,XLATMIR,XLONMIH,XLONMIR + REAL,INTENT(IN) :: A(NLONH,NLATH) + REAL,INTENT(OUT) :: E(NLONR,NLATR) +! +! COMPRUEBA QUE LOS LIMITES SON CORRECTOS + XLATMAH=XLATMIH+((NLATH-1)*DLATH) + XLONMAH=XLONMIH+((NLONH-1)*DLONH) + XLATMAR=XLATMIR+((NLATR-1)*DLATR) + XLONMAR=XLONMIR+((NLONR-1)*DLONR) +! IF(XLATMIR.LT.XLATMIH+DLATH .OR. XLONMIR.LT.XLONMIH+DLONH& +! .OR. XLATMAR.GT.XLATMAH-DLATH .OR. XLONMAR.GT.XLONMAH-DLONH)THEN +! PRINT*,' ERROR EN LIMITES DE REJILLA ESTIMADA:SLATE,ELATE,SLATS,E& +! LATS, Y LON RESPECTIVOS:',XLATMIH,XLATMAH,XLATMIR,XLATMAR,& +! XLONMIH,XLONMAH,XLONMIR,XLONMAR +! STOP +! ENDIF +! +! HAZ LA INTERPOLACION PARA CADA PUNTO DE LA REJILLA DE SALIDA +3 DO J=1,NLATR + DO I = 1,NLONR +! DETERMINA LA POSICION DEL PUNTO DE LA REJILLA DE SALIDA EN LAS COORDENADAS DE +! LA REJILLA DE ENTRADA + XX= (((XLONMIR+DLONR*(I-1)) - XLONMIH) / DLONH ) +1. + YY= (((XLATMIR+DLATR*(J-1)) - XLATMIH) / DLATH ) +1. + M = XX + N = YY + DX = XX - M + DY = YY - N +! APLICA EL ESQUEMA DE INTERPOLACI\324N DE 16 PT DE BESSEL + DXX = .25 *(DX - 1.) + DYY = .25 *(DY - 1.) + AA = A(M,N-1) + DX *(A(M+1,N-1) - A(M,N-1) + DXX *& + (A(MIN(M+2,NLONH),N-1) - A(M+1,N-1) + A(M-1,N-1) - A(M,N-1))) + AB = A(M,N) + DX*(A(M+1,N) - A(M,N) + DXX *(A(MIN(M+2,NLONH),N)& + - A(M+1,N) + A(M-1,N) - A(M,N))) + AC = A(M,N+1) + DX *(A(M+1,N+1) - A(M,N+1) + DXX *& + (A(MIN(M+2,NLONH),N+1) - A(M+1,N+1) + A(M-1,N+1) - A(M,N+1))) + AD = A(M,MIN(N+2,NLATH)) + DX *(A(M+1,MIN(N+2,NLATH)) -& + A(M,MIN(N+2,NLATH)) + DXX *(A(MIN(M+2,NLONH),MIN(N+2,NLATH))& + - A(M+1,MIN(N+2,NLATH)) + A(M-1,MIN(N+2,NLATH)) -& + A(M,MIN(N+2,NLATH)))) + E(I,J) = AB + DY *(AC - AB + DYY *(AD - AC + AA - AB)) + ENDDO + ENDDO + RETURN + END SUBROUTINE + + subroutine radian(t1,t2,t3,sol) + implicit real(a-h,o-z) +! implicit none +! double precision (a-h,o-z) + pi=3.14159265358979d0 + sol=t1+t2/60.d0+t3/3600.d0 + return + end subroutine + + SUBROUTINE GEOUTM (FLON, FLAT, HUSO, X, Y) +! IMPLICIT REAL (A-Z) + IMPLICIT DOUBLE PRECISION (A-Z) +! IMPLICIT NONE +! DOUBLE PRECISION (A-Z) + PI = 3.14159265 + RG = 180. / PI + E2 = 0.6722670E-02 + EP2 = 0.6768170E-02 + A0 = 0.998317208055891 + A2 = 5.050503255106305E-03 + A4 = 5.323041134969273E-06 + A6 = 6.981680670962105E-09 + A8 = 9.931708438892222E-12 + A10 = 1.44222427482031E-14 + RA = 6378388.0 + XM = (6. * HUSO - 183.) / RG + LOI = FLON / RG - XM + LAT = FLAT / RG + B = RA*(A0 * LAT - 0.5 * (A2 * SIN(2. * LAT) - A4 * SIN(4. * LAT)& + + A6*SIN(6. * LAT) - A8 * SIN(8. * LAT) + A10 * SIN(10. * LAT))) + PSI = LOI * COS(LAT) + W = SQRT(1. - E2 * (SIN(LAT) ** 2)) + HN = RA / W + V2 = 1. + EP2 * (COS(LAT) ** 2) + TF2 = TAN(LAT) ** 2 + C2 = (V2 - TF2) / 6 + C3 = V2 / 24. + V2 ** 2 / 6. - TF2 / 24. + C4 = (V2 * (14. - 58. * TF2) + 40. * TF2 + TF2 ** 2 - 9.) / 120. + C5 = (61.- 58.*TF2 + TF2**2 + (V2 - 1.)*(270. - 330.*TF2)) / 720. + X = 500000. + HN * PSI * (1. + C2 * PSI**2 + C4 * PSI**4)*0.9996 + Y = (B + HN * TAN(LAT) * (0.5 * PSI ** 2 + C3 * PSI ** 4 +& + C5 * PSI ** 6)) * 0.9996 + RETURN + END SUBROUTINE + + SUBROUTINE FECHANNO(DIA,MES,IDA) + INTEGER,INTENT(IN) :: DIA,MES + INTEGER,INTENT(OUT) :: IDA + INTEGER NORMAL(12) + DATA NORMAL/0,31,59,90,120,151,181,212,243,273,304,334/ + IDA=NORMAL(MES)+DIA + IF(MES.EQ.2 .AND. DIA.GT.28)IDA=60 + RETURN + END SUBROUTINE + + +END MODULE MOD_FUNCS + diff --git a/src/predictores_significativos.f90 b/src/predictores_significativos.f90 new file mode 100755 index 0000000000000000000000000000000000000000..9dbe189304a47c55c67c371ded4dbdfc012efede --- /dev/null +++ b/src/predictores_significativos.f90 @@ -0,0 +1,582 @@ + +! sig_predic program selects significance predictor from +! the finded collection +SUBROUTINE sig_predic(nlat,nlon,nlatt,nlont,slat,slon,rlat,rlon,slatt,& + slont,n,ic,id,prec_hr,nger,um,vm,gu92,gv92,gu52,& + gv52,iri,u500,v500,msl_si,q700,t500,t850,& + nanx,neni,new_mi,new_ccm,new_kvars,new_corrpar) + +USE MOD_CSTS +USE MOD_FUNCS + + IMPLICIT NONE + +! 0.1 Declarations of arguments +! ------------------------- + + INTEGER, INTENT(IN) :: nlat + INTEGER, INTENT(IN) :: nlon + INTEGER, INTENT(IN) :: nlatt + INTEGER, INTENT(IN) :: nlont + DOUBLE PRECISION, INTENT(IN) :: slat + DOUBLE PRECISION, INTENT(IN) :: slon + DOUBLE PRECISION, INTENT(IN) :: rlat + DOUBLE PRECISION, INTENT(IN) :: rlon + DOUBLE PRECISION, INTENT(IN) :: slatt + DOUBLE PRECISION, INTENT(IN) :: slont + INTEGER, INTENT(IN) :: n + INTEGER, INTENT(IN) :: ic + INTEGER, INTENT(IN) :: id + DOUBLE PRECISION, INTENT(IN) :: prec_hr(n,nptos) + INTEGER, INTENT(IN) :: nger + DOUBLE PRECISION, INTENT(IN) :: um(n,ic) + DOUBLE PRECISION, INTENT(IN) :: vm(n,ic) + DOUBLE PRECISION, INTENT(IN) :: gu92(n,ic) + DOUBLE PRECISION, INTENT(IN) :: gv92(n,ic) + DOUBLE PRECISION, INTENT(IN) :: gu52(n,ic) + DOUBLE PRECISION, INTENT(IN) :: gv52(n,ic) + INTEGER, INTENT(IN) :: iri(nptos) + DOUBLE PRECISION, INTENT(IN) :: u500(n,ic) + DOUBLE PRECISION, INTENT(IN) :: v500(n,ic) + DOUBLE PRECISION, INTENT(IN) :: msl_si(n,ic) + DOUBLE PRECISION, INTENT(IN) :: q700(n,ic) + DOUBLE PRECISION, INTENT(IN) :: t500(n,ic) + DOUBLE PRECISION, INTENT(IN) :: t850(n,ic) + INTEGER, INTENT(IN) :: nanx + INTEGER, INTENT(IN) :: neni + + INTEGER, INTENT(OUT) :: new_mi(nger,nptos) + DOUBLE PRECISION, INTENT(OUT) :: new_ccm(nger,nptos) + INTEGER, INTENT(OUT) :: new_kvars(nger,nptos,npx) + DOUBLE PRECISION, INTENT(OUT) :: new_corrpar(nger,nptos,npx) + + integer nvar,m + + integer nulon,nulat,nulev,nudays,ideb,ifin,ip + integer i,j,tt,vv + integer is + +!***************************************************************** + integer mi + real ccm + character mdl*20,sc*8,pt*9,nomeb*90,nomef*90,ta*3,nta*1 + real he7(n,ic),he7m(ic) + double precision u9(n,ic),v9(n,ic),u5(n,ic),v5(n,ic) + real psl(n,ic),ut9(nger,ic),vt9(nger,ic),ut5(nger,ic),vt5(nger,ic),pseal(id) + real presor(n,id) + real xlat(ic),xlon(ic) + real t5(n,ic),t8(n,ic),tm5(ic),tm8(ic) + real pslm(ic),um9(ic),vm9(ic),um5(ic),vm5(ic),pslma(ic) + real ue5(id),ve5(id),he7ms(ic),he7mr(id) +! + character (len=6) :: he7ca(id) + character (len=6) :: t8ca(id) + real te8(id),te5(id) + real he7me(24) + real pres(id),bar(id),den(ic) + real pred1(npx,n,neni),pred1m(npx,neni),predh(n,neni),predhm(neni) +! + integer anai(nanx),ana(nanx) + integer kvars(npx) + integer nor(nanx) + integer indi1(ic),indi2(ic) + integer annor(n),mesr(n),diar(n) + integer ior(n),anno,mes,dia,eqc(nptos) + integer ref(nptos),puce(neni),puen(neni,5001) + + integer i1,i2,i3,i4,iana,ice,ien,ipos,ipu,ir,iv,jk,k,nan,ndcp + integer nen,rlx,rly,vorm,vorz + real prec(n,nptos),dis(n) + real p9(ic),p5(ic) + real dato1(npx,nanx),pr(nanx) + real corrpar(npx) + real coe(npx),con + real rlt(ic),rln(ic),rltt(id),rlnt(id) + real dist(nanx),dist1(npx,nanx),serin(nanx) + real aaa(nanx) + real ser(n),media(npx,neni),sigma(npx,neni) + real md,sg,medh(neni),sigh(neni) + real mu9(ic),su9(ic),mv9(ic),sv9(ic) + real mu5(ic),su5(ic),mv5(ic),sv5(ic) + real disu5,disu9,disv5,disv9 +!******************************************************* + +! print*,"program 7: significant predictors" + + nvar=npx + m=nger + +!********************************* +! 1. Sinoptic latitude and longitude calculation and assignment of +! weights windows +! + do j=1,ic + rlt(j)=slat+(((j-1)/nlon)*rlat) + rln(j)=slon+((mod(j-1,nlon)+1-1)*rlon) + enddo + p9=0. + p5=1. + do i1=1,ic + if((rlt(i1).le.fnor2).and.(rlt(i1).ge.fsur2)) then + if((rln(i1).ge.foes2).and.(rln(i1).le.fest2)) then + p9(i1)=1. + p5(i1)=4. + endif + endif + enddo + do i1=1,ic + if((rlt(i1).le.fnor1).and.(rlt(i1).ge.fsur1)) then + if((rln(i1).ge.foes1).and.(rln(i1).le.fest1)) then + p9(i1)=2. + p5(i1)=8. + endif + endif + enddo +! +! Latitude and longitude calculation in the extended domain (called low +! resolution) + + do j=1,id + rltt(j)=slatt+(((j-1)/nlont)*rlat) + rlnt(j)=slont+((mod(j-1,nlont)+1-1)*rlon) + enddo + +!*********************************** +! REANALYSIS VARIABLES + + u5(:,:)=u500(:,:) + v5(:,:)=v500(:,:) + psl(:,:)=msl_si(:,:) + he7(:,:)=q700(:,:) + t5(:,:)=t500(:,:) + t8(:,:)=t850(:,:) + +! HIGH RESOLUTION (5KM) OBSERVATIONS +! It is neccesary to convert to tenths of mm (multiplying by 10). + + prec(:,:)=prec_hr(:,:)*10. + +! Mean and standard deviation of reference synoptic fields. + + do j=1,ic + do i=1,n + ser(i)=um(i,j) + enddo + call estadis(ser,md,sg,n) + mu9(j)=md + su9(j)=sg + do i=1,n + ser(i)=vm(i,j) + enddo + call estadis(ser,md,sg,n) + mv9(j)=md + sv9(j)=sg + do i=1,n + ser(i)=u5(i,j) + enddo + call estadis(ser,md,sg,n) + mu5(j)=md + su5(j)=sg + do i=1,n + ser(i)=v5(i,j) + enddo + call estadis(ser,md,sg,n) + mv5(j)=md + sv5(j)=sg + enddo + +! A reference centers (matching points between sinoptic and high +! resolution grids) are define to know where the predictor must be +! calculated. + + nen=1 + ref=iri + + puce(1)=ref(1) + do 101 j=1,nptos + do k=1,nen + if(ref(j).eq.puce(k)) go to 101 + enddo + nen=nen+1 + ipos=nen + puce(ipos)=ref(j) + 101 continue + +! Each reference point have associated a group of high resolution grids. + puen=0 + do k=1,nen + do j=1,nptos + if(ref(j).eq.puce(k)) then + puen(k,5001)=puen(k,5001)+1 + ipos=puen(k,5001) + puen(k,ipos)=j + endif + enddo + enddo + +! The predictors are obtained and normalized + +! OBTAINING THE SEA LEVEL PRESSURE (PREDICTOR 1) IN THE REFERENCE CENTERS + do i=1,n + do j=1,nen + ice=puce(j) + pred1(1,i,j)=psl(i,ice) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(1,i,j) + enddo + call estadis(ser,md,sg,n) + media(1,j)=md + sigma(1,j)=sg + do i=1,n + pred1(1,i,j)=(pred1(1,i,j)-media(1,j))/sigma(1,j) + enddo + enddo + +! OBTAINING THE TREND (PREDICTOR 11) IN THE REFERENCE CENTERS + + do j=1,nen + pred1(11,1,j)=0. + enddo + + do i=2,n + do j=1,nen + ice=puce(j) + pred1(11,i,j)=psl(i,ice)-psl((i-1),ice) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(11,i,j) + enddo + call estadis(ser,md,sg,n) + media(11,j)=md + sigma(11,j)=sg + do i=1,n + pred1(11,i,j)=(pred1(11,i,j)-media(11,j))/sigma(11,j) + enddo + enddo + + +! OBTAINING THE VERTICAL THERMAL GRADIENT(PREDICTOR 3) +! IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(3,i,j)=t8(i,ice)-t5(i,ice) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(3,i,j) + enddo + call estadis(ser,md,sg,n) + media(3,j)=md + sigma(3,j)=sg + do i=1,n + pred1(3,i,j)=(pred1(3,i,j)-media(3,j))/sigma(3,j) + enddo + enddo + +! OBTAINING THE 500 hPa TEMPERATURE (PREDICTOR 2) +! IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(2,i,j)=t5(i,ice) + enddo + enddo + + do j=1,nen + do i=1,n + ser(i)=pred1(2,i,j) + enddo + call estadis(ser,md,sg,n) + media(2,j)=md + sigma(2,j)=sg + do i=1,n + pred1(2,i,j)=(pred1(2,i,j)-media(2,j))/sigma(2,j) + enddo + enddo + +! OBTAINING THE VORTICITY (PREDICTOR 4) IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + rlx=rt*cos(rlt(ice)*pi/180.)*pi*rlon/180. + rly=rt*abs(rlat)*pi/180. + vorm=um(i,ice-nlon)-um(i,ice+nlon) + vorm=vorm/(2.*rly) + vorz=vm(i,ice+1)-vm(i,ice-1) + vorz=vorz/(2.*rlx) + pred1(4,i,j)=vorz-vorm + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(4,i,j) + enddo + call estadis(ser,md,sg,n) + media(4,j)=md + sigma(4,j)=sg + do i=1,n + pred1(4,i,j)=(pred1(4,i,j)-media(4,j))/sigma(4,j) + enddo + enddo + +! OBTAINING THE GEOSTROPHIC U/V COMPONENTS (PREDICTORS 5 AND 6) IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(5,i,j)=um(i,ice) + pred1(6,i,j)=vm(i,ice) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(5,i,j) + enddo + call estadis(ser,md,sg,n) + media(5,j)=md + sigma(5,j)=sg + do i=1,n + pred1(5,i,j)=(pred1(5,i,j)-media(5,j))/sigma(5,j) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(6,i,j) + enddo + call estadis(ser,md,sg,n) + media(6,j)=md + sigma(6,j)=sg + do i=1,n + pred1(6,i,j)=(pred1(6,i,j)-media(6,j))/sigma(6,j) + enddo + enddo + +! OBTAINING THE VORTICITY IN 500 hPa (PREDICTOR 7) IN THE REFERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + rlx=rt*cos(rlt(ice)*pi/180.)*pi*rlon/180. + rly=rt*abs(rlat)*pi/180. + vorm=u5(i,ice-nlon)-u5(i,ice+nlon) + vorm=vorm/(2.*rly) + vorz=v5(i,ice+1)-v5(i,ice-1) + vorz=vorz/(2.*rlx) + pred1(7,i,j)=vorz-vorm + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(7,i,j) + enddo + call estadis(ser,md,sg,n) + media(7,j)=md + sigma(7,j)=sg + do i=1,n + pred1(7,i,j)=(pred1(7,i,j)-media(7,j))/sigma(7,j) + enddo + enddo + + +! OBTAINING THE GEOSTROPHIC U/V COMPONENTS IN 500 hPa (PREDICTORS 8 AND 9) +! IN THE RERENCE CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(8,i,j)=u5(i,ice) + pred1(9,i,j)=v5(i,ice) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(8,i,j) + enddo + call estadis(ser,md,sg,n) + media(8,j)=md + sigma(8,j)=sg + do i=1,n + pred1(8,i,j)=(pred1(8,i,j)-media(8,j))/sigma(8,j) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(9,i,j) + enddo + call estadis(ser,md,sg,n) + media(9,j)=md + sigma(9,j)=sg + do i=1,n + pred1(9,i,j)=(pred1(9,i,j)-media(9,j))/sigma(9,j) + enddo + enddo + +! OBTAINING THE ESPECIFIC HUMIDITY IN 700 hPa (PREDICTOR 10) IN THE REFERENCE +! CENTERS + + do i=1,n + do j=1,nen + ice=puce(j) + pred1(10,i,j)=he7(i,ice) + enddo + enddo + do j=1,nen + do i=1,n + ser(i)=pred1(10,i,j) + enddo + call estadis(ser,md,sg,n) + media(10,j)=md + sigma(10,j)=sg + do i=1,n + pred1(10,i,j)=(pred1(10,i,j)-media(10,j))/sigma(10,j) + enddo + enddo +! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +! ESTANDARIZATION OF REFERENCE WINDS (SINOPTIC WINDS ALSO) + + do i=1,n + do j=1,ic + u9(i,j)=(um(i,j)-mu9(j))/su9(j) + v9(i,j)=(vm(i,j)-mv9(j))/sv9(j) + u5(i,j)=(u5(i,j)-mu5(j))/su5(j) + v5(i,j)=(v5(i,j)-mv5(j))/sv5(j) + enddo + enddo + + do i=1,m + do j=1,ic + ut9(i,j)=(gu92(i,j)-mu9(j))/su9(j) + vt9(i,j)=(gv92(i,j)-mv9(j))/sv9(j) + ut5(i,j)=(gu52(i,j)-mu5(j))/su5(j) + vt5(i,j)=(gv52(i,j)-mv5(j))/sv5(j) + enddo + enddo + +! OBTAINING SIGNIFICANT PREDICTORS FOR EACH SINOPTIC TYPE IN EACH HIGH +! RESOLUTION GRID POINT. + + do 1000 i=1,m + +! print*,i + + +! Determine the "nanx" reference alements more similar to each sinoptic type +! and the corresponding distances + + do i1=1,n + ior(i1)=i1 + dis(i1)=9999. + enddo + do 113 i1=1,n + call distancia9(ut9,m,u9,n,i,i1,p9,disu9,ic) + call distancia9(vt9,m,v9,n,i,i1,p9,disv9,ic) + call distancia5(ut5,m,u5,n,i,i1,p5,disu5,ic) + call distancia5(vt5,m,v5,n,i,i1,p5,disv5,ic) + dis(i1)=(disu9+disv9+disu5+disv5)/4. + + 113 continue + + call burbuja1(dis,ior,n,nanx) + + do i1=1,nanx + anai(i1)=ior(i1) + enddo + +! Consider all high resolution grid points associated with the low resolution +! ones + + do 1100 ien=1,nen + do 1200 i2=1,puen(ien,5001) + ipu=puen(ien,i2) + +! Consider predictand values (precipitation) and predictors from the analogs + + nan=0 + ndcp=0 + do i3=1,nanx + iana=anai(i3) + + if(prec(iana,ipu).ne.-999.) then + nan=nan+1 + ana(nan)=iana + do i4=1,nvar + dato1(i4,nan)=pred1(i4,iana,ien) + enddo + pr(nan)=prec(iana,ipu) + if(pr(nan).eq.-3.) pr(nan)=1. + if(pr(nan).ge.1.) ndcp=ndcp+1 + endif + enddo + if(nan.le.30) then + mi=0 + ccm=-7.77 + go to 1199 + endif + + if(ndcp.le.30) then + mi=0 + ccm=-9.99 + go to 1199 + endif + + if(nan.gt.150) nan=150 + +! Calculation of significant predictors, their coeficients and their +! multiple and partial correlation coeficients to estimate the +! precipitation +! +! mi: number of selected predictors +! ccm: multiple correlation coeficient +! kvars: selected predictors labels (vector) +! corrpar: partial correlation of selected predictors (vector) +! coe: regression coeficients associated to each predictor (vector). +! (value = 0 when there is no selected predictor). +! con: Y-intercept (independent equation term) +! tol: tolerance to select predictors + + call stepregrs& + (pr,dato1,nanx,nvar,nan,mi,ccm,kvars,corrpar,coe,con,tol) + + 1199 continue + + new_mi(i,ipu)=0 + new_ccm(i,ipu)=0. + new_kvars(i,ipu,:)=0 + new_corrpar(i,ipu,:)=0. + + new_mi(i,ipu)=mi + new_ccm(i,ipu)=ccm + + if (mi.ne.0) then + vv=mi + do tt=1,mi + new_kvars(i,ipu,tt)=kvars(tt) + new_corrpar(i,ipu,tt)=corrpar(kvars(tt)) + end do + else + vv=1 + tt=vv + new_kvars(i,ipu,tt)=0 + new_corrpar(i,ipu,tt)=0. + end if + + + 1200 continue + + 1100 continue + + 1000 continue + +END SUBROUTINE sig_predic + diff --git a/src/pts_ref_est_pen.f90 b/src/pts_ref_est_pen.f90 new file mode 100755 index 0000000000000000000000000000000000000000..39f504c5b59d417e3fb2d7b2d6e3e9099b2b6a81 --- /dev/null +++ b/src/pts_ref_est_pen.f90 @@ -0,0 +1,59 @@ + +! The ptos_ref program links Reanalysis grid and +! observed grid using the nearest neighbor + +SUBROUTINE ptos_ref(ic,x,y,xcand,ycand,iri,ipos) + +USE MOD_CSTS + + Implicit none + + INTEGER, INTENT(IN) :: ic + REAL, INTENT(IN) :: x(ic) + REAL, INTENT(IN) :: y(ic) + REAL, INTENT(IN) :: xcand(nptos) + REAL, INTENT(IN) :: ycand(nptos) + INTEGER, INTENT(OUT) :: iri(nptos) + INTEGER, INTENT(OUT) :: ipos + + integer np,i,j,k + real xe(nptos),ye(nptos),xr(ic),yr(ic),dis,dmin(nptos) + integer valores_unicos(nptos+1) + + np=nptos + +! print*,"program 6: reference points" + + xr=x/1000. + yr=y/1000. + + xe=xcand/1000. + ye=ycand/1000. + + valores_unicos=0 + + do 100 i=1,np + + dmin(i)=1600000000. + + do 110 j=1,ic + dis=(xe(i)-xr(j))**2+(ye(i)-yr(j))**2 + if(dis.lt.dmin(i)) then + dmin(i)=dis + iri(i)=j + endif + 110 continue + + dmin(i)=sqrt(dmin(i)) + + do k=1,valores_unicos(np+1) + if(valores_unicos(k).eq.iri(i)) go to 100 + enddo + valores_unicos(np+1)=valores_unicos(np+1)+1 + ipos=valores_unicos(np+1) + valores_unicos(ipos)=iri(i) + + 100 continue + +END SUBROUTINE ptos_ref + diff --git a/src/pts_ref_est_pen_4int.f90 b/src/pts_ref_est_pen_4int.f90 new file mode 100755 index 0000000000000000000000000000000000000000..0e44c895704255abd2fd6d31718bb89435d79efe --- /dev/null +++ b/src/pts_ref_est_pen_4int.f90 @@ -0,0 +1,84 @@ + +! The ptos_ref_4 program links Reanalysis grid and +! observed grid using 4 nearest points interpolation +! (bilineal interpolation approach) + + +SUBROUTINE ptos_ref_4(ic,x,y,xcand,ycand,Vdmin,Vref,ipos) + +USE MOD_CSTS + + Implicit none + + INTEGER, INTENT(IN) :: ic + REAL, INTENT(IN) :: x(ic) + REAL, INTENT(IN) :: y(ic) + REAL, INTENT(IN) :: xcand(nptos) + REAL, INTENT(IN) :: ycand(nptos) + DOUBLE PRECISION, INTENT(OUT) :: Vdmin(nptos,4) + INTEGER, INTENT(OUT) :: Vref(nptos,4) + INTEGER, INTENT(OUT) :: ipos + + integer iri + real dmin + integer np,i,j,k + real xe(nptos),ye(nptos),xr(ic),yr(ic),dis + real copiaXr(nptos),copiaYr(nptos) + integer valores_unicos(nptos+1) + integer cont + +! print*,"program 6: 4 nearest points of reference" + + np=nptos + + xr=x/1000. + yr=y/1000. + + xe=xcand/1000. + ye=ycand/1000. + + + valores_unicos=0 + + do i=1,np + + copiaXr=xr + copiaYr=yr + + do cont=1,4 !4 nearest pts loop + + dmin=1600000000. + + do 110 j=1,ic + dis=(xe(i)-copiaXr(j))**2+(ye(i)-copiaYr(j))**2 + if(dis.lt.dmin) then + dmin=dis + iri=j + endif + 110 continue + + Vdmin(i,cont)=sqrt(dmin) + if (Vdmin(i,cont) .lt. 0.1) then + Vdmin(i,cont)=0.1 + endif + + Vref(i,cont)=iri + copiaXr(iri)=99999999. + copiaYr(iri)=99999999. + + + do k=1,valores_unicos(np+1) + if(valores_unicos(k).eq.iri) go to 100 + enddo + + valores_unicos(np+1)=valores_unicos(np+1)+1 + ipos=valores_unicos(np+1) + valores_unicos(ipos)=iri + + 100 continue + enddo !4 rearest pts loop + + enddo + +END SUBROUTINE ptos_ref_4 + diff --git a/src/registerDynamicSymbol.c b/src/registerDynamicSymbol.c new file mode 100644 index 0000000000000000000000000000000000000000..ad203591419a8e391ea22bd7b2a496f6f94c0052 --- /dev/null +++ b/src/registerDynamicSymbol.c @@ -0,0 +1,10 @@ +// RegisteringDynamic Symbols + +#include +#include +#include + +void R_init_markovchain(DllInfo* info) { + R_registerRoutines(info, NULL, NULL, NULL, NULL); + R_useDynamicSymbols(info, TRUE); +} diff --git a/src/training_part1_prec.f90 b/src/training_part1_prec.f90 new file mode 100755 index 0000000000000000000000000000000000000000..37dc8f3b2653a7460577c8e1ebbfd44a5e9e89e7 --- /dev/null +++ b/src/training_part1_prec.f90 @@ -0,0 +1,89 @@ +! !!!!!!!!!!!!!!! + subroutine training_part1(u500,v500,t1000,z500,z1000,& + msl_si,msl_lr,ngridd,nlat,nlon,ic,nlatt,nlont,& + id,slat,slon,rlat,rlon,slatt,slont,nd,& + um,vm,insol,gu92,gv92,gu52,gv52,nger) + +! !!!!!!!!!!!!!!! +!* 0. DECLARATIONS +! ------------ +! MODULES with constants and functions +use mod_csts +use mod_funcs + +implicit none + +!!!!!!!!!!!!!!!!!!!!!!!! +! INPUT ARGUMENTS +!!!!!!!!!!!!!!!!!!!!!!!! +integer, intent(in) :: ngridd +!*********************************************** +! DOMAIN variables +!************************************************ +! sinoptic grid +integer, intent(in) :: nlat +integer, intent(in) :: nlon +integer, intent(in) :: ic +!----------------------------------------------- +! low resolution grid +integer, intent(in) :: nlatt +integer, intent(in) :: nlont +integer, intent(in) :: id +!------------------------------------------------ +double precision, intent(in) :: slat +double precision, intent(in) :: slon +double precision, intent(in) :: rlat +double precision, intent(in) :: rlon +!------------------------------------------------ +double precision, intent(in) :: slatt +double precision, intent(in) :: slont +!------------------------------------------------ +!!!!!!!!!!!!!!!!!!!!!!!! +! TIME variables +integer, intent(in) :: nd +!------------------------------------------------ +! Reanlysis fields +double precision, intent(in) :: u500(nd,ic) +double precision, intent(in) :: v500(nd,ic) +double precision, intent(in) :: t1000(nd,ic) +double precision, intent(in) :: z500(nd,ic) +double precision, intent(in) :: z1000(nd,ic) +double precision, intent(in) :: msl_si(nd,ic) +double precision, intent(in) :: msl_lr(nd,id) +!------------------------------------------------ +!!!!!!!!!!!!!!!!!!!!!!!! +! OUTPUT ARGUMENTS +!!!!!!!!!!!!!!!!!!!!!!!! +double precision, intent(out) :: um(nd,ic) +double precision, intent(out) :: vm(nd,ic) +double precision, intent(out) :: insol(nd) +!------------------------------------------------ +double precision, intent(out) :: gu92(nd,ic) +double precision, intent(out) :: gv92(nd,ic) +double precision, intent(out) :: gu52(nd,ic) +double precision, intent(out) :: gv52(nd,ic) +!------------------------------------------------ +integer, intent(out) :: nger +!------------------------------------------------ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! INNER FORTRAN VARIABLES +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!------------------------------------------------ +real :: den(nd,ic) +!------------------------------------------------ + +!print*,"" +!print*,"*** TRAINING PROCESS ***" +!print*,"" + +call calc_tempes_densi_sealev(ic,nd,msl_si,t1000,den) + +call geos(ic,nd,id,slatt,slont,slat,slon,rlat,& + rlon,rlat,rlon,nlatt,nlont,nlat,nlon,den,msl_lr,ngridd,& + um,vm) + +call clasif(ic,nd,nlon,nlat,slat,slon,rlat,rlon,um,vm,u500,v500,z1000,& + z500,nger,gu92,gv92,gu52,gv52) + +end subroutine + diff --git a/src/training_part2_prec.f90 b/src/training_part2_prec.f90 new file mode 100755 index 0000000000000000000000000000000000000000..733f1d881427b0180895638a58637dbebf552a66 --- /dev/null +++ b/src/training_part2_prec.f90 @@ -0,0 +1,112 @@ +! !!!!!!!!!!!!!!! + subroutine training_part2(u500,v500,t500,t850,msl_si,q700,& + lon_hr,lat_hr,prec_hr,& + nanx,nlat,nlon,ic,nlatt,nlont,id,slat,& + slon,rlat,rlon,slatt,slont,nd,um,vm,gu92,gv92,& + gu52,gv52,nger,Vdmin,Vref,ipos2,new_mi,new_ccm,& + new_kvars,new_corrpar) + +! !!!!!!!!!!!!!!! +!* 0. DECLARATIONS +! ------------ +! MODULES with constants and functions +use mod_csts +use mod_funcs + +implicit none + +!!!!!!!!!!!!!!!!!!!!!!!! +! INPUT ARGUMENTS +!!!!!!!!!!!!!!!!!!!!!!!! +integer, intent(in) :: nanx +!*********************************************** +! DOMAIN variables +!************************************************ +! sinoptic grid +integer, intent(in) :: nlat +integer, intent(in) :: nlon +integer, intent(in) :: ic +!----------------------------------------------- +! low resolution grid +integer, intent(in) :: nlatt +integer, intent(in) :: nlont +integer, intent(in) :: id +!------------------------------------------------ +double precision, intent(in) :: slat +double precision, intent(in) :: slon +double precision, intent(in) :: rlat +double precision, intent(in) :: rlon +!------------------------------------------------ +double precision, intent(in) :: slatt +double precision, intent(in) :: slont +!------------------------------------------------ +!!!!!!!!!!!!!!!!!!!!!!!! +! TIME variables +integer, intent(in) :: nd +!------------------------------------------------ +! Reanlysis fields +double precision, intent(in) :: u500(nd,ic) +double precision, intent(in) :: v500(nd,ic) +double precision, intent(in) :: t500(nd,ic) +double precision, intent(in) :: t850(nd,ic) +double precision, intent(in) :: msl_si(nd,ic) +double precision, intent(in) :: q700(nd,ic) +!------------------------------------------------ +! AEMET high resolution observational dat +double precision, intent(in) :: lon_hr(nptos) +double precision, intent(in) :: lat_hr(nptos) +double precision, intent(in) :: prec_hr(nd,nptos) +!------------------------------------------------ +double precision, intent(in) :: um(nd,ic) +double precision, intent(in) :: vm(nd,ic) +!------------------------------------------------ +integer, intent(in) :: nger +!------------------------------------------------ +double precision, intent(in) :: gu92(nger,ic) +double precision, intent(in) :: gv92(nger,ic) +double precision, intent(in) :: gu52(nger,ic) +double precision, intent(in) :: gv52(nger,ic) +!------------------------------------------------ +!------------------------------------------------ +!!!!!!!!!!!!!!!!!!!!!!!! +! OUTPUT ARGUMENTS +!!!!!!!!!!!!!!!!!!!!!!!! +integer, intent(out) :: ipos2 +!!!!!!!!!!!!!!!!!!!!!!!! +double precision, intent(out) :: Vdmin(nptos,4) +integer, intent(out) :: Vref(nptos,4) +!------------------------------------------------ +integer, intent(out) :: new_mi(nger,nptos) +double precision, intent(out) :: new_ccm(nger,nptos) +integer, intent(out) :: new_kvars(nger,nptos,npx) +double precision, intent(out) :: new_corrpar(nger,nptos,npx) +!------------------------------------------------ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! INNER FORTRAN VARIABLES +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +integer :: ipos +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +real :: x(ic) +real :: y(ic) +!------------------------------------------------ +real :: xcand(nptos) +real :: ycand(nptos) +!------------------------------------------------ +integer :: iri(nptos) +!------------------------------------------------ + +call utm_ERA(ic,nlat,nlon,slat,slon,rlat,rlon,x,y) + +call utm_obs(lon_hr,lat_hr,xcand,ycand) + +call ptos_ref_4(ic,x,y,xcand,ycand,Vdmin,Vref,ipos2) + +call ptos_ref(ic,x,y,xcand,ycand,iri,ipos) + +call sig_predic(nlat,nlon,nlatt,nlont,slat,slon,rlat,rlon,slatt,& + slont,nd,ic,id,prec_hr,nger,um,vm,gu92,gv92,gu52,& + gv52,iri,u500,v500,msl_si,q700,t500,t850,nanx,& + ipos,new_mi,new_ccm,new_kvars,new_corrpar) + +end subroutine + diff --git a/src/training_temp.f90 b/src/training_temp.f90 new file mode 100755 index 0000000000000000000000000000000000000000..009a5f366e5a9241d71360c758a5deb68b2a032c --- /dev/null +++ b/src/training_temp.f90 @@ -0,0 +1,103 @@ +! !!!!!!!!!!!!!!! + subroutine training_temp(t1000,msl_si,msl_lr,lon_hr,lat_hr,& + ngridd,nlat,nlon,ic,nlatt,nlont,id,slat,& + slon,rlat,rlon,slatt,slont,nd,day,month,& + um,vm,insol,Vdmin,Vref,ipos) + +! !!!!!!!!!!!!!!! +!* 0. DECLARATIONS +! ------------ +! MODULES with constants and functions +use mod_csts +use mod_funcs + +implicit none + +!!!!!!!!!!!!!!!!!!!!!!!! +! INPUT ARGUMENTS +!!!!!!!!!!!!!!!!!!!!!!!! +integer, intent(in) :: ngridd +!*********************************************** +! DOMAIN variables +!************************************************ +! sinoptic grid +integer, intent(in) :: nlat +integer, intent(in) :: nlon +integer, intent(in) :: ic +!----------------------------------------------- +! low resolution grid +integer, intent(in) :: nlatt +integer, intent(in) :: nlont +integer, intent(in) :: id +!------------------------------------------------ +double precision, intent(in) :: slat +double precision, intent(in) :: slon +double precision, intent(in) :: rlat +double precision, intent(in) :: rlon +!------------------------------------------------ +double precision, intent(in) :: slatt +double precision, intent(in) :: slont +!------------------------------------------------ +!!!!!!!!!!!!!!!!!!!!!!!! +! TIME variables +integer, intent(in) :: nd +integer, intent(in) :: day(nd) +integer, intent(in) :: month(nd) +!------------------------------------------------ +! Reanlysis fields +double precision, intent(in) :: t1000(nd,ic) +double precision, intent(in) :: msl_si(nd,ic) +double precision, intent(in) :: msl_lr(nd,id) +!------------------------------------------------ +! AEMET high resolution observational dat +double precision, intent(in) :: lon_hr(nptos) +double precision, intent(in) :: lat_hr(nptos) +!------------------------------------------------ +!!!!!!!!!!!!!!!!!!!!!!!! +! OUTPUT ARGUMENTS +!!!!!!!!!!!!!!!!!!!!!!!! +double precision, intent(out) :: um(nd,ic) +double precision, intent(out) :: vm(nd,ic) +double precision, intent(out) :: insol(nd) +!!!!!!!!!!!!!!!!!!!!!!!! +double precision, intent(out) :: Vdmin(nptos,4) +integer, intent(out) :: Vref(nptos,4) +!!!!!!!!!!!!!!!!!!!!!!!! +integer, intent(out) :: ipos + +!------------------------------------------------ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! INNER FORTRAN VARIABLES +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!------------------------------------------------ +real :: den(nd,ic) +!------------------------------------------------ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +real :: x(ic) +real :: y(ic) +!------------------------------------------------ +real :: xcand(nptos) +real :: ycand(nptos) +!------------------------------------------------ + + +!print*,"" +!print*,"*** TRAINING PROCESS ***" +!print*,"" + +call calc_tempes_densi_sealev(ic,nd,msl_si,t1000,den) + +call geos(ic,nd,id,slatt,slont,slat,slon,rlat,& + rlon,rlat,rlon,nlatt,nlont,nlat,nlon,den,msl_lr,ngridd,& + um,vm) + +call insolation(nd,day,month,insol) + +call utm_ERA(ic,nlat,nlon,slat,slon,rlat,rlon,x,y) + +call utm_obs(lon_hr,lat_hr,xcand,ycand) + +call ptos_ref_4(ic,x,y,xcand,ycand,Vdmin,Vref,ipos) + +end subroutine + diff --git a/tests/testthat/test-CST_Calibration.R b/tests/testthat/test-CST_Calibration.R index a4491719ce63e224965a60000926c18799bffe7f..f169e2e3fbed6437b72846dfe0740afcd8daf892 100644 --- a/tests/testthat/test-CST_Calibration.R +++ b/tests/testthat/test-CST_Calibration.R @@ -17,10 +17,10 @@ test_that("Sanity checks", { expect_equal(cal$lat, obs$lat) expect_equal(cal$lon, exp$lon) expect_equal(cal$lon, obs$lon) - expect_error( - CST_Calibration(exp = exp, obs = exp), - "The length of the dimension 'member' in the component 'data' " - ) + # expect_error( + # CST_Calibration(exp = exp, obs = exp), + # "The length of the dimension 'member' in the component 'data' " + # ) exp2 <- exp exp2$data[1, 2, 1, 1, 1, 1] <- NA diff --git a/tests/testthat/test-CST_MultiMetric.R b/tests/testthat/test-CST_MultiMetric.R index b08b68ba9f941c2cb195b16d3511b3b995bd990f..9d048ab3e86d0667c0e9c8a56961283ca6ea0e70 100644 --- a/tests/testthat/test-CST_MultiMetric.R +++ b/tests/testthat/test-CST_MultiMetric.R @@ -11,31 +11,38 @@ test_that("basic use case", { attr(exp, 'class') <- 's2dv_cube' attr(obs, 'class') <- 's2dv_cube' - result <- list(data = array(rep(c(rep(1, 9), -rep(0.89999999999999991118215802998747676610950, 3)), 48), - dim = c(dataset = 3, dataset = 1, statistics = 4, - lat = 6, lon = 8)), - lat = lat, lon = lon) + result <- list(data = list(corr = array(rep(1, 3* 48), + dim = c(nexp = 3, nobs = 1, + lat = 6, lon = 8)), + p.val = array(rep(0, 3 * 48), dim = c(nexp = 3, nobs = 1, + lat = 6, lon = 8)), + conf.lower = array(rep(1, 3* 48), + dim = c(nexp = 3, nobs = 1, + lat = 6, lon = 8)), + conf.upper = array(rep(1, 3* 48), + dim = c(nexp = 3, nobs = 1, + lat = 6, lon = 8))), + lat = lat, lon = lon) attr(result, 'class') <- 's2dv_cube' expect_equal(CST_MultiMetric(exp = exp, obs = obs), result) exp2 <- exp exp2$data[1, 1, 1, 2, 1, 1] = NA - CST_MultiMetric(exp = exp2, obs = obs) res <- CST_MultiMetric(exp = exp, obs = obs, metric = 'rms') expect_equal(length(res), 3) - expect_equal(dim(res$data), - c(dataset = 3, dataset = 1, statistics = 3, lat = 6, lon = 8)) + expect_equal(dim(res$data$rms), + c(nexp = 3, nobs = 1, lat = 6, lon = 8)) res <- CST_MultiMetric(exp = exp, obs = obs, metric = 'rms', multimodel = FALSE) - expect_equal(dim(res$data), - c(dataset = 2, dataset = 1, statistics = 3, lat = 6, lon = 8)) + expect_equal(dim(res$data$rms), + c(nexp = 2, nobs = 1, lat = 6, lon = 8)) + expect_equal(length(res$data), 3) res <- CST_MultiMetric(exp = exp, obs = obs, metric = 'rmsss') - expect_equal(dim(res$data), - c(dataset = 3, dataset = 1, statistics = 2, lat = 6, lon = 8)) + expect_equal(dim(res$data$rmsss), + c(nexp = 3, nobs = 1, lat = 6, lon = 8)) res <- CST_MultiMetric(exp = exp, obs = obs, metric = 'rmsss', multimodel = FALSE) - expect_equal(dim(res$data), - c(dataset = 2, dataset = 1, statistics = 2, lat = 6, lon =8)) + expect_equal(dim(res$data$rmsss), + c(nexp = 2, nobs = 1, lat = 6, lon = 8)) }) @@ -59,7 +66,7 @@ test_that("Sanity checks", { expect_error( CST_MultiMetric(exp = exp, obs = obs, metric = 1), paste0("Parameter 'metric' must be a character string indicating one ", - "of the options: 'correlation', 'rms' or 'rmse'")) + "of the options: 'correlation', 'rms', 'rmsss' or 'rpss'")) expect_error( CST_MultiMetric(exp = exp, obs = obs, metric = NA), "missing value where TRUE/FALSE needed") @@ -70,4 +77,15 @@ test_that("Sanity checks", { CST_MultiMetric(exp = exp, obs = obs, metric = "correlation", multimodel = NULL), "Parameter 'multimodel' must be a logical value.") + expect_error( + MultiMetric(exp = lonlat_data$exp, obs = lonlat_data$obs, metric = "rpss", + multimodel = TRUE), + "Element 'data' from parameters 'exp' and 'obs' should have dimmension names.") +exp <- lonlat_data$exp$data[1,,,,,] +obs <- lonlat_data$obs$data[1,,,,,] + expect_error( + MultiMetric(exp = exp, obs = obs, metric = "rpss", + multimodel = TRUE), + paste0("Dimension names of element 'data' from parameters 'exp' and ", + "'obs' should have the same name dimmension.")) }) diff --git a/tests/testthat/test-CST_QuantileMapping.R b/tests/testthat/test-CST_QuantileMapping.R index a03680f7243b5fe0f82bf112371c0dcb5090dbc9..f8482967627a30bbdc7873200754b920df85d58a 100644 --- a/tests/testthat/test-CST_QuantileMapping.R +++ b/tests/testthat/test-CST_QuantileMapping.R @@ -1,6 +1,5 @@ context("Generic tests") test_that("Sanity checks", { -library(qmap) expect_error( CST_QuantileMapping(exp = 1), paste0("Parameter 'exp' and 'obs' must be of the class 's2dv_cube', ", @@ -90,8 +89,25 @@ library(qmap) expect_equal(CST_QuantileMapping(exp = exp, obs = obs, sample_dims = 'time', method = 'SSPLIN'), exp) library(CSTools) - expect_equal(CST_QuantileMapping(exp = lonlat_data$exp, obs = lonlat_data$obs), - CST_QuantileMapping(exp = lonlat_data$exp, obs = lonlat_data$obs, - exp_cor = lonlat_data$exp)) + expect_error(CST_QuantileMapping(exp = lonlat_data$exp, obs = lonlat_data$obs, + exp_cor = lonlat_data$exp), + paste0("Review parameter 'sample_dims' or the data dimensions ", + "since multiple dimensions with different length have being ", + "found in the data inputs that don't match with 'sample_dims' parameter.")) + exp <- lonlat_data$exp + exp$data <- exp$data[,,1:4,,,] + dim(exp$data) <- c(dataset = 1, member = 15, sdate = 4, ftime = 3, + lat = 22, lon = 53) + obs <- lonlat_data$obs + obs$data <- obs$data[,,1:4, ,,] + dim(obs$data) <- c(dataset = 1, member = 1, sdate = 4, ftime = 3, + lat = 22, lon = 53) + exp_cor <- lonlat_data$exp + exp_cor$data <- exp_cor$data[,,5:6,,,] + dim(exp_cor$data) <- c(dataset = 1, member = 15, sdate = 2, ftime = 3, + lat = 22, lon = 53) + expect_warning(CST_QuantileMapping(exp, obs, exp_cor, + sample_dims = c('sdate', 'ftime', 'member')), + "The sample_dims sdate are not used when applying the correction to 'exp_cor'") }) diff --git a/tests/testthat/test-CST_RainFARM.R b/tests/testthat/test-CST_RainFARM.R index 3608d4e8c3cd3347243289dd4b5d89cc4a9d0c41..d9414925dc4a4d1a48060022e02c93e9a9f430ed 100644 --- a/tests/testthat/test-CST_RainFARM.R +++ b/tests/testthat/test-CST_RainFARM.R @@ -26,7 +26,7 @@ test_that("Sanity checks and simple use cases", { ) expect_error( res <- CST_RainFARM(exp, nf=8, weights=array(0,dim=c(2,2))), - "The dimensions of the weights matrix" + "Parameter 'weights' must have dimension names when it is not a scalar." ) dimexp=dim(exp$data) @@ -92,7 +92,7 @@ test_that("Sanity checks and simple use cases", { # Use climatological mean of PF precipitation to generate sythetic weights w <- rfweights(rpfm, res$lon, res$lat, exp$lon, exp$lat, 8, fsmooth=FALSE ) - + names(dim(w)) <- c('lon', 'lat') res <- CST_RainFARM(exppf, nf=8, time_dim=c("ftime", "sdate", "member"), nens=2, nproc=2, fsmooth=FALSE) resw <- CST_RainFARM(exppf, nf=8, time_dim=c("ftime", "sdate", "member"), diff --git a/tests/testthat/test-CST_SplitDim.R b/tests/testthat/test-CST_SplitDim.R index 800344c192cb93ec275c815b976ef4da1f37e25b..d595b76f3d2de806f004bdda2a48a5a6d14a15a7 100644 --- a/tests/testthat/test-CST_SplitDim.R +++ b/tests/testthat/test-CST_SplitDim.R @@ -73,4 +73,20 @@ library(CSTools) result$data <- output expect_equal(CST_SplitDim(data = lonlat_data$exp, split_dim = 'ftime'), result) + + expect_equal(dim(CST_SplitDim(data = lonlat_data$exp, split_dim = 'member', + freq = 5)$data), + c(dataset = 1, member = 5, sdate = 6, ftime = 3, + lat = 22, lon = 53, index = 3)) + expect_warning(CST_SplitDim(data = lonlat_data$exp, split_dim = 'member', + freq = 5, new_dim_name = c('a', 'b')), + paste0("Parameter 'new_dim_name' has length greater than 1 ", + "and only the first elemenst is used.")) + expect_error(CST_SplitDim(data = lonlat_data$exp, split_dim = 'member', + freq = 5, new_dim_name = 3), + "Parameter 'new_dim_name' must be character string") + expect_equal(dim(CST_SplitDim(data = lonlat_data$exp, split_dim = 'member', + freq = 5, new_dim_name = 'wt')$data), + c(dataset = 1, member = 5, sdate = 6, ftime = 3, + lat = 22, lon = 53, wt = 3)) }) diff --git a/tests/testthat/test-PlotPDFsOLE.R b/tests/testthat/test-PlotPDFsOLE.R index bd1279d9df403c795910d5d9b921f00b6df38592..91e61d08c2c7c08c67d32d70410c1ee9c68f9770 100644 --- a/tests/testthat/test-PlotPDFsOLE.R +++ b/tests/testthat/test-PlotPDFsOLE.R @@ -39,6 +39,22 @@ test_that("Sanity checks", { paste0("Parameter 'plotfile' must be a character string ", "indicating the path and name of output png file.")) + expect_error(PlotPDFsOLE(pdf_1, pdf_2, legendPos = 1, plotfile = NULL, + width = 30, height = 15, + units = "cm", dpi = 300) , + "Parameter 'legendPos' must be character") + + expect_error(PlotPDFsOLE(pdf_1, pdf_2, legendPos = 'arriba', + plotfile = "plot.png", + width = 30, height = 15, + units = "cm", dpi = 300) , + "Parameter 'legendPos' must be equal to 'bottom', 'top', 'right' or 'left'.") + + expect_error(PlotPDFsOLE(pdf_1, pdf_2, legendSize = '3', plotfile = NULL, + width = 30, height = 15, + units = "cm", dpi = 300) , + "Parameter 'legendSize' must be numeric.") + expect_error(PlotPDFsOLE(pdf_1, pdf_2, nsigma = '3', plotfile = NULL, width = 30, height = 15, units = "cm", dpi = 300) , diff --git a/vignettes/Analogs_vignette.Rmd b/vignettes/Analogs_vignette.Rmd new file mode 100644 index 0000000000000000000000000000000000000000..5a52a05d99666c617ccf02a1a953b8c60e79c540 --- /dev/null +++ b/vignettes/Analogs_vignette.Rmd @@ -0,0 +1,376 @@ +--- +title: "Analogs based on large scale for downscaling" +author: "M. Carmen Alvarez-Castro and M. del Mar Chaves-Montero (CMCC, Italy)" +date: "November 2020" +output: rmarkdown::html_vignette +vignette: > + %\VignetteEngine{knitr::knitr} + %\VignetteIndexEntry{Analogs} + %\usepackage[utf8]{inputenc} +--- + +## Downscaling seasonal forecast data using Analogs + +In this example, the seasonal temperature forecasts, initialized in october, +will be used to perform a downscaling in the Balearic Islands temperature using +the cmcc system 3 seasonal forecasting system from the Euro-Mediterranean Center +of Climate Change (CMCC), by computing Analogs in Sea level pressure data (SLP) +in a larger region (North Atlantic). The first step will be to load the data we +want to downscale (i.e. cmcc) in the large region (i.e North Atlantic) for +temperature (predictand) and SLP (predictor) and same variables and region for a +higher resolution data (ERA5). In a second step we will interpolate the model to +the resolution of ERA5. In a third step we will find the analogs using one of +the three criterias. In a four step we will get the downscaled dataset in the +region selected (local scale, in this case Balearic Islands) + +## 1. Introduction of the function + +For instance if we want to perform a temperature donwscaling in Balearic Island +for October we will get a daily series of temperature with 1 analog per day, +the best analog. How we define the best analog for a certain day? This function +offers three options for that: + +(1) The day with the minimum Euclidean distance in a large scale field: using +i.e. pressure or geopotencial height as variables and North Atlantic region as +large scale region. The Atmospheric circulation pattern in the North Atlantic +(LargeScale) has an important role in the climate in Spain (LocalScale). +The function will find the day with the most similar pattern in atmospheric +circulation in the database (obs, slp in ERA5) to the day of interest +(exp,slp in model). Once the date of the best analog is found, the function +takes the associated temperature to that day (obsVar, tas in ERA5), with a +subset of the region of interest (Balearic Island) + +(2) Same that (1) but in this case we will search for analogs in the local +scale (Balearic Island) instead of in the large scale (North Atlantic). +Once the date of the best analog is found, the function takes the associated +temperature to that day (obsVar, t2m in ERA5), with a subset of the region of +interest (Balearic Island) + +(3) Same that (2) but here we will search for analogs with higher correlation +at local scale (Balearic Island) and instead of using SLP we will use t2m. + + +In particular the _Analogs Method_ uses a nonlinear approach that follows +(**Analogs**; Yiou et al. 2013) + +An efficient implementation of Analogs is provided for CSTools by the +`CST_Analogs()` function. + +Two datasets are used to illustrate how to use the function. The first one could be enterly run by the users since it is using data samples provided along with the package. The second one uses data that needs to be downloaded or requested. + +### Example 1: using data from CSTools + + +After loading **CSTools** package on the R session, the user will have access to the sample data `lonlat_data` and `lonlat_prec`. + +*Note: If it is the first time using CSTools, install the package by running `install.packages("CSTools")`. + +``` +library(CSTools) +``` + +After exploring the data, the user can directly run the Analogs downscaling method using the 'Large_dis' metric: + +``` +class(lonlat_data$exp) +names(lonlat_data$obs) +dim(lonlat_data$obs$data) +dim(lonlat_data$exp$data) +head(lonlat_data$exp$Dates$start) +``` +There are 15 ensemble members available in the data set, 6 starting dates and 3 +forecast times, which refer to daily values in the month of November following +starting dates on November 1st in the years 2010, 2011, 2012. + +``` +down <- CST_Analogs(expL = lonlat_data$exp, obsL = lonlat_data$obs) +``` + +The visualization of the first three time steps for the ensemble mean of the forecast initialized the 1st of Noveber 2000 can be done using the package **s2dv**: + +``` +library(s2dv) +PlotLayout(PlotEquiMap, c('lat', 'lon'), + var = Reorder(MeanDims(down$data, 'member')[1,,,1,], + c('time_exp', 'lat', 'lon')), + nrow = 1, ncol = 3, + lon = down$lon, lat = down$lat, filled.continents = FALSE, + titles = c("2000-11-01", "2000-12-01", "2001-01-01"), units = 'T(K)', + toptitle = 'Analogs sdate November 2000', + width = 10, height = 4, fileout = './Figures/Analogs1.png') +``` + +![](./Figures/Analogs1.png) + +The user can also request extra Analogs and the information: + +``` +down <- CST_Analogs(expL = lonlat_data$exp, obsL = lonlat_data$obs, + nAnalogs = 2, AnalogsInfo = TRUE) +``` + +Again, the user can explore the object down1 which is class 's2dv_cube'. The element 'data' contains in this case metrics and the dates corresponding to the observed field: + +``` +class(down) +names(down$data) +dim(down$data$fields) +dim(down$data$metric) +dim(down$data$dates) +down$data$dates[1,15,1,1] +``` + +The last command run concludes that the best analog of the ensemble 15 corresponding to the 1st of November 2000 is the 1st November 2004: + +``` +PlotLayout(PlotEquiMap, c('lat', 'lon'), var = list(down$data$fields[1,,,15,1,1], + lonlat_data$obs$data[1,1,5,1,,]), nrow = 1, ncol = 2, + lon = down$lon, lat = down$lat, filled.continents = FALSE, + titles = c("Downscaled 2000-11-01", "Observed 2004-11-01"), units = 'T(K)', + width = 7, height = 4, fileout = './Figures/Analogs2.png') +``` + +![](./Figures/Analogs2.png) + +As expected, they are exatly the same. + +### Exemple 2: Load data using CST_Load + +In this case, the spatial field of a single forecast day will be downscale using Analogs in this example. This will allow illustrating how to use CST_Load to retrieve observations separated from simulations. To explore other options, see other CSTools vignettes as well as `CST_Load` documentation. + +The simulations available for the desired model cover the period 1993-2016. Here, the 15th of October 2000 (for the simulation initialized in the 1st of October 2000), will be downscaled. +For ERA5 from 1979 to the present days. For this example we will just use October days from 2000 to 2006, so, the starting dates can be defined by running the +following lines: + + +``` +start <- as.Date(paste(2000, 10, "01", sep = ""), "%Y%m%d") +end <- as.Date(paste(2006, 10, "01", sep = ""), "%Y%m%d") +dateseq <- format(seq(start, end, by = "year"), "%Y%m%d") +``` + +Using the `CST_Load` function from **CSTool package**, the data available in our +data store can be loaded. The following lines show how this function can be +used. The experimental datasets are interpolated to the ERA5 grid by specifying the 'grid' parameter while ERA5 doesn't need to be interpolated. While parameter leadtimemax is set to 1 for the experimental dataset, it is set to 31 for the observations, returning the daily observations for October for the years requested in 'sdate' (2000-2006). +Download the data to run the recipe in the link https://downloads.cmcc.bo.it/d_chaves/ANALOGS/data_for_Analogs.Rdat or ask carmen.alvarez-castro at cmcc.it or nuria.perez at bsc.es. + +``` +exp <- list(name = 'ECMWF_system4_m1', + path = file.path("/esarchive/exp/ecmwf/system4_m1/", + "$STORE_FREQ$_mean/$VAR_NAME$_*/$VAR_NAME$_$START_DATE$.nc")) +obs <- list(name = 'ERA5', + path = file.path("/esarchive/recon/ecmwf/era5/$STORE_FREQ$_mean/", + "$VAR_NAME$_f1h-r1440x721cds/$VAR_NAME$_$YEAR$$MONTH$.nc")) + +expTAS <- CST_Load(var = 'tas', exp = list(exp), obs = NULL, + sdates = '20001001', latmin = 22, latmax = 70, + lonmin = -80, lonmax = 50, output ='lonlat', + storefreq = 'daily', nmember = 15, leadtimemin = 15, + leadtimemax = 15, method = "bilinear", grid = 'r1440x721', + nprocs = 1) +obsTAS <- CST_Load(var = 'tas', exp = NULL, obs = list(obs), + sdates = dateseq, leadtimemax = 31, + latmin = 22, latmax = 70, + lonmin = -80, lonmax = 50, output = 'lonlat', + nprocs = 1, storefreq = "daily", nmember = 1) + +expPSL <- CST_Load(var = 'psl', exp = list(exp), obs = NULL, + sdates = '20001001', latmin = 22, latmax = 70, + lonmin = -80, lonmax = 50, output ='lonlat', + storefreq = 'daily', nmember = 15, leadtimemin = 15, + leadtimemax = 15, method = "bilinear", grid = 'r1440x721', + nprocs = 1) +obsPSL <- CST_Load(var = 'psl', exp = NULL, obs = list(obs), + sdates = dateseq, leadtimemax = 31, + latmin = 22, latmax = 70, + lonmin = -80, lonmax = 50, output = 'lonlat', + nprocs = 1, storefreq = "daily", nmember = 1) + +save(expTAS, obsTAS, expPSL, obsPSL, + file = "../../data_for_Analogs.Rdat", + version = 2) + +#load(file = "./data_for_Analogs.Rdat") +``` + +*Note: `CST_Load` allows to load the data simultaneously for 'exp' and 'obs' already formatted to have the same dimensions as in this example. However, it is possible to request separated 'obs' and 'exp'. In this second case, the observations could be return in a continous time series instead of being split in start dates and forecast time.* + + +The s2dv_cube objects `expTAS`,`obsTAS`, `expPSL` and `obsPSL` are now loaded in the R enviroment. The first two elements correspond to the experimental and observed data for temperature and the other two are the equivalent for the SLP data. + +Loading the data using `CST_Load` allows to obtain two lists, one for the +experimental data and another for the observe data, with the same elements and +compatible dimensions of the data element: + + +``` +dim(expTAS$data) +dataset member sdate ftime lat lon + 1 15 1 1 193 521 +dim(obsTAS$data) +dataset member sdate ftime lat lon + 1 1 7 31 193 521 +``` + + +#### Two variables and criteria Large [scale] Distance: + +The aim is to downscale the temperature field of the simulation for the 15th of October 2000 but looking at the pressure pattern: + +``` +down1 <- CST_Analogs(expL = expPSL, obsL = obsPSL, AnalogsInfo = TRUE, + criteria = "Large_dist", nAnalogs = 3, + obsVar = obsTAS, expVar = expTAS) +``` + +Some warnings could appear indicating information about undefining parameters. It is possible to explore the information in object `down` by runing: + +``` +names(down1$data) +dim(down1$data$field) +#nAnalogs lat lon member time +# 3 193 521 15 1 +dim(down1$data$dates) +#nAnalogs member time +# 3 15 1 +down1$data$dates[1,1,1] +#"2005-10-07 UTC" +``` + +Now, we can visualize the output and save it using library ragg (not mandatory): + +``` +library(ragg) +agg_png("/esarchive/scratch/nperez/git/cstools/vignettes/Figures/Analogs3.png", + width = 1100, height = 500, units = 'px',res = 144) +PlotLayout(PlotEquiMap, c('lat', 'lon'), + var = list(expPSL$data[1,1,1,1,,], obsPSL$data[1,1,1,15,,], + obsPSL$data[1,1,6,7,,]), + lon = obsPSL$lon, lat = obsPSL$lat, filled.continents = FALSE, + titles = c('Exp PSL 15-10-2000','Obs PSL 15-10-2000', + 'Obs PSL 7-10-2005'), + toptitle = 'First member', ncol = 3, nrow = 1) +dev.off() +agg_png("/esarchive/scratch/nperez/git/cstools/vignettes/Figures/Analogs4.png", + width = 800, height = 800, units = 'px',res = 144) +PlotLayout(PlotEquiMap, c('lat', 'lon'), var = list( + expTAS$data[1,1,1,1,,], obsTAS$data[1,1,1,15,,], + down1$data$field[1,,,1,1], obsTAS$data[1,1,6,7,,]), + lon = obsTAS$lon, lat = obsTAS$lat, filled.continents = FALSE, + titles = c('Exp TAS 15-10-2000', 'Obs TAS 15-10-2000', + 'Analog TAS 15-10-2000', 'Obs TAS 7-10-2005'), + ncol = 2, nrow = 2) +dev.off() +``` + +![](./Figures/Analogs3.png) + +The previous figure, shows the PSL inputs and the PSL pattern for the PSL the 7th of October, 2005, which is the best analog. *Note: Analogs automatically exclude the day is being downscaled from the observations.* + +The next figure shows the input temperature fields, and the result analog which corresponds to the temperature of the 7th of October, 2005: + +![](./Figures/Analogs4.png) + + +#### Two variables and criteria Local [scale] Distance: + +The aim is to downscale the temperature simulation of the 15th of October 2000, by considering the pressure spatial pattern n the large scale and the local pressure pattern on a given region. Therefore, a region is defined providing maximum and minimum latitude and longitude coordinates, in this case, selecting the Balearic Islands: + +``` +region <- c(lonmin = 0, lonmax = 5, latmin = 38.5, latmax = 40.5) +expPSL$data <- expPSL$data[1,1,1,1,,] +expTAS$data <- expTAS$data[1,1,1,1,,] +down2 <- CST_Analogs(expL = expPSL, obsL = obsPSL, AnalogsInfo = TRUE, + criteria = "Local_dist", nAnalogs = 50, + obsVar = obsTAS, expVar = expTAS, + region = region) +``` + +The parameter 'nAnalogs' doesn't correspond to the number of Analogs returned, but to the number of the best observations to use in the comparison between large and local scale. + +In this case, when looking to a large scale pattern and also to local scale pattern the best analog for the first member is the 13th of October 2001: + +``` +down2$data$dates[1,1] +[1] "2001-10-13 UTC" +``` + +``` +library(ClimProjDiags) +agg_png("/esarchive/scratch/nperez/git/cstools/vignettes/Figures/Analogs5.png", + width = 800, height = 800, units = 'px',res = 144) +PlotLayout(PlotEquiMap, c('lat', 'lon'), var = list( + expTAS$data, obsTAS$data[1,1,1,15,,], + down2$data$field[1,,,1], SelBox(obsTAS$data[1,1,2,13,,], + lon = as.vector(obsTAS$lon), lat = as.vector(obsTAS$lat), + region)$data), + special_args = list(list(lon = expTAS$lon, lat = expTAS$lat), + list(lon = obsTAS$lon, lat = obsTAS$lat), + list(lon = down2$lon, down2$lat), + list(lon = down2$lon, down2$lat)), + filled.continents = FALSE, + titles = c('Exp TAS 15-10-2000', 'Obs TAS 15-10-2000', + 'Analog TAS 15-10-2000', 'Obs TAS 13-10-2001'), + ncol = 2, nrow = 2) +dev.off() +``` + +![](./Figures/Analogs5.png) + +Previous figure shows that the best Analog field corrspond to the observed field on the 13th of October 2001. + + +#### Two variables and criteria Local [scale] Correlation: + +``` +down3 <- CST_Analogs(expL = expPSL, obsL = obsPSL, AnalogsInfo = TRUE, + criteria = "Local_cor", nAnalogs = 50, + obsVar = obsTAS, expVar = expTAS, + region = region) +``` + +In this case, when looking to a large scale pattern and also to local scale pattern the best analog for the first member is the 10th of October 2001: + +``` +down3$data$dates[1,1] +[1] "2001-10-10 UTC" +``` + +``` +agg_png("/esarchive/scratch/nperez/git/cstools/vignettes/Figures/Analogs6.png", + width = 800, height = 400, units = 'px',res = 144) +PlotLayout(PlotEquiMap, c('lat', 'lon'), var = list( + down3$data$field[1,,,1], SelBox(obsTAS$data[1,1,2,10,,], + lon = as.vector(obsTAS$lon), lat = as.vector(obsTAS$lat), + region)$data), lon = down3$lon, lat = down3$lat, + filled.continents = FALSE, + titles = c('Analog TAS 15-10-2000', 'Obs TAS 10-10-2001'), + ncol = 2, nrow = 1) +dev.off() +``` + +![](./Figures/Analogs6.png) + +Previous figure shows that the best Analog field corrspond to the observed field on the 10th of October 2001. + +#### Downscaling using exp$data using excludeTime parameter + +`ExludeTime` is set by default to Time_expL in order to find the same analog than +the day of interest. If there is some interest in excluding other dates should +be included in the argument 'excludeTime'. + +``` +down4 <- CST_Analogs(expL = expPSL, obsL = obsPSL, AnalogsInfo = TRUE, + criteria = "Large_dist", nAnalogs = 20, + obsVar = obsTAS, expVar = expTAS, + region = region, excludeTime = obsPSL$Dates$start[10:20]) +``` + +In this case, the best analog is still being 7th of October, 2005. + + +*Note: You can compute the anomalies values before applying the criterias (as in Yiou et al, 2013) using `CST_Anomaly` of CSTools package* \ No newline at end of file diff --git a/vignettes/BestEstimateIndex_vignette.Rmd b/vignettes/BestEstimateIndex_vignette.Rmd index e549533f389fec6b3eb074a22cdf9d61109cfdf3..213499ee67e67ffba713f63e7e918e7e24508ac9 100644 --- a/vignettes/BestEstimateIndex_vignette.Rmd +++ b/vignettes/BestEstimateIndex_vignette.Rmd @@ -158,9 +158,9 @@ The following figures show the probabilities for lower tercile for precipitation - The winter precipitation (from November to March) from 1997 to 2016 over Iberia Peninsula from he ECMWF-S5 dynamical model with resolution 0.5º x 0.5º, to weighting with the previous Best Estimation of Index NAO. -![](./Figures/BestEstimateIndex_fig1.png){width=70%} +![](./Figures/BestEstimateIndex_fig1.png) -![](./Figures/BestEstimateIndex_fig3.png){width=70%} +![](./Figures/BestEstimateIndex_fig3.png) @@ -169,6 +169,6 @@ March 2013, for example, will be in the lower tercile from ECMWF Seasonal Foreca -![](./Figures/BestEstimateIndex_fig2.png){width=70%} +![](./Figures/BestEstimateIndex_fig2.png) -![](./Figures/BestEstimateIndex_fig4.png){width=70%} \ No newline at end of file +![](./Figures/BestEstimateIndex_fig4.png) diff --git a/vignettes/Data_Considerations.Rmd b/vignettes/Data_Considerations.Rmd new file mode 100644 index 0000000000000000000000000000000000000000..5a9f81e695c117b70384af6513343ac616c9439a --- /dev/null +++ b/vignettes/Data_Considerations.Rmd @@ -0,0 +1,143 @@ +--- +author: "Nuria Perez" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteEngine{knitr::knitr} + %\VignetteIndexEntry{Data Storage and Retrieval} + %\usepackage[utf8]{inputenc} +--- + +Data Storage and Retrieval +----------------------------------------- + +CSTools is aim for post-processing seasonal climate forecast with state-of-the-art methods. However, some doubts and issues may arise the first time using the package: do I need an specific R version? how much RAM memory I need? where I can find the datasets? Should I format the datasets? etc. Therefore, some recommendations and key points to take into account are gathered here in order to facilitate the use of CSTools. + +### 1. System requirements + +The first question may come to a new user is the requirements of my computer to run CSTools. Here, the list of most frequent needs: +- netcdf library version 4.1 or later +- cdo (I am currently using 1.6.3) +- R 3.4.2 or later + +On the other hand, the computational power of a computer could be a limitation, but it will depends on the size of the data that the users need for their analysis. For instance, they can estimate the memory they will require by multiplying the following values: + +- Area of the study region (km^2) +- Area of the desired grid cell (km) (or square of the grid cell size) +- Number of models + 1 observational dataset +- Forecast time length (days or months) +- Temporal resolution (1 for daily, 4 for 6 hourly or 24 for daily data) +- Hindcast length (years) +- Number of start date (or season) +- Number of members +- Extra factor for functions computation(*) + +For example, if they want to use the hindcast of 3 different seasonal simulations with 9 members, in daily resolution, for performing a regional study let's say in a region of 40000 km2 with a resolution of 5 km: + +> 200km x 200km / (5km * 5km) * (3 + 1) models * 214 days * 30 hindcast years * 9 members x 2 start dates x 8 bytes ~ 6 GB + +(*)Furthermore, some of the functions need to duplicated or triplicate (even more) the inputs for performing their analysis. Therefore, between 12 and 18 GB of RAM memory would be necessary, in this example. + + +### 2. Overview of CSTools structure + +All CSTools functions have been developed following the same guidelines. The main point, interesting for the users, is that that one function is built on several nested levels, and it is possible to distinguish at least three levels: +- `CST_FunctionName()` this function works on s2dv_cube objects which is exposed to the users. +- `FunctionName()`this function works on N-dimensional arrays with named dimensions and it is exposed to the users. +- lower level functions such as `.functionname()` which works in the minimum required elements and it is not exposed to the user. + +A reasonable important doubt that a new user may have at this point is: what 's2dv_cube' object is? +'s2dv_cube' is a class of an object storing the data and metadata in several elements: + + $data element is an N-dimensional array with named dimensions containing the data (e.g.: temperature values), + + $lat($lon) element is a vector indicating the latitudinal(longitudinal) values of the region in $data, + + $Variable describes the variable name and its units + + other elements for extra metadata information + +It is possible to visualize an example of the structure of 's2dv_cube' object by opening an R session and running: + +``` +library(CSTools) +class(lonlat_data$exp) # check the class of the object lonlat_data$exp +names(lonlat_data$exp) # shows the names of the elements in the object lonlat_data$exp +str(lonlat_data$exp) # shows the full structure of the object lonlat_data$exp +``` + +### 3. Data storage recommendations + +CSTools main objective is to share state-of-the-arts post-processing methods with the scientific community. However, in order to facilitate its use, CSTools package includes a function, `CST_Load`, to read the files and have the data available in 's2dv_cube' format in the R session memory to conduct the analysis. Some benefits of using this function are: +- CST_Load can read multiple experimental or observational datasets at once, +- CST_Load can regrid all datasets to a common grid, +- CST_Load reformat observational datasets in the same structure than experiments (i.e. matching start dates and forecast lead time between experiments and observations) or keep observations as usual time series (i.e. continous temporal dimension), +- CST_Load can subset a region from global files, +- CST_Load can read multiple members in monthly, daily or other resolutions, +- CST_Load can perform spatial averages over a defined region or return the lat-lon grid and +- CST_Load can read from files using multiple parallel processes among other possibilites. + +If you plan to use CST_Load, we have developed guidelines to download and formatting the data. See [CDS_Seasonal_Downloader](https://earth.bsc.es/gitlab/es/cds-seasonal-downloader). + +There are alternatives to CST_Load function, for instance, the user can: +1) use another tool to read the data from files (e.g.: ncdf4, easyNDCF, startR packages) and then convert it to the class 's2dv_cube' with `s2dv.cube()` function or +2) If they keep facing problems to convert the data to that class, they can just skip it and work with the functions without the prefix 'CST_'. In this case, they will be able to work with the basic class 'array'. + +Independently of the tool used to read the data from your local storage to your R session, this step can be automatized by given a common structure and format to all datasets in your local storate. Here, there is the list of minimum requirements that CST_Save follows to be able to store an experiment that could be later loaded with CST_Load: + +- this function creates one NetCDF file per start date with the name of the variable and the start date: `$VARNAME$_$YEAR$$MONTH$.nc` +- each file has dimensions: lon, lat, ensemble and time. + + + +### 4. CST_Load example + + + +``` +library(CSTools) +library(zeallot) +path <- "/esarchive/exp/meteofrance/system6c3s/$STORE_FREQ$_mean/$VAR_NAME$_f6h/$VAR_NAME$_$START_DATE$.nc" +ini <- 1993 +fin <- 2012 +month <- '05' +start <- as.Date(paste(ini, month, "01", sep = ""), "%Y%m%d") +end <- as.Date(paste(fin, month, "01", sep = ""), "%Y%m%d") +dateseq <- format(seq(start, end, by = "year"), "%Y%m%d") +c(exp, obs) %<-% CST_Load(var = 'sfcWind', + exp = list(list(name = 'meteofrance/system6c3s', path = path)), + obs = 'erainterim', + sdates = dateseq, leadtimemin = 2, leadtimemax = 4, + lonmin = -19, lonmax = 60.5, latmin = 0, latmax = 79.5, + storefreq = "daily", sampleperiod = 1, nmember = 9, + output = "lonlat", method = "bilinear", + grid = "r360x180") +``` + + +Extra lines to see the size of the objects and visualize the data: + +``` +library(pryr) +object_size(exp) +# 27.7 MB +object_size(obs) +# 3.09 MB +library(s2dv) +PlotEquiMap(exp$data[1,1,1,1,,], lon = exp$lon, lat= exp$lat, + filled.continents = FALSE, fileout = "Meteofrance_r360x180.png") +``` + + +![Meteofrance](../vignettes/Figures/Meteofrance_r360x180.png) + +### Managing big datasets and memory issues + +Depending on the user needs, limitations can be found when trying to process big datasets. This may depend on the number of ensembles, the resolution and region that the user wants to process. CSTools has been developed for compatibility of startR package which covers these aims: + +- retrieving data from NetCDF files to RAM memory in a flexible way, +- divide automatically datasets in pieces to perform an analysis avoiding memory issues and +- run the workflow in your local machine or submitting to an HPC cluster. +This is especially useful when a user doesn’t have access to a HPC and must work with small RAM memory size: + +![](./Figures/CSTvsNonCST.png) + +There is a [video tutorial](https://earth.bsc.es/wiki/lib/exe/fetch.php?media=tools:startr_tutorial_2020.mp4) about the startR package and the tutorial material. + +The functions in CSTools (with or without CST_ prefix) include a parameter called ‘ncores’ that allows to automatically parallelize the code in multiple cores when the parameter is set greater than one. diff --git a/vignettes/ENSclustering_vignette.Rmd b/vignettes/ENSclustering_vignette.Rmd index af8d5c6fd2fac1820509ff1426bef452bf861379..96ff2f5e399a10b2aae0f36c37da3cfef7933104 100644 --- a/vignettes/ENSclustering_vignette.Rmd +++ b/vignettes/ENSclustering_vignette.Rmd @@ -180,7 +180,7 @@ PlotLayout(PlotEquiMap, plot_dims = c("lat", "lon"), fileout = "EnsClus_4clus_both_mem_std_Fig1.png") ``` -![Figure 1 - Representative temperature anomalies of each cluster.](./Figures/EnsClus_4clus_both_mem_std_Fig1.png){width=100%} +![Figure 1 - Representative temperature anomalies of each cluster.](./Figures/EnsClus_4clus_both_mem_std_Fig1.png) @@ -202,7 +202,7 @@ PlotLayout(PlotEquiMap, plot_dims = c("lat", "lon"), fileout = "EnsClus_4clus_both_mem_std_Fig2.png") ``` -![Figure 2 - Temperature anomalies pattern per 'member - start_date' pair with cluster to which it is assigned displayed as map title.](./Figures/EnsClus_4clus_both_mem_std_Fig2.png){width=100%} +![Figure 2 - Temperature anomalies pattern per 'member - start_date' pair with cluster to which it is assigned displayed as map title.](./Figures/EnsClus_4clus_both_mem_std_Fig2.png) ### Final notes diff --git a/vignettes/Figures/Analogs1.png b/vignettes/Figures/Analogs1.png new file mode 100644 index 0000000000000000000000000000000000000000..db966fd89c24e19585e72bce7ae9d761142eaad3 Binary files /dev/null and b/vignettes/Figures/Analogs1.png differ diff --git a/vignettes/Figures/Analogs2.png b/vignettes/Figures/Analogs2.png new file mode 100644 index 0000000000000000000000000000000000000000..976bde0a08be6062a456a931018f9e13e5e3fa31 Binary files /dev/null and b/vignettes/Figures/Analogs2.png differ diff --git a/vignettes/Figures/Analogs3.png b/vignettes/Figures/Analogs3.png new file mode 100644 index 0000000000000000000000000000000000000000..3731301e0ab8661fd1b92fc45f564edd475ee077 Binary files /dev/null and b/vignettes/Figures/Analogs3.png differ diff --git a/vignettes/Figures/Analogs4.png b/vignettes/Figures/Analogs4.png new file mode 100644 index 0000000000000000000000000000000000000000..2b1e1a364923d6da7c0cf252cc8bdf110dd1e79c Binary files /dev/null and b/vignettes/Figures/Analogs4.png differ diff --git a/vignettes/Figures/Analogs5.png b/vignettes/Figures/Analogs5.png new file mode 100644 index 0000000000000000000000000000000000000000..f8632d0eb75b390892335b72a99c0dcbf5d0b737 Binary files /dev/null and b/vignettes/Figures/Analogs5.png differ diff --git a/vignettes/Figures/Analogs6.png b/vignettes/Figures/Analogs6.png new file mode 100644 index 0000000000000000000000000000000000000000..c5319ed82d22e028b34cc9f6c1f180477882f35f Binary files /dev/null and b/vignettes/Figures/Analogs6.png differ diff --git a/vignettes/Figures/BestEstimateIndex_fig1.png b/vignettes/Figures/BestEstimateIndex_fig1.png index bd960203aaded56d21794650520c887339e0824e..384f0dd8c932b5c60919f973d8df775f204868a2 100644 Binary files a/vignettes/Figures/BestEstimateIndex_fig1.png and b/vignettes/Figures/BestEstimateIndex_fig1.png differ diff --git a/vignettes/Figures/BestEstimateIndex_fig2.png b/vignettes/Figures/BestEstimateIndex_fig2.png index 24dbceb26d63f9c71765efd849130f0302780e04..460bef5e406086f1faec6679a8a9dd10523cebd9 100644 Binary files a/vignettes/Figures/BestEstimateIndex_fig2.png and b/vignettes/Figures/BestEstimateIndex_fig2.png differ diff --git a/vignettes/Figures/BestEstimateIndex_fig3.png b/vignettes/Figures/BestEstimateIndex_fig3.png index ad43826f00abb0f204b549dff8f54bac8b3d94f2..e7cb17894a62f1db1f9235dbb239840c082c028e 100644 Binary files a/vignettes/Figures/BestEstimateIndex_fig3.png and b/vignettes/Figures/BestEstimateIndex_fig3.png differ diff --git a/vignettes/Figures/BestEstimateIndex_fig4.png b/vignettes/Figures/BestEstimateIndex_fig4.png index 632d5e50cff8ff93b580cf6e03ea545216f83411..22c1d5001c7a5599628bee367e4b725ee97a3b5a 100644 Binary files a/vignettes/Figures/BestEstimateIndex_fig4.png and b/vignettes/Figures/BestEstimateIndex_fig4.png differ diff --git a/vignettes/Figures/CSTvsNonCST.png b/vignettes/Figures/CSTvsNonCST.png new file mode 100644 index 0000000000000000000000000000000000000000..4088bdc6550ebbf402e0131a5737c78c1af8c1a7 Binary files /dev/null and b/vignettes/Figures/CSTvsNonCST.png differ diff --git a/vignettes/Figures/EnsClus_4clus_both_mem_std_Fig1.png b/vignettes/Figures/EnsClus_4clus_both_mem_std_Fig1.png index 9bfcb5f7777d671db02852407e2ba69bf1997885..123fdb7795fc91926af753592d66c21d60f2b092 100644 Binary files a/vignettes/Figures/EnsClus_4clus_both_mem_std_Fig1.png and b/vignettes/Figures/EnsClus_4clus_both_mem_std_Fig1.png differ diff --git a/vignettes/Figures/EnsClus_4clus_both_mem_std_Fig2.png b/vignettes/Figures/EnsClus_4clus_both_mem_std_Fig2.png index ed3938e4be4b1498e69302090f2ae364fb6f015f..2c05166cfa25e349166f301957fa23a47e96efcd 100644 Binary files a/vignettes/Figures/EnsClus_4clus_both_mem_std_Fig2.png and b/vignettes/Figures/EnsClus_4clus_both_mem_std_Fig2.png differ diff --git a/vignettes/Figures/Meteofrance_r360x180.png b/vignettes/Figures/Meteofrance_r360x180.png new file mode 100644 index 0000000000000000000000000000000000000000..1438bd493288c10cf80b1796660e97ba51e5df56 Binary files /dev/null and b/vignettes/Figures/Meteofrance_r360x180.png differ diff --git a/vignettes/Figures/MostLikelyTercile_fig1.png b/vignettes/Figures/MostLikelyTercile_fig1.png new file mode 100644 index 0000000000000000000000000000000000000000..86ba94a2c47762e182e936848a2529a305143434 Binary files /dev/null and b/vignettes/Figures/MostLikelyTercile_fig1.png differ diff --git a/vignettes/Figures/MostLikelyTercile_fig2.png b/vignettes/Figures/MostLikelyTercile_fig2.png new file mode 100644 index 0000000000000000000000000000000000000000..342877bc82a89d4f355d3133332dd3b477c10296 Binary files /dev/null and b/vignettes/Figures/MostLikelyTercile_fig2.png differ diff --git a/vignettes/Figures/MostLikelyTercile_fig3.png b/vignettes/Figures/MostLikelyTercile_fig3.png new file mode 100644 index 0000000000000000000000000000000000000000..1fa8460723a07d2abb08251758a585a2c3bcf7fc Binary files /dev/null and b/vignettes/Figures/MostLikelyTercile_fig3.png differ diff --git a/vignettes/Figures/MultiModelSkill_cor_tas_1992-2012.png b/vignettes/Figures/MultiModelSkill_cor_tas_1992-2012.png index 47c35f7298bf8c9aeae6775a299d5cb1147931f7..13cdd7e1203254f071e77311df840d4e706277b0 100644 Binary files a/vignettes/Figures/MultiModelSkill_cor_tas_1992-2012.png and b/vignettes/Figures/MultiModelSkill_cor_tas_1992-2012.png differ diff --git a/vignettes/Figures/MultiModelSkill_rms_tas_1992-2012.png b/vignettes/Figures/MultiModelSkill_rms_tas_1992-2012.png index 52ededf3e3e91c4f0a7248619518b6ea02aaa461..7b10d1e9bff9b4b026a24aaf1820983b06225227 100644 Binary files a/vignettes/Figures/MultiModelSkill_rms_tas_1992-2012.png and b/vignettes/Figures/MultiModelSkill_rms_tas_1992-2012.png differ diff --git a/vignettes/Figures/MultiModelSkill_rmsss_tas_1992-2012.png b/vignettes/Figures/MultiModelSkill_rmsss_tas_1992-2012.png index 10a9d45d189382d0e077805be08ffbd8ae3bb805..607bcb675f9cae4c44129e483f5fefedab6b8a8c 100644 Binary files a/vignettes/Figures/MultiModelSkill_rmsss_tas_1992-2012.png and b/vignettes/Figures/MultiModelSkill_rmsss_tas_1992-2012.png differ diff --git a/vignettes/Figures/MultivarRMSE_gloseas5_tas_prlr_1992-2012.png b/vignettes/Figures/MultivarRMSE_gloseas5_tas_prlr_1992-2012.png index 0229746629fa7eae6fb51d17c7e55fa0f8689298..c652c2d2d0f6b792b6658cdb27011f6c690211a5 100644 Binary files a/vignettes/Figures/MultivarRMSE_gloseas5_tas_prlr_1992-2012.png and b/vignettes/Figures/MultivarRMSE_gloseas5_tas_prlr_1992-2012.png differ diff --git a/vignettes/Figures/Obs_Persistence.png b/vignettes/Figures/Obs_Persistence.png index 6e4c33058489ed0491441ea7d77179861cbd1ebc..82cc33967ca962ae094f9bc4f0e05a035aad6bec 100644 Binary files a/vignettes/Figures/Obs_Persistence.png and b/vignettes/Figures/Obs_Persistence.png differ diff --git a/vignettes/Figures/PlotForecastPDF_ex1.png b/vignettes/Figures/PlotForecastPDF_ex1.png index d7067133054902e6b661f828f8a89b0bb285aa68..516bd717e792da8c885d42257ae4d9fcb72291b1 100644 Binary files a/vignettes/Figures/PlotForecastPDF_ex1.png and b/vignettes/Figures/PlotForecastPDF_ex1.png differ diff --git a/vignettes/Figures/PlotForecastPDF_ex2.png b/vignettes/Figures/PlotForecastPDF_ex2.png index 67b4af1e1b523c564047fb545baecc11a4dbac2c..c7f9d9e434180700fa858117c8cc52bf5835957c 100644 Binary files a/vignettes/Figures/PlotForecastPDF_ex2.png and b/vignettes/Figures/PlotForecastPDF_ex2.png differ diff --git a/vignettes/Figures/PlotForecastPDF_ex3.png b/vignettes/Figures/PlotForecastPDF_ex3.png index 10b1e63b4766e8aff8d10419b849d8677f197c58..d8eec5bedfc2696266972d2eb61a8eae5e514764 100644 Binary files a/vignettes/Figures/PlotForecastPDF_ex3.png and b/vignettes/Figures/PlotForecastPDF_ex3.png differ diff --git a/vignettes/Figures/PlotForecastPDF_ex4.png b/vignettes/Figures/PlotForecastPDF_ex4.png index 34df30d73e73f1d1d8e5ff4c34e57c2d487f4c2f..7254ee2e5a21740739659f5ab5b87b4927f5383a 100644 Binary files a/vignettes/Figures/PlotForecastPDF_ex4.png and b/vignettes/Figures/PlotForecastPDF_ex4.png differ diff --git a/vignettes/Figures/RainFARM_fig1.png b/vignettes/Figures/RainFARM_fig1.png index 1818c6e3e2ac0f264ad07b8b73e987492a5d3a7f..8c61d083990ba9b4c27eff4c90b4fade4c81ff71 100644 Binary files a/vignettes/Figures/RainFARM_fig1.png and b/vignettes/Figures/RainFARM_fig1.png differ diff --git a/vignettes/Figures/RainFARM_fig2.png b/vignettes/Figures/RainFARM_fig2.png index a821f4ad0384de5de83b6475d6da76210568ea1c..cd8a843b067c10a7b764428ec4068869435fdacf 100644 Binary files a/vignettes/Figures/RainFARM_fig2.png and b/vignettes/Figures/RainFARM_fig2.png differ diff --git a/vignettes/Figures/observed_regimes.png b/vignettes/Figures/observed_regimes.png index 1bb134a108ea158a23e743b3d4d0ca94634ff47e..678ac72aad7fd3f7e55e01abcbd90372c838b3c0 100644 Binary files a/vignettes/Figures/observed_regimes.png and b/vignettes/Figures/observed_regimes.png differ diff --git a/vignettes/Figures/predicted_regimes.png b/vignettes/Figures/predicted_regimes.png index 354afb7db5ff6d9d219ad6445abcd0976070d6f6..9f69484f5c97a041454330247f967b5c912c2cff 100644 Binary files a/vignettes/Figures/predicted_regimes.png and b/vignettes/Figures/predicted_regimes.png differ diff --git a/vignettes/MostLikelyTercile_vignette.Rmd b/vignettes/MostLikelyTercile_vignette.Rmd new file mode 100644 index 0000000000000000000000000000000000000000..6a5ce54758083cdbc27e82ad72cf0b9a6fa3160f --- /dev/null +++ b/vignettes/MostLikelyTercile_vignette.Rmd @@ -0,0 +1,213 @@ +--- +author: "Louis-Philippe Caron and Núria Pérez-Zanón" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteEngine{knitr::knitr} + %\VignetteIndexEntry{Most Likely Terciles} + %\usepackage[utf8]{inputenc} +--- + + +Computing and displaying the most likely tercile of a seasonal forecast +======================== + +In this example, we will use CSTools to visualize a probabilistic forecast (most likely tercile) of summer near-surface temperature produced by ECMWF System5. The (re-)forecasts used are initilized on May 1st for the period 1981-2020. The target for the forecast is June-August (JJA) 2020. The forecast data are taken from the Copernicus Climate Data Store. + + +### 1. Preliminary setup + + +To run this vignette, the following R packages should be installed and loaded: + + +```r +library(CSTools) +library(s2dv) +library(s2dverification) +library(multiApply) +library(zeallot) +library(easyVerification) +``` + + +### 2. Loading the data + + +We first define a few parameters. We start by defining the region we are interested in. In this example, we focus on the Mediterranean region. + + +```r +lat_min = 25 +lat_max = 52 +lon_min = -10 +lon_max = 40 +``` + +If the boundaries are not specified, the domain will be the entire globe. + +We also define the start dates for the hindcasts/forecast (in this case, May 1st 1981-2020) and create a sequence of dates that will be required by the load function. + + +```r +ini <- 1981 +fin <- 2020 +numyears <- fin - ini +1 +mth = '05' +start <- as.Date(paste(ini, mth, "01", sep = ""), "%Y%m%d") +end <- as.Date(paste(fin, mth, "01", sep = ""), "%Y%m%d") +dateseq <- format(seq(start, end, by = "year"), "%Y%m%d") +``` + +We then define the target months for the forecast (i.e. JJA). The months are given relative to the start date (May in this case) considering that monthly simulations are being analyzed. + + +```r +mon1 <- 2 +monf <- 4 +``` + + +Finally, we define the forecast system, an observational reference, the variable of interest and the common grid onto which to interpolate. + + +```r +forecastsys <- 'system5c3s' +obs <- 'erainterim' +grid <- "256x128" +clim_var = 'tas' +``` + +Finally, the data are loaded using `CST_Load`: + + +```r +c(exp,obs) %<-% CST_Load(var = clim_var, exp = forecastsys, obs = obs, + sdates = dateseq, leadtimemin = mon1, leadtimemax = monf, + lonmin = lon_min, lonmax = lon_max, + latmin = lat_min, latmax = lat_max, + storefreq = "monthly", sampleperiod = 1, nmember = 10, + output = "lonlat", method = "bilinear", + grid = paste("r", grid, sep = "")) +``` + +Loading the data using CST_Load returns two objects, one for the experimental data and another one for the observe data, with the same elements and compatible dimensions of the data element: + + +```r +> dim(exp$data) +dataset member sdate ftime lat lon + 1 10 40 3 19 36 +> dim(obs$data) +dataset member sdate ftime lat lon + 1 1 40 3 19 36 +``` + + +The latitude and longitude are saved for later use: + + +```r +Lat <- exp$lat +Lon <- exp$lon +``` + +### 3. Computing probabilities + +First, anomalies of forecast and observations are computed using cross-validation on individual members: + + +``` +c(Ano_Exp, Ano_Obs) %<-% CST_Anomaly(exp = exp, obs = obs, cross = TRUE, memb = TRUE) +``` + + +The seasonal mean of both forecasts and observations are computed by averaging over the ftime dimension. + + +```r +ano_exp$data <- MeanDims(Ano_Exp$data, 'ftime') +ano_obs$data <- MeanDims(Ano_Obs$data, 'ftime') +``` + + +Finally, the probabilities of each tercile are computed by evaluating which tercile is forecasted by each ensemble member for the latest forecast (2020) using the function `ProbBins` in **s2dverification** and then averaging the results along the member dimension to obtain the probability of each tercile. + + +```r +PB <- ProbBins(ano_exp$data, fcyr = numyears, thr = c(1/3, 2/3), quantile = TRUE, + posdates = 3, posdim = 2, compPeriod = "Without fcyr") +prob_map <- MeanDims(PB, c('sdate', 'member', 'dataset')) +``` + + +### 4. Visualization with PlotMostLikelyQuantileMap + + +We then plot the most likely quantile using the **CSTools** function `PlotMostLikelyQuantileMap`. + + +``` +PlotMostLikelyQuantileMap(probs = prob_map, lon = Lon, lat = Lat, + coast_width=1.5, legend_scale = 0.8, + toptitle = paste0('Most likely tercile - ', clim_var, + ' - ECMWF System5 - JJA 2020')) +``` + +![](./Figures/MostLikelyTercile_fig1.png) + +The forecast calls for above average temperature over most of the Mediterranean basin and near average temperature for some smaller regions as well. But can this forecast be trusted? + +For this, it is useful evaluate the skill of the system at forecasting near surface temperature over the period for which hindcasts are available. We can then use this information to mask the regions for which the system doesn't have skill. + +In order to do this, we will first calculate the ranked probability skill score (RPSS) and then exclude/mask from the forecasts the regions for which the RPSS is smaller or equal to 0 (no improvement with respect to climatology). + + +### 5. Computing Skill Score + + +First, we evaluate and plot the RPSS. Therefore, we use `RPSS` metric included in CST_MultiMetric from function from **easyVerification** package which requires to remove missing values from latest start dates: + + +```r +Ano_Exp$data <- Subset(Ano_Exp$data, along = 'sdate', indices = 1:38) +Ano_Obs$data <- Subset(Ano_Obs$data, along = 'sdate', indices = 1:38) +RPSS <- CST_MultiMetric(Ano_Exp, Ano_Obs, metric = 'rpss', multimodel = FALSE) + +PlotEquiMap(RPSS$data[[1]], lat = Lat, lon = Lon, brks = seq(-1, 1, by = 0.1), + filled.continents = FALSE) +``` + + +![](./Figures/MostLikelyTercile_fig2.png) + + +Areas displayed in red (RPSS > 0) are areas for which the forecast system shows skill above climatology whereas areas in blue (such as a large part of the Iberian Peninsula) are areas for which the model does not. We thus want to mask the areas currently displayed in blue. + +### 6. Simultaneous visualization of probabilities and skill scores + +From the RPSS, we create a mask: regions with RPSS <= 0 will be masked. + + +```r +mask_rpss <- RPSS[[1]] +mask_rpss[RPSS[[1]] <= 0] <- 1 +mask_rpss[is.na(RPSS[[1]])] <- 1 +mask_rpss[RPSS[[1]] > 0] <- 0 +``` + +Finally, we plot the latest forecast, as in the previous step, but add the mask we just created. + + +```r +PlotMostLikelyQuantileMap(probs = prob_map, lon = Lon, lat = Lat, coast_width = 1.5, + legend_scale = 0.8, mask = t(mask_rpss), + toptitle = paste('Most likely tercile -', clim_var, + '- ECMWF System5 - JJA 2020')) +``` + +![](./Figures/MostLikelyTercile_fig3.png) + +We obtain the same figure as before, but this time, we only display the areas for which the model has skill at forecasting the right tercile. The gray regions represents areas where the system doesn't have sufficient skill over the verification period. + + diff --git a/vignettes/MultiModelSkill_vignette.Rmd b/vignettes/MultiModelSkill_vignette.Rmd index 734652fe67fdd3416825c40a7cf9d81673dd740f..e44be8529629d822878c0cf1987049433fe53533 100644 --- a/vignettes/MultiModelSkill_vignette.Rmd +++ b/vignettes/MultiModelSkill_vignette.Rmd @@ -145,9 +145,12 @@ While other relevant data is being stored in the corresponding element of the ob ```r -> dim(AnomDJF$data) - dataset dataset statistics lat lon - 4 1 4 35 43 +> str(AnomDJF$data) +List of 4 + $ corr : num [1:4, 1, 1:35, 1:64] 0.586 0.614 0.143 0.501 0.419 ... + $ p.val : num [1:4, 1, 1:35, 1:64] 0.0026 0.00153 0.26805 0.01036 0.02931 ... + $ conf.lower: num [1:4, 1, 1:35, 1:64] 0.2073 0.2485 -0.3076 0.0883 -0.0154 ... + $ conf.upper: num [1:4, 1, 1:35, 1:64] 0.812 0.827 0.541 0.767 0.72 ... > names(AnomDJF) [1] "data" "lon" "lat" "Variable" "Datasets" "Dates" [7] "when" "source_files" "load_parameters" @@ -155,26 +158,19 @@ While other relevant data is being stored in the corresponding element of the ob [1] "glosea5" "ecmwf/system4_m1" "meteofrance/system5_m1" "erainterim" ``` -In the element $data of the `AnomDJF` object, the third dimension contains the lower limit of the 95% confidence interval, the correlation, the upper limit of the 95% confidence interval and the 95% significance level given by a one-sided T-test. - - -```r -corre <- AnomDJF$data[ , , 2, , ] -names(dim(corre)) <- c("maps", "lat", "lon") -``` - +In the element $data of the `AnomDJF` object is a list of object for the metric and its statistics: correlation, p-value, the lower limit of the 95% confidence interval and the upper limit of the 95% confidence interval and the 95% significance level given by a one-sided T-test. To obtain a spatial plot with a scale from -1 to 1 value of correlation for the model with the highest correlation for each grid point, the following lines should be run: ```r -PlotCombinedMap(corre, lon = Lon, lat = Lat, map_select_fun = max, - display_range = c(0, 1), map_dim = 'maps', +PlotCombinedMap(AnomDJF$data$corr[,1,,], lon = Lon, lat = Lat, map_select_fun = max, + display_range = c(0, 1), map_dim = 'nexp', legend_scale = 0.5, brks = 11, - cols = list(c('white', 'darkblue'), + cols = list(c('white', 'black'), + c('white', 'darkblue'), c('white', 'darkred'), - c('white', 'darkorange'), - c('white', 'black')), - bar_titles = c(names(AnomDJF$Datasets)[-4], "MMM"), + c('white', 'darkorange')), + bar_titles = c("MMM", names(AnomDJF$Datasets)), fileout = "./vignettes/Figures/MultiModelSkill_cor_tas_1992-2012.png", width = 14, height = 8) ``` @@ -200,14 +196,14 @@ The following lines are necessary to obtain the plot which visualizes the best m ```r names(dim(RMS)) <- c("maps", "lat", "lon") -PlotCombinedMap(RMS, lon = Lon, lat = Lat, map_select_fun = min, - display_range = c(0, ceiling(max(abs(RMS)))), map_dim = 'maps', +PlotCombinedMap(AnomDJF$data$rms[,1,,], lon = Lon, lat = Lat, map_select_fun = min, + display_range = c(0, ceiling(max(abs(AnomDJF$data$rms)))), map_dim = 'nexp', legend_scale = 0.5, brks = 11, - cols = list(c('darkblue', 'white'), + cols = list(c('black', 'white'), + c('darkblue', 'white'), c('darkred', 'white'), - c('darkorange', 'white'), - c('black', 'white')), - bar_titles = c(names(AnomDJF$Datasets)[-4], "MMM"), + c('darkorange', 'white')), + bar_titles = c("MMM", names(AnomDJF$Datasets)), fileout = "./vignettes/Figures/MultiModelSkill_rms_tas_1992-2012.png", width = 14, height = 8) ``` @@ -226,19 +222,17 @@ Notice that the perfect RMSSS is 1 and the parameter `map_select_fun` from `Plo ```r AnomDJF <- CST_MultiMetric(exp = ano_exp, obs = ano_obs, metric = 'rmsss', multimodel = TRUE) -RMSSS <- AnomDJF$data[ , , 1, , ] -names(dim(RMSSS)) <- c("maps", "lat", "lon") -PlotCombinedMap(RMSSS, lon = Lon, lat = Lat, +PlotCombinedMap(AnomDJF$data$rmsss[,1,,], lon = Lon, lat = Lat, map_select_fun = function(x) {x[which.min(abs(x - 1))]}, display_range = c(0, - ceiling(max(abs(RMSSS)))), map_dim = 'maps', + ceiling(max(abs(AnomDJF$data$rmsss)))), map_dim = 'nexp', legend_scale = 0.5, brks = 11, - cols = list(c('white', 'darkblue'), + cols = list(c('white', 'black'), + c('white', 'darkblue'), c('white', 'darkred'), - c('white', 'darkorange'), - c('white', 'black')), - bar_titles = c(names(AnomDJF$Datasets)[-4], "MMM"), + c('white', 'darkorange')), + bar_titles = c("MMM", names(AnomDJF$Datasets)), fileout = "./vignettes/Figures/MultiModelSkill_rmsss_tas_1992-2012.png", width = 14, height = 8) ``` diff --git a/vignettes/PlotForecastPDF.Rmd b/vignettes/PlotForecastPDF.Rmd index 98b2ae144003e7d876689a827325cdf0eadf9fdc..757f4047440b7ad59f25aaaa310557767214f1fc 100644 --- a/vignettes/PlotForecastPDF.Rmd +++ b/vignettes/PlotForecastPDF.Rmd @@ -30,20 +30,28 @@ PlotForecastPDF(fcst, tercile.limits = c(20, 26)) ![Example 1](./Figures/PlotForecastPDF_ex1.png) -### 2.- Some useful parameters -Changing the title, the forecast labels or the units will be needed in most cases. +Input data can also be provided as an two-dimensional array, as far as one of the dimensions is named 'members': + +```{r,fig.show = 'hide',warning=F} +fcst <- array(rnorm(mean=25, sd=2, n=90), dim=c(members=30, 3)) +PlotForecastPDF(fcst, tercile.limits = c(23, 27)) +``` + +### 2.- Customizing the appearance of your plots +Some parameters allow to customize your plot by changing the title, the forecast labels, the variable name and units, or the colors. ```{r,fig.show = 'hide',warning=F} fcst <- data.frame(fcst1 = rnorm(mean = 25, sd = 3, n = 30), fcst2 = rnorm(mean = 23, sd = 4.5, n = 30)) PlotForecastPDF(fcst, tercile.limits = c(20, 26), var.name = "Temperature (ºC)", - title = "Forecasts valid on 2019-01-01 at Sunny Hills", - fcst.names = c("model a", "model b")) + title = "Forecasts valid for 2019-01-01 at Sunny Hills", + fcst.names = c("model a", "model b"), + color.set = "s2s4e") ``` ![Example 2](./Figures/PlotForecastPDF_ex2.png) ### 3.- Adding extremes and observed values -We can add the probability of extreme values and the observed values. The tercile and extreme limits can be specified for each panel separately, as well as the observed values. +Optionally, we can include the probability of extreme values or the actually observed values. The tercile limits, extreme limits and observation values can be specified for each panel separately. ```{r,fig.show = 'hide',warning=F} fcst <- data.frame(fcst1 = rnorm(mean = 25, sd = 3, n = 30), @@ -51,30 +59,31 @@ fcst <- data.frame(fcst1 = rnorm(mean = 25, sd = 3, n = 30), PlotForecastPDF(fcst, tercile.limits = rbind(c(20, 26), c(22, 28), c(15, 22)), var.name = "Temperature (ºC)", title = "Forecasts at Sunny Hills", fcst.names = c("January", "February", "March"), obs = c(21, 24, 17), - extreme.limits = rbind(c(18, 28), c(20, 30), c(12, 24))) + extreme.limits = rbind(c(18, 28), c(20, 30), c(12, 24)), + color.set="s2s4e") ``` -The same example using a forecast in array format is provided. -```{r,fig.show = 'hide',warning=F} -fcst <- array(cbind(cbind(rnorm(mean = 25, sd = 3, n = 30), - rnorm(mean = 23, sd = 4.5, n = 30)), rnorm(mean = 17, sd = 3, n = 30)), - dim = c(members = 30, 3)) -PlotForecastPDF(fcst, tercile.limits = rbind(c(20, 26), c(22, 28), c(15, 22)), - var.name = "Temperature (ºC)", title = "Forecasts at Sunny Hills", - fcst.names = c("January", "February", "March"), obs = c(21, 24, 17), - extreme.limits = rbind(c(18, 28), c(20, 30), c(12, 24))) -``` ![Example 3](./Figures/PlotForecastPDF_ex3.png) -### 4.- Example using lonlat_data -An example using the lonlat_data from CSTools is provided. +### 4.- Saving your plot to a file +PlotForecastPDF uses ggplot2, so you can save the output of the function to a variable and operate with it as a ggplot2 object. For instance, you can save it to a file: + +``` +library(ggplot2) +fcst <- array(rnorm(mean=25, sd=2, n=90), dim=c(members=30, 3)) +plot <-PlotForecastPDF(fcst, tercile.limits = c(23, 27)) +ggsave("outfile.pdf", plot, width=7, height=5) +``` + +### 5.- A reproducible example using lonlat_data +This final example uses the sample lonlat data from CSTools. It is suitable for checking reproducibility of results. ```{r,fig.show = 'hide',warning=F} fcst <- data.frame(fcst1 = lonlat_data$exp$data[1,,1,1,1,1] - 273.15, fcst2 = lonlat_data$exp$data[1,,1,2,1,1] - 273.15) PlotForecastPDF(fcst, tercile.limits = c(5, 7), extreme.limits = c(4, 8), var.name = "Temperature (ºC)", - title = "Forecasts valid on 2000-11 at sample mediterranean region", + title = "Forecasts initialized on Nov 2000 at sample Mediterranean region", fcst.names = c("November", "December")) ``` ![Example 4](./Figures/PlotForecastPDF_ex4.png) diff --git a/vignettes/RainFARM_vignette.Rmd b/vignettes/RainFARM_vignette.Rmd index 41ba8f2b8b9b5d0911427514b8d2dc2ddb044af2..dbcb48a47bee31c92b80c4b879d55d989132843b 100644 --- a/vignettes/RainFARM_vignette.Rmd +++ b/vignettes/RainFARM_vignette.Rmd @@ -110,7 +110,7 @@ title(main = "pr 17/03/2010 downscaled") ``` -![Comparison between an original precipitation field at 1-degree resolution (left) and the same field downscaled with RainFARM to 0.05 degree resolution (right)](Figures/RainFARM_fig1.png){width=420px} +![Comparison between an original precipitation field at 1-degree resolution (left) and the same field downscaled with RainFARM to 0.05 degree resolution (right)](Figures/RainFARM_fig1.png) RainFARM has downscaled the original field with a realistic fine-scale correlation structure. Precipitation is conserved in an average sense (in this case smoothing both the original and the downscaled fields with a circular kernel with a diameter equal to the original field grid spacing would lead to the same results). The downscaled field presents more extreme precipitation peaks. @@ -118,7 +118,7 @@ RainFARM has downscaled the original field with a realistic fine-scale correlati The area of interest in our example presents a complex orography, but the basic RainFARM algorithm used does not consider topographic elevation in deciding how to distribute fine-scale precipitation. A long term climatology of the downscaled fields would have a resolution comparable to that of the original coarse fields and would not resemble the fine-scale structure of an observed climatology. If an external fine-scale climatology of precipitation is available, we can use the method discussed in Terzago et al. (2018) to change the distribution of precipitation by RainFARM for each timestep, so that the long-term average is close to this reference climatology in terms of precipitation distribution (while the total precipitation amount of the original fields to downscale is preserved). -Suitable climatology files could be for example a fine-scale precipitation climatology from a high-resolution regional climate model (see e.g. Terzago et al. 2018), a local high-resolution gridded climatology from observations, or a reconstruction such as those which can be downloaded from the WORLDCLIM (http://www.worldclim.org) or CHELSA (http://chelsa-climate.org) websites. The latter data will need to be converted to NetCDF format before being used (see for example the GDAL tools (https://www.gdal.org). +Suitable climatology files could be for example a fine-scale precipitation climatology from a high-resolution regional climate model (see e.g. Terzago et al. 2018), a local high-resolution gridded climatology from observations, or a reconstruction such as those which can be downloaded from the WORLDCLIM (https://www.worldclim.org) or CHELSA (https://chelsa-climate.org) websites. The latter data will need to be converted to NetCDF format before being used (see for example the GDAL tools (https://gdal.org). We will assume that a copy of the WORLDCLIM precipitation climatology at 30 arcseconds (about 1km resolution) is available in the local file `medscope.nc`. From this file we can derive suitable weights to be used with RainFARM using the `CST_RFWeights` functions as follows: ```{r} ww <- CST_RFWeights("./worldclim.nc", nf = 20, lon = exp$lon, lat = exp$lat) @@ -168,7 +168,7 @@ dev.off() ``` --> -![The same field as in Fig. 1 downscaled using climatological weights from the WORLDCLIM dataset. (left panel). The center and right panel show the climatology of the downscaled fields over all ensemble members, realizations and forecast times without and with climatological weights respectively, for the first starting date. The center and right panels use a different colorscale than the left panel.](Figures/RainFARM_fig2.png){width=640px} +![The same field as in Fig. 1 downscaled using climatological weights from the WORLDCLIM dataset. (left panel). The center and right panel show the climatology of the downscaled fields over all ensemble members, realizations and forecast times without and with climatological weights respectively, for the first starting date. The center and right panels use a different colorscale than the left panel.](Figures/RainFARM_fig2.png) ### Determining the spectral slopes diff --git a/vignettes/WeatherRegimes_vignette.Rmd b/vignettes/WeatherRegimes_vignette.Rmd index 62e4883db6b55406f5100f7d6cb4e9187859f3d2..d9272678f27f4cbfaaa131aaaddc479f32fa21d2 100644 --- a/vignettes/WeatherRegimes_vignette.Rmd +++ b/vignettes/WeatherRegimes_vignette.Rmd @@ -30,7 +30,7 @@ library(zeallot) The data employed in this example are described below. - Sea level pressure (psl): this has been selected as the circulation variable, however other variables such as geopotential at 500 hPa can be also used. - Region: Euro-Atlantic domain [85.5ºW-45ºE; 27-81ºN]. -- Datasets: seasonal predictions from ECMWF System 4 ([**Molteni et al. 2011**] (https://www.ecmwf.int/sites/default/files/elibrary/2011/11209-new-ecmwf-seasonal-forecast-system-system-4.pdf)) and ERA-Interim reanalysis ([**Dee et al. 2011**] (http://onlinelibrary.wiley.com/doi/10.1002/qj.828/pdf)) as a reference dataset. +- Datasets: seasonal predictions from ECMWF System 4 ([**Molteni et al. 2011**] (https://www.ecmwf.int/sites/default/files/elibrary/2011/11209-new-ecmwf-seasonal-forecast-system-system-4.pdf)) and ERA-Interim reanalysis ([**Dee et al. 2011**] (https://rmets.onlinelibrary.wiley.com/doi/pdf/10.1002/qj.828)) as a reference dataset. - Period: 1991-2010. Only 20 years have been selected for illustrative purposes, but the full hindcast period could be used for the analysis.