From ed6a5fabcc0f71bf212b4768d5c3431fd43b4118 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 7 Apr 2022 09:11:38 +0200 Subject: [PATCH 01/29] Add grid description files and systems to the archive --- conf/archive.yml | 25 ++++++++++++++++---- conf/grid_description/griddes_eccc1.txt | 19 +++++++++++++++ conf/grid_description/griddes_system2c3s.txt | 19 +++++++++++++++ 3 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 conf/grid_description/griddes_eccc1.txt create mode 100644 conf/grid_description/griddes_system2c3s.txt diff --git a/conf/archive.yml b/conf/archive.yml index 57574919..73379d5d 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -16,20 +16,37 @@ archive: reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" system7c3s: src: "exp/meteofrance/system7c3s/" - monthly_mean: {"tas":"_f6h/","g500":"_f12h/", - "prlr":"_f24h/", "sfcWind": "_f6h/"} + monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", + "prlr":"_f24h/", "sfcWind": "_f6h/", + "tasmax":"_f6h/", "tasmin": "_f6h/"} nmember: fcst: 51 hcst: 25 reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system7c3s.txt" system35c3s: src: "exp/cmcc/system35c3s/" - monthly_mean: {"tas":"_f6h/","g500":"_f12h/", - "prlr":"_f24h/", "sfcWind": "_f6h/"} + monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", + "prlr":"_f24h/", "sfcWind": "_f6h/"} nmember: fcst: 50 hcst: 40 reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system35c3s.txt" + system2c3s: + src: "exp/jma/system2c3s/" + monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", + "tasmax":"_f6h/", "tasmin":"_f6h/"} + nmember: + fcst: 10 + hcst: 10 + reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system35c3s.txt" + eccc1: + src: "exp/eccc/eccc1/" + monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", + "tasmax":"_f6h/", "tasmin":"_f6h/"} + nmember: + fcst: 10 + hcst: 10 + reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_eccc1.txt" Reference: era5: src: "recon/ecmwf/era5/" diff --git a/conf/grid_description/griddes_eccc1.txt b/conf/grid_description/griddes_eccc1.txt new file mode 100644 index 00000000..58a9810c --- /dev/null +++ b/conf/grid_description/griddes_eccc1.txt @@ -0,0 +1,19 @@ +# +# Grid description file for ECCC1 +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/grid_description/griddes_system2c3s.txt b/conf/grid_description/griddes_system2c3s.txt new file mode 100644 index 00000000..cf1e3515 --- /dev/null +++ b/conf/grid_description/griddes_system2c3s.txt @@ -0,0 +1,19 @@ +# +# Grid description file for JMA System3c3s +# +# gridID 2 +# +gridtype = lonlat +gridsize = 10512 +xsize = 144 +ysize = 73 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0 +xinc = 2.5 +yfirst = 90 +yinc = -2.5 -- GitLab From 68ccbd7cb91f26b9d54de10296665971f6b1cc8d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 7 Apr 2022 13:06:35 +0200 Subject: [PATCH 02/29] Fix for issue #11 --- modules/data_load/load.R | 2 +- .../data_load/testing_recipes/recipe_4.yml | 43 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 modules/data_load/testing_recipes/recipe_4.yml diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 0083e9a9..1fbf623d 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -230,7 +230,7 @@ load_datasets <- function(recipe_file){ split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) - names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "fcst_year" + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "fcst_syear" fcst <- startR_to_s2dv(fcst) } else { diff --git a/modules/data_load/testing_recipes/recipe_4.yml b/modules/data_load/testing_recipes/recipe_4.yml new file mode 100644 index 00000000..128560d4 --- /dev/null +++ b/modules/data_load/testing_recipes/recipe_4.yml @@ -0,0 +1,43 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: no + Reference: + name: era5 + Time: + sdate: + fcst_syear: '2020' + fcst_sday: '1101' + hcst_start: '1993' + hcst_end: '2016' + leadtimemin: 2 + leadtimemax: 4 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: SBC + Skill: + metric: RPSS + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ -- GitLab From 2254f0786b6597a35b49ef05c35960731cfc9742 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 12 Apr 2022 16:32:46 +0200 Subject: [PATCH 03/29] format fixes --- modules/data_load/load.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 1fbf623d..6aec0614 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -19,12 +19,12 @@ attr2array <- function(attr){ startR_to_s2dv <- function(startR_array){ dates_dims <- dim(Subset(startR_array, - along=c('dat','var', + along=c('dat', 'var', 'latitude', 'longitude', 'ensemble'), list(1,1,1,1,1), drop="selected")) #sdates_dims <- dim(Subset(startR_array, - # along=c('dat','var','time','sweek','sday', + # along=c('dat', 'var', 'time', 'sweek', 'sday', # 'latitude', 'longitude', 'ensemble'), # list(1,1,1,1,1,1,1,1), drop="selected")) @@ -44,18 +44,18 @@ startR_to_s2dv <- function(startR_array){ 'Variables')$dat1$longitude), lat = attr2array(attr(startR_array, 'Variables')$dat1$latitude), - Variable = list(varName=names(attr(startR_array, - 'Variables')$common)[2], + Variable = list(varName=names(attr(startR_array, + 'Variables')$common)[2], level = NULL), Dates = list(start = dates_start, end = dates_end), #sdate = sdates), - time_dims = time_dims, + time_dims = time_dims, when = Sys.time(), source_files = attr2array(attr(startR_array, "Files")) - #Datasets=list(exp1=list(InitializationsDates=list(Member_1="01011990", - # Members="Member_1"))) - ) + # Datasets=list(exp1=list(InitializationsDates=list( Member_1="01011990", + # Members="Member_1"))) + ) return(s2dv_object) } -- GitLab From 5f98c0dd0319a258343e6ba7e68603622d53c96b Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 12 Apr 2022 16:33:27 +0200 Subject: [PATCH 04/29] Add testing recipe for meteofrance system 7 monthly --- modules/data_load/testing_recipes/recipe_4.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data_load/testing_recipes/recipe_4.yml b/modules/data_load/testing_recipes/recipe_4.yml index 128560d4..a82c4f2c 100644 --- a/modules/data_load/testing_recipes/recipe_4.yml +++ b/modules/data_load/testing_recipes/recipe_4.yml @@ -30,7 +30,7 @@ Analysis: type: to_system Workflow: Calibration: - method: SBC + method: mse_min Skill: metric: RPSS Indicators: -- GitLab From cf240f215e897c6a82f9e6f0b98716ec42c520ef Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 19 Apr 2022 14:47:05 +0200 Subject: [PATCH 05/29] added file to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5faf5e63..752f5cf9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ out-logs/ *.swp *.swo +/modules/Calibration/test_victoria.R -- GitLab From 73ecc687f21033b16f1833e64391fddf36e98006 Mon Sep 17 00:00:00 2001 From: lpalma Date: Tue, 19 Apr 2022 16:10:29 +0200 Subject: [PATCH 06/29] fixed leadtime selection, now the 0 leadmonth corresponds to the month starting when the forecast is initialized --- modules/data_load/dates2load.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R index 28d7905e..f6a5e27f 100644 --- a/modules/data_load/dates2load.R +++ b/modules/data_load/dates2load.R @@ -97,8 +97,8 @@ get_timeidx <- function(sdates, ltmin, ltmax, } else if (time_freq == "monthly_mean") { - idx_min <- ltmin - idx_max <- ltmax + idx_min <- ltmin + 1 + idx_max <- ltmax + 1 indxs <- indices(idx_min:idx_max) } -- GitLab From 8b99aa6d4d486b5721a546aa577242d960580d29 Mon Sep 17 00:00:00 2001 From: lpalma Date: Tue, 19 Apr 2022 18:06:03 +0200 Subject: [PATCH 07/29] Added @aho suggestions --- modules/data_load/load.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 6aec0614..25a3b82b 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -244,8 +244,7 @@ load_datasets <- function(recipe_file){ # Separate Start() call for monthly vs daily data if (store.freq == "monthly_mean") { - dates_file <- sapply(dates, format, '%Y%m%d') - dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") + dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") dim(dates_file) <- dim(Subset(hcst$data, along=c('dat','var', 'latitude', 'longitude', 'ensemble'), @@ -254,8 +253,6 @@ load_datasets <- function(recipe_file){ obs <- Start(dat = obs.path, var = variable, file_date = dates_file, - merge_across_dims = TRUE, - merge_across_dims_narm = TRUE, latitude = values(list(lats.min, lats.max)), latitude_reorder = Sort(), longitude = values(list(lons.min, lons.max)), -- GitLab From e36ae023b518d9e8e44d070ad7ce52593c3a288b Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 21 Apr 2022 09:29:27 +0200 Subject: [PATCH 08/29] Adapt hcst calibration script to work with s2dv_cube --- modules/Calibration/Calibration.R | 55 ++++++----- modules/Calibration/test_calib.R | 94 +++++++++++++++++++ .../data_load/testing_recipes/recipe_4.yml | 2 +- 3 files changed, 124 insertions(+), 27 deletions(-) create mode 100644 modules/Calibration/test_calib.R diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 295bb406..c57e4867 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -48,12 +48,12 @@ #'@param obs A numeric array with named dimensions, #' representing the observational data used #' in the bias-adjustment method. -#' 'sday','syear','member' are mandatory dims +#' 'sday','syear','ensemble' are mandatory dims #'@param hcst A numeric array with named dimensions, #' representing the system hindcast data used #' in the bias-adjustment method. -#' 'sday','syear','member' are mandatory dims +#' 'sday','syear','ensemble' are mandatory dims #'@param fun bias adjustment function @@ -69,7 +69,7 @@ #'@return A numeric array with the bias adjusted hindcast, #' -CST_CALIB_METHODS <- c("bias","evmos","mse_min","crps_min","rpc-based") +CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, split_factor=1, ncores=32) @@ -77,10 +77,11 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, # Replicates observations for bias adjusting each # system of the multi-model separately + ## TODO: Adapt code for multi-model case if(mm){ obs.mm <- obs for(dat in 1:(dim(hcst)['dat'][[1]]-1)){ - obs.mm <- abind(obs2,obs, + obs.mm <- abind(obs2, obs, along = which(names(dim(obs)) == 'dat')) } names(dim(obs.mm)) <- names(dim(obs)) @@ -91,7 +92,7 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, if (method %in% CST_CALIB_METHODS) { # Hcst calibration - hcst <- CSTools::CST_Calibration(hcst,obs, + hcst <- CSTools::CST_Calibration(hcst, obs, cal.method = method, eval.method = "leave-one-out", multi.model = mm, @@ -99,32 +100,34 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, na.rm = na.rm, apply_to = NULL, alpha = NULL, - memb_dim = "member", + memb_dim = "ensemble", sdate_dim = "syear", ncores = ncores) } else { - #error(logger, - # "Calibration method is not implemented in CSTools") + ## TODO: Implement other calibration methods + error(logger, + "Calibration method is not implemented in CSTools") } - hcst[!is.finite(hcst)] <- NA - remove(hcst) + hcst$data[!is.finite(hcst$data)] <- NA + ## TODO: Is this line necessary? + # remove(hcst) # Merges all the ensembles from the different systems into # one single ensemble if(mm){ hcst <- MergeDims(hcst, - merge_dims=c('dat','member'), - rename_dim='member') - hcst <- drop_na_dim(hcst,'member') + merge_dims = c('dat', 'ensemble'), + rename_dim = 'ensemble') + hcst <- drop_na_dim(hcst, 'ensemble') } - # removes dat and var dimensions if needed - try(hcst<-Subset(hcst, - c('var'),list(1),drop='selected')) - try(hcst<-Subset(hcst, - c('dat'),list(1),drop='selected')) +# # removes dat and var dimensions if needed +# try(hcst<-Subset(hcst, +# c('var'), list(1), drop='selected')) +# try(hcst<-Subset(hcst, +# c('dat'), list(1), drop='selected')) return(hcst) @@ -138,17 +141,17 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, #'@param obs A numeric array with named dimensions, #' representing the observational data used #' in the bias-adjustment method. -#' 'sday','syear','member' are mandatory dims +#' 'sday','syear','ensemble' are mandatory dims #'@param hcst A numeric array with named dimensions, #' representing the system hindcast data used #' in the bias-adjustment method. -#' 'sday','syear','member' are mandatory dims +#' 'sday','syear','ensemble' are mandatory dims #'@param fcst A numeric array with named dimensions, #' representing the system forecast to be #' bias-adjusted. -#' 'sday','syear','member' are mandatory dims +#' 'sday','syear','ensemble' are mandatory dims #'@param fun bias adjustment function @@ -168,8 +171,8 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, ## needs and update: look at hcst version #fcst_calib <- function(obs, hcst, fcst, fun, mm=F, # na.rm=T, split_factor=1,ncores=32, -# target_dims=c('sday','syear','member'), -# output_dims=c('member')) +# target_dims=c('sday','syear','ensemble'), +# output_dims=c('ensemble')) #{ # # # Replicates observations for bias adjusting each @@ -200,9 +203,9 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, # # one single ensemble # if(mm){ # calibrated_fcst <- MergeDims(calibrated_fcst, -# merge_dims=c('dat','member'), -# rename_dim='member') -# calibrated_fcst <- drop_na_dim(calibrated_fcst,'member') +# merge_dims=c('dat','ensemble'), +# rename_dim='ensemble') +# calibrated_fcst <- drop_na_dim(calibrated_fcst,'ensemble') # } # # # removes dat and var dimensions if needed diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R new file mode 100644 index 00000000..18ee110f --- /dev/null +++ b/modules/Calibration/test_calib.R @@ -0,0 +1,94 @@ + + +recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml" + +source("modules/data_load/load.R") + +data <- load_datasets(recipe_file) + +## Entry params data and recipe? +## TODO: Instead of reading the recipe at each module, do it at the beginning? +recipe <- read_yaml(recipe_file) + +calibrate_datasets <- function(data, recipe) { + # hcst <- data$hcst + # obs <- data$obs + # fcst <- data$fcst + + # Calibration function params + method <- recipe$Analysis$Workflow$Calibration$method + ## TODO: Make sure 'Multimodel' param in recipe is a boolean + mm <- recipe$Analysis$Datasets$Multimodel + ncores <- 4 + na.rm <- T + + # Hcst calibration + hcst_calib <- CSTools::CST_Calibration(data$hcst, data$obs, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + + ## Alba's version + library(s2dv) + library(ClimProjDiags) + library(multiApply) + source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-CalibrationForecast/R/CST_Calibration.R") + hcst_calibrated <- CST_Calibration(data$hcst, data$obs, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + + ## In the new CST_Calibration function, the fcst must have the same number of + ## dimensions as the hcst + dim(data$fcst$data) <- c(dim(data$fcst$data)[1:2], "sday" = 1, "sweek" = 1, + dim(data$fcst$data)[3:7]) + ## In the new CST_Calibration function, memb_dim and sdate_dim must be the same + ## in the hcst and the fcst. + names(dim(data$fcst$data))[which(names(dim(data$fcst$data)) == + 'fcst_syear')] <- "syear" + + fcst_alba <- CST_Calibration(data$hcst, data$obs, data$fcst, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + + ## Lluís' version: + ## Note: memb_dim is hardcoded as 'ensemble'. + source("/esarchive/scratch/vagudets/repos/auto-s2s/modules/Calibration/Calibration.R") + hcst_lluis <- hcst_calib(data$obs, data$hcst, + method = method, + mm = mm, + na.rm = na.rm, + split_factor = 1, + ncores = ncores) + ## TODO: Include Lluís' forecast version and test it + # For daily + # hcst <- CSTools::CST_QuantileMapping(data$hcst, + # data$obs, + # exp_cor = NULL, + # sample_dims = c("syear", "time", "ensemble"), + # sample_length = NULL, + # method = "QUANT", + # ncores = ncores, + # na.rm = na.rm) diff --git a/modules/data_load/testing_recipes/recipe_4.yml b/modules/data_load/testing_recipes/recipe_4.yml index a82c4f2c..421888b1 100644 --- a/modules/data_load/testing_recipes/recipe_4.yml +++ b/modules/data_load/testing_recipes/recipe_4.yml @@ -9,7 +9,7 @@ Analysis: Datasets: System: name: system7c3s - Multimodel: no + Multimodel: False Reference: name: era5 Time: -- GitLab From 5dd360ab980355a37bb4c0e06df586f17eb39f16 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 22 Apr 2022 12:01:45 +0200 Subject: [PATCH 09/29] First version of wrapper fun using CST_Calibration, add some useful scripts --- modules/Calibration/Calibration.R | 91 ++++++------- modules/Calibration/Calibration_fcst3.R | 155 ++++++++++++++++++++++ modules/Calibration/test_calib.R | 165 ++++++++++++------------ modules/test_victoria.R | 14 ++ 4 files changed, 300 insertions(+), 125 deletions(-) create mode 100644 modules/Calibration/Calibration_fcst3.R create mode 100644 modules/test_victoria.R diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index c57e4867..8f21eecb 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -169,52 +169,55 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, ## TODO: ## needs and update: look at hcst version -#fcst_calib <- function(obs, hcst, fcst, fun, mm=F, -# na.rm=T, split_factor=1,ncores=32, -# target_dims=c('sday','syear','ensemble'), -# output_dims=c('ensemble')) -#{ -# -# # Replicates observations for bias adjusting each -# # system of the multi-model separately -# if(mm){ -# obs.mm <- obs -# for(dat in 1:(dim(hcst)['dat'][[1]]-1)){ -# obs.mm <- abind(obs2,obs, -# along=which(names(dim(obs)) == 'dat')) -# } -# names(dim(obs.mm)) <- names(dim(obs)) -# obs <- obs.mm -# remove(obs.mm) -# } -# -# # Fcst Calibration -# calibrated_fcst <-Apply(data=list(obs=obs,hcst=hcst,fcst=fcst), -# extra_info=list(na.rm=na.rm), -# target_dims=target_dims, -# output_dims=output_dims, -# na.rm=na.rm, -# ncores=ncores, -# fun = .fcstcal)[[1]] -# -# calibrated_fcst[!is.finite(calibrated_fcst)] <- NA -# -# # Merges all the ensembles from the different systems into -# # one single ensemble -# if(mm){ -# calibrated_fcst <- MergeDims(calibrated_fcst, -# merge_dims=c('dat','ensemble'), -# rename_dim='ensemble') -# calibrated_fcst <- drop_na_dim(calibrated_fcst,'ensemble') -# } -# +fcst_calib <- function(obs, hcst, fcst, fun, mm=F, + na.rm=T, split_factor=1, ncores=32, + ## TODO: remove these? + target_dims=c('sday', 'syear', 'ensemble'), + output_dims=c('ensemble')) +{ + + # Replicates observations for bias adjusting each + # system of the multi-model separately + ## TODO: Adapt multi-model case + if(mm){ + obs.mm <- obs + for(dat in 1:(dim(hcst)['dat'][[1]]-1)){ + obs.mm <- abind(obs2,obs, + along=which(names(dim(obs)) == 'dat')) + } + names(dim(obs.mm)) <- names(dim(obs)) + obs <- obs.mm + remove(obs.mm) + } + + # Fcst Calibration + calibrated_fcst <- Apply(data=list(obs = obs, hcst = hcst, fcst = fcst), + extra_info = list(na.rm = na.rm), + target_dims = target_dims, + output_dims = output_dims, + na.rm = na.rm, + ncores = ncores, + fun = .fcstcal)[[1]] ## Need to source this fun? + + calibrated_fcst$data[!is.finite(calibrated_fcst$data)] <- NA + + # Merges all the ensembles from the different systems into + # one single ensemble + ## TODO: Adapt multi-model case + if(mm){ + calibrated_fcst <- MergeDims(calibrated_fcst, + merge_dims=c('dat', 'ensemble'), + rename_dim = 'ensemble') + calibrated_fcst <- drop_na_dim(calibrated_fcst, 'ensemble') + } + # # removes dat and var dimensions if needed # try(calibrated_fcst<-Subset(calibrated_fcst, # c('var'),list(1),drop='selected')) # try(calibrated_fcst<-Subset(calibrated_fcst, # c('dat'),list(1),drop='selected')) -# -# return(calibrated_fcst) -# -# -#} + + return(calibrated_fcst) + + +} diff --git a/modules/Calibration/Calibration_fcst3.R b/modules/Calibration/Calibration_fcst3.R new file mode 100644 index 00000000..530081f2 --- /dev/null +++ b/modules/Calibration/Calibration_fcst3.R @@ -0,0 +1,155 @@ +library(ClimProjDiags) + +# Performs the hcst calibration for a centered time +# window of sdays. Objects must have the dimensions; +# sday, ensemble & syear. +# If sday dim = 1; no time window calibration is performed. +.cal <- function(var_obs, var_exp, na.rm=F) { + + ntime <- dim(var_exp)[which(names(dim(var_exp)) == 'syear')][] + nmembers <- dim(var_exp)[which(names(dim(var_exp)) == 'ensemble')][] + ndays <- dim(var_exp)[which(names(dim(var_exp)) == 'sday')][] + + # selects only the centered day of time window + if ( ndays == 1 ){ + fcst.index <- 1 + } else { + fcst.index <- (ndays + 1)/2 + } + + ind <- 1:ntime + climObs <- sapply(ind, function(x) {mean( + Subset(var_obs, along='syear',indices=-x), na.rm=na.rm)}) + climPred <- sapply(ind, function(x) {mean( + Subset(var_exp, along='syear',indices=-x), na.rm=na.rm)}) + + var_obs.ano <- var_obs - climObs + var_exp.ano <- var_exp - climPred + + calib.hcst <- NA * Subset(var_exp, along=c('sday','syear'), c(fcst.index,0)) + # loop over the hindcast years + for (t in 1 : ntime) { + + # defining forecast,hindcast in cross-validation + fcst.ano <- Subset(var_exp.ano, along=c('syear','sday'), + indices=list(t,fcst.index)) + hcst.ano <- Subset(var_exp.ano, along=c('syear'), indices=list(-t)) + + obs.ano <- Subset(var_obs.ano, along='syear', indices=-t) + + # computes ensemble mean of the fcst & hcst + em_fcst <- mean(fcst.ano, na.rm=na.rm) + em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean, + na.rm=na.rm)$output1 + + # Changes the correlation method in case of NAs + if (na.rm){ + corr.method <- "pairwise.complete.obs" + } else { + corr.method <- "everything" + } + corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method) + sd_obs <- sd(obs.ano, na.rm=na.rm) + sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm) + + # Computes difference between fcst anomaly and ensemble mean + fcst_diff <- fcst.ano - em_fcst + hcst_diff <- array(numeric(), c(ndays,ntime-1,0)) + names(dim(hcst_diff)) <- names(dim(hcst.ano)) + for (n in 1 : nmembers) { + diff <- Subset(hcst.ano, along='ensemble', indices=n, + drop='selected') - em_hcst + hcst_diff <- abind(hcst_diff, diff, + along=which(names(dim(var_exp)) == 'ensemble'), + hier.names=F) + } + + sd_hcst_diff <- sd(hcst_diff, na.rm=na.rm) + + a <- abs(corr) * (sd_obs / sd_em_hcst) + b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2)) + + # Calibrates hcst sdate + calib.syear <- ((a * em_fcst) + (b * fcst_diff) + climObs[t]) + # Appends calibrated sdate to the hcst array + calib.hcst <- abind(calib.hcst, calib.syear, + along=which(names(dim(var_exp)) == 'syear'), + hier.names=F) + } + + names(dim(calib.hcst)) <- names(dim(calib.syear)) + # removes NAs added in array init + calib.hcst <- Subset(calib.hcst, along='syear', indices=-1) + + return(calib.hcst) + +} + +# Performs the fcst calibration for a centered time +# window of sdays. Objects must have the dimensions; +# sday, ensemble & syear. +# If sday dim = 1; no time window calibration is performed. +.fcstcal <- function(obs, hcst, fcst, na.rm=F) { + + ntime <- dim(hcst)['syear'] + nmembers <- dim(hcst)['ensemble'] + fcst.nmembers <- dim(fcst)['ensemble'] + ndays <- dim(hcst)['sday'] + + # selects only the centered day of time window + if ( ndays == 1 ){ + fcst.index <- 1 + } else { + fcst.index <- (ndays + 1)/2 + } + + ind <- 1:ntime + climObs <- mean(obs, na.rm=na.rm) + climPred <- mean(hcst, na.rm=na.rm) + + obs.ano <- obs - climObs + hcst.ano <- hcst - climPred + fcst.ano <- fcst - climPred + + calib.fcst <- NA * fcst + + # computes ensemble mean of the fcst & hcst + em_fcst <- mean(fcst.ano, na.rm=na.rm) + em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean, + na.rm=na.rm)$output1 + + # Changes the correlation method in case of NAs allowed + if (na.rm){ + corr.method <- "pairwise.complete.obs" + } else { + corr.method <- "everything" + } + + corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method) + sd_obs <- sd(obs.ano, na.rm=na.rm) + sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm) + + # Computes difference between fcst anomaly and ensemble mean + fcst_diff <- fcst.ano - em_fcst + hcst_diff <- array(numeric(), c(ndays,ntime,0)) + names(dim(hcst_diff)) <- names(dim(hcst.ano)) + for (n in 1 : nmembers) { + diff <- Subset(hcst.ano, along='ensemble', indices=n, + drop='selected') - em_hcst + hcst_diff <- abind(hcst_diff, diff, + along=which(names(dim(hcst)) == 'ensemble'), + hier.names=F) + } + + sd_hcst_diff <- sd(hcst_diff, na.rm) + + a <- abs(corr) * (sd_obs / sd_em_hcst) + b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2)) + + # Calibrates hcst sdate + fcst.calib <- ((a * em_fcst) + (b * fcst_diff) + climObs) + return(Subset(fcst.calib, c('sday','syear'), list(1,1), drop='selected')) + +} + + diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R index 18ee110f..c24bcecf 100644 --- a/modules/Calibration/test_calib.R +++ b/modules/Calibration/test_calib.R @@ -1,19 +1,16 @@ - -recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml" - -source("modules/data_load/load.R") - -data <- load_datasets(recipe_file) +## TODO: Replace with library(CSTools) once Alba's fun is merged +library(s2dv) +library(ClimProjDiags) +library(multiApply) +source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-CalibrationForecast/R/CST_Calibration.R") ## Entry params data and recipe? -## TODO: Instead of reading the recipe at each module, do it at the beginning? -recipe <- read_yaml(recipe_file) - calibrate_datasets <- function(data, recipe) { - # hcst <- data$hcst - # obs <- data$obs - # fcst <- data$fcst + hcst <- data$hcst + obs <- data$obs + fcst <- data$fcst + ## TODO: Daily case # Calibration function params method <- recipe$Analysis$Workflow$Calibration$method @@ -22,73 +19,79 @@ calibrate_datasets <- function(data, recipe) { ncores <- 4 na.rm <- T - # Hcst calibration - hcst_calib <- CSTools::CST_Calibration(data$hcst, data$obs, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - alpha = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) - - ## Alba's version - library(s2dv) - library(ClimProjDiags) - library(multiApply) - source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-CalibrationForecast/R/CST_Calibration.R") - hcst_calibrated <- CST_Calibration(data$hcst, data$obs, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - alpha = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) - - ## In the new CST_Calibration function, the fcst must have the same number of - ## dimensions as the hcst - dim(data$fcst$data) <- c(dim(data$fcst$data)[1:2], "sday" = 1, "sweek" = 1, - dim(data$fcst$data)[3:7]) - ## In the new CST_Calibration function, memb_dim and sdate_dim must be the same - ## in the hcst and the fcst. - names(dim(data$fcst$data))[which(names(dim(data$fcst$data)) == - 'fcst_syear')] <- "syear" - - fcst_alba <- CST_Calibration(data$hcst, data$obs, data$fcst, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - alpha = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) + ## TODO: Change or move this check elsewhere so the error shows up earlier + CST_CALIB_METHODS <- c("bias","evmos","mse_min","crps_min","rpc-based") + if (method %in% CST_CALIB_METHODS) { + ## Alba's version + hcst_calibrated <- CST_Calibration(data$hcst, data$obs, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + ## TODO: Resolve these workarounds + ## In the new CST_Calibration function, the fcst must have the same number of + ## dimensions as the hcst + dim(fcst$data) <- c(dim(fcst$data)[1:2], "sday" = 1, "sweek" = 1, + dim(fcst$data)[3:7]) + ## In the new CST_Calibration function, memb_dim and sdate_dim must be the + ## same in the hcst and the fcst. + names(dim(fcst$data))[which(names(dim(fcst$data)) == + 'fcst_syear')] <- "syear" + + fcst_calibrated <- CST_Calibration(hcst, obs, fcst, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } else { + stop("Calibration method is not implemented in CSTools") + } + print("###### CALIBRATION COMPLETE ######") + return(list(hcst_calibrated, fcst_calibrated)) +} + +# ## Lluís' version: +# ## Note: memb_dim is hardcoded as 'ensemble'. +# source("modules/Calibration/Calibration.R") +# ## TODO: Change path +# # source("https://earth.bsc.es/gitlab/api/v4/projects/446/repository/blobs/bfe5848c06536218f1fbed6e9356cc6aebe27762/raw?ref=TFM&private_token=pExJt3V4FYwj4yzvy9ps") +# source("modules/Calibration/Calibration_fcst3.R") +# ## TODO: move this elsewhere +# library(abind) +# hcst_lluis <- hcst_calib(obs, hcst, +# method = method, +# mm = mm, +# na.rm = na.rm, +# split_factor = 1, +# ncores = ncores) +# +# fcst_lluis <- fcst_calib(obs$data, hcst$data, fcst$data, +# # method = method, +# fun = .fcstcal, ## ? +# mm = mm, +# na.rm = na.rm, +# split_factor = 1, +# ncores = ncores) - ## Lluís' version: - ## Note: memb_dim is hardcoded as 'ensemble'. - source("/esarchive/scratch/vagudets/repos/auto-s2s/modules/Calibration/Calibration.R") - hcst_lluis <- hcst_calib(data$obs, data$hcst, - method = method, - mm = mm, - na.rm = na.rm, - split_factor = 1, - ncores = ncores) - ## TODO: Include Lluís' forecast version and test it - # For daily - # hcst <- CSTools::CST_QuantileMapping(data$hcst, - # data$obs, - # exp_cor = NULL, - # sample_dims = c("syear", "time", "ensemble"), - # sample_length = NULL, - # method = "QUANT", - # ncores = ncores, - # na.rm = na.rm) + ## TODO: Include Lluís' forecast version and test it + # For daily + # hcst <- CSTools::CST_QuantileMapping(data$hcst, + # data$obs, + # exp_cor = NULL, + # sample_dims = c("syear", "time", "ensemble"), + # sample_length = NULL, + # method = "QUANT", + # ncores = ncores, + # na.rm = na.rm) diff --git a/modules/test_victoria.R b/modules/test_victoria.R new file mode 100644 index 00000000..e04c8a6f --- /dev/null +++ b/modules/test_victoria.R @@ -0,0 +1,14 @@ + + +recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml" + +source("modules/data_load/load.R") +source("modules/Calibration/test_calib.R") +data <- load_datasets(recipe_file) +## TODO: Instead of reading the recipe at each module, do it at the beginning? +recipe <- read_yaml(recipe_file) +calibrated_data <- calibrate_datasets(data, recipe) + +## Entry params data and recipe? +## TODO: Instead of reading the recipe at each module, do it at the beginning? +calibrated_data <- calibrate_datasets(data, recipe) -- GitLab From 379f2b6a154b4822ef40de0b6956f55f16e43ea3 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 22 Apr 2022 17:03:58 +0200 Subject: [PATCH 10/29] Add condition in case fcst is NULL --- modules/Calibration/test_calib.R | 43 +++++++++++++++++--------------- modules/test_victoria.R | 4 --- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R index c24bcecf..09dc19b9 100644 --- a/modules/Calibration/test_calib.R +++ b/modules/Calibration/test_calib.R @@ -10,7 +10,7 @@ calibrate_datasets <- function(data, recipe) { hcst <- data$hcst obs <- data$obs fcst <- data$fcst - ## TODO: Daily case + ## TODO: Implement daily case # Calibration function params method <- recipe$Analysis$Workflow$Calibration$method @@ -21,9 +21,10 @@ calibrate_datasets <- function(data, recipe) { ## TODO: Change or move this check elsewhere so the error shows up earlier CST_CALIB_METHODS <- c("bias","evmos","mse_min","crps_min","rpc-based") + ## TODO: Revise output variable names if (method %in% CST_CALIB_METHODS) { ## Alba's version - hcst_calibrated <- CST_Calibration(data$hcst, data$obs, + hcst_calibrated <- CST_Calibration(hcst, obs, cal.method = method, eval.method = "leave-one-out", multi.model = mm, @@ -37,24 +38,26 @@ calibrate_datasets <- function(data, recipe) { ## TODO: Resolve these workarounds ## In the new CST_Calibration function, the fcst must have the same number of ## dimensions as the hcst - dim(fcst$data) <- c(dim(fcst$data)[1:2], "sday" = 1, "sweek" = 1, - dim(fcst$data)[3:7]) - ## In the new CST_Calibration function, memb_dim and sdate_dim must be the - ## same in the hcst and the fcst. - names(dim(fcst$data))[which(names(dim(fcst$data)) == - 'fcst_syear')] <- "syear" - - fcst_calibrated <- CST_Calibration(hcst, obs, fcst, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - alpha = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) + if (!is.null(fcst)) { + dim(fcst$data) <- c(dim(fcst$data)[1:2], "sday" = 1, "sweek" = 1, + dim(fcst$data)[3:7]) + ## In the new CST_Calibration function, memb_dim and sdate_dim must be the + ## same in the hcst and the fcst. + names(dim(fcst$data))[which(names(dim(fcst$data)) == + 'fcst_syear')] <- "syear" + + fcst_calibrated <- CST_Calibration(hcst, obs, fcst, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } } else { stop("Calibration method is not implemented in CSTools") } diff --git a/modules/test_victoria.R b/modules/test_victoria.R index e04c8a6f..686b1f13 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -8,7 +8,3 @@ data <- load_datasets(recipe_file) ## TODO: Instead of reading the recipe at each module, do it at the beginning? recipe <- read_yaml(recipe_file) calibrated_data <- calibrate_datasets(data, recipe) - -## Entry params data and recipe? -## TODO: Instead of reading the recipe at each module, do it at the beginning? -calibrated_data <- calibrate_datasets(data, recipe) -- GitLab From 96d384f478146fcdc0f74b40f94515d0e0102972 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 26 Apr 2022 11:20:11 +0200 Subject: [PATCH 11/29] Change path to grid description files from absolute to relative --- conf/archive.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 73379d5d..43750a1b 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -22,7 +22,7 @@ archive: nmember: fcst: 51 hcst: 25 - reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system7c3s.txt" + reference_grid: "conf/grid_description/griddes_system7c3s.txt" system35c3s: src: "exp/cmcc/system35c3s/" monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", @@ -30,7 +30,7 @@ archive: nmember: fcst: 50 hcst: 40 - reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system35c3s.txt" + reference_grid: "conf/grid_description/griddes_system35c3s.txt" system2c3s: src: "exp/jma/system2c3s/" monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", @@ -38,7 +38,7 @@ archive: nmember: fcst: 10 hcst: 10 - reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system35c3s.txt" + reference_grid: "conf/grid_description/griddes_system35c3s.txt" eccc1: src: "exp/eccc/eccc1/" monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", @@ -46,7 +46,7 @@ archive: nmember: fcst: 10 hcst: 10 - reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_eccc1.txt" + reference_grid: "conf/grid_description/griddes_eccc1.txt" Reference: era5: src: "recon/ecmwf/era5/" -- GitLab From b3520bcb9c47edd811a02743f6ca2071861d80f2 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 26 Apr 2022 12:02:24 +0200 Subject: [PATCH 12/29] Adapt forecast calibration to work with s2dv_cube objects --- modules/Calibration/Calibration.R | 23 +++++++++++++---------- modules/Calibration/Calibration_fcst3.R | 18 ++++++++++-------- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 8f21eecb..4945c333 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -88,9 +88,8 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, obs <- obs.mm remove(obs.mm) } - + if (method %in% CST_CALIB_METHODS) { - # Hcst calibration hcst <- CSTools::CST_Calibration(hcst, obs, cal.method = method, @@ -105,9 +104,10 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, ncores = ncores) } else { - ## TODO: Implement other calibration methods - error(logger, - "Calibration method is not implemented in CSTools") + ## TODO: Implement other calibration methods, implement logger + stop("Calibration method is not implemented in CSTools") + # error(logger, + # "Calibration method is not implemented in CSTools") } hcst$data[!is.finite(hcst$data)] <- NA @@ -171,9 +171,9 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, ## needs and update: look at hcst version fcst_calib <- function(obs, hcst, fcst, fun, mm=F, na.rm=T, split_factor=1, ncores=32, - ## TODO: remove these? + ## TODO: establish target dims and output dims target_dims=c('sday', 'syear', 'ensemble'), - output_dims=c('ensemble')) + output_dims=c('sday', 'syear', 'ensemble')) { # Replicates observations for bias adjusting each @@ -191,15 +191,18 @@ fcst_calib <- function(obs, hcst, fcst, fun, mm=F, } # Fcst Calibration - calibrated_fcst <- Apply(data=list(obs = obs, hcst = hcst, fcst = fcst), + ## TODO: Return s2dv_cube + calibrated_fcst <- Apply(data=list(obs = obs$data, hcst = hcst$data, fcst = fcst$data), extra_info = list(na.rm = na.rm), target_dims = target_dims, output_dims = output_dims, na.rm = na.rm, ncores = ncores, fun = .fcstcal)[[1]] ## Need to source this fun? - - calibrated_fcst$data[!is.finite(calibrated_fcst$data)] <- NA + + ## TODO: Restore this line + # calibrated_fcst$data[!is.finite(calibrated_fcst$data)] <- NA + calibrated_fcst[!is.finite(calibrated_fcst)] <- NA # Merges all the ensembles from the different systems into # one single ensemble diff --git a/modules/Calibration/Calibration_fcst3.R b/modules/Calibration/Calibration_fcst3.R index 530081f2..e57f05c4 100644 --- a/modules/Calibration/Calibration_fcst3.R +++ b/modules/Calibration/Calibration_fcst3.R @@ -19,20 +19,20 @@ library(ClimProjDiags) ind <- 1:ntime climObs <- sapply(ind, function(x) {mean( - Subset(var_obs, along='syear',indices=-x), na.rm=na.rm)}) + Subset(var_obs, along='syear', indices=-x), na.rm=na.rm)}) climPred <- sapply(ind, function(x) {mean( - Subset(var_exp, along='syear',indices=-x), na.rm=na.rm)}) + Subset(var_exp, along='syear', indices=-x), na.rm=na.rm)}) var_obs.ano <- var_obs - climObs var_exp.ano <- var_exp - climPred - calib.hcst <- NA * Subset(var_exp, along=c('sday','syear'), c(fcst.index,0)) + calib.hcst <- NA * Subset(var_exp, along=c('sday','syear'), c(fcst.index, 0)) # loop over the hindcast years for (t in 1 : ntime) { # defining forecast,hindcast in cross-validation - fcst.ano <- Subset(var_exp.ano, along=c('syear','sday'), - indices=list(t,fcst.index)) + fcst.ano <- Subset(var_exp.ano, along=c('syear', 'sday'), + indices=list(t, fcst.index)) hcst.ano <- Subset(var_exp.ano, along=c('syear'), indices=list(-t)) obs.ano <- Subset(var_obs.ano, along='syear', indices=-t) @@ -40,7 +40,7 @@ library(ClimProjDiags) # computes ensemble mean of the fcst & hcst em_fcst <- mean(fcst.ano, na.rm=na.rm) em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean, - na.rm=na.rm)$output1 + na.rm=na.rm)$output1 # Changes the correlation method in case of NAs if (na.rm){ @@ -117,7 +117,7 @@ library(ClimProjDiags) em_fcst <- mean(fcst.ano, na.rm=na.rm) em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean, na.rm=na.rm)$output1 - + # Changes the correlation method in case of NAs allowed if (na.rm){ corr.method <- "pairwise.complete.obs" @@ -148,7 +148,9 @@ library(ClimProjDiags) # Calibrates hcst sdate fcst.calib <- ((a * em_fcst) + (b * fcst_diff) + climObs) - return(Subset(fcst.calib, c('sday','syear'), list(1,1), drop='selected')) + # fcst_cube$data <- fcst.calib + return(fcst.calib) + # return(Subset(fcst.calib, c('sday', 'syear'), list(1,1), drop='selected')) } -- GitLab From 397a6ce59eb8fe56c33917e51805347a365b4811 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 26 Apr 2022 16:24:50 +0200 Subject: [PATCH 13/29] testing --- modules/data_load/testing_recipes/recipe_3.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data_load/testing_recipes/recipe_3.yml b/modules/data_load/testing_recipes/recipe_3.yml index ed82f43c..b71f428d 100644 --- a/modules/data_load/testing_recipes/recipe_3.yml +++ b/modules/data_load/testing_recipes/recipe_3.yml @@ -30,7 +30,7 @@ Analysis: type: to_system Workflow: Calibration: - method: SBC + method: mse_min Skill: metric: RPSS Indicators: -- GitLab From 05cfd85abe4811acfe5034e00cbe3ae76854811e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 26 Apr 2022 16:25:58 +0200 Subject: [PATCH 14/29] Make dimensions of fcst object the same as hcst object --- modules/Calibration/test_calib.R | 21 ++++------ modules/data_load/dates2load.R | 13 +++--- modules/data_load/load.R | 71 ++++++++++++++++++-------------- 3 files changed, 55 insertions(+), 50 deletions(-) diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R index 09dc19b9..d4d4259c 100644 --- a/modules/Calibration/test_calib.R +++ b/modules/Calibration/test_calib.R @@ -14,16 +14,15 @@ calibrate_datasets <- function(data, recipe) { # Calibration function params method <- recipe$Analysis$Workflow$Calibration$method - ## TODO: Make sure 'Multimodel' param in recipe is a boolean mm <- recipe$Analysis$Datasets$Multimodel ncores <- 4 na.rm <- T - ## TODO: Change or move this check elsewhere so the error shows up earlier - CST_CALIB_METHODS <- c("bias","evmos","mse_min","crps_min","rpc-based") + CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") ## TODO: Revise output variable names if (method %in% CST_CALIB_METHODS) { - ## Alba's version + ## Alba's version of CST_Calibration (pending merge) is being used + # Calibrate the hindcast hcst_calibrated <- CST_Calibration(hcst, obs, cal.method = method, eval.method = "leave-one-out", @@ -35,17 +34,8 @@ calibrate_datasets <- function(data, recipe) { memb_dim = "ensemble", sdate_dim = "syear", ncores = ncores) - ## TODO: Resolve these workarounds - ## In the new CST_Calibration function, the fcst must have the same number of - ## dimensions as the hcst if (!is.null(fcst)) { - dim(fcst$data) <- c(dim(fcst$data)[1:2], "sday" = 1, "sweek" = 1, - dim(fcst$data)[3:7]) - ## In the new CST_Calibration function, memb_dim and sdate_dim must be the - ## same in the hcst and the fcst. - names(dim(fcst$data))[which(names(dim(fcst$data)) == - 'fcst_syear')] <- "syear" - + # Calibrate the forecast fcst_calibrated <- CST_Calibration(hcst, obs, fcst, cal.method = method, eval.method = "leave-one-out", @@ -57,7 +47,10 @@ calibrate_datasets <- function(data, recipe) { memb_dim = "ensemble", sdate_dim = "syear", ncores = ncores) + } else { + fcst_calibrated <- NULL } + ## TODO: Implement other calibration methods } else { stop("Calibration method is not implemented in CSTools") } diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R index f6a5e27f..d9e5fa0c 100644 --- a/modules/data_load/dates2load.R +++ b/modules/data_load/dates2load.R @@ -49,12 +49,13 @@ dates2load <- function(recipe, logger){ # adds the correspondent dims to each sdate array .add_dims <- function(data, type){ - if (type == "hcst"){ - default_dims <- c(sday = 1, sweek = 1, - syear = length(data)) - } else { - default_dims <- c(fcst_syear = length(data)) - } +# if (type == "hcst"){ +# default_dims <- c(sday = 1, sweek = 1, +# syear = length(data)) +# } else { +# default_dims <- c(fcst_syear = length(data)) +# } + default_dims <- c(sday = 1, sweek = 1, syear = length(data)) default_dims[names(dim(data))] <- dim(data) dim(data) <- default_dims diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 25a3b82b..68588156 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -37,7 +37,7 @@ startR_to_s2dv <- function(startR_array){ #dim(sdates) <- sdates_dims ## TODO: change this line when time attributes work correctly? - time_dims <- c("time", "sday", "sweek", "syear", "fcst_syear") + time_dims <- c("time", "sday", "sweek", "syear") s2dv_object <- s2dv_cube(data = attr2array(startR_array), lon = attr2array(attr(startR_array, @@ -193,8 +193,8 @@ load_datasets <- function(recipe_file){ # Adjusts dims for daily case, could be removed if startR allows # multidim split names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" - default_dims <- c(dat = 1, var = 1, sweek = 1, - sday = 1, syear = 1, time = 1, + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, latitude = 1, longitude = 1, ensemble = 1) default_dims[names(dim(hcst))] <- dim(hcst) dim(hcst) <- default_dims @@ -218,23 +218,34 @@ load_datasets <- function(recipe_file){ longitude_reorder = circularsort, transform = regrid_params$fcst.transform, transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), + method = regrid_params$fcst.gridmethod), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), ensemble = indices(1:fcst.nmember), return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), + longitude = 'dat', + time = 'file_date'), split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) - - names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "fcst_syear" + + ## TODO: Add 'sday' and 'sweek' dims to fcst + if (recipe$Analysis$Variables$freq == "daily_mean") { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + } + # names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" fcst <- startR_to_s2dv(fcst) } else { - fcst <- NULL + fcst <- NULL } # Load reference @@ -251,23 +262,23 @@ load_datasets <- function(recipe_file){ list(1,1,1,1,1), drop="selected")) obs <- Start(dat = obs.path, - var = variable, - file_date = dates_file, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) + var = variable, + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) } else if (store.freq == "daily_mean") { @@ -307,8 +318,8 @@ load_datasets <- function(recipe_file){ # TODO: Reorder obs dims to match hcst dims? # Adds ensemble dim to obs (for consistency with hcst/fcst) - default_dims <- c(dat = 1, var = 1, sweek = 1, - sday = 1, syear = 1, time = 1, + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, latitude = 1, longitude = 1, ensemble = 1) default_dims[names(dim(obs))] <- dim(obs) dim(obs) <- default_dims -- GitLab From e8b6f53cb18149322070ce4827986c7ab9af598e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 28 Apr 2022 13:25:29 +0200 Subject: [PATCH 15/29] Get hcst calibration (Lluis version) to work with s2dv_cube --- modules/Calibration/Calibration.R | 30 +++++++++++++++++-------- modules/Calibration/Calibration_fcst3.R | 8 ++++--- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 4945c333..2463fcbd 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -103,14 +103,26 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, sdate_dim = "syear", ncores = ncores) + hcst$data[!is.finite(hcst$data)] <- NA + + } else if (method %in% c("s2s4e")) { + hcst <- Apply(data=list(obs = obs$data, hcst = hcst$data), + extra_info = list(na.rm = na.rm), + target_dims = c('sday', 'syear', 'ensemble'), + output_dims = c('sday', 'syear', 'ensemble'), + na.rm = na.rm, + ncores = ncores, + fun = .cal)[[1]] + + hcst[!is.finite(hcst)] <- NA } else { ## TODO: Implement other calibration methods, implement logger stop("Calibration method is not implemented in CSTools") - # error(logger, - # "Calibration method is not implemented in CSTools") + # error(logger, + # "Calibration method is not implemented in CSTools") } - hcst$data[!is.finite(hcst$data)] <- NA + # hcst$data[!is.finite(hcst$data)] <- NA ## TODO: Is this line necessary? # remove(hcst) @@ -193,12 +205,12 @@ fcst_calib <- function(obs, hcst, fcst, fun, mm=F, # Fcst Calibration ## TODO: Return s2dv_cube calibrated_fcst <- Apply(data=list(obs = obs$data, hcst = hcst$data, fcst = fcst$data), - extra_info = list(na.rm = na.rm), - target_dims = target_dims, - output_dims = output_dims, - na.rm = na.rm, - ncores = ncores, - fun = .fcstcal)[[1]] ## Need to source this fun? + extra_info = list(na.rm = na.rm), + target_dims = target_dims, + output_dims = output_dims, + na.rm = na.rm, + ncores = ncores, + fun = .fcstcal)[[1]] ## TODO: Restore this line # calibrated_fcst$data[!is.finite(calibrated_fcst$data)] <- NA diff --git a/modules/Calibration/Calibration_fcst3.R b/modules/Calibration/Calibration_fcst3.R index e57f05c4..c67aae4c 100644 --- a/modules/Calibration/Calibration_fcst3.R +++ b/modules/Calibration/Calibration_fcst3.R @@ -18,15 +18,17 @@ library(ClimProjDiags) } ind <- 1:ntime + climObs <- sapply(ind, function(x) {mean( - Subset(var_obs, along='syear', indices=-x), na.rm=na.rm)}) + Subset(var_obs, along='syear', indices=-x), na.rm=na.rm)}) climPred <- sapply(ind, function(x) {mean( - Subset(var_exp, along='syear', indices=-x), na.rm=na.rm)}) + Subset(var_exp, along='syear', indices=-x), na.rm=na.rm)}) var_obs.ano <- var_obs - climObs var_exp.ano <- var_exp - climPred - calib.hcst <- NA * Subset(var_exp, along=c('sday','syear'), c(fcst.index, 0)) + calib.hcst <- NA * Subset(var_exp, along=c('sday', 'syear'), + c(fcst.index, 0)) # loop over the hindcast years for (t in 1 : ntime) { -- GitLab From 8c59d0170f40458aaaca47e5d9c5f201f187d526 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 28 Apr 2022 13:25:58 +0200 Subject: [PATCH 16/29] Add quantile mapping as calibration method for daily data --- modules/Calibration/test_calib.R | 81 ++++++++++++++++++++++---------- modules/test_victoria.R | 2 +- 2 files changed, 58 insertions(+), 25 deletions(-) diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R index d4d4259c..20f067ce 100644 --- a/modules/Calibration/test_calib.R +++ b/modules/Calibration/test_calib.R @@ -17,26 +17,18 @@ calibrate_datasets <- function(data, recipe) { mm <- recipe$Analysis$Datasets$Multimodel ncores <- 4 na.rm <- T + + if (recipe$Analysis$Variables$freq == "monthly_mean") { - CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") - ## TODO: Revise output variable names - if (method %in% CST_CALIB_METHODS) { - ## Alba's version of CST_Calibration (pending merge) is being used - # Calibrate the hindcast - hcst_calibrated <- CST_Calibration(hcst, obs, - cal.method = method, - eval.method = "leave-one-out", - multi.model = mm, - na.fill = TRUE, - na.rm = na.rm, - apply_to = NULL, - alpha = NULL, - memb_dim = "ensemble", - sdate_dim = "syear", - ncores = ncores) - if (!is.null(fcst)) { - # Calibrate the forecast - fcst_calibrated <- CST_Calibration(hcst, obs, fcst, + CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") + ## TODO: implement other calibration methods + ## TODO: Restructure the code? + if (!(method %in% CST_CALIB_METHODS)) { + stop("Calibration method is not implemented in CSTools") + } else { + ## Alba's version of CST_Calibration (pending merge) is being used + # Calibrate the hindcast and forecast + hcst_calibrated <- CST_Calibration(hcst, obs, cal.method = method, eval.method = "leave-one-out", multi.model = mm, @@ -47,15 +39,57 @@ calibrate_datasets <- function(data, recipe) { memb_dim = "ensemble", sdate_dim = "syear", ncores = ncores) + if (!is.null(fcst)) { + # Calibrate the forecast + fcst_calibrated <- CST_Calibration(hcst, obs, fcst, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } else { + fcst_calibrated <- NULL + } + + } + + } else if (recipe$Analysis$Variables$freq == "daily_mean") { + # Daily data calibration using Quantile Mapping + warning("Data is at daily frequency. Quantile Mapping will be used", + " as the calibration method.") + hcst_calibrated <- CST_QuantileMapping(hcst, obs, + exp_cor = NULL, + ## TODO: Tidy up + sample_dims = c("syear", + "time", + "ensemble"), + sample_length = NULL, + method = "QUANT", + ncores = ncores, + na.rm = na.rm) + + if (!is.null(fcst)) { + fcst_calibrated <- CST_QuantileMapping(hcst, obs, + exp_cor = fcst, + sample_dims = c("syear", + "time", + "ensemble"), + sample_length = NULL, + method = "QUANT", + ncores = ncores, + na.rm = na.rm) } else { fcst_calibrated <- NULL } - ## TODO: Implement other calibration methods - } else { - stop("Calibration method is not implemented in CSTools") } + print("###### CALIBRATION COMPLETE ######") - return(list(hcst_calibrated, fcst_calibrated)) + return(list(hcst_calib = hcst_calibrated, fcst_calib = fcst_calibrated)) } # ## Lluís' version: @@ -81,7 +115,6 @@ calibrate_datasets <- function(data, recipe) { # split_factor = 1, # ncores = ncores) - ## TODO: Include Lluís' forecast version and test it # For daily # hcst <- CSTools::CST_QuantileMapping(data$hcst, # data$obs, diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 686b1f13..3f394e28 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -1,6 +1,6 @@ -recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml" +recipe_file <- "modules/data_load/testing_recipes/recipe_3.yml" source("modules/data_load/load.R") source("modules/Calibration/test_calib.R") -- GitLab From 4b374f9d13643fbea26d7427b4f9c842f47a1c85 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 28 Apr 2022 16:29:32 +0200 Subject: [PATCH 17/29] Add second version of legacy fcst calibration --- modules/Calibration/Calibration_fcst4.R | 158 ++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 modules/Calibration/Calibration_fcst4.R diff --git a/modules/Calibration/Calibration_fcst4.R b/modules/Calibration/Calibration_fcst4.R new file mode 100644 index 00000000..f1866dcb --- /dev/null +++ b/modules/Calibration/Calibration_fcst4.R @@ -0,0 +1,158 @@ +library(ClimProjDiags) + +# Performs the hcst calibration for a centered time +# window of sdays. Objects must have the dimensions; +# sday, ensemble & syear. +# If sday dim = 1; no time window calibration is performed. +.cal <- function(var_obs, var_exp, na.rm=F) { + + ntime <- dim(var_exp)[which(names(dim(var_exp)) == 'syear')][] + nmembers <- dim(var_exp)[which(names(dim(var_exp)) == 'ensemble')][] + ndays <- dim(var_exp)[which(names(dim(var_exp)) == 'sday')][] + + # selects only the centered day of time window + if ( ndays == 1 ){ + fcst.index <- 1 + } else { + fcst.index <- (ndays + 1)/2 + } + + ind <- 1:ntime + + climObs <- sapply(ind, function(x) {mean(var_obs, na.rm=na.rm)}) + climPred <- sapply(ind, function(x) {mean( + Subset(var_exp, along='syear', indices=-x), na.rm=na.rm)}) + + var_obs.ano <- var_obs - climObs + var_exp.ano <- var_exp - climPred + + calib.hcst <- NA * Subset(var_exp, along=c('sday', 'syear'), + c(fcst.index, 0)) + # loop over the hindcast years + for (t in 1 : ntime) { + + # defining forecast,hindcast in cross-validation + fcst.ano <- Subset(var_exp.ano, along=c('syear', 'sday'), + indices=list(t, fcst.index)) + hcst.ano <- Subset(var_exp.ano, along=c('syear'), indices=list(-t)) + + obs.ano <- Subset(var_obs.ano, along='syear', indices=-t) + + # computes ensemble mean of the fcst & hcst + em_fcst <- mean(fcst.ano, na.rm=na.rm) + em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean, + na.rm=na.rm)$output1 + + # Changes the correlation method in case of NAs + if (na.rm){ + corr.method <- "pairwise.complete.obs" + } else { + corr.method <- "everything" + } + corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method) + sd_obs <- sd(obs.ano, na.rm=na.rm) + sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm) + + # Computes difference between fcst anomaly and ensemble mean + fcst_diff <- fcst.ano - em_fcst + hcst_diff <- array(numeric(), c(ndays,ntime-1,0)) + names(dim(hcst_diff)) <- names(dim(hcst.ano)) + for (n in 1 : nmembers) { + diff <- Subset(hcst.ano, along='ensemble', indices=n, + drop='selected') - em_hcst + hcst_diff <- abind(hcst_diff, diff, + along=which(names(dim(var_exp)) == 'ensemble'), + hier.names=F) + } + + sd_hcst_diff <- sd(hcst_diff, na.rm=na.rm) + + a <- abs(corr) * (sd_obs / sd_em_hcst) + b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2)) + + # Calibrates hcst sdate + calib.syear <- ((a * em_fcst) + (b * fcst_diff) + climObs[t]) + # Appends calibrated sdate to the hcst array + calib.hcst <- abind(calib.hcst, calib.syear, + along=which(names(dim(var_exp)) == 'syear'), + hier.names=F) + } + + names(dim(calib.hcst)) <- names(dim(calib.syear)) + # removes NAs added in array init + calib.hcst <- Subset(calib.hcst, along='syear', indices=-1) + + return(calib.hcst) + +} + +# Performs the fcst calibration for a centered time +# window of sdays. Objects must have the dimensions; +# sday, ensemble & syear. +# If sday dim = 1; no time window calibration is performed. +.fcstcal <- function(obs, hcst, fcst, na.rm=F) { + + ntime <- dim(hcst)['syear'] + nmembers <- dim(hcst)['ensemble'] + fcst.nmembers <- dim(fcst)['ensemble'] + ndays <- dim(hcst)['sday'] + + # selects only the centered day of time window + if ( ndays == 1 ){ + fcst.index <- 1 + } else { + fcst.index <- (ndays + 1)/2 + } + + ind <- 1:ntime + climObs <- mean(obs, na.rm=na.rm) + climPred <- mean(hcst, na.rm=na.rm) + + obs.ano <- obs - climObs + hcst.ano <- hcst - climPred + fcst.ano <- fcst - climPred + + calib.fcst <- NA * fcst + + # computes ensemble mean of the fcst & hcst + em_fcst <- mean(fcst.ano, na.rm=na.rm) + em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean, + na.rm=na.rm)$output1 + + # Changes the correlation method in case of NAs allowed + if (na.rm){ + corr.method <- "pairwise.complete.obs" + } else { + corr.method <- "everything" + } + + corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method) + sd_obs <- sd(obs.ano, na.rm=na.rm) + sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm) + + # Computes difference between fcst anomaly and ensemble mean + fcst_diff <- fcst.ano - em_fcst + hcst_diff <- array(numeric(), c(ndays,ntime,0)) + names(dim(hcst_diff)) <- names(dim(hcst.ano)) + for (n in 1 : nmembers) { + diff <- Subset(hcst.ano, along='ensemble', indices=n, + drop='selected') - em_hcst + hcst_diff <- abind(hcst_diff, diff, + along=which(names(dim(hcst)) == 'ensemble'), + hier.names=F) + } + + sd_hcst_diff <- sd(hcst_diff, na.rm) + + a <- abs(corr) * (sd_obs / sd_em_hcst) + b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2)) + + # Calibrates hcst sdate + fcst.calib <- ((a * em_fcst) + (b * fcst_diff) + climObs) + # fcst_cube$data <- fcst.calib + return(fcst.calib) + # return(Subset(fcst.calib, c('sday', 'syear'), list(1,1), drop='selected')) + +} + + -- GitLab From 0dd29335d02315c8c7888705fec2063d68680216 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 29 Apr 2022 16:55:38 +0200 Subject: [PATCH 18/29] Move loading of libraries to tools/libs.R --- modules/Calibration/Calibration_fcst3.R | 1 - modules/Calibration/Calibration_fcst4.R | 1 - modules/Calibration/test_calib.R | 39 ++----------------------- tools/libs.R | 3 +- 4 files changed, 4 insertions(+), 40 deletions(-) diff --git a/modules/Calibration/Calibration_fcst3.R b/modules/Calibration/Calibration_fcst3.R index c67aae4c..babc1922 100644 --- a/modules/Calibration/Calibration_fcst3.R +++ b/modules/Calibration/Calibration_fcst3.R @@ -1,4 +1,3 @@ -library(ClimProjDiags) # Performs the hcst calibration for a centered time # window of sdays. Objects must have the dimensions; diff --git a/modules/Calibration/Calibration_fcst4.R b/modules/Calibration/Calibration_fcst4.R index f1866dcb..9359c6e8 100644 --- a/modules/Calibration/Calibration_fcst4.R +++ b/modules/Calibration/Calibration_fcst4.R @@ -1,4 +1,3 @@ -library(ClimProjDiags) # Performs the hcst calibration for a centered time # window of sdays. Objects must have the dimensions; diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R index 20f067ce..38bc08bd 100644 --- a/modules/Calibration/test_calib.R +++ b/modules/Calibration/test_calib.R @@ -1,8 +1,5 @@ -## TODO: Replace with library(CSTools) once Alba's fun is merged -library(s2dv) -library(ClimProjDiags) -library(multiApply) +## TODO: Remove once Alba's fun is merged in CSTools source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-CalibrationForecast/R/CST_Calibration.R") ## Entry params data and recipe? @@ -10,7 +7,6 @@ calibrate_datasets <- function(data, recipe) { hcst <- data$hcst obs <- data$obs fcst <- data$fcst - ## TODO: Implement daily case # Calibration function params method <- recipe$Analysis$Workflow$Calibration$method @@ -92,35 +88,4 @@ calibrate_datasets <- function(data, recipe) { return(list(hcst_calib = hcst_calibrated, fcst_calib = fcst_calibrated)) } -# ## Lluís' version: -# ## Note: memb_dim is hardcoded as 'ensemble'. -# source("modules/Calibration/Calibration.R") -# ## TODO: Change path -# # source("https://earth.bsc.es/gitlab/api/v4/projects/446/repository/blobs/bfe5848c06536218f1fbed6e9356cc6aebe27762/raw?ref=TFM&private_token=pExJt3V4FYwj4yzvy9ps") -# source("modules/Calibration/Calibration_fcst3.R") -# ## TODO: move this elsewhere -# library(abind) -# hcst_lluis <- hcst_calib(obs, hcst, -# method = method, -# mm = mm, -# na.rm = na.rm, -# split_factor = 1, -# ncores = ncores) -# -# fcst_lluis <- fcst_calib(obs$data, hcst$data, fcst$data, -# # method = method, -# fun = .fcstcal, ## ? -# mm = mm, -# na.rm = na.rm, -# split_factor = 1, -# ncores = ncores) - - # For daily - # hcst <- CSTools::CST_QuantileMapping(data$hcst, - # data$obs, - # exp_cor = NULL, - # sample_dims = c("syear", "time", "ensemble"), - # sample_length = NULL, - # method = "QUANT", - # ncores = ncores, - # na.rm = na.rm) + diff --git a/tools/libs.R b/tools/libs.R index 7b010f54..483eda5d 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -3,12 +3,13 @@ library(startR) library(ClimProjDiags) library(multiApply) library(yaml) +library(s2dv) # library(s2dverification) # library(ncdf4) # library(abind) # library(easyVerification) # library(easyNCDF) - library(CSTools) +library(CSTools) # # library(parallel) # library(pryr) # To check mem usage. -- GitLab From 8672e2d2089e7dae6d197759bfc64f3368b32d1d Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 4 May 2022 14:30:14 +0200 Subject: [PATCH 19/29] Add multi-model calilbration --- modules/Calibration/test_calib.R | 14 ++++++++++++-- tools/libs.R | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R index 38bc08bd..fa3f1359 100644 --- a/modules/Calibration/test_calib.R +++ b/modules/Calibration/test_calib.R @@ -14,6 +14,18 @@ calibrate_datasets <- function(data, recipe) { ncores <- 4 na.rm <- T + # Replicate observation array for the multi-model case + if (mm) { + obs.mm <- obs$data + for(dat in 1:(dim(hcst$data)['dat'][[1]]-1)) { + obs.mm <- abind(obs.mm, obs$data, + along=which(names(dim(obs$data)) == 'dat')) + } + names(dim(obs.mm)) <- names(dim(obs$data)) + obs$data <- obs.mm + remove(obs.mm) + } + if (recipe$Analysis$Variables$freq == "monthly_mean") { CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") @@ -87,5 +99,3 @@ calibrate_datasets <- function(data, recipe) { print("###### CALIBRATION COMPLETE ######") return(list(hcst_calib = hcst_calibrated, fcst_calib = fcst_calibrated)) } - - diff --git a/tools/libs.R b/tools/libs.R index 483eda5d..3ee4c40a 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -4,9 +4,9 @@ library(ClimProjDiags) library(multiApply) library(yaml) library(s2dv) +library(abind) # library(s2dverification) # library(ncdf4) -# library(abind) # library(easyVerification) # library(easyNCDF) library(CSTools) -- GitLab From a2d8abe8f5b5011b09a473d9becd59c3ccdfd145 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 12 May 2022 14:35:33 +0200 Subject: [PATCH 20/29] Modify the way the cross-validation is done to reproduce the results from CST_Calibration() --- modules/Calibration/Calibration_fcst3.R | 46 ++++++++++++++++++++----- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/modules/Calibration/Calibration_fcst3.R b/modules/Calibration/Calibration_fcst3.R index babc1922..3d24226b 100644 --- a/modules/Calibration/Calibration_fcst3.R +++ b/modules/Calibration/Calibration_fcst3.R @@ -20,23 +20,30 @@ climObs <- sapply(ind, function(x) {mean( Subset(var_obs, along='syear', indices=-x), na.rm=na.rm)}) + + climObs2 <- sapply(ind, function(x) {mean(var_obs, na.rm=na.rm)}) + climPred <- sapply(ind, function(x) {mean( Subset(var_exp, along='syear', indices=-x), na.rm=na.rm)}) - var_obs.ano <- var_obs - climObs - var_exp.ano <- var_exp - climPred - + climPred2 <- sapply(ind, function(x) {mean(var_exp, na.rm=na.rm)}) + + var_obs.ano <- var_obs - climObs2 + var_fcst.ano <- var_exp - climPred + var_exp.ano <- var_exp - climPred2 + calib.hcst <- NA * Subset(var_exp, along=c('sday', 'syear'), c(fcst.index, 0)) # loop over the hindcast years for (t in 1 : ntime) { # defining forecast,hindcast in cross-validation - fcst.ano <- Subset(var_exp.ano, along=c('syear', 'sday'), - indices=list(t, fcst.index)) + fcst.ano <- Subset(var_fcst.ano, along=c('syear', 'sday'), + indices=list(t, fcst.index)) + hcst.ano <- Subset(var_exp.ano, along=c('syear'), indices=list(-t)) - obs.ano <- Subset(var_obs.ano, along='syear', indices=-t) + obs.ano <- Subset(var_obs.ano, along='syear', indices=-t) # computes ensemble mean of the fcst & hcst em_fcst <- mean(fcst.ano, na.rm=na.rm) @@ -66,9 +73,20 @@ } sd_hcst_diff <- sd(hcst_diff, na.rm=na.rm) - + a <- abs(corr) * (sd_obs / sd_em_hcst) b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2)) + # print(paste0("climObs: ", climObs[t])) # + # print(paste0("sd_obs :", sd_obs)) # + # print(paste0("sd_hcst_diff :", sd_hcst_diff)) + # print(paste0("sd_em_hcst :", sd_em_hcst)) # + # print(paste0("em_hcst :", em_hcst)) + # print(paste0("Alpha - Lluís:", a)) # + # print(paste0("Beta - Lluís:", b)) # + # print(paste0("fcst_diff: ", fcst_diff)) + # print(paste0("climPred: ", climPred)) + # print(paste0("climCross: ", climCross[t])) + # print(paste0("em_fcst :", em_fcst)) # Calibrates hcst sdate calib.syear <- ((a * em_fcst) + (b * fcst_diff) + climObs[t]) @@ -143,10 +161,20 @@ } sd_hcst_diff <- sd(hcst_diff, na.rm) - + # print(paste0("Sigma e:", sd_hcst_diff)) a <- abs(corr) * (sd_obs / sd_em_hcst) b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2)) - + ## print(paste0("climObs: ", climObs)) + ## print(paste0("sd_obs :", sd_obs)) + # print(paste0("sd_hcst_diff :", sd_hcst_diff)) + # print(paste0("sd_em_hcst :", sd_em_hcst)) + # print(paste0("em_hcst :", em_hcst)) + ## print(paste0("Alpha - Lluís:", a)) + ## print(paste0("Beta - Lluís:", b)) + # print(paste0("climObs: ", climObs)) + # print(paste0("fcst_diff: ", fcst_diff)) + # print(paste0("em_fcst :", em_fcst)) + # Calibrates hcst sdate fcst.calib <- ((a * em_fcst) + (b * fcst_diff) + climObs) # fcst_cube$data <- fcst.calib -- GitLab From 56d1e99cbd6ba4acb4656125a89f12bafbb7cd77 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 12 May 2022 16:55:00 +0200 Subject: [PATCH 21/29] formatting --- modules/Skill/s2s.metrics.R | 242 ++++++++++++++++++++++++++++++++++++ modules/Skill/s2s.probs.R | 42 +++++++ 2 files changed, 284 insertions(+) create mode 100644 modules/Skill/s2s.metrics.R create mode 100644 modules/Skill/s2s.probs.R diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R new file mode 100644 index 00000000..5497a885 --- /dev/null +++ b/modules/Skill/s2s.metrics.R @@ -0,0 +1,242 @@ + +source("s2s.probs.R") + + +# MERGES verification dimns int single sdate dim along which the +# verification metrics will be computed +mergedims <- function(data, indims, outdim) { + + old.dims <- names(dim(data)) + new.dims <- c(indims, old.dims[! old.dims %in% indims]) + data <- Reorder(data, new.dims) + + for (step in 1:(length(indims) - 1)) { + if (step == 1) { + data <- MergeDims(data, + merge_dims=c(indims[step], indims[step+1]), + rename_dim=outdim) + } else { + data <- MergeDims(data, + merge_dims=c(outdim, indims[step+1]), + rename_dim=outdim) + } + } + + return(data) + +} + +Compute_verif_metrics <- function(exp, obs, skill_metrics, + verif.dims=c("syear", "sday", "sweek"), + na.rm=T,ncores=1) { + + exp <- mergedims(exp, verif.dims, 'sdate') + obs <- mergedims(obs, verif.dims, 'sdate') + ## obs already contains ensemble dimension + # obs <- InsertDim(obs, 2, 1, name='member') + + ## REMOVE VALUES IN CASE PAIR OBS-EXP IS NOT COMPLETE + if (na.rm) { + nsdates <- dim(exp)[which(names(dim(exp)) == 'sdate')][] + not_missing_dates <- c() + for (sdate in 1:nsdates){ + if(!all(is.na(Subset(exp,along='sdate', list(sdate), drop=F)))){ + not_missing_dates <- c(not_missing_dates, sdate) + } + } + obs <- Subset(obs, along='sdate', not_missing_dates, drop=F) + exp <- Subset(exp, along='sdate', not_missing_dates, drop=F) + } + + return(.compute_metrics(exp, obs, skill_metrics, ncores=ncores, na.rm=na.rm)) + +} + +.compute_metrics <- function(exp, obs, metrics, ncores=1, split_factor=1, na.rm=FALSE) { + + veriUnwrap <- easyVerification:::veriUnwrap + + ## SPECS VERIFICATION PARAMETERS + SPECSVER_METRICS <- c("frpss", "frps", "bss10", "bss90", "enscorr", "rpss") + + FUN <- list(frps="FairRps", rpss="EnsRpss", frpss="FairRpss", + bss10="FairRpss", bss90="FairRpss", enscorr="EnsCorr") + + PROB <- list(frps=c(1/3, 2/3), rpss=c(1/3, 2/3), frpss=c(1/3, 2/3), + bss10=c(1/10), bss90=c(9/10), enscorr=NULL) + + metrics_data <- list() + for (metric in metrics) { + + if (metric %in% SPECSVER_METRICS) { + + data <- Apply(data=list(exp, obs), + target_dims=list(c('sdate', 'ensemble'), + c('sdate', 'ensemble')), + fun="veriApply", + verifun=FUN[[metric]], + prob=PROB[[metric]], + ensdim=2, + ncores=ncores, + split_factor=split_factor, + tdim=1, + na.rm=na.rm)[[1]] #* 100 + + data <- Subset(data, c('member'),list(1),drop='selected') + data[!is.finite(data)] <- NaN + metrics_data[[ metric ]] <- list(data) + + } else if (metric == "corr_eno") { + # computes ensemble mean + data <- multiApply::Apply(data = exp, + target_dims = 'ensemble', + fun = mean, ncores = ncores)$output1 + + data <- multiApply::Apply(data = list(exp = data, + obs = Subset(obs, c('ensemble'), + list(1), drop='selected')), + target_dims = 'sdate', fun = .correlation_eno, + time_dim = 'sdate', ncores = ncores) + + # append both corr_r and corr_sign to metrics list + for (i in 1:length(data)) { + coeff <- data[[i]] + coeff[!is.finite(coeff)] <- NaN + if (names(data)[i] %in% c("r", "sign")){ + metrics_data[[ paste0("corr_", names(data)[i]) ]] <- list(coeff) + } + } + + } else if (metric == "frpss_sign") { + + terciles_obs <- Compute_probs(obs, c(1/3,2/3), + quantile_dims=c('sdate'), + ncores=ncores, + split_factor=1, + na.rm=na.rm) + + terciles_exp <- Compute_probs(exp, c(1/3,2/3), + quantile_dims=c('sdate', 'ensemble'), + ncores=ncores, + split_factor=1, + na.rm=na.rm) + + probs_clim = array(data = 1/3, dim = dim(terciles_obs$probs)) + + frps <- NULL + n_members <- dim(exp)[which(names(dim(exp)) == 'ensemble')][] + frps$clim <- multiApply::Apply(data = list(probs_exp = probs_clim, + probs_obs = terciles_obs$probs), + target_dims = 'bin', + fun = .rps_from_probs, + n_categories = 3, + n_members = n_members, + Fair = TRUE, ncores = ncores)$output1 + + frps$exp <- multiApply::Apply(data = list(probs_exp = terciles_exp$probs, + probs_obs = terciles_obs$probs), + target_dims = 'bin', + fun = .rps_from_probs, + n_categories = 3, + n_members=n_members, + Fair = TRUE, ncores = ncores)$output1 + + frps$clim_mean <- multiApply::Apply(data = frps$clim, target_dims='sdate', + fun=mean, ncores=ncores)$output1 + frps$exp_mean <- multiApply::Apply(data = frps$exp, target_dims='sdate', + fun=mean, ncores=ncores)$output1 + + frpss_respect2clim <- NULL + frpss_respect2clim$rpss <- 1 - frps$exp_mean/frps$clim_mean + frpss_respect2clim$sign <- s2dv::RandomWalkTest(skill_A=frps$exp, + skill_B=frps$clim, + time_dim='sdate', + ncores=ncores)$signif + + frpss_respect2clim$rpss[!is.finite(frpss_respect2clim$rpss)] <- NaN + frpss_respect2clim$sign[!is.finite(frpss_respect2clim$sign)] <- NaN + + frpss_respect2clim$rpss <- Subset(frpss_respect2clim$rpss, + c('ensemble'), list(1), + drop='selected') + frpss_respect2clim$sign <- Subset(frpss_respect2clim$sign, + c('ensemble'), list(1), + drop='selected') + + metrics_data[[ "frpss_values" ]] <- list(frpss_respect2clim$rpss) + metrics_data[[ "frpss_sign" ]] <- list(frpss_respect2clim$sign) + + } + + } + + return(metrics_data) + +} + +.correlation_eno = function(exp, obs, time_dim = 'syear', alpha = 0.05, ncores = 1){ + + cor = NULL + cor$r = cor(exp, obs) # Correlation coefficient + + n_eff = s2dv::Eno(data = obs, time_dim = time_dim, + na.action = na.pass, ncores = ncores) + + t_alpha2_n2 = qt(p=alpha/2, df = n_eff-2, lower.tail = FALSE) + t = abs(cor$r) * sqrt(n_eff-2) / sqrt(1-cor$r^2) + + if (anyNA(c(t, t_alpha2_n2)) == FALSE & t >= t_alpha2_n2) { + cor$sign = TRUE + } else { + cor$sign = FALSE + } + + z_a2 = qnorm(p = alpha/2, lower.tail = FALSE) + conf_int = c() + conf_int[1] = tanh(atanh(cor$r) - z_a2 / sqrt(n_eff-3)) + conf_int[2] = tanh(atanh(cor$r) + z_a2 / sqrt(n_eff-3)) + cor$conf_int = conf_int + + cor$n_eff <- n_eff + + return(cor) +} + + +.rps_from_probs <- function(probs_exp, probs_obs, n_categories, Fair, n_members = NULL){ + + ## Checkings + if (length(probs_exp) != n_categories | length(probs_obs) != n_categories){ + stop('The number of probabilities has to be the same as the number of categories') + } + if (!is.numeric(n_categories)){ + stop('n_categories must be an integer') + } + if (!is.numeric(n_members) & Fair == TRUE){ + stop('n_members must be an integer') + } + + ## RPS (Wilks 2011, pp.348-350) + rps <- NULL + for (i in 1:n_categories){ + rps[i] <- (cumsum(probs_exp)[i]-cumsum(probs_obs)[i])^2 + } + rps <- sum(rps) + + ## FairRPS + if (Fair == TRUE){ + + ## formula of SpecsVerification::EnsRps + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + R <- n_members + R_new <- Inf + probs_cum <- cumsum(probs_exp) + adjustment <- -1 / (R-1) * probs_cum * (1 - probs_cum) + adjustment <- sum(adjustment) + + rps <- rps + adjustment + + } + + return(rps) +} diff --git a/modules/Skill/s2s.probs.R b/modules/Skill/s2s.probs.R new file mode 100644 index 00000000..650a4bc5 --- /dev/null +++ b/modules/Skill/s2s.probs.R @@ -0,0 +1,42 @@ + + +Compute_probs <- function(data, thresholds, + ncores=1, quantile_dims=c('syear', 'member'), + probs_dims=list('member', 'bin'), + split_factor=1, na.rm=FALSE) { + + quantiles <- Apply(data, + quantile_dims, + function(x, na.rm) {quantile(as.vector(x), + thresholds,na.rm=na.rm)}, + output_dims="bin", + ncores=ncores, + na.rm=na.rm, + split_factor=split_factor)[[1]] + + if (na.rm == FALSE) { + c2p <- function(x, t) { + colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) + } + } else { + c2p <- function(x, t) { + if (any(!is.na(x))){ + colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) + } else { + c(NA, NA, NA) + } + } + } + + probs <- Apply(data = list(x = data, t = quantiles), + target_dims = probs_dims, + c2p, + output_dims = "bin", + split_factor = split_factor, + ncores = ncores)[[1]] + + return(list(probs=probs, quantiles=quantiles)) + +} + + -- GitLab From c4c7b1786791055e857d5bebc2ae2caee67e4b9e Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 12 May 2022 17:04:17 +0200 Subject: [PATCH 22/29] Slight modification for testing --- modules/data_load/testing_recipes/recipe_4.yml | 2 +- modules/test_victoria.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/data_load/testing_recipes/recipe_4.yml b/modules/data_load/testing_recipes/recipe_4.yml index 421888b1..2cfcff37 100644 --- a/modules/data_load/testing_recipes/recipe_4.yml +++ b/modules/data_load/testing_recipes/recipe_4.yml @@ -19,7 +19,7 @@ Analysis: hcst_start: '1993' hcst_end: '2016' leadtimemin: 2 - leadtimemax: 4 + leadtimemax: 2 Region: latmin: -10 latmax: 10 diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 3f394e28..686b1f13 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -1,6 +1,6 @@ -recipe_file <- "modules/data_load/testing_recipes/recipe_3.yml" +recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml" source("modules/data_load/load.R") source("modules/Calibration/test_calib.R") -- GitLab From 1694df406e1e8fd93d5363884c76e7529a31af1f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 13 May 2022 12:02:01 +0200 Subject: [PATCH 23/29] Change ensemble dim name, formatting --- modules/Skill/s2s.metrics.R | 44 ++++++++++++++++++++----------------- modules/Skill/s2s.probs.R | 4 ++-- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R index 5497a885..32243310 100644 --- a/modules/Skill/s2s.metrics.R +++ b/modules/Skill/s2s.metrics.R @@ -1,5 +1,5 @@ -source("s2s.probs.R") +source("modules/Skill/s2s.probs.R") # MERGES verification dimns int single sdate dim along which the @@ -52,7 +52,9 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, } -.compute_metrics <- function(exp, obs, metrics, ncores=1, split_factor=1, na.rm=FALSE) { +.compute_metrics <- function(exp, obs, metrics, + ncores=1, split_factor=1, + na.rm=FALSE) { veriUnwrap <- easyVerification:::veriUnwrap @@ -71,18 +73,18 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, if (metric %in% SPECSVER_METRICS) { data <- Apply(data=list(exp, obs), - target_dims=list(c('sdate', 'ensemble'), - c('sdate', 'ensemble')), - fun="veriApply", - verifun=FUN[[metric]], - prob=PROB[[metric]], - ensdim=2, - ncores=ncores, - split_factor=split_factor, - tdim=1, - na.rm=na.rm)[[1]] #* 100 + target_dims=list(c('sdate', 'ensemble'), + c('sdate', 'ensemble')), + fun="veriApply", + verifun=FUN[[metric]], + prob=PROB[[metric]], + ensdim=2, + ncores=ncores, + split_factor=split_factor, + tdim=1, + na.rm=na.rm)[[1]] #* 100 - data <- Subset(data, c('member'),list(1),drop='selected') + data <- Subset(data, c('ensemble'), list(1), drop='selected') data[!is.finite(data)] <- NaN metrics_data[[ metric ]] <- list(data) @@ -127,19 +129,21 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, n_members <- dim(exp)[which(names(dim(exp)) == 'ensemble')][] frps$clim <- multiApply::Apply(data = list(probs_exp = probs_clim, probs_obs = terciles_obs$probs), - target_dims = 'bin', - fun = .rps_from_probs, - n_categories = 3, - n_members = n_members, - Fair = TRUE, ncores = ncores)$output1 + target_dims = 'bin', + fun = .rps_from_probs, + n_categories = 3, + n_members = n_members, + Fair = TRUE, + ncores = ncores)$output1 frps$exp <- multiApply::Apply(data = list(probs_exp = terciles_exp$probs, probs_obs = terciles_obs$probs), target_dims = 'bin', fun = .rps_from_probs, n_categories = 3, - n_members=n_members, - Fair = TRUE, ncores = ncores)$output1 + n_members = n_members, + Fair = TRUE, + ncores = ncores)$output1 frps$clim_mean <- multiApply::Apply(data = frps$clim, target_dims='sdate', fun=mean, ncores=ncores)$output1 diff --git a/modules/Skill/s2s.probs.R b/modules/Skill/s2s.probs.R index 650a4bc5..a8cab57c 100644 --- a/modules/Skill/s2s.probs.R +++ b/modules/Skill/s2s.probs.R @@ -1,8 +1,8 @@ Compute_probs <- function(data, thresholds, - ncores=1, quantile_dims=c('syear', 'member'), - probs_dims=list('member', 'bin'), + ncores=1, quantile_dims=c('syear', 'ensemble'), + probs_dims=list('ensemble', 'bin'), split_factor=1, na.rm=FALSE) { quantiles <- Apply(data, -- GitLab From 5f51a06e873e64f2a71dead9a014888dac1a171c Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 17 May 2022 14:57:58 +0200 Subject: [PATCH 24/29] Add wrapper fun for skill computations, small changes for testing --- modules/Skill/Skill.R | 109 +++++++++++++----- modules/Skill/s2s.metrics.R | 15 ++- .../data_load/testing_recipes/recipe_4.yml | 4 +- modules/test_victoria.R | 7 ++ 4 files changed, 95 insertions(+), 40 deletions(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index e7cad0cb..9976d622 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -7,37 +7,82 @@ # - reliability diagram # - ask Carlos which decadal metrics he is currently using +source("modules/Skill/s2s.metrics.R") +## TODO: Remove once the new version of s2dv is released +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/RandomWalkTest.R") +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/develop-RPSS/R/RPS.R") +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/develop-RPSS/R/RPSS.R") + +## TODO: Implement this in the future ## Which parameter are required? -if (!("obs" %in% ls()) || is.null(obs)) { - error(logger, - "There is no object 'obs' in the global environment or it is NULL") -} -if (stream == "fcst" && (!("fcst" %in% ls()) || is.null(fcst))) { - error(logger, - "There is no object 'fcst' in the global environment or it is NULL") -} -if (!("hcst" %in% ls()) || is.null(hcst)) { - error(logger, - "There is no object 'hcst' in the global environment or it is NULL") -} -if (!("metric" %in% ls()) || is.null(metric)) { - warn(logger, - "Verification metric not found and it is set as 'EnsCorr'.") - metric <- 'EnsCorr' -} -if (metric %in% c('FRPSS', 'RPSS')) { - metric_fun <- "veriApply" - metric_method <- "FairRpss" -} else if (metric %in% c("FCRPSS", "CRPSS")) { - metric_fun <- "veriApply" -} else if (metric %in% c("EnsCorr", "EnsCor")) { - metric_fun <- "veriApply" - metric_method <- "EnsCorr" -#... -} else { - error(logger, "Unknown verification metric defined in the recipe.") - metric_fun <- 'NotFound' +# if (!("obs" %in% ls()) || is.null(obs)) { +# error(logger, +# "There is no object 'obs' in the global environment or it is NULL") +# } +# if (stream == "fcst" && (!("fcst" %in% ls()) || is.null(fcst))) { +# error(logger, +# "There is no object 'fcst' in the global environment or it is NULL") +# } +# if (!("hcst" %in% ls()) || is.null(hcst)) { +# error(logger, +# "There is no object 'hcst' in the global environment or it is NULL") +# } +# if (!("metric" %in% ls()) || is.null(metric)) { +# warn(logger, +# "Verification metric not found and it is set as 'EnsCorr'.") +# metric <- 'EnsCorr' +# } +# if (metric %in% c('FRPSS', 'RPSS')) { +# metric_fun <- "veriApply" +# metric_method <- "FairRpss" +# } else if (metric %in% c("FCRPSS", "CRPSS")) { +# metric_fun <- "veriApply" +# } else if (metric %in% c("EnsCorr", "EnsCor")) { +# metric_fun <- "veriApply" +# metric_method <- "EnsCorr" +# #... +# } else { +# error(logger, "Unknown verification metric defined in the recipe.") +# metric_fun <- 'NotFound' +# } +# info(logger, paste("#-------------------------- ", "\n", +# " running Skill module ", "\n", +# " it can call ", metric_fun )) + +compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { + # exp: s2dv_cube containing the hindcast + # obs: s2dv_cube containing the observations + # recipe: auto-s2s recipe as provided by read_yaml + + ## TODO: Adapt time_dims to subseasonal case? + time_dim <- 'syear' + memb_dim <- 'ensemble' + metric <- recipe$Analysis$Workflow$Skill$metric + # Whether the fair version of the metric is to be computed + if (metric %in% c('FRPS', 'FRPSS', 'BSS10', 'BSS90')) { + Fair <- T + } else { + Fair <- F + } + + if (metric %in% c('RPS', 'FRPS')) { + skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, + Fair = Fair, ncores = ncores) + + } else if (metric %in% c('RPSS', 'FRPSS')) { + skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, + Fair = Fair, ncores = ncores) + } else if (metric == 'BSS10') { + skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, + prob_thresholds = 0.1, Fair = Fair, ncores = ncores) + } else if (metric == 'BSS90') { + skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, + prob_thresholds = 0.9, Fair = Fair, ncores = ncores) + } + + return(skill) + } -info(logger, paste("#-------------------------- ", "\n", - " running Skill module ", "\n", - " it can call ", metric_fun )) + + ## TODO: Add specsVerification funs + diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R index 32243310..1f5b8a9d 100644 --- a/modules/Skill/s2s.metrics.R +++ b/modules/Skill/s2s.metrics.R @@ -26,6 +26,8 @@ mergedims <- function(data, indims, outdim) { } +## TODO: New function to split sdate dim back into 'sday', 'sweek' and 'syear'? + Compute_verif_metrics <- function(exp, obs, skill_metrics, verif.dims=c("syear", "sday", "sweek"), na.rm=T,ncores=1) { @@ -83,15 +85,16 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, split_factor=split_factor, tdim=1, na.rm=na.rm)[[1]] #* 100 - + + ## TODO: Keep 'ensemble' dimension to maintain flexibility? data <- Subset(data, c('ensemble'), list(1), drop='selected') data[!is.finite(data)] <- NaN - metrics_data[[ metric ]] <- list(data) + metrics_data[[ metric ]] <- data ## previously: list(data) } else if (metric == "corr_eno") { # computes ensemble mean data <- multiApply::Apply(data = exp, - target_dims = 'ensemble', + target_dims = 'ensemble', fun = mean, ncores = ncores)$output1 data <- multiApply::Apply(data = list(exp = data, @@ -105,19 +108,19 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, coeff <- data[[i]] coeff[!is.finite(coeff)] <- NaN if (names(data)[i] %in% c("r", "sign")){ - metrics_data[[ paste0("corr_", names(data)[i]) ]] <- list(coeff) + metrics_data[[ paste0("corr_", names(data)[i]) ]] <- coeff ## list(coeff) } } } else if (metric == "frpss_sign") { - terciles_obs <- Compute_probs(obs, c(1/3,2/3), + terciles_obs <- Compute_probs(obs, c(1/3, 2/3), quantile_dims=c('sdate'), ncores=ncores, split_factor=1, na.rm=na.rm) - terciles_exp <- Compute_probs(exp, c(1/3,2/3), + terciles_exp <- Compute_probs(exp, c(1/3, 2/3), quantile_dims=c('sdate', 'ensemble'), ncores=ncores, split_factor=1, diff --git a/modules/data_load/testing_recipes/recipe_4.yml b/modules/data_load/testing_recipes/recipe_4.yml index 2cfcff37..33c3a74c 100644 --- a/modules/data_load/testing_recipes/recipe_4.yml +++ b/modules/data_load/testing_recipes/recipe_4.yml @@ -19,7 +19,7 @@ Analysis: hcst_start: '1993' hcst_end: '2016' leadtimemin: 2 - leadtimemax: 2 + leadtimemax: 3 Region: latmin: -10 latmax: 10 @@ -32,7 +32,7 @@ Analysis: Calibration: method: mse_min Skill: - metric: RPSS + metric: BSS90 Indicators: index: no Output_format: S2S4E diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 686b1f13..a38edbc2 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -4,7 +4,14 @@ recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml" source("modules/data_load/load.R") source("modules/Calibration/test_calib.R") +source("modules/Skill/Skill.R") + data <- load_datasets(recipe_file) ## TODO: Instead of reading the recipe at each module, do it at the beginning? recipe <- read_yaml(recipe_file) +# Calibrate datasets calibrated_data <- calibrate_datasets(data, recipe) +# Compute skill metric +skill_metric <- compute_skill_metrics(calibrated_data$hcst_calib, data$obs, + recipe, na.rm = T, ncores = 4) + -- GitLab From aef22ebcda44b819957f53f7cfdce9323259688b Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 17 May 2022 16:33:17 +0200 Subject: [PATCH 25/29] Add some systems and grid description files to archive --- conf/archive.yml | 20 ++++++++++++++------ conf/grid_description/griddes_ncep-cfsv2.txt | 18 ++++++++++++++++++ conf/grid_description/griddes_system2c3s.txt | 16 +++++++--------- 3 files changed, 39 insertions(+), 15 deletions(-) create mode 100644 conf/grid_description/griddes_ncep-cfsv2.txt diff --git a/conf/archive.yml b/conf/archive.yml index 43750a1b..0caa78bf 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -5,11 +5,11 @@ archive: System: system5c3s: src: "exp/ecmwf/system5c3s/" - daily_mean: {"tas":"_f6h/","rsds":"_s0-24h/", - "prlr":"_s0-24h/","sfcWind":"_f6h/"} - monthly_mean: {"tas":"_f6h/","rsds":"_s0-24h/", - "prlr":"_s0-24h/","sfcWind":"_f6h/", - "tasmin":"_f24h/","tasmax":"_f24h/"} + daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", + "prlr":"_s0-24h/", "sfcWind":"_f6h/"} + monthly_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", + "prlr":"_s0-24h/", "sfcWind":"_f6h/", + "tasmin":"_f24h/", "tasmax":"_f24h/"} nmember: fcst: 51 hcst: 25 @@ -38,7 +38,7 @@ archive: nmember: fcst: 10 hcst: 10 - reference_grid: "conf/grid_description/griddes_system35c3s.txt" + reference_grid: "conf/grid_description/griddes_system2c3s.txt" eccc1: src: "exp/eccc/eccc1/" monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", @@ -47,6 +47,14 @@ archive: fcst: 10 hcst: 10 reference_grid: "conf/grid_description/griddes_eccc1.txt" + ncep-cfsv2: + src: "exp/ncep/cfs-v2/" + monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", + "tasmax":"_f6h/", "tasmin":"_f6h/"} + nmember: + fcst: 20 + hcst: 20 + reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" Reference: era5: src: "recon/ecmwf/era5/" diff --git a/conf/grid_description/griddes_ncep-cfsv2.txt b/conf/grid_description/griddes_ncep-cfsv2.txt new file mode 100644 index 00000000..6d8abe86 --- /dev/null +++ b/conf/grid_description/griddes_ncep-cfsv2.txt @@ -0,0 +1,18 @@ +# +# Grid description file for NCEP CFSv2 +# +gridtype = lonlat +gridsize = 64800 +xname = lon +xlongname = Longitude +xunits = degrees_east +yname = lat +ylongname = Latitude +yunits = degrees_north +xsize = 360 +ysize = 180 +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 +# diff --git a/conf/grid_description/griddes_system2c3s.txt b/conf/grid_description/griddes_system2c3s.txt index cf1e3515..0827b7c0 100644 --- a/conf/grid_description/griddes_system2c3s.txt +++ b/conf/grid_description/griddes_system2c3s.txt @@ -1,18 +1,16 @@ # -# Grid description file for JMA System3c3s -# -# gridID 2 +# Grid description file for JMA System 2 C3S # gridtype = lonlat gridsize = 10512 -xsize = 144 -ysize = 73 xname = lon -xlongname = "longitude" -xunits = "degrees_east" +xlongname = longitude +xunits = degrees_east yname = lat -ylongname = "latitude" -yunits = "degrees_north" +ylongname = latitude +yunits = degrees_north +xsize = 144 +ysize = 73 xfirst = 0 xinc = 2.5 yfirst = 90 -- GitLab From 506018beeab7ec5c06c5b0b0c0430bf5fa656b96 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 18 May 2022 12:27:07 +0200 Subject: [PATCH 26/29] Improvements to archive --- conf/archive.yml | 33 ++++++++++++------- conf/grid_description/griddes_system21_m1.txt | 16 ++++----- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 0caa78bf..e869cb9c 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -23,10 +23,19 @@ archive: fcst: 51 hcst: 25 reference_grid: "conf/grid_description/griddes_system7c3s.txt" + system21_m1: + src: "exp/dwd/system21_m1/" + monthly_mean: {"tas":"_f6h/", "prlr":"_f24h", + "tasmin":"_f24h/", "tasmax":"_f24h/"} + nmember: + fcst: 50 + hcst: 30 + reference_grid: "conf/grid_description/griddes_system21_m1.txt" system35c3s: src: "exp/cmcc/system35c3s/" monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", - "prlr":"_f24h/", "sfcWind": "_f6h/"} + "prlr":"_f24h/", "sfcWind": "_f6h/", + "tasmax":"_f24h/", "tasmin":"_f24h"} nmember: fcst: 50 hcst: 40 @@ -48,24 +57,24 @@ archive: hcst: 10 reference_grid: "conf/grid_description/griddes_eccc1.txt" ncep-cfsv2: - src: "exp/ncep/cfs-v2/" - monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", - "tasmax":"_f6h/", "tasmin":"_f6h/"} - nmember: - fcst: 20 - hcst: 20 + src: "exp/ncep/cfs-v2/" + monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", + "tasmax":"_f6h/", "tasmin":"_f6h/"} + nmember: + fcst: 20 + hcst: 20 reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" Reference: era5: src: "recon/ecmwf/era5/" - daily_mean: {"tas":"_f1h-r1440x721cds/","rsds":"_f1h-r1440x721cds/", - "prlr":"_f1h-r1440x721cds/","sfcWind":"_f1h-r1440x721cds/"} - monthly_mean: {"tas":"_f1h-r1440x721cds/"} + daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", + "prlr":"_f1h-r1440x721cds/", "sfcWind":"_f1h-r1440x721cds/"} + monthly_mean: {"tas":"_f1h-r1440x721cds/", "prlr":"_f1h-r1440x721cds/"} reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" era5land: src: "recon/ecmwf/era5land/" - daily_mean: {"tas":"_f1h/","rsds":"_f1h/", - "prlr":"_f1h/","sfcWind":"_f1h/"} + daily_mean: {"tas":"_f1h/", "rsds":"_f1h/", + "prlr":"_f1h/", "sfcWind":"_f1h/"} monthly_mean: {"tas":"_f1h/"} reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" uerra: diff --git a/conf/grid_description/griddes_system21_m1.txt b/conf/grid_description/griddes_system21_m1.txt index bf9ac52b..954438f8 100644 --- a/conf/grid_description/griddes_system21_m1.txt +++ b/conf/grid_description/griddes_system21_m1.txt @@ -1,16 +1,16 @@ -# Grid description file for DWD GCFS2.1 (CDS) -# gridID 2 +# +# Grid description file for DWD GCFS2.1 # gridtype = lonlat gridsize = 64800 -xsize = 360 -ysize = 180 xname = lon -xlongname = "longitude" -xunits = "degrees_east" +xlongname = longitude +xunits = degrees_east yname = lat -ylongname = "latitude" -yunits = "degrees_north" +ylongname = latitude +yunits = degrees_north +xsize = 360 +ysize = 180 xfirst = 0.5 xinc = 1 yfirst = 89.5 -- GitLab From e74dc6027cd6ae86356b57cd762f70c0a1099ac1 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 19 May 2022 17:29:47 +0200 Subject: [PATCH 27/29] Add SpecsVerification metrics, add testing recipes --- modules/Skill/Skill.R | 37 ++++++++++++---- .../data_load/testing_recipes/recipe_3.yml | 2 +- .../data_load/testing_recipes/recipe_5.yml | 43 +++++++++++++++++++ modules/test_victoria.R | 2 +- 4 files changed, 74 insertions(+), 10 deletions(-) create mode 100644 modules/data_load/testing_recipes/recipe_5.yml diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 9976d622..9ed0b9ae 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -57,29 +57,50 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { ## TODO: Adapt time_dims to subseasonal case? time_dim <- 'syear' memb_dim <- 'ensemble' - metric <- recipe$Analysis$Workflow$Skill$metric + metric <- tolower(recipe$Analysis$Workflow$Skill$metric) # Whether the fair version of the metric is to be computed - if (metric %in% c('FRPS', 'FRPSS', 'BSS10', 'BSS90')) { + if (metric %in% c('frps', 'frpss', 'bss10', 'bss90')) { Fair <- T } else { Fair <- F } - - if (metric %in% c('RPS', 'FRPS')) { + # Ranked Probability Score and Fair version + if (metric %in% c('rps', 'frps')) { skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) - - } else if (metric %in% c('RPSS', 'FRPSS')) { + # Ranked Probability Skill Score and Fair version + } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) - } else if (metric == 'BSS10') { + # Brier Skill Score - 10th percentile + } else if (metric == 'bss10') { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, prob_thresholds = 0.1, Fair = Fair, ncores = ncores) - } else if (metric == 'BSS90') { + # Brier Skill Score - 90th percentile + } else if (metric == 'bss90') { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, prob_thresholds = 0.9, Fair = Fair, ncores = ncores) + # Ensemble mean correlation + } else if (metric == 'enscorr') { + ## TODO: Implement option for Kendall and Spearman methods? + skill <- s2dv::Corr(exp$data, obs$data, + dat_dim = 'dat', + time_dim = time_dim, + method = 'pearson', + memb_dim = memb_dim, + ncores = ncores) + } else if (grepl("specs", metric, fixed = TRUE)) { + # Compute SpecsVerification version of the metrics + metric <- (strsplit(metric, "_"))[[1]][1] # Get metric name + skill <- Compute_verif_metrics(exp$data, obs$data, + skill_metrics = metric, + verif.dims=c("syear", "sday", "sweek"), + na.rm = na.rm, + ncores = ncores) } + print("##### SKILL METRIC COMPUTATION COMPLETE #####") + return(skill) } diff --git a/modules/data_load/testing_recipes/recipe_3.yml b/modules/data_load/testing_recipes/recipe_3.yml index b71f428d..da1bd78a 100644 --- a/modules/data_load/testing_recipes/recipe_3.yml +++ b/modules/data_load/testing_recipes/recipe_3.yml @@ -17,7 +17,7 @@ Analysis: fcst_syear: '2020' fcst_sday: '1101' hcst_start: '1993' - hcst_end: '1995' + hcst_end: '2016' leadtimemin: 0 leadtimemax: 0 Region: diff --git a/modules/data_load/testing_recipes/recipe_5.yml b/modules/data_load/testing_recipes/recipe_5.yml new file mode 100644 index 00000000..5e02eb1f --- /dev/null +++ b/modules/data_load/testing_recipes/recipe_5.yml @@ -0,0 +1,43 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: prlr + freq: monthly_mean + Datasets: + System: + name: system2c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: + fcst_syear: # + fcst_sday: '0301' + hcst_start: '1993' + hcst_end: '2015' + leadtimemin: 0 + leadtimemax: 1 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: evmos + Skill: + metric: RPSS_Specs + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/test_victoria.R b/modules/test_victoria.R index a38edbc2..4383eb6d 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -1,6 +1,6 @@ -recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml" +recipe_file <- "modules/data_load/testing_recipes/recipe_5.yml" source("modules/data_load/load.R") source("modules/Calibration/test_calib.R") -- GitLab From 568d414745ffde33cd065010ce9ef214d7ca766a Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 25 May 2022 12:20:55 +0200 Subject: [PATCH 28/29] Return list with multiple skill metric arrays; testing --- modules/Skill/Skill.R | 95 ++++++++++--------- .../data_load/testing_recipes/recipe_5.yml | 4 +- modules/test_victoria.R | 4 +- 3 files changed, 54 insertions(+), 49 deletions(-) diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 9ed0b9ae..f641fa6a 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -57,53 +57,58 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { ## TODO: Adapt time_dims to subseasonal case? time_dim <- 'syear' memb_dim <- 'ensemble' - metric <- tolower(recipe$Analysis$Workflow$Skill$metric) - # Whether the fair version of the metric is to be computed - if (metric %in% c('frps', 'frpss', 'bss10', 'bss90')) { - Fair <- T - } else { - Fair <- F + metrics <- tolower(recipe$Analysis$Workflow$Skill$metric) + skill_metrics <- list() + for (metric in strsplit(metrics, " ")[[1]]) { + # Whether the fair version of the metric is to be computed + if (metric %in% c('frps', 'frpss', 'bss10', 'bss90')) { + Fair <- T + } else { + Fair <- F + } + # Ranked Probability Score and Fair version + if (metric %in% c('rps', 'frps')) { + skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, + Fair = Fair, ncores = ncores) + skill_metrics[[ metric ]] <- skill + # Ranked Probability Skill Score and Fair version + } else if (metric %in% c('rpss', 'frpss')) { + skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, + Fair = Fair, ncores = ncores) + skill_metrics[[ metric ]] <- skill + # Brier Skill Score - 10th percentile + } else if (metric == 'bss10') { + skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, + prob_thresholds = 0.1, Fair = Fair, ncores = ncores) + skill_metrics[[ metric ]] <- skill + # Brier Skill Score - 90th percentile + } else if (metric == 'bss90') { + skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, + prob_thresholds = 0.9, Fair = Fair, ncores = ncores) + skill_metrics[[ metric ]] <- skill + # Ensemble mean correlation + } else if (metric == 'enscorr') { + ## TODO: Implement option for Kendall and Spearman methods? + skill <- s2dv::Corr(exp$data, obs$data, + dat_dim = 'dat', + time_dim = time_dim, + method = 'pearson', + memb_dim = memb_dim, + ncores = ncores) + skill_metrics[[ metric ]] <- skill + } else if (grepl("specs", metric, fixed = TRUE)) { + # Compute SpecsVerification version of the metrics + metric <- (strsplit(metric, "_"))[[1]][1] # Get metric name + skill <- Compute_verif_metrics(exp$data, obs$data, + skill_metrics = metric, + verif.dims=c("syear", "sday", "sweek"), + na.rm = na.rm, + ncores = ncores) + skill_metrics[[ metric ]] <- skill + } } - # Ranked Probability Score and Fair version - if (metric %in% c('rps', 'frps')) { - skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - Fair = Fair, ncores = ncores) - # Ranked Probability Skill Score and Fair version - } else if (metric %in% c('rpss', 'frpss')) { - skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - Fair = Fair, ncores = ncores) - # Brier Skill Score - 10th percentile - } else if (metric == 'bss10') { - skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - prob_thresholds = 0.1, Fair = Fair, ncores = ncores) - # Brier Skill Score - 90th percentile - } else if (metric == 'bss90') { - skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, - prob_thresholds = 0.9, Fair = Fair, ncores = ncores) - # Ensemble mean correlation - } else if (metric == 'enscorr') { - ## TODO: Implement option for Kendall and Spearman methods? - skill <- s2dv::Corr(exp$data, obs$data, - dat_dim = 'dat', - time_dim = time_dim, - method = 'pearson', - memb_dim = memb_dim, - ncores = ncores) - } else if (grepl("specs", metric, fixed = TRUE)) { - # Compute SpecsVerification version of the metrics - metric <- (strsplit(metric, "_"))[[1]][1] # Get metric name - skill <- Compute_verif_metrics(exp$data, obs$data, - skill_metrics = metric, - verif.dims=c("syear", "sday", "sweek"), - na.rm = na.rm, - ncores = ncores) - } - print("##### SKILL METRIC COMPUTATION COMPLETE #####") - return(skill) + return(skill_metrics) } - - ## TODO: Add specsVerification funs - diff --git a/modules/data_load/testing_recipes/recipe_5.yml b/modules/data_load/testing_recipes/recipe_5.yml index 5e02eb1f..ccfebb34 100644 --- a/modules/data_load/testing_recipes/recipe_5.yml +++ b/modules/data_load/testing_recipes/recipe_5.yml @@ -19,7 +19,7 @@ Analysis: hcst_start: '1993' hcst_end: '2015' leadtimemin: 0 - leadtimemax: 1 + leadtimemax: 0 Region: latmin: -10 latmax: 10 @@ -32,7 +32,7 @@ Analysis: Calibration: method: evmos Skill: - metric: RPSS_Specs + metric: BSS90 FRPS RPSS_Specs Indicators: index: no Output_format: S2S4E diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 4383eb6d..96a0e6bb 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -12,6 +12,6 @@ recipe <- read_yaml(recipe_file) # Calibrate datasets calibrated_data <- calibrate_datasets(data, recipe) # Compute skill metric -skill_metric <- compute_skill_metrics(calibrated_data$hcst_calib, data$obs, - recipe, na.rm = T, ncores = 4) +skill_metrics <- compute_skill_metrics(calibrated_data$hcst_calib, data$obs, + recipe, na.rm = T, ncores = 4) -- GitLab From 1571a454962e78c8a23dba261d6b25c27a119272 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 25 May 2022 12:21:33 +0200 Subject: [PATCH 29/29] Add option not to merge time dimensions, format fixes --- modules/Skill/s2s.metrics.R | 263 +++++++++++++++++++----------------- 1 file changed, 138 insertions(+), 125 deletions(-) diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R index 1f5b8a9d..b0069730 100644 --- a/modules/Skill/s2s.metrics.R +++ b/modules/Skill/s2s.metrics.R @@ -30,24 +30,30 @@ mergedims <- function(data, indims, outdim) { Compute_verif_metrics <- function(exp, obs, skill_metrics, verif.dims=c("syear", "sday", "sweek"), - na.rm=T,ncores=1) { + merge.dims=F, + na.rm=T, ncores=1) { - exp <- mergedims(exp, verif.dims, 'sdate') - obs <- mergedims(obs, verif.dims, 'sdate') + if (merge.dims) { + exp <- mergedims(exp, verif.dims, 'sdate') + obs <- mergedims(obs, verif.dims, 'sdate') + time.dim <- 'sdate' + } else { + time.dim <- 'syear' + } ## obs already contains ensemble dimension # obs <- InsertDim(obs, 2, 1, name='member') ## REMOVE VALUES IN CASE PAIR OBS-EXP IS NOT COMPLETE if (na.rm) { - nsdates <- dim(exp)[which(names(dim(exp)) == 'sdate')][] + nsdates <- dim(exp)[which(names(dim(exp)) == time.dim)][] not_missing_dates <- c() for (sdate in 1:nsdates){ - if(!all(is.na(Subset(exp,along='sdate', list(sdate), drop=F)))){ + if(!all(is.na(Subset(exp, along=time.dim, list(sdate), drop=F)))){ not_missing_dates <- c(not_missing_dates, sdate) } } - obs <- Subset(obs, along='sdate', not_missing_dates, drop=F) - exp <- Subset(exp, along='sdate', not_missing_dates, drop=F) + obs <- Subset(obs, along=time.dim, not_missing_dates, drop=F) + exp <- Subset(exp, along=time.dim, not_missing_dates, drop=F) } return(.compute_metrics(exp, obs, skill_metrics, ncores=ncores, na.rm=na.rm)) @@ -56,128 +62,134 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, .compute_metrics <- function(exp, obs, metrics, ncores=1, split_factor=1, - na.rm=FALSE) { - - veriUnwrap <- easyVerification:::veriUnwrap - - ## SPECS VERIFICATION PARAMETERS - SPECSVER_METRICS <- c("frpss", "frps", "bss10", "bss90", "enscorr", "rpss") - - FUN <- list(frps="FairRps", rpss="EnsRpss", frpss="FairRpss", - bss10="FairRpss", bss90="FairRpss", enscorr="EnsCorr") - - PROB <- list(frps=c(1/3, 2/3), rpss=c(1/3, 2/3), frpss=c(1/3, 2/3), - bss10=c(1/10), bss90=c(9/10), enscorr=NULL) - - metrics_data <- list() - for (metric in metrics) { - - if (metric %in% SPECSVER_METRICS) { - - data <- Apply(data=list(exp, obs), - target_dims=list(c('sdate', 'ensemble'), - c('sdate', 'ensemble')), - fun="veriApply", - verifun=FUN[[metric]], - prob=PROB[[metric]], - ensdim=2, - ncores=ncores, - split_factor=split_factor, - tdim=1, - na.rm=na.rm)[[1]] #* 100 - - ## TODO: Keep 'ensemble' dimension to maintain flexibility? - data <- Subset(data, c('ensemble'), list(1), drop='selected') - data[!is.finite(data)] <- NaN - metrics_data[[ metric ]] <- data ## previously: list(data) - - } else if (metric == "corr_eno") { - # computes ensemble mean - data <- multiApply::Apply(data = exp, - target_dims = 'ensemble', - fun = mean, ncores = ncores)$output1 - - data <- multiApply::Apply(data = list(exp = data, - obs = Subset(obs, c('ensemble'), - list(1), drop='selected')), - target_dims = 'sdate', fun = .correlation_eno, - time_dim = 'sdate', ncores = ncores) - - # append both corr_r and corr_sign to metrics list - for (i in 1:length(data)) { - coeff <- data[[i]] - coeff[!is.finite(coeff)] <- NaN - if (names(data)[i] %in% c("r", "sign")){ - metrics_data[[ paste0("corr_", names(data)[i]) ]] <- coeff ## list(coeff) - } - } - - } else if (metric == "frpss_sign") { - - terciles_obs <- Compute_probs(obs, c(1/3, 2/3), - quantile_dims=c('sdate'), - ncores=ncores, - split_factor=1, - na.rm=na.rm) - - terciles_exp <- Compute_probs(exp, c(1/3, 2/3), - quantile_dims=c('sdate', 'ensemble'), - ncores=ncores, - split_factor=1, - na.rm=na.rm) - - probs_clim = array(data = 1/3, dim = dim(terciles_obs$probs)) - - frps <- NULL - n_members <- dim(exp)[which(names(dim(exp)) == 'ensemble')][] - frps$clim <- multiApply::Apply(data = list(probs_exp = probs_clim, - probs_obs = terciles_obs$probs), - target_dims = 'bin', - fun = .rps_from_probs, - n_categories = 3, - n_members = n_members, - Fair = TRUE, - ncores = ncores)$output1 - - frps$exp <- multiApply::Apply(data = list(probs_exp = terciles_exp$probs, - probs_obs = terciles_obs$probs), - target_dims = 'bin', - fun = .rps_from_probs, - n_categories = 3, - n_members = n_members, - Fair = TRUE, - ncores = ncores)$output1 - - frps$clim_mean <- multiApply::Apply(data = frps$clim, target_dims='sdate', - fun=mean, ncores=ncores)$output1 - frps$exp_mean <- multiApply::Apply(data = frps$exp, target_dims='sdate', - fun=mean, ncores=ncores)$output1 - - frpss_respect2clim <- NULL - frpss_respect2clim$rpss <- 1 - frps$exp_mean/frps$clim_mean - frpss_respect2clim$sign <- s2dv::RandomWalkTest(skill_A=frps$exp, - skill_B=frps$clim, - time_dim='sdate', - ncores=ncores)$signif - - frpss_respect2clim$rpss[!is.finite(frpss_respect2clim$rpss)] <- NaN - frpss_respect2clim$sign[!is.finite(frpss_respect2clim$sign)] <- NaN - - frpss_respect2clim$rpss <- Subset(frpss_respect2clim$rpss, - c('ensemble'), list(1), - drop='selected') - frpss_respect2clim$sign <- Subset(frpss_respect2clim$sign, - c('ensemble'), list(1), - drop='selected') - - metrics_data[[ "frpss_values" ]] <- list(frpss_respect2clim$rpss) - metrics_data[[ "frpss_sign" ]] <- list(frpss_respect2clim$sign) + merge.dims=FALSE, na.rm=FALSE) { + if (merge.dims) { + time.dim <- 'sdate' + } else { + time.dim <- 'syear' + } + veriUnwrap <- easyVerification:::veriUnwrap + + ## SPECS VERIFICATION PARAMETERS + SPECSVER_METRICS <- c("frpss", "frps", "bss10", "bss90", "enscorr", "rpss") + + FUN <- list(frps="FairRps", rpss="EnsRpss", frpss="FairRpss", + bss10="FairRpss", bss90="FairRpss", enscorr="EnsCorr") + + PROB <- list(frps=c(1/3, 2/3), rpss=c(1/3, 2/3), frpss=c(1/3, 2/3), + bss10=c(1/10), bss90=c(9/10), enscorr=NULL) + + metrics_data <- list() + for (metric in metrics) { + + if (metric %in% SPECSVER_METRICS) { + + data <- Apply(data=list(exp, obs), + target_dims=list(c(time.dim, 'ensemble'), + c(time.dim, 'ensemble')), + fun="veriApply", + verifun=FUN[[metric]], + prob=PROB[[metric]], + ensdim=2, + ncores=ncores, + split_factor=split_factor, + tdim=1, + na.rm=na.rm)[[1]] #* 100 + + ## TODO: Keep 'ensemble' dimension to maintain flexibility? + data <- Subset(data, c('ensemble'), list(1), drop='selected') + data[!is.finite(data)] <- NaN + metric <- paste0(metric, "_specs") + metrics_data[[ metric ]] <- data ## previously: list(data) + + } else if (metric == "corr_eno") { + # computes ensemble mean + data <- multiApply::Apply(data = exp, + target_dims = 'ensemble', + fun = mean, ncores = ncores)$output1 + + data <- multiApply::Apply(data = list(exp = data, + obs = Subset(obs, c('ensemble'), + list(1), drop='selected')), + target_dims = time.dim, fun = .correlation_eno, + time_dim = time.dim, ncores = ncores) + + # append both corr_r and corr_sign to metrics list + for (i in 1:length(data)) { + coeff <- data[[i]] + coeff[!is.finite(coeff)] <- NaN + if (names(data)[i] %in% c("r", "sign")){ + metrics_data[[ paste0("corr_", names(data)[i]) ]] <- coeff ## list(coeff) + } } + } else if (metric == "frpss_sign") { + + terciles_obs <- Compute_probs(obs, c(1/3, 2/3), + quantile_dims=c(time.dim), + ncores=ncores, + split_factor=1, + na.rm=na.rm) + + terciles_exp <- Compute_probs(exp, c(1/3, 2/3), + quantile_dims=c(time.dim, 'ensemble'), + ncores=ncores, + split_factor=1, + na.rm=na.rm) + + probs_clim = array(data = 1/3, dim = dim(terciles_obs$probs)) + + frps <- NULL + n_members <- dim(exp)[which(names(dim(exp)) == 'ensemble')][] + frps$clim <- multiApply::Apply(data = list(probs_exp = probs_clim, + probs_obs = terciles_obs$probs), + target_dims = 'bin', + fun = .rps_from_probs, + n_categories = 3, + n_members = n_members, + Fair = TRUE, + ncores = ncores)$output1 + + frps$exp <- multiApply::Apply(data = list(probs_exp = terciles_exp$probs, + probs_obs = terciles_obs$probs), + target_dims = 'bin', + fun = .rps_from_probs, + n_categories = 3, + n_members = n_members, + Fair = TRUE, + ncores = ncores)$output1 + + frps$clim_mean <- multiApply::Apply(data = frps$clim, target_dims=time.dim, + fun=mean, ncores=ncores)$output1 + frps$exp_mean <- multiApply::Apply(data = frps$exp, target_dims=time.dim, + fun=mean, ncores=ncores)$output1 + + frpss_respect2clim <- NULL + frpss_respect2clim$rpss <- 1 - frps$exp_mean/frps$clim_mean + frpss_respect2clim$sign <- s2dv::RandomWalkTest(skill_A=frps$exp, + skill_B=frps$clim, + time_dim=time.dim, + ncores=ncores)$signif + + frpss_respect2clim$rpss[!is.finite(frpss_respect2clim$rpss)] <- NaN + frpss_respect2clim$sign[!is.finite(frpss_respect2clim$sign)] <- NaN + + frpss_respect2clim$rpss <- Subset(frpss_respect2clim$rpss, + c('ensemble'), list(1), + drop='selected') + frpss_respect2clim$sign <- Subset(frpss_respect2clim$sign, + c('ensemble'), list(1), + drop='selected') + + metrics_data[[ "frpss_values" ]] <- list(frpss_respect2clim$rpss) + metrics_data[[ "frpss_sign" ]] <- list(frpss_respect2clim$sign) + } - - return(metrics_data) + + } + + return(metrics_data) } @@ -246,4 +258,5 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, } return(rps) + } -- GitLab