diff --git a/.gitignore b/.gitignore index 5faf5e636238b164637385c3484c0eb98785ec63..752f5cf97a2e597b4d8863bf8cfd55046423eb0f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ out-logs/ *.swp *.swo +/modules/Calibration/test_victoria.R diff --git a/conf/archive.yml b/conf/archive.yml index 57574919711c876d1306e0ef5cf90375a424374d..e869cb9c0f2bf84a97efe48c75ff288a91b367ba 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -5,42 +5,76 @@ archive: System: system5c3s: src: "exp/ecmwf/system5c3s/" - daily_mean: {"tas":"_f6h/","rsds":"_s0-24h/", - "prlr":"_s0-24h/","sfcWind":"_f6h/"} - monthly_mean: {"tas":"_f6h/","rsds":"_s0-24h/", - "prlr":"_s0-24h/","sfcWind":"_f6h/", - "tasmin":"_f24h/","tasmax":"_f24h/"} + daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", + "prlr":"_s0-24h/", "sfcWind":"_f6h/"} + monthly_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", + "prlr":"_s0-24h/", "sfcWind":"_f6h/", + "tasmin":"_f24h/", "tasmax":"_f24h/"} nmember: fcst: 51 hcst: 25 reference_grid: "/esarchive/exp/ecmwf/system5c3s/monthly_mean/tas_f6h/tas_20180501.nc" system7c3s: src: "exp/meteofrance/system7c3s/" - monthly_mean: {"tas":"_f6h/","g500":"_f12h/", - "prlr":"_f24h/", "sfcWind": "_f6h/"} + monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", + "prlr":"_f24h/", "sfcWind": "_f6h/", + "tasmax":"_f6h/", "tasmin": "_f6h/"} nmember: fcst: 51 hcst: 25 - reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system7c3s.txt" + reference_grid: "conf/grid_description/griddes_system7c3s.txt" + system21_m1: + src: "exp/dwd/system21_m1/" + monthly_mean: {"tas":"_f6h/", "prlr":"_f24h", + "tasmin":"_f24h/", "tasmax":"_f24h/"} + nmember: + fcst: 50 + hcst: 30 + reference_grid: "conf/grid_description/griddes_system21_m1.txt" system35c3s: src: "exp/cmcc/system35c3s/" - monthly_mean: {"tas":"_f6h/","g500":"_f12h/", - "prlr":"_f24h/", "sfcWind": "_f6h/"} + monthly_mean: {"tas":"_f6h/", "g500":"_f12h/", + "prlr":"_f24h/", "sfcWind": "_f6h/", + "tasmax":"_f24h/", "tasmin":"_f24h"} nmember: fcst: 50 hcst: 40 - reference_grid: "/esarchive/scratch/vagudets/repos/auto-s2s/conf/grid_description/griddes_system35c3s.txt" + reference_grid: "conf/grid_description/griddes_system35c3s.txt" + system2c3s: + src: "exp/jma/system2c3s/" + monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", + "tasmax":"_f6h/", "tasmin":"_f6h/"} + nmember: + fcst: 10 + hcst: 10 + reference_grid: "conf/grid_description/griddes_system2c3s.txt" + eccc1: + src: "exp/eccc/eccc1/" + monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", + "tasmax":"_f6h/", "tasmin":"_f6h/"} + nmember: + fcst: 10 + hcst: 10 + reference_grid: "conf/grid_description/griddes_eccc1.txt" + ncep-cfsv2: + src: "exp/ncep/cfs-v2/" + monthly_mean: {"tas":"_f6h/", "prlr":"_f6h/", + "tasmax":"_f6h/", "tasmin":"_f6h/"} + nmember: + fcst: 20 + hcst: 20 + reference_grid: "conf/grid_description/griddes_ncep-cfsv2.txt" Reference: era5: src: "recon/ecmwf/era5/" - daily_mean: {"tas":"_f1h-r1440x721cds/","rsds":"_f1h-r1440x721cds/", - "prlr":"_f1h-r1440x721cds/","sfcWind":"_f1h-r1440x721cds/"} - monthly_mean: {"tas":"_f1h-r1440x721cds/"} + daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", + "prlr":"_f1h-r1440x721cds/", "sfcWind":"_f1h-r1440x721cds/"} + monthly_mean: {"tas":"_f1h-r1440x721cds/", "prlr":"_f1h-r1440x721cds/"} reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" era5land: src: "recon/ecmwf/era5land/" - daily_mean: {"tas":"_f1h/","rsds":"_f1h/", - "prlr":"_f1h/","sfcWind":"_f1h/"} + daily_mean: {"tas":"_f1h/", "rsds":"_f1h/", + "prlr":"_f1h/", "sfcWind":"_f1h/"} monthly_mean: {"tas":"_f1h/"} reference_grid: "/esarchive/recon/ecmwf/era5land/daily_mean/tas_f1h/tas_201805.nc" uerra: diff --git a/conf/grid_description/griddes_eccc1.txt b/conf/grid_description/griddes_eccc1.txt new file mode 100644 index 0000000000000000000000000000000000000000..58a9810cdb73c21c967f607eb7c8b771020cfa6a --- /dev/null +++ b/conf/grid_description/griddes_eccc1.txt @@ -0,0 +1,19 @@ +# +# Grid description file for ECCC1 +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/grid_description/griddes_ncep-cfsv2.txt b/conf/grid_description/griddes_ncep-cfsv2.txt new file mode 100644 index 0000000000000000000000000000000000000000..6d8abe8607ab6988436e6e25d6565228f2661db5 --- /dev/null +++ b/conf/grid_description/griddes_ncep-cfsv2.txt @@ -0,0 +1,18 @@ +# +# Grid description file for NCEP CFSv2 +# +gridtype = lonlat +gridsize = 64800 +xname = lon +xlongname = Longitude +xunits = degrees_east +yname = lat +ylongname = Latitude +yunits = degrees_north +xsize = 360 +ysize = 180 +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 +# diff --git a/conf/grid_description/griddes_system21_m1.txt b/conf/grid_description/griddes_system21_m1.txt index bf9ac52b4971bf02f2bdd932f3be4f1d866e47ac..954438f81443cc54dc2595edd4af35115615341a 100644 --- a/conf/grid_description/griddes_system21_m1.txt +++ b/conf/grid_description/griddes_system21_m1.txt @@ -1,16 +1,16 @@ -# Grid description file for DWD GCFS2.1 (CDS) -# gridID 2 +# +# Grid description file for DWD GCFS2.1 # gridtype = lonlat gridsize = 64800 -xsize = 360 -ysize = 180 xname = lon -xlongname = "longitude" -xunits = "degrees_east" +xlongname = longitude +xunits = degrees_east yname = lat -ylongname = "latitude" -yunits = "degrees_north" +ylongname = latitude +yunits = degrees_north +xsize = 360 +ysize = 180 xfirst = 0.5 xinc = 1 yfirst = 89.5 diff --git a/conf/grid_description/griddes_system2c3s.txt b/conf/grid_description/griddes_system2c3s.txt new file mode 100644 index 0000000000000000000000000000000000000000..0827b7c047f5d59ea975a43a1b22bc6b84e071df --- /dev/null +++ b/conf/grid_description/griddes_system2c3s.txt @@ -0,0 +1,17 @@ +# +# Grid description file for JMA System 2 C3S +# +gridtype = lonlat +gridsize = 10512 +xname = lon +xlongname = longitude +xunits = degrees_east +yname = lat +ylongname = latitude +yunits = degrees_north +xsize = 144 +ysize = 73 +xfirst = 0 +xinc = 2.5 +yfirst = 90 +yinc = -2.5 diff --git a/modules/Calibration/Calibration.R b/modules/Calibration/Calibration.R index 295bb40667cb07d5ebb942240046b3b6b950d741..2463fcbd1e22ae49883bcf290aee93d70635ede9 100644 --- a/modules/Calibration/Calibration.R +++ b/modules/Calibration/Calibration.R @@ -48,12 +48,12 @@ #'@param obs A numeric array with named dimensions, #' representing the observational data used #' in the bias-adjustment method. -#' 'sday','syear','member' are mandatory dims +#' 'sday','syear','ensemble' are mandatory dims #'@param hcst A numeric array with named dimensions, #' representing the system hindcast data used #' in the bias-adjustment method. -#' 'sday','syear','member' are mandatory dims +#' 'sday','syear','ensemble' are mandatory dims #'@param fun bias adjustment function @@ -69,7 +69,7 @@ #'@return A numeric array with the bias adjusted hindcast, #' -CST_CALIB_METHODS <- c("bias","evmos","mse_min","crps_min","rpc-based") +CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, split_factor=1, ncores=32) @@ -77,21 +77,21 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, # Replicates observations for bias adjusting each # system of the multi-model separately + ## TODO: Adapt code for multi-model case if(mm){ obs.mm <- obs for(dat in 1:(dim(hcst)['dat'][[1]]-1)){ - obs.mm <- abind(obs2,obs, + obs.mm <- abind(obs2, obs, along = which(names(dim(obs)) == 'dat')) } names(dim(obs.mm)) <- names(dim(obs)) obs <- obs.mm remove(obs.mm) } - + if (method %in% CST_CALIB_METHODS) { - # Hcst calibration - hcst <- CSTools::CST_Calibration(hcst,obs, + hcst <- CSTools::CST_Calibration(hcst, obs, cal.method = method, eval.method = "leave-one-out", multi.model = mm, @@ -99,32 +99,47 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, na.rm = na.rm, apply_to = NULL, alpha = NULL, - memb_dim = "member", + memb_dim = "ensemble", sdate_dim = "syear", ncores = ncores) + hcst$data[!is.finite(hcst$data)] <- NA + + } else if (method %in% c("s2s4e")) { + hcst <- Apply(data=list(obs = obs$data, hcst = hcst$data), + extra_info = list(na.rm = na.rm), + target_dims = c('sday', 'syear', 'ensemble'), + output_dims = c('sday', 'syear', 'ensemble'), + na.rm = na.rm, + ncores = ncores, + fun = .cal)[[1]] + + hcst[!is.finite(hcst)] <- NA } else { - #error(logger, - # "Calibration method is not implemented in CSTools") + ## TODO: Implement other calibration methods, implement logger + stop("Calibration method is not implemented in CSTools") + # error(logger, + # "Calibration method is not implemented in CSTools") } - hcst[!is.finite(hcst)] <- NA - remove(hcst) + # hcst$data[!is.finite(hcst$data)] <- NA + ## TODO: Is this line necessary? + # remove(hcst) # Merges all the ensembles from the different systems into # one single ensemble if(mm){ hcst <- MergeDims(hcst, - merge_dims=c('dat','member'), - rename_dim='member') - hcst <- drop_na_dim(hcst,'member') + merge_dims = c('dat', 'ensemble'), + rename_dim = 'ensemble') + hcst <- drop_na_dim(hcst, 'ensemble') } - # removes dat and var dimensions if needed - try(hcst<-Subset(hcst, - c('var'),list(1),drop='selected')) - try(hcst<-Subset(hcst, - c('dat'),list(1),drop='selected')) +# # removes dat and var dimensions if needed +# try(hcst<-Subset(hcst, +# c('var'), list(1), drop='selected')) +# try(hcst<-Subset(hcst, +# c('dat'), list(1), drop='selected')) return(hcst) @@ -138,17 +153,17 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, #'@param obs A numeric array with named dimensions, #' representing the observational data used #' in the bias-adjustment method. -#' 'sday','syear','member' are mandatory dims +#' 'sday','syear','ensemble' are mandatory dims #'@param hcst A numeric array with named dimensions, #' representing the system hindcast data used #' in the bias-adjustment method. -#' 'sday','syear','member' are mandatory dims +#' 'sday','syear','ensemble' are mandatory dims #'@param fcst A numeric array with named dimensions, #' representing the system forecast to be #' bias-adjusted. -#' 'sday','syear','member' are mandatory dims +#' 'sday','syear','ensemble' are mandatory dims #'@param fun bias adjustment function @@ -166,52 +181,58 @@ hcst_calib <- function(obs, hcst, method, mm=F, na.rm=T, ## TODO: ## needs and update: look at hcst version -#fcst_calib <- function(obs, hcst, fcst, fun, mm=F, -# na.rm=T, split_factor=1,ncores=32, -# target_dims=c('sday','syear','member'), -# output_dims=c('member')) -#{ -# -# # Replicates observations for bias adjusting each -# # system of the multi-model separately -# if(mm){ -# obs.mm <- obs -# for(dat in 1:(dim(hcst)['dat'][[1]]-1)){ -# obs.mm <- abind(obs2,obs, -# along=which(names(dim(obs)) == 'dat')) -# } -# names(dim(obs.mm)) <- names(dim(obs)) -# obs <- obs.mm -# remove(obs.mm) -# } -# -# # Fcst Calibration -# calibrated_fcst <-Apply(data=list(obs=obs,hcst=hcst,fcst=fcst), -# extra_info=list(na.rm=na.rm), -# target_dims=target_dims, -# output_dims=output_dims, -# na.rm=na.rm, -# ncores=ncores, -# fun = .fcstcal)[[1]] -# -# calibrated_fcst[!is.finite(calibrated_fcst)] <- NA -# -# # Merges all the ensembles from the different systems into -# # one single ensemble -# if(mm){ -# calibrated_fcst <- MergeDims(calibrated_fcst, -# merge_dims=c('dat','member'), -# rename_dim='member') -# calibrated_fcst <- drop_na_dim(calibrated_fcst,'member') -# } -# +fcst_calib <- function(obs, hcst, fcst, fun, mm=F, + na.rm=T, split_factor=1, ncores=32, + ## TODO: establish target dims and output dims + target_dims=c('sday', 'syear', 'ensemble'), + output_dims=c('sday', 'syear', 'ensemble')) +{ + + # Replicates observations for bias adjusting each + # system of the multi-model separately + ## TODO: Adapt multi-model case + if(mm){ + obs.mm <- obs + for(dat in 1:(dim(hcst)['dat'][[1]]-1)){ + obs.mm <- abind(obs2,obs, + along=which(names(dim(obs)) == 'dat')) + } + names(dim(obs.mm)) <- names(dim(obs)) + obs <- obs.mm + remove(obs.mm) + } + + # Fcst Calibration + ## TODO: Return s2dv_cube + calibrated_fcst <- Apply(data=list(obs = obs$data, hcst = hcst$data, fcst = fcst$data), + extra_info = list(na.rm = na.rm), + target_dims = target_dims, + output_dims = output_dims, + na.rm = na.rm, + ncores = ncores, + fun = .fcstcal)[[1]] + + ## TODO: Restore this line + # calibrated_fcst$data[!is.finite(calibrated_fcst$data)] <- NA + calibrated_fcst[!is.finite(calibrated_fcst)] <- NA + + # Merges all the ensembles from the different systems into + # one single ensemble + ## TODO: Adapt multi-model case + if(mm){ + calibrated_fcst <- MergeDims(calibrated_fcst, + merge_dims=c('dat', 'ensemble'), + rename_dim = 'ensemble') + calibrated_fcst <- drop_na_dim(calibrated_fcst, 'ensemble') + } + # # removes dat and var dimensions if needed # try(calibrated_fcst<-Subset(calibrated_fcst, # c('var'),list(1),drop='selected')) # try(calibrated_fcst<-Subset(calibrated_fcst, # c('dat'),list(1),drop='selected')) -# -# return(calibrated_fcst) -# -# -#} + + return(calibrated_fcst) + + +} diff --git a/modules/Calibration/Calibration_fcst3.R b/modules/Calibration/Calibration_fcst3.R new file mode 100644 index 0000000000000000000000000000000000000000..3d24226be716b015f994ae4830612606206489f0 --- /dev/null +++ b/modules/Calibration/Calibration_fcst3.R @@ -0,0 +1,186 @@ + +# Performs the hcst calibration for a centered time +# window of sdays. Objects must have the dimensions; +# sday, ensemble & syear. +# If sday dim = 1; no time window calibration is performed. +.cal <- function(var_obs, var_exp, na.rm=F) { + + ntime <- dim(var_exp)[which(names(dim(var_exp)) == 'syear')][] + nmembers <- dim(var_exp)[which(names(dim(var_exp)) == 'ensemble')][] + ndays <- dim(var_exp)[which(names(dim(var_exp)) == 'sday')][] + + # selects only the centered day of time window + if ( ndays == 1 ){ + fcst.index <- 1 + } else { + fcst.index <- (ndays + 1)/2 + } + + ind <- 1:ntime + + climObs <- sapply(ind, function(x) {mean( + Subset(var_obs, along='syear', indices=-x), na.rm=na.rm)}) + + climObs2 <- sapply(ind, function(x) {mean(var_obs, na.rm=na.rm)}) + + climPred <- sapply(ind, function(x) {mean( + Subset(var_exp, along='syear', indices=-x), na.rm=na.rm)}) + + climPred2 <- sapply(ind, function(x) {mean(var_exp, na.rm=na.rm)}) + + var_obs.ano <- var_obs - climObs2 + var_fcst.ano <- var_exp - climPred + var_exp.ano <- var_exp - climPred2 + + calib.hcst <- NA * Subset(var_exp, along=c('sday', 'syear'), + c(fcst.index, 0)) + # loop over the hindcast years + for (t in 1 : ntime) { + + # defining forecast,hindcast in cross-validation + fcst.ano <- Subset(var_fcst.ano, along=c('syear', 'sday'), + indices=list(t, fcst.index)) + + hcst.ano <- Subset(var_exp.ano, along=c('syear'), indices=list(-t)) + + obs.ano <- Subset(var_obs.ano, along='syear', indices=-t) + + # computes ensemble mean of the fcst & hcst + em_fcst <- mean(fcst.ano, na.rm=na.rm) + em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean, + na.rm=na.rm)$output1 + + # Changes the correlation method in case of NAs + if (na.rm){ + corr.method <- "pairwise.complete.obs" + } else { + corr.method <- "everything" + } + corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method) + sd_obs <- sd(obs.ano, na.rm=na.rm) + sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm) + + # Computes difference between fcst anomaly and ensemble mean + fcst_diff <- fcst.ano - em_fcst + hcst_diff <- array(numeric(), c(ndays,ntime-1,0)) + names(dim(hcst_diff)) <- names(dim(hcst.ano)) + for (n in 1 : nmembers) { + diff <- Subset(hcst.ano, along='ensemble', indices=n, + drop='selected') - em_hcst + hcst_diff <- abind(hcst_diff, diff, + along=which(names(dim(var_exp)) == 'ensemble'), + hier.names=F) + } + + sd_hcst_diff <- sd(hcst_diff, na.rm=na.rm) + + a <- abs(corr) * (sd_obs / sd_em_hcst) + b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2)) + # print(paste0("climObs: ", climObs[t])) # + # print(paste0("sd_obs :", sd_obs)) # + # print(paste0("sd_hcst_diff :", sd_hcst_diff)) + # print(paste0("sd_em_hcst :", sd_em_hcst)) # + # print(paste0("em_hcst :", em_hcst)) + # print(paste0("Alpha - Lluís:", a)) # + # print(paste0("Beta - Lluís:", b)) # + # print(paste0("fcst_diff: ", fcst_diff)) + # print(paste0("climPred: ", climPred)) + # print(paste0("climCross: ", climCross[t])) + # print(paste0("em_fcst :", em_fcst)) + + # Calibrates hcst sdate + calib.syear <- ((a * em_fcst) + (b * fcst_diff) + climObs[t]) + # Appends calibrated sdate to the hcst array + calib.hcst <- abind(calib.hcst, calib.syear, + along=which(names(dim(var_exp)) == 'syear'), + hier.names=F) + } + + names(dim(calib.hcst)) <- names(dim(calib.syear)) + # removes NAs added in array init + calib.hcst <- Subset(calib.hcst, along='syear', indices=-1) + + return(calib.hcst) + +} + +# Performs the fcst calibration for a centered time +# window of sdays. Objects must have the dimensions; +# sday, ensemble & syear. +# If sday dim = 1; no time window calibration is performed. +.fcstcal <- function(obs, hcst, fcst, na.rm=F) { + + ntime <- dim(hcst)['syear'] + nmembers <- dim(hcst)['ensemble'] + fcst.nmembers <- dim(fcst)['ensemble'] + ndays <- dim(hcst)['sday'] + + # selects only the centered day of time window + if ( ndays == 1 ){ + fcst.index <- 1 + } else { + fcst.index <- (ndays + 1)/2 + } + + ind <- 1:ntime + climObs <- mean(obs, na.rm=na.rm) + climPred <- mean(hcst, na.rm=na.rm) + + obs.ano <- obs - climObs + hcst.ano <- hcst - climPred + fcst.ano <- fcst - climPred + + calib.fcst <- NA * fcst + + # computes ensemble mean of the fcst & hcst + em_fcst <- mean(fcst.ano, na.rm=na.rm) + em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean, + na.rm=na.rm)$output1 + + # Changes the correlation method in case of NAs allowed + if (na.rm){ + corr.method <- "pairwise.complete.obs" + } else { + corr.method <- "everything" + } + + corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method) + sd_obs <- sd(obs.ano, na.rm=na.rm) + sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm) + + # Computes difference between fcst anomaly and ensemble mean + fcst_diff <- fcst.ano - em_fcst + hcst_diff <- array(numeric(), c(ndays,ntime,0)) + names(dim(hcst_diff)) <- names(dim(hcst.ano)) + for (n in 1 : nmembers) { + diff <- Subset(hcst.ano, along='ensemble', indices=n, + drop='selected') - em_hcst + hcst_diff <- abind(hcst_diff, diff, + along=which(names(dim(hcst)) == 'ensemble'), + hier.names=F) + } + + sd_hcst_diff <- sd(hcst_diff, na.rm) + # print(paste0("Sigma e:", sd_hcst_diff)) + a <- abs(corr) * (sd_obs / sd_em_hcst) + b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2)) + ## print(paste0("climObs: ", climObs)) + ## print(paste0("sd_obs :", sd_obs)) + # print(paste0("sd_hcst_diff :", sd_hcst_diff)) + # print(paste0("sd_em_hcst :", sd_em_hcst)) + # print(paste0("em_hcst :", em_hcst)) + ## print(paste0("Alpha - Lluís:", a)) + ## print(paste0("Beta - Lluís:", b)) + # print(paste0("climObs: ", climObs)) + # print(paste0("fcst_diff: ", fcst_diff)) + # print(paste0("em_fcst :", em_fcst)) + + # Calibrates hcst sdate + fcst.calib <- ((a * em_fcst) + (b * fcst_diff) + climObs) + # fcst_cube$data <- fcst.calib + return(fcst.calib) + # return(Subset(fcst.calib, c('sday', 'syear'), list(1,1), drop='selected')) + +} + + diff --git a/modules/Calibration/Calibration_fcst4.R b/modules/Calibration/Calibration_fcst4.R new file mode 100644 index 0000000000000000000000000000000000000000..9359c6e81ff94aaf83c3ed716fc5f9c672579241 --- /dev/null +++ b/modules/Calibration/Calibration_fcst4.R @@ -0,0 +1,157 @@ + +# Performs the hcst calibration for a centered time +# window of sdays. Objects must have the dimensions; +# sday, ensemble & syear. +# If sday dim = 1; no time window calibration is performed. +.cal <- function(var_obs, var_exp, na.rm=F) { + + ntime <- dim(var_exp)[which(names(dim(var_exp)) == 'syear')][] + nmembers <- dim(var_exp)[which(names(dim(var_exp)) == 'ensemble')][] + ndays <- dim(var_exp)[which(names(dim(var_exp)) == 'sday')][] + + # selects only the centered day of time window + if ( ndays == 1 ){ + fcst.index <- 1 + } else { + fcst.index <- (ndays + 1)/2 + } + + ind <- 1:ntime + + climObs <- sapply(ind, function(x) {mean(var_obs, na.rm=na.rm)}) + climPred <- sapply(ind, function(x) {mean( + Subset(var_exp, along='syear', indices=-x), na.rm=na.rm)}) + + var_obs.ano <- var_obs - climObs + var_exp.ano <- var_exp - climPred + + calib.hcst <- NA * Subset(var_exp, along=c('sday', 'syear'), + c(fcst.index, 0)) + # loop over the hindcast years + for (t in 1 : ntime) { + + # defining forecast,hindcast in cross-validation + fcst.ano <- Subset(var_exp.ano, along=c('syear', 'sday'), + indices=list(t, fcst.index)) + hcst.ano <- Subset(var_exp.ano, along=c('syear'), indices=list(-t)) + + obs.ano <- Subset(var_obs.ano, along='syear', indices=-t) + + # computes ensemble mean of the fcst & hcst + em_fcst <- mean(fcst.ano, na.rm=na.rm) + em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean, + na.rm=na.rm)$output1 + + # Changes the correlation method in case of NAs + if (na.rm){ + corr.method <- "pairwise.complete.obs" + } else { + corr.method <- "everything" + } + corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method) + sd_obs <- sd(obs.ano, na.rm=na.rm) + sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm) + + # Computes difference between fcst anomaly and ensemble mean + fcst_diff <- fcst.ano - em_fcst + hcst_diff <- array(numeric(), c(ndays,ntime-1,0)) + names(dim(hcst_diff)) <- names(dim(hcst.ano)) + for (n in 1 : nmembers) { + diff <- Subset(hcst.ano, along='ensemble', indices=n, + drop='selected') - em_hcst + hcst_diff <- abind(hcst_diff, diff, + along=which(names(dim(var_exp)) == 'ensemble'), + hier.names=F) + } + + sd_hcst_diff <- sd(hcst_diff, na.rm=na.rm) + + a <- abs(corr) * (sd_obs / sd_em_hcst) + b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2)) + + # Calibrates hcst sdate + calib.syear <- ((a * em_fcst) + (b * fcst_diff) + climObs[t]) + # Appends calibrated sdate to the hcst array + calib.hcst <- abind(calib.hcst, calib.syear, + along=which(names(dim(var_exp)) == 'syear'), + hier.names=F) + } + + names(dim(calib.hcst)) <- names(dim(calib.syear)) + # removes NAs added in array init + calib.hcst <- Subset(calib.hcst, along='syear', indices=-1) + + return(calib.hcst) + +} + +# Performs the fcst calibration for a centered time +# window of sdays. Objects must have the dimensions; +# sday, ensemble & syear. +# If sday dim = 1; no time window calibration is performed. +.fcstcal <- function(obs, hcst, fcst, na.rm=F) { + + ntime <- dim(hcst)['syear'] + nmembers <- dim(hcst)['ensemble'] + fcst.nmembers <- dim(fcst)['ensemble'] + ndays <- dim(hcst)['sday'] + + # selects only the centered day of time window + if ( ndays == 1 ){ + fcst.index <- 1 + } else { + fcst.index <- (ndays + 1)/2 + } + + ind <- 1:ntime + climObs <- mean(obs, na.rm=na.rm) + climPred <- mean(hcst, na.rm=na.rm) + + obs.ano <- obs - climObs + hcst.ano <- hcst - climPred + fcst.ano <- fcst - climPred + + calib.fcst <- NA * fcst + + # computes ensemble mean of the fcst & hcst + em_fcst <- mean(fcst.ano, na.rm=na.rm) + em_hcst <- Apply(hcst.ano, target_dims=c('ensemble'), fun=mean, + na.rm=na.rm)$output1 + + # Changes the correlation method in case of NAs allowed + if (na.rm){ + corr.method <- "pairwise.complete.obs" + } else { + corr.method <- "everything" + } + + corr <- cor(as.vector(em_hcst), obs.ano, use = corr.method) + sd_obs <- sd(obs.ano, na.rm=na.rm) + sd_em_hcst <- sd(as.vector(em_hcst), na.rm=na.rm) + + # Computes difference between fcst anomaly and ensemble mean + fcst_diff <- fcst.ano - em_fcst + hcst_diff <- array(numeric(), c(ndays,ntime,0)) + names(dim(hcst_diff)) <- names(dim(hcst.ano)) + for (n in 1 : nmembers) { + diff <- Subset(hcst.ano, along='ensemble', indices=n, + drop='selected') - em_hcst + hcst_diff <- abind(hcst_diff, diff, + along=which(names(dim(hcst)) == 'ensemble'), + hier.names=F) + } + + sd_hcst_diff <- sd(hcst_diff, na.rm) + + a <- abs(corr) * (sd_obs / sd_em_hcst) + b <- (sd_obs / sd_hcst_diff) * sqrt(1 - (corr ^ 2)) + + # Calibrates hcst sdate + fcst.calib <- ((a * em_fcst) + (b * fcst_diff) + climObs) + # fcst_cube$data <- fcst.calib + return(fcst.calib) + # return(Subset(fcst.calib, c('sday', 'syear'), list(1,1), drop='selected')) + +} + + diff --git a/modules/Calibration/test_calib.R b/modules/Calibration/test_calib.R new file mode 100644 index 0000000000000000000000000000000000000000..fa3f1359ae8b39fb2485d0097ae039e3ede092f4 --- /dev/null +++ b/modules/Calibration/test_calib.R @@ -0,0 +1,101 @@ + +## TODO: Remove once Alba's fun is merged in CSTools +source("https://earth.bsc.es/gitlab/external/cstools/-/raw/develop-CalibrationForecast/R/CST_Calibration.R") + +## Entry params data and recipe? +calibrate_datasets <- function(data, recipe) { + hcst <- data$hcst + obs <- data$obs + fcst <- data$fcst + + # Calibration function params + method <- recipe$Analysis$Workflow$Calibration$method + mm <- recipe$Analysis$Datasets$Multimodel + ncores <- 4 + na.rm <- T + + # Replicate observation array for the multi-model case + if (mm) { + obs.mm <- obs$data + for(dat in 1:(dim(hcst$data)['dat'][[1]]-1)) { + obs.mm <- abind(obs.mm, obs$data, + along=which(names(dim(obs$data)) == 'dat')) + } + names(dim(obs.mm)) <- names(dim(obs$data)) + obs$data <- obs.mm + remove(obs.mm) + } + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + + CST_CALIB_METHODS <- c("bias", "evmos", "mse_min", "crps_min", "rpc-based") + ## TODO: implement other calibration methods + ## TODO: Restructure the code? + if (!(method %in% CST_CALIB_METHODS)) { + stop("Calibration method is not implemented in CSTools") + } else { + ## Alba's version of CST_Calibration (pending merge) is being used + # Calibrate the hindcast and forecast + hcst_calibrated <- CST_Calibration(hcst, obs, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + if (!is.null(fcst)) { + # Calibrate the forecast + fcst_calibrated <- CST_Calibration(hcst, obs, fcst, + cal.method = method, + eval.method = "leave-one-out", + multi.model = mm, + na.fill = TRUE, + na.rm = na.rm, + apply_to = NULL, + alpha = NULL, + memb_dim = "ensemble", + sdate_dim = "syear", + ncores = ncores) + } else { + fcst_calibrated <- NULL + } + + } + + } else if (recipe$Analysis$Variables$freq == "daily_mean") { + # Daily data calibration using Quantile Mapping + warning("Data is at daily frequency. Quantile Mapping will be used", + " as the calibration method.") + hcst_calibrated <- CST_QuantileMapping(hcst, obs, + exp_cor = NULL, + ## TODO: Tidy up + sample_dims = c("syear", + "time", + "ensemble"), + sample_length = NULL, + method = "QUANT", + ncores = ncores, + na.rm = na.rm) + + if (!is.null(fcst)) { + fcst_calibrated <- CST_QuantileMapping(hcst, obs, + exp_cor = fcst, + sample_dims = c("syear", + "time", + "ensemble"), + sample_length = NULL, + method = "QUANT", + ncores = ncores, + na.rm = na.rm) + } else { + fcst_calibrated <- NULL + } + } + + print("###### CALIBRATION COMPLETE ######") + return(list(hcst_calib = hcst_calibrated, fcst_calib = fcst_calibrated)) +} diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index e7cad0cb45a182571a383bbd00c9985c7886ae4f..f641fa6a57edc3281f5fd256736f07f1b996c32a 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -7,37 +7,108 @@ # - reliability diagram # - ask Carlos which decadal metrics he is currently using +source("modules/Skill/s2s.metrics.R") +## TODO: Remove once the new version of s2dv is released +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/master/R/RandomWalkTest.R") +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/develop-RPSS/R/RPS.R") +source("https://earth.bsc.es/gitlab/es/s2dv/-/raw/develop-RPSS/R/RPSS.R") + +## TODO: Implement this in the future ## Which parameter are required? -if (!("obs" %in% ls()) || is.null(obs)) { - error(logger, - "There is no object 'obs' in the global environment or it is NULL") -} -if (stream == "fcst" && (!("fcst" %in% ls()) || is.null(fcst))) { - error(logger, - "There is no object 'fcst' in the global environment or it is NULL") -} -if (!("hcst" %in% ls()) || is.null(hcst)) { - error(logger, - "There is no object 'hcst' in the global environment or it is NULL") -} -if (!("metric" %in% ls()) || is.null(metric)) { - warn(logger, - "Verification metric not found and it is set as 'EnsCorr'.") - metric <- 'EnsCorr' -} -if (metric %in% c('FRPSS', 'RPSS')) { - metric_fun <- "veriApply" - metric_method <- "FairRpss" -} else if (metric %in% c("FCRPSS", "CRPSS")) { - metric_fun <- "veriApply" -} else if (metric %in% c("EnsCorr", "EnsCor")) { - metric_fun <- "veriApply" - metric_method <- "EnsCorr" -#... -} else { - error(logger, "Unknown verification metric defined in the recipe.") - metric_fun <- 'NotFound' +# if (!("obs" %in% ls()) || is.null(obs)) { +# error(logger, +# "There is no object 'obs' in the global environment or it is NULL") +# } +# if (stream == "fcst" && (!("fcst" %in% ls()) || is.null(fcst))) { +# error(logger, +# "There is no object 'fcst' in the global environment or it is NULL") +# } +# if (!("hcst" %in% ls()) || is.null(hcst)) { +# error(logger, +# "There is no object 'hcst' in the global environment or it is NULL") +# } +# if (!("metric" %in% ls()) || is.null(metric)) { +# warn(logger, +# "Verification metric not found and it is set as 'EnsCorr'.") +# metric <- 'EnsCorr' +# } +# if (metric %in% c('FRPSS', 'RPSS')) { +# metric_fun <- "veriApply" +# metric_method <- "FairRpss" +# } else if (metric %in% c("FCRPSS", "CRPSS")) { +# metric_fun <- "veriApply" +# } else if (metric %in% c("EnsCorr", "EnsCor")) { +# metric_fun <- "veriApply" +# metric_method <- "EnsCorr" +# #... +# } else { +# error(logger, "Unknown verification metric defined in the recipe.") +# metric_fun <- 'NotFound' +# } +# info(logger, paste("#-------------------------- ", "\n", +# " running Skill module ", "\n", +# " it can call ", metric_fun )) + +compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { + # exp: s2dv_cube containing the hindcast + # obs: s2dv_cube containing the observations + # recipe: auto-s2s recipe as provided by read_yaml + + ## TODO: Adapt time_dims to subseasonal case? + time_dim <- 'syear' + memb_dim <- 'ensemble' + metrics <- tolower(recipe$Analysis$Workflow$Skill$metric) + skill_metrics <- list() + for (metric in strsplit(metrics, " ")[[1]]) { + # Whether the fair version of the metric is to be computed + if (metric %in% c('frps', 'frpss', 'bss10', 'bss90')) { + Fair <- T + } else { + Fair <- F + } + # Ranked Probability Score and Fair version + if (metric %in% c('rps', 'frps')) { + skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, + Fair = Fair, ncores = ncores) + skill_metrics[[ metric ]] <- skill + # Ranked Probability Skill Score and Fair version + } else if (metric %in% c('rpss', 'frpss')) { + skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, + Fair = Fair, ncores = ncores) + skill_metrics[[ metric ]] <- skill + # Brier Skill Score - 10th percentile + } else if (metric == 'bss10') { + skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, + prob_thresholds = 0.1, Fair = Fair, ncores = ncores) + skill_metrics[[ metric ]] <- skill + # Brier Skill Score - 90th percentile + } else if (metric == 'bss90') { + skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, + prob_thresholds = 0.9, Fair = Fair, ncores = ncores) + skill_metrics[[ metric ]] <- skill + # Ensemble mean correlation + } else if (metric == 'enscorr') { + ## TODO: Implement option for Kendall and Spearman methods? + skill <- s2dv::Corr(exp$data, obs$data, + dat_dim = 'dat', + time_dim = time_dim, + method = 'pearson', + memb_dim = memb_dim, + ncores = ncores) + skill_metrics[[ metric ]] <- skill + } else if (grepl("specs", metric, fixed = TRUE)) { + # Compute SpecsVerification version of the metrics + metric <- (strsplit(metric, "_"))[[1]][1] # Get metric name + skill <- Compute_verif_metrics(exp$data, obs$data, + skill_metrics = metric, + verif.dims=c("syear", "sday", "sweek"), + na.rm = na.rm, + ncores = ncores) + skill_metrics[[ metric ]] <- skill + } + } + print("##### SKILL METRIC COMPUTATION COMPLETE #####") + + return(skill_metrics) + } -info(logger, paste("#-------------------------- ", "\n", - " running Skill module ", "\n", - " it can call ", metric_fun )) diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R new file mode 100644 index 0000000000000000000000000000000000000000..b00697306790fa454cc8f40ba066bfcf0db114ea --- /dev/null +++ b/modules/Skill/s2s.metrics.R @@ -0,0 +1,262 @@ + +source("modules/Skill/s2s.probs.R") + + +# MERGES verification dimns int single sdate dim along which the +# verification metrics will be computed +mergedims <- function(data, indims, outdim) { + + old.dims <- names(dim(data)) + new.dims <- c(indims, old.dims[! old.dims %in% indims]) + data <- Reorder(data, new.dims) + + for (step in 1:(length(indims) - 1)) { + if (step == 1) { + data <- MergeDims(data, + merge_dims=c(indims[step], indims[step+1]), + rename_dim=outdim) + } else { + data <- MergeDims(data, + merge_dims=c(outdim, indims[step+1]), + rename_dim=outdim) + } + } + + return(data) + +} + +## TODO: New function to split sdate dim back into 'sday', 'sweek' and 'syear'? + +Compute_verif_metrics <- function(exp, obs, skill_metrics, + verif.dims=c("syear", "sday", "sweek"), + merge.dims=F, + na.rm=T, ncores=1) { + + if (merge.dims) { + exp <- mergedims(exp, verif.dims, 'sdate') + obs <- mergedims(obs, verif.dims, 'sdate') + time.dim <- 'sdate' + } else { + time.dim <- 'syear' + } + ## obs already contains ensemble dimension + # obs <- InsertDim(obs, 2, 1, name='member') + + ## REMOVE VALUES IN CASE PAIR OBS-EXP IS NOT COMPLETE + if (na.rm) { + nsdates <- dim(exp)[which(names(dim(exp)) == time.dim)][] + not_missing_dates <- c() + for (sdate in 1:nsdates){ + if(!all(is.na(Subset(exp, along=time.dim, list(sdate), drop=F)))){ + not_missing_dates <- c(not_missing_dates, sdate) + } + } + obs <- Subset(obs, along=time.dim, not_missing_dates, drop=F) + exp <- Subset(exp, along=time.dim, not_missing_dates, drop=F) + } + + return(.compute_metrics(exp, obs, skill_metrics, ncores=ncores, na.rm=na.rm)) + +} + +.compute_metrics <- function(exp, obs, metrics, + ncores=1, split_factor=1, + merge.dims=FALSE, na.rm=FALSE) { + if (merge.dims) { + time.dim <- 'sdate' + } else { + time.dim <- 'syear' + } + + veriUnwrap <- easyVerification:::veriUnwrap + + ## SPECS VERIFICATION PARAMETERS + SPECSVER_METRICS <- c("frpss", "frps", "bss10", "bss90", "enscorr", "rpss") + + FUN <- list(frps="FairRps", rpss="EnsRpss", frpss="FairRpss", + bss10="FairRpss", bss90="FairRpss", enscorr="EnsCorr") + + PROB <- list(frps=c(1/3, 2/3), rpss=c(1/3, 2/3), frpss=c(1/3, 2/3), + bss10=c(1/10), bss90=c(9/10), enscorr=NULL) + + metrics_data <- list() + for (metric in metrics) { + + if (metric %in% SPECSVER_METRICS) { + + data <- Apply(data=list(exp, obs), + target_dims=list(c(time.dim, 'ensemble'), + c(time.dim, 'ensemble')), + fun="veriApply", + verifun=FUN[[metric]], + prob=PROB[[metric]], + ensdim=2, + ncores=ncores, + split_factor=split_factor, + tdim=1, + na.rm=na.rm)[[1]] #* 100 + + ## TODO: Keep 'ensemble' dimension to maintain flexibility? + data <- Subset(data, c('ensemble'), list(1), drop='selected') + data[!is.finite(data)] <- NaN + metric <- paste0(metric, "_specs") + metrics_data[[ metric ]] <- data ## previously: list(data) + + } else if (metric == "corr_eno") { + # computes ensemble mean + data <- multiApply::Apply(data = exp, + target_dims = 'ensemble', + fun = mean, ncores = ncores)$output1 + + data <- multiApply::Apply(data = list(exp = data, + obs = Subset(obs, c('ensemble'), + list(1), drop='selected')), + target_dims = time.dim, fun = .correlation_eno, + time_dim = time.dim, ncores = ncores) + + # append both corr_r and corr_sign to metrics list + for (i in 1:length(data)) { + coeff <- data[[i]] + coeff[!is.finite(coeff)] <- NaN + if (names(data)[i] %in% c("r", "sign")){ + metrics_data[[ paste0("corr_", names(data)[i]) ]] <- coeff ## list(coeff) + } + } + + } else if (metric == "frpss_sign") { + + terciles_obs <- Compute_probs(obs, c(1/3, 2/3), + quantile_dims=c(time.dim), + ncores=ncores, + split_factor=1, + na.rm=na.rm) + + terciles_exp <- Compute_probs(exp, c(1/3, 2/3), + quantile_dims=c(time.dim, 'ensemble'), + ncores=ncores, + split_factor=1, + na.rm=na.rm) + + probs_clim = array(data = 1/3, dim = dim(terciles_obs$probs)) + + frps <- NULL + n_members <- dim(exp)[which(names(dim(exp)) == 'ensemble')][] + frps$clim <- multiApply::Apply(data = list(probs_exp = probs_clim, + probs_obs = terciles_obs$probs), + target_dims = 'bin', + fun = .rps_from_probs, + n_categories = 3, + n_members = n_members, + Fair = TRUE, + ncores = ncores)$output1 + + frps$exp <- multiApply::Apply(data = list(probs_exp = terciles_exp$probs, + probs_obs = terciles_obs$probs), + target_dims = 'bin', + fun = .rps_from_probs, + n_categories = 3, + n_members = n_members, + Fair = TRUE, + ncores = ncores)$output1 + + frps$clim_mean <- multiApply::Apply(data = frps$clim, target_dims=time.dim, + fun=mean, ncores=ncores)$output1 + frps$exp_mean <- multiApply::Apply(data = frps$exp, target_dims=time.dim, + fun=mean, ncores=ncores)$output1 + + frpss_respect2clim <- NULL + frpss_respect2clim$rpss <- 1 - frps$exp_mean/frps$clim_mean + frpss_respect2clim$sign <- s2dv::RandomWalkTest(skill_A=frps$exp, + skill_B=frps$clim, + time_dim=time.dim, + ncores=ncores)$signif + + frpss_respect2clim$rpss[!is.finite(frpss_respect2clim$rpss)] <- NaN + frpss_respect2clim$sign[!is.finite(frpss_respect2clim$sign)] <- NaN + + frpss_respect2clim$rpss <- Subset(frpss_respect2clim$rpss, + c('ensemble'), list(1), + drop='selected') + frpss_respect2clim$sign <- Subset(frpss_respect2clim$sign, + c('ensemble'), list(1), + drop='selected') + + metrics_data[[ "frpss_values" ]] <- list(frpss_respect2clim$rpss) + metrics_data[[ "frpss_sign" ]] <- list(frpss_respect2clim$sign) + + } + + } + + return(metrics_data) + +} + +.correlation_eno = function(exp, obs, time_dim = 'syear', alpha = 0.05, ncores = 1){ + + cor = NULL + cor$r = cor(exp, obs) # Correlation coefficient + + n_eff = s2dv::Eno(data = obs, time_dim = time_dim, + na.action = na.pass, ncores = ncores) + + t_alpha2_n2 = qt(p=alpha/2, df = n_eff-2, lower.tail = FALSE) + t = abs(cor$r) * sqrt(n_eff-2) / sqrt(1-cor$r^2) + + if (anyNA(c(t, t_alpha2_n2)) == FALSE & t >= t_alpha2_n2) { + cor$sign = TRUE + } else { + cor$sign = FALSE + } + + z_a2 = qnorm(p = alpha/2, lower.tail = FALSE) + conf_int = c() + conf_int[1] = tanh(atanh(cor$r) - z_a2 / sqrt(n_eff-3)) + conf_int[2] = tanh(atanh(cor$r) + z_a2 / sqrt(n_eff-3)) + cor$conf_int = conf_int + + cor$n_eff <- n_eff + + return(cor) +} + + +.rps_from_probs <- function(probs_exp, probs_obs, n_categories, Fair, n_members = NULL){ + + ## Checkings + if (length(probs_exp) != n_categories | length(probs_obs) != n_categories){ + stop('The number of probabilities has to be the same as the number of categories') + } + if (!is.numeric(n_categories)){ + stop('n_categories must be an integer') + } + if (!is.numeric(n_members) & Fair == TRUE){ + stop('n_members must be an integer') + } + + ## RPS (Wilks 2011, pp.348-350) + rps <- NULL + for (i in 1:n_categories){ + rps[i] <- (cumsum(probs_exp)[i]-cumsum(probs_obs)[i])^2 + } + rps <- sum(rps) + + ## FairRPS + if (Fair == TRUE){ + + ## formula of SpecsVerification::EnsRps + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + R <- n_members + R_new <- Inf + probs_cum <- cumsum(probs_exp) + adjustment <- -1 / (R-1) * probs_cum * (1 - probs_cum) + adjustment <- sum(adjustment) + + rps <- rps + adjustment + + } + + return(rps) + +} diff --git a/modules/Skill/s2s.probs.R b/modules/Skill/s2s.probs.R new file mode 100644 index 0000000000000000000000000000000000000000..a8cab57c16618a163a8526331ebcabdaa78b31d4 --- /dev/null +++ b/modules/Skill/s2s.probs.R @@ -0,0 +1,42 @@ + + +Compute_probs <- function(data, thresholds, + ncores=1, quantile_dims=c('syear', 'ensemble'), + probs_dims=list('ensemble', 'bin'), + split_factor=1, na.rm=FALSE) { + + quantiles <- Apply(data, + quantile_dims, + function(x, na.rm) {quantile(as.vector(x), + thresholds,na.rm=na.rm)}, + output_dims="bin", + ncores=ncores, + na.rm=na.rm, + split_factor=split_factor)[[1]] + + if (na.rm == FALSE) { + c2p <- function(x, t) { + colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) + } + } else { + c2p <- function(x, t) { + if (any(!is.na(x))){ + colMeans(convert2prob(as.vector(x), threshold = as.vector(t))) + } else { + c(NA, NA, NA) + } + } + } + + probs <- Apply(data = list(x = data, t = quantiles), + target_dims = probs_dims, + c2p, + output_dims = "bin", + split_factor = split_factor, + ncores = ncores)[[1]] + + return(list(probs=probs, quantiles=quantiles)) + +} + + diff --git a/modules/data_load/dates2load.R b/modules/data_load/dates2load.R index 28d7905e08405f0a27d35069f260ef1dc920d08b..d9e5fa0c3fcb1fb31e771f35d697e3a19e1488ed 100644 --- a/modules/data_load/dates2load.R +++ b/modules/data_load/dates2load.R @@ -49,12 +49,13 @@ dates2load <- function(recipe, logger){ # adds the correspondent dims to each sdate array .add_dims <- function(data, type){ - if (type == "hcst"){ - default_dims <- c(sday = 1, sweek = 1, - syear = length(data)) - } else { - default_dims <- c(fcst_syear = length(data)) - } +# if (type == "hcst"){ +# default_dims <- c(sday = 1, sweek = 1, +# syear = length(data)) +# } else { +# default_dims <- c(fcst_syear = length(data)) +# } + default_dims <- c(sday = 1, sweek = 1, syear = length(data)) default_dims[names(dim(data))] <- dim(data) dim(data) <- default_dims @@ -97,8 +98,8 @@ get_timeidx <- function(sdates, ltmin, ltmax, } else if (time_freq == "monthly_mean") { - idx_min <- ltmin - idx_max <- ltmax + idx_min <- ltmin + 1 + idx_max <- ltmax + 1 indxs <- indices(idx_min:idx_max) } diff --git a/modules/data_load/load.R b/modules/data_load/load.R index 0083e9a910d1e1ae4c51894cd61b797833f26bcd..685881562eee6295e2181ecc7156fbc4e24baf47 100755 --- a/modules/data_load/load.R +++ b/modules/data_load/load.R @@ -19,12 +19,12 @@ attr2array <- function(attr){ startR_to_s2dv <- function(startR_array){ dates_dims <- dim(Subset(startR_array, - along=c('dat','var', + along=c('dat', 'var', 'latitude', 'longitude', 'ensemble'), list(1,1,1,1,1), drop="selected")) #sdates_dims <- dim(Subset(startR_array, - # along=c('dat','var','time','sweek','sday', + # along=c('dat', 'var', 'time', 'sweek', 'sday', # 'latitude', 'longitude', 'ensemble'), # list(1,1,1,1,1,1,1,1), drop="selected")) @@ -37,25 +37,25 @@ startR_to_s2dv <- function(startR_array){ #dim(sdates) <- sdates_dims ## TODO: change this line when time attributes work correctly? - time_dims <- c("time", "sday", "sweek", "syear", "fcst_syear") + time_dims <- c("time", "sday", "sweek", "syear") s2dv_object <- s2dv_cube(data = attr2array(startR_array), lon = attr2array(attr(startR_array, 'Variables')$dat1$longitude), lat = attr2array(attr(startR_array, 'Variables')$dat1$latitude), - Variable = list(varName=names(attr(startR_array, - 'Variables')$common)[2], + Variable = list(varName=names(attr(startR_array, + 'Variables')$common)[2], level = NULL), Dates = list(start = dates_start, end = dates_end), #sdate = sdates), - time_dims = time_dims, + time_dims = time_dims, when = Sys.time(), source_files = attr2array(attr(startR_array, "Files")) - #Datasets=list(exp1=list(InitializationsDates=list(Member_1="01011990", - # Members="Member_1"))) - ) + # Datasets=list(exp1=list(InitializationsDates=list( Member_1="01011990", + # Members="Member_1"))) + ) return(s2dv_object) } @@ -193,8 +193,8 @@ load_datasets <- function(recipe_file){ # Adjusts dims for daily case, could be removed if startR allows # multidim split names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" - default_dims <- c(dat = 1, var = 1, sweek = 1, - sday = 1, syear = 1, time = 1, + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, latitude = 1, longitude = 1, ensemble = 1) default_dims[names(dim(hcst))] <- dim(hcst) dim(hcst) <- default_dims @@ -218,23 +218,34 @@ load_datasets <- function(recipe_file){ longitude_reorder = circularsort, transform = regrid_params$fcst.transform, transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), + method = regrid_params$fcst.gridmethod), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat', 'latitude'), - longitude = c('lon', 'longitude'), - ensemble = c('member', 'ensemble')), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), ensemble = indices(1:fcst.nmember), return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), + longitude = 'dat', + time = 'file_date'), split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) - - names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "fcst_year" + + ## TODO: Add 'sday' and 'sweek' dims to fcst + if (recipe$Analysis$Variables$freq == "daily_mean") { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + } + # names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" fcst <- startR_to_s2dv(fcst) } else { - fcst <- NULL + fcst <- NULL } # Load reference @@ -244,33 +255,30 @@ load_datasets <- function(recipe_file){ # Separate Start() call for monthly vs daily data if (store.freq == "monthly_mean") { - dates_file <- sapply(dates, format, '%Y%m%d') - dates_file <- format(as.Date(dates_file, '%Y%m%d'), "%Y%m") + dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m") dim(dates_file) <- dim(Subset(hcst$data, along=c('dat','var', 'latitude', 'longitude', 'ensemble'), list(1,1,1,1,1), drop="selected")) obs <- Start(dat = obs.path, - var = variable, - file_date = dates_file, - merge_across_dims = TRUE, - merge_across_dims_narm = TRUE, - latitude = values(list(lats.min, lats.max)), - latitude_reorder = Sort(), - longitude = values(list(lons.min, lons.max)), - longitude_reorder = circularsort, - transform = regrid_params$obs.transform, - transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), - transform_vars = c('latitude', 'longitude'), - synonims = list(latitude = c('lat','latitude'), - longitude = c('lon','longitude')), - return_vars = list(latitude = 'dat', - longitude = 'dat', - time = 'file_date'), - split_multiselected_dims = TRUE, - retrieve = TRUE) + var = variable, + file_date = dates_file, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$obs.transform, + transform_params = list(grid = regrid_params$obs.gridtype, + method = regrid_params$obs.gridmethod), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) } else if (store.freq == "daily_mean") { @@ -310,8 +318,8 @@ load_datasets <- function(recipe_file){ # TODO: Reorder obs dims to match hcst dims? # Adds ensemble dim to obs (for consistency with hcst/fcst) - default_dims <- c(dat = 1, var = 1, sweek = 1, - sday = 1, syear = 1, time = 1, + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, latitude = 1, longitude = 1, ensemble = 1) default_dims[names(dim(obs))] <- dim(obs) dim(obs) <- default_dims diff --git a/modules/data_load/testing_recipes/recipe_3.yml b/modules/data_load/testing_recipes/recipe_3.yml index ed82f43c9b34971c8692833a0d77b49492d32b86..da1bd78a1a3298320439124016f6f773bb2c0637 100644 --- a/modules/data_load/testing_recipes/recipe_3.yml +++ b/modules/data_load/testing_recipes/recipe_3.yml @@ -17,7 +17,7 @@ Analysis: fcst_syear: '2020' fcst_sday: '1101' hcst_start: '1993' - hcst_end: '1995' + hcst_end: '2016' leadtimemin: 0 leadtimemax: 0 Region: @@ -30,7 +30,7 @@ Analysis: type: to_system Workflow: Calibration: - method: SBC + method: mse_min Skill: metric: RPSS Indicators: diff --git a/modules/data_load/testing_recipes/recipe_4.yml b/modules/data_load/testing_recipes/recipe_4.yml new file mode 100644 index 0000000000000000000000000000000000000000..33c3a74c716da3beee7e927f5596fbbf0f47c2ab --- /dev/null +++ b/modules/data_load/testing_recipes/recipe_4.yml @@ -0,0 +1,43 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: tas + freq: monthly_mean + Datasets: + System: + name: system7c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: + fcst_syear: '2020' + fcst_sday: '1101' + hcst_start: '1993' + hcst_end: '2016' + leadtimemin: 2 + leadtimemax: 3 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: mse_min + Skill: + metric: BSS90 + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/data_load/testing_recipes/recipe_5.yml b/modules/data_load/testing_recipes/recipe_5.yml new file mode 100644 index 0000000000000000000000000000000000000000..ccfebb34b2f49cbafbfc72953b5b04ed9555897e --- /dev/null +++ b/modules/data_load/testing_recipes/recipe_5.yml @@ -0,0 +1,43 @@ +Description: + Author: V. Agudetse + +Analysis: + Horizon: Seasonal + Variables: + name: prlr + freq: monthly_mean + Datasets: + System: + name: system2c3s + Multimodel: False + Reference: + name: era5 + Time: + sdate: + fcst_syear: # + fcst_sday: '0301' + hcst_start: '1993' + hcst_end: '2015' + leadtimemin: 0 + leadtimemax: 0 + Region: + latmin: -10 + latmax: 10 + lonmin: 0 + lonmax: 20 + Regrid: + method: bilinear + type: to_system + Workflow: + Calibration: + method: evmos + Skill: + metric: BSS90 FRPS RPSS_Specs + Indicators: + index: no + Output_format: S2S4E +Run: + Loglevel: INFO + Terminal: yes + output_dir: /esarchive/scratch/vagudets/repos/auto-s2s/out-logs/ + code_dir: /esarchive/scratch/vagudets/repos/auto-s2s/ diff --git a/modules/test_victoria.R b/modules/test_victoria.R new file mode 100644 index 0000000000000000000000000000000000000000..96a0e6bb6972a93a8dc0b192b103ff41888bc0c4 --- /dev/null +++ b/modules/test_victoria.R @@ -0,0 +1,17 @@ + + +recipe_file <- "modules/data_load/testing_recipes/recipe_5.yml" + +source("modules/data_load/load.R") +source("modules/Calibration/test_calib.R") +source("modules/Skill/Skill.R") + +data <- load_datasets(recipe_file) +## TODO: Instead of reading the recipe at each module, do it at the beginning? +recipe <- read_yaml(recipe_file) +# Calibrate datasets +calibrated_data <- calibrate_datasets(data, recipe) +# Compute skill metric +skill_metrics <- compute_skill_metrics(calibrated_data$hcst_calib, data$obs, + recipe, na.rm = T, ncores = 4) + diff --git a/tools/libs.R b/tools/libs.R index 7b010f544f8b7ba4b10d249bccd4ca12071a347c..3ee4c40a8df744baaff47561d326e31b9e709dec 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -3,12 +3,13 @@ library(startR) library(ClimProjDiags) library(multiApply) library(yaml) +library(s2dv) +library(abind) # library(s2dverification) # library(ncdf4) -# library(abind) # library(easyVerification) # library(easyNCDF) - library(CSTools) +library(CSTools) # # library(parallel) # library(pryr) # To check mem usage.