diff --git a/.gitignore b/.gitignore index 263c4e640a4ffe3bd13bf6a14f80c37553954d4d..9244f19d212d1dd9c4e9d9760b780344da951027 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,9 @@ out-logs/ *.swp *.swo ecsbatch.log* +*.nc modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml conf/vitigeoss-vars-dict.yml +*.err +*.out diff --git a/Crossval_NAO.R b/Crossval_NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..8fe9fa014a8a1bb2aad2de8c29ee8948a293bb0c --- /dev/null +++ b/Crossval_NAO.R @@ -0,0 +1,322 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_anomalies <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + orig_dims <- names(dim(data$hcst$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst$data))) { + agg = 'region' + nregions <- dim(data$hcst$data)['region'] + } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + ## output objects + ano_hcst_ev_res <- NULL + ano_obs_ev_res <- NULL + ano_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_ano_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_ano_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # compute category limits + lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) + #store results + ano_hcst_ev_res <- abind(ano_hcst_ev_res, ano_hcst_ev, + along = length(dim(ano_hcst_ev)) + 1) + ano_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, + along = length(dim(ano_obs_ev)) + 1) + ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, + along = length(dim(ano_obs_tr)) + 1) + for(ps in 1:length(categories)) { + lims_ano_hcst_tr_res[[ps]] <- abind(lims_ano_hcst_tr_res[[ps]], lims_ano_hcst_tr[[ps]], + along = length(dim(lims_ano_hcst_tr[[ps]])) + 1) + lims_ano_obs_tr_res[[ps]] <- abind(lims_ano_obs_tr_res[[ps]], lims_ano_obs_tr[[ps]], + along = length(dim(lims_ano_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + # Add dim names: + names(dim(ano_hcst_ev_res)) <- ev_dim_names + names(dim(ano_obs_ev_res)) <- ev_dim_names + names(dim(ano_obs_tr_res)) <- tr_dim_names + # To make crps_clim to work the reference forecast need to have same dims as obs: + ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) + lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + + # Forecast anomalies: + if (!is.null(data$fcst)) { + clim_hcst <- Apply(ano_hcst_ev_res, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + hcst_probs_ev[[ps]] <- GetProbs(ano_hcst_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = lims_ano_hcst_tr[[ps]], + ncores = ncores) + obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_ano_obs_tr_res[[ps]], + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } + } + # Convert to s2dv_cubes the resulting anomalies + ano_hcst <- data$hcst + ano_hcst$data <- ano_hcst_ev_res + ano_obs <- data$obs + ano_obs$data <- ano_obs_ev_res + + info(recipe$Run$logger, + "#### Anomalies and Probabilities Done #####") + if (recipe$Analysis$Workflow$Anomalies$save != 'none') { + info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Anomalies$save == 'all') { + save_observations(recipe = recipe, data_cube = ano_obs) + } + } + # Save probability bins + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_hcst) + probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_obs) + if (!is.null(data$fcst)) { + probs_fcst <- append(list(Subset(fcst_probs[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_fcst) + } + all_names <- c(all_names, name_elem) + } + } + names(probs_hcst) <- all_names + if (!('var' %in% names(dim(probs_hcst[[1]])))) { + probs_hcst <- lapply(probs_hcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + names(probs_obs) <- all_names + if (!('var' %in% names(dim(probs_obs[[1]])))) { + probs_obs <- lapply(probs_obs, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + if (!('var' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(syear = 1, dim(x)) + return(x)}) + } + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = probs_hcst, + data_cube = data$hcst, agg = agg, + type = "hcst") + save_probabilities(recipe = recipe, probs = probs_obs, + data_cube = data$hcst, agg = agg, + type = "obs") + # TODO Forecast + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + } + # Save ensemble mean for multimodel option: + hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(hcst_EM = + Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), + data_cube = data$hcst, agg = agg, + module = "statistics") + fcst_EM <- NULL + if (!is.null(data$fcst)) { + fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(fcst_EM = + Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), + data_cube = data$fcst, agg = agg, + module = "statistics") + } + return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + hcst_EM = hcst_EM, fcst_EM = fcst_EM, + cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + obs_tr = lims_ano_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev), + ref_obs_tr = ano_obs_tr_res)) +} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/Rplots.pdf b/Rplots.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2610e8a00b147a0801d6aad8aec8c2fd21a5a476 Binary files /dev/null and b/Rplots.pdf differ diff --git a/amdsunset.sh b/amdsunset.sh new file mode 100644 index 0000000000000000000000000000000000000000..c7ba06281c90ea3874dd45d6cf63c2482e6a1c19 --- /dev/null +++ b/amdsunset.sh @@ -0,0 +1,20 @@ +#!/bin/bash +#SBATCH -n 50 +#SBATCH -N 1 +#SBATCH -t 4:00:00 +#SBATCH -J sunset_multimodel +#SBATCH -o sunset_multimodel-%J.out +#SBATCH -e sunset_multimodel-%J.err +#SBATCH --account=bsc32 + +#### --qos=acc_bsces + +source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_seas.yml + +Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_mul_seas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R + diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index c656d0de422d1808246112885239c9148f8c6dae..f3004af25acd202a1bcde4d2a68c668324221725 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -1,5 +1,4 @@ -gpfs: - src_sys: "/gpfs/scratch/bsc32/MN4/bsc32/bsc32693/data_amd/" + src_sys: "/gpfs/projects/bsc32/esarchive_cache/" System: # ---- EC-Earth3-i4: @@ -34,7 +33,7 @@ gpfs: member: r1i4p1f1,r2i4p1f1,r3i4p1f1,r4i4p1f1,r5i4p1f1,r6i4p1f1,r7i4p1f1,r8i4p1f1,r9i4p1f1,r10i4p1f1 initial_month: 11 sdate_add: 0 - reference_grid: "/gpfs/scratch/bsc32/MN4/bsc32/bsc32693/data_amd/exp/CMIP6/dcppA-hindcast/EC-Earth3-i4/DCPP/EC-Earth-Consortium/EC-Earth3-i4/dcppA-hindcast/r1i4p1f1/Amon/tas/gr/v20210910/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i4p1f1_gr_196011-196110.nc" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/CMIP6/dcppA-hindcast/EC-Earth3-i4/DCPP/EC-Earth-Consortium/EC-Earth3-i4/dcppA-hindcast/r1i4p1f1/Amon/tas/gr/v20210910/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i4p1f1_gr_196011-196110.nc" esarchive: src_sys: "/esarchive/" diff --git a/conf/archive_reference.yml b/conf/archive_reference.yml index 063ed47063a7966da7b6c853c5a40ad00d7a290d..a1c7bf5b1d581396d2000837d77883ade19e8970 100644 --- a/conf/archive_reference.yml +++ b/conf/archive_reference.yml @@ -5,6 +5,10 @@ gpfs: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" + weekly_mean: {"tas":"weekly_mean/tas_f1h-r1440x721cds/", + "psl":"weekly_mean/psl_f1h-r1440x721cds/", + "prlr":"weekly_mean/prlr_f1h-r1440x721cds/", + "sfcWind":"weekly_mean/sfcWind_f1h-r1440x721cds/"} monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", "psl":"monthly_mean/psl_f1h-r1440x721cds/", "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", @@ -20,8 +24,12 @@ esarchive: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" - weekly_mean: {"tas":"weekly_mean/tas_f1h-r1440x721cds/", - "prlr":"weekly_mean/prlr_f1h-r1440x721cds/"} + weekly_mean: {"tas":"weekly_mean/tas_f1h-r1440x721cds/", + "prlr":"weekly_mean/prlr_f1h-r1440x721cds/", + "tasmax":"weekly_mean/tasmax_f24h-r1440x721cds/", + "tasmin":"weekly_mean/tasmin_f24h-r1440x721cds/", + "rsds":"weekly_mean/rsds_f1h-r1440x721cds/", + "sfcWind":"weekly_mean/sfcWind_f1h-r1440x721cds/"} daily_mean: {"tas":"daily_mean/tas_f1h-r1440x721cds/", "rsds":"daily_mean/rsds_f1h-r1440x721cds/", "prlr":"daily_mean/prlr_f1h-r1440x721cds/", diff --git a/conf/archive_seasonal.yml b/conf/archive_seasonal.yml index 3ce0db5cb886f4443f98345dbe328f8a7e505968..9a8bc54a94151d821d5f18a6940e93c4ad0262b3 100644 --- a/conf/archive_seasonal.yml +++ b/conf/archive_seasonal.yml @@ -25,7 +25,7 @@ gpfs: institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ecmwf/system51c3s/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", + "prlr":"monthly_mean/prlr_s0-24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} nmember: @@ -77,10 +77,24 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_ukmo600.txt" + UK-MetOffice-Glosea603: + name: "UK MetOffice GloSea 6 (v6.03)" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/ukmo/glosea6_system603-c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/", + "sfcWind":"monthly_mean/sfcWind_f6h/", + "psl":"monthly_mean/psl_f6h/"} + nmember: + fcst: 62 + hcst: 28 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_ukmo603.txt" NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? - src: "exp/ncep/cfs-v2/" + src: "exp/ncep/system2c3s/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", @@ -121,6 +135,18 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_eccc1.txt" + ECCC-GEM5.2-NEMO: + name: "ECCC GEM5.2-NEMO" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/eccc/eccc5/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/"} + nmember: + fcst: 20 + hcst: 20 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_eccc5.txt" Reference: ERA5: name: "ERA5" @@ -237,6 +263,19 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system2c3s.txt" + JMA-MRI-CPS3: + name: "JMA MRI-CPS3" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/jma/system3c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_s0-24h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} + nmember: + fcst: 155 + hcst: 10 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_eccc1.txt" + ECCC-CanCM4i: name: "ECCC CanCM4i" institution: "European Centre for Medium-Range Weather Forecasts" diff --git a/conf/archive_subseasonal.yml b/conf/archive_subseasonal.yml index 300884dcfc9482fe656ca1e41a3974a1cabf498c..a1540879aeda21c01a5a6e6e0a6e25ba40ea28ee 100644 --- a/conf/archive_subseasonal.yml +++ b/conf/archive_subseasonal.yml @@ -1,3 +1,38 @@ +gpfs: + src_sys: "/gpfs/projects/bsc32/esarchive_cache/" + System: + ECMWF-ENS-EXT: + name: "ECMWF-ENS-EXT" + insitution: "ECMWF" + srchc: "exp/ecmwf/s2s-monthly_ensforhc/" + srcfc: "exp/ecmwf/s2s-monthly_ensfor/" + weekly_mean: {"prlr":"weekly_mean/prlr_s0-6h/"} + nmember: + fcst: 51 + hcst: 11 + calendar: "gregorian" + time_stamp_lag: "0" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ecmwf/s2s-monthly_ensforhc/weekly_mean/prlr_s0-6h/prlr_20221205.nc" + NCEP-CFSv2: + name: "NCEP CFSv2" + institution: "NOAA NCEP" #? + srchc: "exp/ncep/cfs-v2/" + srcfc: "exp/ncep/cfs-v2/" + weekly_mean: {"tas":"weekly_mean/s2s/tas_f24h/", + "prlr":"weekly_mean/s2s/prlr_f24h/", + "tasmax":"weekly_mean/s2s/tasmax_f24h/", + "tasmin":"weekly_mean/s2s/tasmin_f24h/", + "sfcWind":"weekly_mean/s2s/sfcWind_f24h/", + "rsds":"weekly_mean/s2s/rsds_f24h/"} + daily_mean: {"tas":"daily_mean/s2s/tas_f6h", + "prlr":"daily_mean/s2s/prlr_f6h/", + "psl":"daily_mean/s2s/psl_f6h/"} + nmember: + fcst: 48 + hcst: 12 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" # Do we need it for subseasonal? + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20050109.nc" # is it the same as seasonal? esarchive: src_sys: "/esarchive/" System: diff --git a/conf/grid_description/griddes_eccc5.txt b/conf/grid_description/griddes_eccc5.txt new file mode 100644 index 0000000000000000000000000000000000000000..782a1b4d356b4f9d4c421a5f84d666ced8013519 --- /dev/null +++ b/conf/grid_description/griddes_eccc5.txt @@ -0,0 +1,22 @@ +# +# gridID 1 +# +gridtype = generic +gridsize = 1 +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/conf/grid_description/griddes_ncep-cfsv2.txt b/conf/grid_description/griddes_ncep-cfsv2.txt index 6d8abe8607ab6988436e6e25d6565228f2661db5..320b2342bcac48df01b7b694475eaac5646edc72 100644 --- a/conf/grid_description/griddes_ncep-cfsv2.txt +++ b/conf/grid_description/griddes_ncep-cfsv2.txt @@ -1,18 +1,44 @@ # -# Grid description file for NCEP CFSv2 -# -gridtype = lonlat -gridsize = 64800 -xname = lon -xlongname = Longitude -xunits = degrees_east -yname = lat -ylongname = Latitude -yunits = degrees_north -xsize = 360 -ysize = 180 -xfirst = 0.5 -xinc = 1 -yfirst = 89.5 -yinc = -1 +# gridID 1 # +gridtype = gaussian +gridsize = 72960 +datatype = float +xsize = 384 +ysize = 190 +xname = longitude +xlongname = "longitude" +xunits = "degrees_east" +yname = latitude +ylongname = "latitude" +yunits = "degrees_north" +numLPE = 95 +xfirst = 0 +xinc = 0.938 +yvals = 89.27671 88.33975 87.39726 86.45352 85.5093 84.56486 83.62029 82.67563 + 81.73093 80.78619 79.84142 78.89663 77.95183 77.00702 76.06219 75.11737 + 74.17253 73.22769 72.28284 71.338 70.39315 69.4483 68.50343 67.55858 + 66.61372 65.66885 64.72399 63.77913 62.83426 61.88939 60.94453 59.99966 + 59.05479 58.10992 57.16505 56.22018 55.27531 54.33043 53.38556 52.44069 + 51.49582 50.55094 49.60607 48.66119 47.71632 46.77144 45.82657 44.88169 + 43.93682 42.99194 42.04707 41.10219 40.15731 39.21244 38.26756 37.32269 + 36.37781 35.43293 34.48805 33.54317 32.5983 31.65342 30.70854 29.76367 + 28.81879 27.87391 26.92903 25.98416 25.03928 24.0944 23.14952 22.20464 + 21.25977 20.31489 19.37001 18.42513 17.48025 16.53537 15.5905 14.64562 + 13.70074 12.75586 11.81098 10.8661 9.921226 8.976347 8.031468 7.08659 + 6.141711 5.196833 4.251954 3.307075 2.362197 1.417318 0.4724393 -0.4724393 + -1.417318 -2.362197 -3.307075 -4.251954 -5.196833 -6.141711 -7.08659 + -8.031468 -8.976347 -9.921226 -10.8661 -11.81098 -12.75586 -13.70074 + -14.64562 -15.5905 -16.53537 -17.48025 -18.42513 -19.37001 -20.31489 + -21.25977 -22.20464 -23.14952 -24.0944 -25.03928 -25.98416 -26.92903 + -27.87391 -28.81879 -29.76367 -30.70854 -31.65342 -32.5983 -33.54317 + -34.48805 -35.43293 -36.37781 -37.32269 -38.26756 -39.21244 -40.15731 + -41.10219 -42.04707 -42.99194 -43.93682 -44.88169 -45.82657 -46.77144 + -47.71632 -48.66119 -49.60607 -50.55094 -51.49582 -52.44069 -53.38556 + -54.33043 -55.27531 -56.22018 -57.16505 -58.10992 -59.05479 -59.99966 + -60.94453 -61.88939 -62.83426 -63.77913 -64.72399 -65.66885 -66.61372 + -67.55858 -68.50343 -69.4483 -70.39315 -71.338 -72.28284 -73.22769 -74.17253 + -75.11737 -76.06219 -77.00702 -77.95183 -78.89663 -79.84142 -80.78619 + -81.73093 -82.67563 -83.62029 -84.56486 -85.5093 -86.45352 -87.39726 + -88.33975 -89.27671 + diff --git a/conf/grid_description/griddes_ukmo603.txt b/conf/grid_description/griddes_ukmo603.txt new file mode 100644 index 0000000000000000000000000000000000000000..26b33631e5b74bd5d80680bc921fc5cf2f0a01b6 --- /dev/null +++ b/conf/grid_description/griddes_ukmo603.txt @@ -0,0 +1,22 @@ +# gridID 1 +# +gridtype = generic +gridsize = 1 +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 + diff --git a/full_NAO.R b/full_NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..b0db00df35f5b844b729e14aa82c67369525f617 --- /dev/null +++ b/full_NAO.R @@ -0,0 +1,29 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_NAO_single.yml" +#recipe <- read_atomic_recipe(recipe_file) +recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) + + +source("modules/Crossval/Crossval_NAO.R") +res <- Crossval_NAO(recipe = recipe, data = data) + +source("plot_NAO.R") +plot_NAO(recipe = recipe, nao = res, data = data) + +source("modules/Crossval/Crossval_metrics.R") +skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + +# No visualisation available for region aggregated + diff --git a/full_ecvs_anomalies.R b/full_ecvs_anomalies.R new file mode 100644 index 0000000000000000000000000000000000000000..a4c46693b501aa95d1cdae2e0339f3ada08f7eb9 --- /dev/null +++ b/full_ecvs_anomalies.R @@ -0,0 +1,52 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +source("modules/Aggregation/Aggregation.R") +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "recipe_tas_singl_ano_seas.yml" +#recipe <- read_atomic_recipe(recipe_file) +recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) +data_summary(data$fcst, recipe) + + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + data <- Aggregation(recipe = recipe, data = data) + data_summary(data$hcst, recipe) + data_summary(data$obs, recipe) + data_summary(data$fcst, recipe) + } +source("modules/Crossval/Crossval_anomalies.R") +res <- Crossval_anomalies(recipe = recipe, data = data) + +source("modules/Crossval/Crossval_metrics.R") +skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + +# Required to plot a forecast: +data$fcst <- res$fcst +tmp_probs <- list(probs_fcst = res$probs$fcst[[1]]) +nlats <- as.numeric(dim(data$hcst$data)['latitude']) +nlons <- as.numeric(dim(data$hcst$data)['longitude']) +ntimes <- as.numeric(dim(data$hcst$data)['time']) + +# TODO: consider more than terciles: +dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = ntimes, + latitude = nlats, longitude = nlons) + + +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE, probabilities = tmp_probs) + +## Check logo size is appropiated for your maps: +source("tools/add_logo.R") +add_logo(recipe, "tools/BSC_logo_95.jpg") #"rsz_rsz_bsc_logo.png") + +# scorecards computation: + + diff --git a/full_ecvs_calibration.R b/full_ecvs_calibration.R new file mode 100644 index 0000000000000000000000000000000000000000..3b521f223afb9d9becad2edbfb86a76e550bc658 --- /dev/null +++ b/full_ecvs_calibration.R @@ -0,0 +1,69 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +source("modules/Aggregation/Aggregation.R") +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "recipe_tas_singl_cal_seas.yml" +#recipe_file <- "recipe_subseasonal_ecvs.yml" +#recipe_file <- "recipe_prlr_cal_subseas.yml" + +recipe <- read_atomic_recipe(recipe_file) +#recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) +if (!is.null(data$fcst)) { + data_summary(data$fcst, recipe) +} + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + data <- Aggregation(recipe = recipe, data = data) + data_summary(data$hcst, recipe) + data_summary(data$obs, recipe) + if (!is.null(data$fcst)) { + data_summary(data$fcst, recipe) + } + } + +source("modules/Crossval/Crossval_Calibration.R") +res <- Crossval_Calibration(recipe = recipe, data = data) + +source("modules/Crossval/Crossval_metrics.R") +skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + +if (!is.null(data$fcst)) { + # Required to plot a forecast: + data$fcst <- res$fcst + tmp_probs <- list(probs_fcst = res$probs$fcst[[1]]) + nlats <- as.numeric(dim(data$hcst$data)['latitude']) + nlons <- as.numeric(dim(data$hcst$data)['longitude']) + ntimes <- as.numeric(dim(data$hcst$data)['time']) + + # TODO: consider more than terciles + dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = ntimes, + latitude = nlats, longitude = nlons) + + + Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + significance = TRUE, probabilities = tmp_probs, + logo = NULL) + #logo = "tools/BSC_logo_95.jpg") +} else { + Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + significance = TRUE, log = "tools/BSC_logo_95.jpg") +} + +saveRDS(object = list(product = res, + metrics = skill_metrics, + fcst_probs = tmp_probs), + file = paste0(recipe$Run$output_dir, "/outputs/", + recipe$Analysis$Datasets$System$name, "_", + recipe$Analysis$Variables$name, "_", + recipe$Analysis$Time$fcst_year, ".RDS")) +recipe$Run$output_dir diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R new file mode 100644 index 0000000000000000000000000000000000000000..e92a143cd92f2bc4d9693f78eebd58d6d77a077f --- /dev/null +++ b/full_ecvs_multimodel_anomalies.R @@ -0,0 +1,71 @@ +library(pryr) +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +#recipe_file <- "recipe_tas.yml" +recipe_file <- "recipe_ecvs_ano_mul_seas.yml" +# recipe_file <- "recipe_tas_decadal.yml" +# Will it work with a single job? +#recipe <- read_atomic_recipe(recipe_file) +original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) +# Load datasets +models <- unlist(original_recipe$Analysis$Datasets$System) + + +recipe_aux <- original_recipe +datos <- list() +datos$hcst <- list() +datos$fcst <- list() +source("modules/Crossval/Crossval_metrics.R") +source("modules/Crossval/Crossval_anomalies.R") +source("modules/Aggregation/Aggregation.R") +# sys <- models[1] +for (sys in models) { + recipe_aux$Analysis$Datasets$System <- NULL + recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) + # recipe_aux$Analysis$Datasets$System$member <- read_yaml("conf/archive_decadal.yml")$esarchive$System[[sys]]$member + data <- Loading(recipe = recipe_aux) + data <- Units(recipe = recipe_aux, data = data) + if (recipe_aux$Analysis$Workflow$Time_aggregation$execute) { + data <- Aggregation(recipe = recipe_aux, data = data) + } + # verification individual models + product <- Crossval_anomalies(recipe = recipe_aux, data = data) + skill_model <- Crossval_metrics(recipe = recipe_aux, data_crossval = product, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + Visualization(recipe = recipe_aux, data = data, + skill_metrics = skill_model, significance = TRUE) + ## end ver + datos$hcst <- append(datos$hcst, list(data$hcst)) + datos$fcst <- append(datos$fcst, list(data$fcst)) + names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) + names(datos$fcst)[length(datos$fcst)] <- gsub('\\.','', sys) + gc() + print(mem_used()) +} +data_aux <- data +datos$obs <- data$obs +data <- datos +rm(list = 'datos') + +source("modules/Crossval/Crossval_multimodel_anomalies.R") +res <- Crossval_multimodel_anomalies(recipe = original_recipe, data = data) + +source("modules/Crossval/Crossval_multimodel_metrics.R") +skill_metrics <- Crossval_multimodel_metrics(recipe = original_recipe, + data = res, Fair = FALSE) + +recipe_aux$Analysis$Datasets$System$name <- 'Multimodel' +Visualization(recipe = recipe_aux, data = data_aux, skill_metrics = skill_metrics, significance = TRUE) + + +## Check logo size is appropiated for your maps: +source("tools/add_logo.R") +add_logo(recipe_aux, "rsz_rsz_bsc_logo.png") + +# scorecards computation: + + diff --git a/full_ecvs_multimodel_calibrated.R b/full_ecvs_multimodel_calibrated.R new file mode 100644 index 0000000000000000000000000000000000000000..2a3c885431a6edf1e9fba5647bcc43961c16d052 --- /dev/null +++ b/full_ecvs_multimodel_calibrated.R @@ -0,0 +1,88 @@ +library(pryr) +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +#recipe_file <- "recipe_tas.yml" +# recipe_file <- "recipe_tas_decadal.yml" +recipe_file <- "recipe_ecvs_cal_mul_seas.yml" +# Will it work with a single job? +#recipe <- read_atomic_recipe(recipe_file) +original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) +# Load datasets +models <- unlist(original_recipe$Analysis$Datasets$System) + + +recipe_aux <- original_recipe +datos <- list() +datos$hcst <- list() +datos$fcst <- list() +source("modules/Crossval/Crossval_metrics.R") +source("modules/Crossval/Crossval_Calibration.R") +source("modules/Aggregation/Aggregation.R") +# sys <- models[1] +for (sys in models) { + recipe_aux$Analysis$Datasets$System <- NULL + recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) + # recipe_aux$Analysis$Datasets$System$member <- read_yaml("conf/archive_decadal.yml")$esarchive$System[[sys]]$member + data <- Loading(recipe = recipe_aux) + data <- Units(recipe = recipe_aux, data = data) + if (recipe_aux$Analysis$Workflow$Time_aggregation$execute) { + data <- Aggregation(recipe = recipe_aux, data = data) + } + # verification individual models + product <- Crossval_Calibration(recipe = recipe_aux, data = data) + skill_model <- Crossval_metrics(recipe = recipe_aux, data_crossval = product, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + + tmp_probs <- list(probs_fcst = product$probs$fcst[[1]]) + lats <- length(data$hcst$coords$latitude) + lons <- length(data$hcst$coords$longitude) + timesteps <- as.numeric(dim(data$hcst$data)['time']) + dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = timesteps, + #latitude = 20, longitude = 20) + latitude = lats, longitude = lons) + + Visualization(recipe = recipe_aux, data = data, probabilities = tmp_probs, + skill_metrics = skill_model, significance = TRUE) + ## end ver + datos$hcst <- append(datos$hcst, list(data$hcst)) + datos$fcst <- append(datos$fcst, list(data$fcst)) + names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) + names(datos$fcst)[length(datos$fcst)] <- gsub('\\.','', sys) + gc() + print(mem_used()) +} +data_aux <- data +datos$obs <- data$obs +data <- datos +rm(list = 'datos') + +source("modules/Crossval/Crossval_multimodel_Calibration.R") +res <- Crossval_multimodel_Calibration(recipe = original_recipe, data = data) + +source("modules/Crossval/Crossval_multimodel_metrics.R") +skill_metrics <- Crossval_multimodel_metrics(recipe = original_recipe, + data = res, Fair = FALSE) + +recipe_aux$Analysis$Datasets$System$name <- 'Multimodel' +tmp_probs <- list(probs_fcst = res$probs$probs_fcst[[1]][[1]]) + + +dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = timesteps, + #latitude = 20, longitude = 20) + latitude = lats, longitude = lons) +info(recipe_aux$Run$logger,dim(tmp_probs$probs_fcst)) + +Visualization(recipe = recipe_aux, data = data_aux, probabilities = tmp_probs, skill_metrics = skill_metrics, significance = TRUE) + + +## Check logo size is appropiated for your maps: +source("tools/add_logo.R") +add_logo(recipe_aux, "rsz_rsz_bsc_logo.png") + +# scorecards computation: + + diff --git a/full_multimodel_NAO.R b/full_multimodel_NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..e53cd1993e4dc62fd6fa65b7d43eaafe499bc533 --- /dev/null +++ b/full_multimodel_NAO.R @@ -0,0 +1,49 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_NAO.yml" +#recipe <- read_atomic_recipe(recipe_file) +original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) +models <- unlist(original_recipe$Analysis$Datasets$System) + +recipe_aux <- original_recipe +datos <- list() +datos$hcst <- list() +datos$fcst <- list() +NAO_ind <- list() +source("modules/Crossval/Crossval_metrics.R") +source("modules/Crossval/Crossval_NAO.R") +for (sys in models[1:2]) { + recipe_aux$Analysis$Datasets$System <- NULL + recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) + # Load datasets + data <- Loading(recipe = recipe_aux) + data <- Units(recipe = recipe_aux, data = data) + # Verification individual models + product <- Crossval_NAO(recipe = recipe_aux, data = data) + source("plot_NAO.R") + plot_NAO(recipe = recipe_aux, nao = product, data = data) + + skill_model <- Crossval_metrics(recipe = recipe_aux, + data_crossval = product, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + datos$hcst <- append(datos$hcst, list(data$hcst)) + datos$fcst <- append(datos$fcst, list(data$fcst)) + names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) + names(datos$fcst)[length(datos$fcst)] <- gsub('\\.','', sys) + NAO_ind <- append(NAO_ind, list(product)) + names(NAO_ind)[length(NAO_ind)] <- gsub('\\.','', sys) +} +# No visualisation available for region aggregated +data_aux <- data +datos$obs <- data$obs +data <- datos +rm(list = 'datos') +source("modules/Crossval/Crossval_multimodel_NAO.R") +res <- Crossval_multimodel_NAO(recipe = original_recipe, data = data) + + diff --git a/modules/Aggregation/R/agg_ini_end.R b/modules/Aggregation/R/agg_ini_end.R index bb3a410ac2221b38ab5cb50dfd0557cf723da939..120f3b8db29152eea4b92867f5f16e317273d76e 100644 --- a/modules/Aggregation/R/agg_ini_end.R +++ b/modules/Aggregation/R/agg_ini_end.R @@ -14,7 +14,7 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { plotting_attr <- list(names(indices)) } original_dims <- names(dim(x[[1]]$data)) - if (method == 'average') { + if (method == 'average') { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', function(y, ind) { @@ -70,3 +70,4 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { x[[1]]$attrs$Dates <- tmp_dates return(x) } + diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R new file mode 100644 index 0000000000000000000000000000000000000000..76cdb7213dbc1c1020dcdd9069db34f4485bb9ae --- /dev/null +++ b/modules/Crossval/Crossval_Calibration.R @@ -0,0 +1,542 @@ +# take the output of Flor/s2s/subseasonal_loading.R + +source("modules/Crossval/R/tmp/GetProbs.R") +Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { + + cal_method <- recipe$Analysis$Workflow$Calibration$method + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + orig_dims <- names(dim(data$hcst$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst$data))) { + agg = 'region' + nregions <- dim(data$hcst$data)['region'] + } + + + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + cal_hcst_ev_res <- NULL + cal_hcst_tr_res <- NULL + #cal_obs_ev_res <- NULL + obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_cal_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + + hcst_tr <- MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = na.rm) + + obs_tr <- MergeDims(obs_tr, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = na.rm) + + hcst_ev <- MergeDims(hcst_ev, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = na.rm) + + } + if (cal_method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) { + cal_hcst_tr <- QuantileMapping(exp = hcst_tr, obs = obs_tr, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + na.rm = na.rm, qstep = 0.1, + wet.day = FALSE, + ncores = ncores) + cal_hcst_ev <- QuantileMapping(exp = hcst_tr, obs = obs_tr, + exp_cor = hcst_ev, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + wet.day = FALSE, + na.rm = na.rm, qstep = 0.1, + ncores = ncores) +# Try to plot quantile-quantile: +# browser() +# test <- quantile(cal_hcst_tr[,,1,1,5,1,10,26], seq(0,1,0.1), na.rm = T) +# test2 <- quantile(obs_tr[1,1,,5,1,10,26,1], seq(0,1,0.1), na.rm = T) + +# plot(test2, test) +# test1 <- cal_hcst_tr[1,,1,1,5,1,14,1] +# test2 <- hcst_tr[1,1,,5,1,1,14,1] +# pos <- order(test2, na.last=NA) +# test2 <- sort(test2) +# test1 <- test1[pos] +# plot(test1, test2, xlim = c(990,1030),ylim=c(990,1030), +# xlab = "Original hindcast - training indices (hPa)", +# ylab = "Calibrated hindcast - training indices (hPa)", +# main = "One ensemble member") +# lines(x=c(920,1030), y=c(920,1030), type = "l", col = 'blue') + +# test3 <- MeanDims(cal_hcst_tr, 'ensemble') +# test3 <- test3[,1,1,5,1,14,1] +# test4 <- MeanDims(hcst_tr, 'ensemble') +# test4 <- test4[1,1,,5,1,14,1] +# pos <- order(test4, na.last =NA) +# test4 <- sort(test4) +# test3 <- test3[pos] +# plot(test4, test3, xlim = c(1000,1030),ylim=c(1000,1030), +# xlab = "Original hindcast - training indices (hPa)", +# ylab = "Calibrated hindcast - training indices (hPa)", +# main = "Ensemble mean") +# lines(x=c(1000,1030), y=c(1000,1030), type = "l", col = 'blue') + + } else { + cal_hcst_tr <- Calibration(exp = hcst_tr, obs = obs_tr, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, ncores = ncores) + cal_hcst_ev <- Calibration(exp = hcst_tr, obs = obs_tr, exp_cor = hcst_ev, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, ncores = ncores) + } + # If precipitation is negative: + if (correct_negative) { + cal_hcst_tr[cal_hcst_tr < 0] <- 0 + cal_hcst_ev[cal_hcst_ev < 0] <- 0 + } + lims_cal_hcst_tr <- Apply(cal_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = TRUE))})}, + output_dims = lapply(categories, + function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + lims_obs_tr <- Apply(obs_tr, target_dims = c('syear'),#, 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = TRUE))})}, + output_dims = lapply(categories, + function(x){'bin'}), + prob_lims = categories, + ncores = ncores) + + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + browser() + cal_hcst_tr <- SplitDim(cal_hcst_tr, split_dim = 'syear', + new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross[[t]]$train.dexes))) + cal_hcst_ev <- SplitDim(cal_hcst_ev, split_dim = 'syear', + new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross[[t]]$eval.dexes))) + obs_tr <- SplitDim(obs_tr, split_dim = 'syear', new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross[[t]]$train.dexes))) + for (ps in 1:length(categories)) { + lims_cal_hcst_tr[[ps]] <- InsertDim(lims_cal_hcst_tr[[ps]], + pos = 1, len = 1, name = 'sday') + lims_obs_tr[[ps]] <- InsertDim(lims_obs_tr[[ps]], + pos = 1, len = 1, name = 'sday') + } + } + # check order before storing: + cal_hcst_ev <- Reorder(cal_hcst_ev, + names(dim(data$hcst$data))) + cal_hcst_tr <- Reorder(cal_hcst_tr, + names(dim(data$hcst$data))) + obs_tr <- Reorder(obs_tr, names(dim(data$obs$data))) + for (ps in 1:length(categories)) { + lims_cal_hcst_tr[[ps]] <- Reorder(lims_cal_hcst_tr[[ps]], + c('bin', names(dim(data$obs$data))[-c(5,9)])) + lims_obs_tr[[ps]] <- Reorder(lims_obs_tr[[ps]], + c('bin', names(dim(data$obs$data))[-5])) + } + #store results + cal_hcst_ev_res <- abind(cal_hcst_ev_res, cal_hcst_ev, + along = length(dim(cal_hcst_ev)) + 1) + cal_hcst_tr_res <- abind(cal_hcst_tr_res, cal_hcst_tr, + along = length(dim(cal_hcst_tr)) + 1) + + # Obs ev are original obs +# cal_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, +# along = length(dim(ano_obs_ev)) + 1) + obs_tr_res <- abind(obs_tr_res, obs_tr, + along = length(dim(obs_tr)) + 1) + for(ps in 1:length(categories)) { + lims_cal_hcst_tr_res[[ps]] <- abind(lims_cal_hcst_tr_res[[ps]], + lims_cal_hcst_tr[[ps]], + along = length(dim(lims_cal_hcst_tr[[ps]])) + 1) + lims_obs_tr_res[[ps]] <- abind(lims_obs_tr_res[[ps]], + lims_obs_tr[[ps]], + along = length(dim(lims_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Calibration Cross-validation loop ended #####") + gc() + names(dim(cal_hcst_ev_res)) <- c('dat', 'var', 'sday', 'sweek', + 'unneeded', 'time', + 'latitude', 'longitude', + 'ensemble', 'syear') + names(dim(cal_hcst_tr_res)) <- c('dat', 'var', 'sday', 'sweek', + 'loop', 'time', + 'latitude', 'longitude', 'ensemble', 'syear') + names(dim(obs_tr_res)) <- c('dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + 'latitude', 'longitude', 'unneeded', 'syear') + obs_tr_res <- Subset(obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + cal_hcst_ev_res <- Subset(cal_hcst_ev_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_cal_hcst_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sday', + 'sweek', 'time', + 'latitude', 'longitude', + 'syear') + names(dim(lims_obs_tr_res[[ps]])) <- c('bin', 'dat', 'var', 'sday', 'sweek', + 'time', 'latitude', 'longitude', + 'unneeded', 'syear') + lims_obs_tr_res[[ps]] <- Subset(lims_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, + drop = 'selected') + } + + # Forecast calibration: + if (!is.null(data$fcst)) { + if (tolower(recipe$Analysis$Horizon) %in% c('seasonal', 'decadal')) { + if (cal_method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) { + data$fcst$data <- QuantileMapping(exp = data$hcst$data, + obs = data$obs$data, + exp_cor = data$fcst$data, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + na.rm = na.rm, qstep = 0.1, + wet.day = FALSE, + ncores = ncores) + hcst_cal <- QuantileMapping(exp = data$hcst$data, + obs = data$obs$data, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + wet.day = FALSE, + na.rm = na.rm, qstep = 0.1, + ncores = ncores) + } else { + data$fcst$data <- Calibration(exp = data$hcst$data, + obs = data$obs$data, + exp_cor = data$fcst$data, + cal.method = cal_method, + multi.model = FALSE, + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + hcst_cal <- Calibration(exp = data$hcst$data, + obs = data$obs$data, + cal.method = cal_method, + eval.method = 'in-sample', + multi.model = FALSE, + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + } + # For compatibility with subseasonal: + obs <- data$obs$data + } else { # if subseasonal + # merge sample dimensions and select central week + hcst <- MergeDims(data$hcst$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + hcst <- Subset(hcst, along = 'sweek', + indices = (dim(data$hcst$data)['sweek'] + 1) / 2) + obs <- MergeDims(data$obs$data, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + obs <- Subset(obs, along = 'sweek', + indices = (dim(data$obs$data)['sweek'] + 1) / 2) + fcst <- Subset(data$fcst$data, along = 'sday', indices = 1, + drop = 'selected') + if (cal_method %in% c('PTF', 'DIST', 'RQUANT', 'QUANT', 'SSPLIN')) { + data$fcst$data <- QuantileMapping(exp = hcst, + obs = obs, + exp_cor = fcst, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + na.rm = na.rm, qstep = 0.1, + wet.day = FALSE, + ncores = ncores) + hcst_cal <- QuantileMapping(exp = hcst, + obs = obs, + method = cal_method, + memb_dim = 'ensemble', + sdate_dim = 'syear', + window_dim = NULL, + na.rm = na.rm, + wet.day = FALSE, + qstep = 0.1, + ncores = ncores) + } else { + data$fcst$data <- Calibration(exp = hcst, + obs = obs, + exp_cor = fcst, + cal.method = cal_method, + multi.model = FALSE, + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + hcst_cal <- Calibration(exp = hcst, + obs = obs, + cal.method = cal_method, + eval.method = 'in-sample', + multi.model = FALSE, + na.fill = TRUE, na.rm = na.rm, + apply_to = NULL, + alpha = NULL, memb_dim = 'ensemble', + sdate_dim = 'syear', + dat_dim = NULL, ncores = ncores) + } + if (!('sday' %in% names(dim(data$fcst$data)))) { + data$fcst$data <- InsertDim(data$fcst$data, + len = 1, pos = 3, name = 'sday') + } + } # end condition subseasonal + if (correct_negative) { + hcst_cal[hcst_cal < 0] <- 0 + data$fcst$data[data$fcst$data < 0] <- 0 + } + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(hcst_cal, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = TRUE))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + lims <- Apply(obs, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = TRUE))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + tmp_lims2 <- list() + + # SAVING 'lims' which are the observed category limits: + # TODO saving: + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Calibration/") + + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + if (recipe$Analysis$Workflow$Probabilities$save == 'all') { + for (ps in 1:length(categories)) { + ## TODO: use .drop_dims() + tmp_lims3 <- .drop_dims(lims[[ps]]) + for (l in 1:dim(lims[[ps]])['bin']) { + tmp_lims <- ClimProjDiags::Subset(x = tmp_lims3, + along = "bin", + indices = l, + drop = "selected") + tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) + names(tmp_lims2)[length(tmp_lims2)] <- + paste0("p_", as.character(categories[[ps]][l]*100)) + } + } + info(recipe$Run$logger, + "SAVING OBSERVED CATEGORY LIMITS") + save_percentiles(recipe = recipe, percentiles = tmp_lims2, + data_cube = data$obs, + agg = "global", outdir = NULL) + } + } + # Compute Probabilities + for (ps in 1:length(categories)) { + # Get only the probabilities of the central day in sday + central_day <- (dim(cal_hcst_ev_res)['sday'] + 1)/2 + tmp <- Subset(cal_hcst_ev_res, along = 'sday', indices = central_day) + hcst_probs_ev[[ps]] <- GetProbs(tmp, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_cal_hcst_tr_res[[ps]], + ncores = ncores) + tmp <- Subset(data$obs$data, along = 'sday', indices = central_day) + obs_probs_ev[[ps]] <- GetProbs(tmp, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_obs_tr_res[[ps]], + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } + } + # Convert to s2dv_cubes the resulting calibrated + hcst <- data$hcst + hcst$data <- cal_hcst_ev_res + + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + # Keep only de central sday position from the dimension + if (dim(hcst$data)['sday'] > 1) { + hcst <- CST_Subset(hcst, along = 'sday', + indices = (dim(hcst$data)['sday'] + 1) / 2) + } + if (dim(data$obs$data)['sday'] > 1) { + data$obs <- CST_Subset(data$obs, along = 'sday', + indices = (dim(data$obs$data)['sday'] + 1) / 2) + } + if (dim(data$hcst$data)['sday'] > 1) { + data$hcst <- CST_Subset(data$hcst, along = 'sday', + indices = (dim(data$hcst$data)['sday'] + 1) / 2) + } + if (dim(obs_tr_res)['sday'] > 1) { + obs_tr_res <- Subset(obs_tr_res, along = 'sday', + indices = (dim(obs_tr_res)['sday'] + 1) / 2) + } + } + info(recipe$Run$logger, + "#### Calibrated and Probabilities Done #####") + if (recipe$Analysis$Workflow$Calibration$save != FALSE) { + info(recipe$Run$logger, "##### START SAVING CALIBRATED #####") + # Save forecast + if ((recipe$Analysis$Workflow$Calibration$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + #if (recipe$Analysis$Workflow$Anomalies$save %in% + # c('all', 'exp_only')) { + # save_forecast(recipe = recipe, data_cube = hcst, type = 'hcst') + #} + } + # Save probability bins + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]*100) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]*100) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1]*100, + "_to_", categories[[ps]][perc]*100) + } + probs_hcst <- append(probs_hcst, + list(Subset(hcst_probs_ev[[ps]], + along = 'bin', + indices = perc, + drop = 'selected'))) + probs_obs <- append(probs_obs, + list(Subset(obs_probs_ev[[ps]], + along = 'bin', + indices = perc, + drop = 'selected'))) + if (!is.null(data$fcst)) { + probs_fcst <- append(probs_fcst, + list(Subset(fcst_probs[[ps]], + along = 'bin', + indices = perc, + drop = 'selected'))) + } + all_names <- c(all_names, name_elem) + } + } + ## TODO: Apply .drop_dims() directly to hcst_probs_ev etc; and move + ## reorganizing and renaming to save_probabilities() + names(probs_hcst) <- all_names + probs_hcst <- lapply(probs_hcst, .drop_dims) + names(probs_obs) <- all_names + probs_obs <- lapply(probs_obs, .drop_dims) + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + probs_fcst <- lapply(probs_fcst, .drop_dims) + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + ## Saving only forecast probabilities for now + # save_probabilities(recipe = recipe, probs = probs_hcst, + # data_cube = data$hcst, agg = agg, + # type = "hcst") + # save_probabilities(recipe = recipe, probs = probs_obs, + # data_cube = data$obs, agg = agg, + # type = "obs") + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + + return(list(hcst = hcst, obs = data$obs, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + cat_lims = list(obs_lims = lims, hcst_lims = lims_fcst), + #cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + # obs_tr = lims_ano_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev, + fcst = fcst_probs), + ref_obs_tr = obs_tr_res)) + +} + diff --git a/modules/Crossval/Crossval_NAO.R b/modules/Crossval/Crossval_NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..d4882f3ec7da242450aa95bc97caccb521883f24 --- /dev/null +++ b/modules/Crossval/Crossval_NAO.R @@ -0,0 +1,375 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Crossval/R/tmp/NAO.R") +source("modules/Crossval/R/tmp/ProjectField.R") +source("modules/Crossval/R/tmp/EOF.R") +source("modules/Crossval/R/tmp/Utils.R") + +Crossval_NAO <- function(recipe, data) { + obsproj <- recipe$Analysis$Workflow$Indices$NAO$obsproj + if (is.null(obsproj)) { + obsproj <- TRUE + } + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + orig_dims <- names(dim(data$hcst$data)) + # spatial dims + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + agg = 'region' + + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + ## output objects + nao_hcst_ev_res <- NULL + nao_obs_ev_res <- NULL + nao_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_nao_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_nao_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # compute NAO: + nao <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_hcst_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores) + + nao_obs_ev <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_obs_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores)$exp_cor + #Standarisation: + # Need the nao_hcst (for the train.dexes) to standarize the eval.dexes? + nao_hcst_ev <- Apply(list(nao$exp, nao$exp_cor), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_ev <- Apply(list(nao$obs, nao_obs_ev), + target_dims = c('syear','ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_tr <- Apply(list(nao$obs), target_dims = 'syear', + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, 1, function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores, + output_dims = 'syear')$output1 + nao_hcst_tr <- Apply(list(nao$exp), target_dims = c('syear', 'ensemble'), + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + + # compute category limits + lims_nao_hcst_tr <- Apply(nao_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + lims_nao_obs_tr <- Apply(nao_obs_tr, target_dims = c('syear'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) + #store results + nao_hcst_ev_res <- abind(nao_hcst_ev_res, nao_hcst_ev, + along = length(dim(nao_hcst_ev)) + 1) + nao_obs_ev_res <- abind(nao_obs_ev_res, nao_obs_ev, + along = length(dim(nao_obs_ev)) + 1) + nao_obs_tr_res <- abind(nao_obs_tr_res, nao_obs_tr, + along = length(dim(nao_obs_tr)) + 1) + for(ps in 1:length(categories)) { + lims_nao_hcst_tr_res[[ps]] <- abind(lims_nao_hcst_tr_res[[ps]], + lims_nao_hcst_tr[[ps]], + along = length(dim(lims_nao_hcst_tr[[ps]])) + 1) + lims_nao_obs_tr_res[[ps]] <- abind(lims_nao_obs_tr_res[[ps]], + lims_nao_obs_tr[[ps]], + along = length(dim(lims_nao_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + # Add dim names: + names(dim(nao_hcst_ev_res)) <- c('unneeded', 'ensemble', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + names(dim(nao_obs_ev_res)) <- c('unneeded', 'ensemble', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + names(dim(nao_obs_tr_res)) <- c('ensemble', 'unneeded', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + # To make crps_clim to work the reference forecast need to have same dims as obs: + nao_hcst_ev_res <- Subset(nao_hcst_ev_res, along = 'unneeded', + indices = 1, drop = 'selected') + nao_obs_ev_res <- Subset(nao_obs_ev_res, along = 'unneeded', + indices = 1, drop = 'selected') + nao_obs_tr_res <- Subset(nao_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_nao_hcst_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', + 'sweek', 'time', 'syear') + names(dim(lims_nao_obs_tr_res[[ps]])) <- c('cat', 'unneeded', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + lims_nao_obs_tr_res[[ps]] <- Subset(lims_nao_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + # Forecast anomalies: + nao_fcst <- NULL + if (!is.null(data$fcst)) { + clim_hcst <- Apply(data$hcst$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + clim_obs <- Apply(data$obs$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + ano_fcst <- Ano(data = data$fcst$data, clim = clim_hcst) + ano_hcst <- Ano(data = data$hcst$data, clim = clim_hcst) + ano_obs <- Ano(data= data$obs$data, clim = clim_obs) + nao_fcst <- NAO(exp = ano_hcst, obs = ano_obs, exp_cor = ano_fcst, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(nao_fcst$exp, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + hcst_probs_ev[[ps]] <- GetProbs(nao_hcst_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_nao_hcst_tr_res[[ps]], + ncores = ncores) + obs_probs_ev[[ps]] <- GetProbs(nao_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_nao_obs_tr_res[[ps]], + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(nao_fcst$exp_cor, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } + } + # Convert to s2dv_cubes + nao_hcst <- data$hcst + nao_hcst$data <- nao_hcst_ev_res + nao_obs <- data$obs + nao_obs$data <- nao_obs_ev_res + + info(recipe$Run$logger, + "#### NAO and Probabilities Done #####") + if (recipe$Analysis$Workflow$Indices$NAO$save != 'none') { + info(recipe$Run$logger, "##### START SAVING NAO #####") + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Indices$NAO$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = nao_fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Indices$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = nao_hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Indices$NAO$save == 'all') { + save_observations(recipe = recipe, data_cube = nao_obs) + } + } + # Save probability bins + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_hcst) + probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_obs) + if (!is.null(data$fcst)) { + probs_fcst <- append(list(Subset(fcst_probs[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_fcst) + } + all_names <- c(all_names, name_elem) + } + } + names(probs_hcst) <- all_names + if (!('var' %in% names(dim(probs_hcst[[1]])))) { + probs_hcst <- lapply(probs_hcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + names(probs_obs) <- all_names + if (!('var' %in% names(dim(probs_obs[[1]])))) { + probs_obs <- lapply(probs_obs, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + if (!('var' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(syear = 1, dim(x)) + return(x)}) + } + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = probs_hcst, + data_cube = data$hcst, agg = agg, + type = "hcst") + save_probabilities(recipe = recipe, probs = probs_obs, + data_cube = data$hcst, agg = agg, + type = "obs") + # TODO Forecast + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + } + return(list(hcst = nao_hcst, obs = nao_obs, fcst = nao_fcst, + # Mean Bias for the NAO doesn't make sense, so + # not returning fullvals + #hcst.full_val = data$hcst, obs.full_val = data$obs, + cat_lims = list(hcst_tr = lims_nao_hcst_tr_res, + obs_tr = lims_nao_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev), + ref_obs_tr = nao_obs_tr_res)) +} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R new file mode 100644 index 0000000000000000000000000000000000000000..66eb3b2ea75e15b1ac6001c939e4a7968c3cc1e3 --- /dev/null +++ b/modules/Crossval/Crossval_anomalies.R @@ -0,0 +1,374 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_anomalies <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + orig_dims <- names(dim(data$hcst$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst$data))) { + agg = 'region' + nregions <- dim(data$hcst$data)['region'] + } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + ## output objects + ano_hcst_ev_res <- NULL + ano_obs_ev_res <- NULL + ano_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_ano_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_ano_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + central_day <- (dim(data$hcst$data)['sday'] + 1)/2 + hcst_tr <- MergeDims(hcst_tr, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + + obs_tr <- MergeDims(obs_tr, merge_dims = c('sday', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + # 'sday' dim to select the central day + hcst_ev <- Subset(hcst_ev, along = 'sday', indices = central_day) + + } + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # compute category limits + lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, + prob_lims = categories, + ncores = ncores) + lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, + prob_lims = categories, + ncores = ncores) + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + ano_hcst_tr <- SplitDim(ano_hcst_tr, split_dim = 'syear', + new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross[[t]]$train.dexes))) + ano_obs_tr <- SplitDim(obs_tr, split_dim = 'syear', new_dim_name = 'sday', + indices = rep(1:dim(data$hcst$data)['sday'], + length(cross[[t]]$train.dexes))) + } + + #store results + ano_hcst_ev_res <- abind(ano_hcst_ev_res, ano_hcst_ev, + along = length(dim(ano_hcst_ev)) + 1) + ano_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, + along = length(dim(ano_obs_ev)) + 1) + ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, + along = length(dim(ano_obs_tr)) + 1) + for(ps in 1:length(categories)) { + lims_ano_hcst_tr_res[[ps]] <- abind(lims_ano_hcst_tr_res[[ps]], lims_ano_hcst_tr[[ps]], + along = length(dim(lims_ano_hcst_tr[[ps]])) + 1) + lims_ano_obs_tr_res[[ps]] <- abind(lims_ano_obs_tr_res[[ps]], lims_ano_obs_tr[[ps]], + along = length(dim(lims_ano_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + browser() + # Add dim names: + names(dim(ano_hcst_ev_res)) <- ev_dim_names + names(dim(ano_obs_ev_res)) <- ev_dim_names + names(dim(ano_obs_tr_res)) <- tr_dim_names + # To make crps_clim to work the reference forecast need to have same dims as obs: + ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('bin', + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('bin', + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) + lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + + # Forecast anomalies: + if (!is.null(data$fcst)) { + clim_hcst <- Apply(data$hcst$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) + hcst_ano <- Ano(data = data$hcst$data, clim = clim_hcst) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(hcst_ano, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + clim_obs <- Apply(data$obs$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + obs_ano <- Ano(data = data$obs$data, clim = clim_obs) + lims <- Apply(obs_ano, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + tmp_lims2 <- list() + + for (ps in 1:length(categories)) { + ## TODO: use .drop_dims() + tmp_lims3 <- .drop_dims(lims[[ps]]) + for (l in 1:dim(lims[[ps]])['bin']) { + ## TODO: Use Subset + tmp_lims <- ClimProjDiags::Subset(x = tmp_lims3, + along = "bin", + indices = l) + tmp_lims2 <- append(tmp_lims2, list(tmp_lims)) + names(tmp_lims2)[length(tmp_lims2)] <- as.character(categories[[ps]][l]) + } + if (recipe$Analysis$Workflow$Probabilities$save == 'yes') { + save_percentiles(recipe = recipe, percentiles = tmp_lims2, + data_cube = data$obs, + agg = "global", outdir = NULL) + } + } + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + hcst_probs_ev[[ps]] <- GetProbs(ano_hcst_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_ano_hcst_tr_res[[ps]], + ncores = ncores) + obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_ano_obs_tr_res[[ps]], + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } + } + # Convert to s2dv_cubes the resulting anomalies + ano_hcst <- data$hcst + ano_hcst$data <- ano_hcst_ev_res + ano_obs <- data$obs + ano_obs$data <- ano_obs_ev_res + + info(recipe$Run$logger, + "#### Anomalies and Probabilities Done #####") + if (recipe$Analysis$Workflow$Anomalies$save != 'none') { + info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") +# recipe$Run$output_dir <- paste0(recipe$Run$output_dir, +# "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Anomalies$save == 'all') { + save_observations(recipe = recipe, data_cube = ano_obs) + } + } + # Save probability bins + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + + ## TODO: Add .drop_dims() here + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + probs_hcst <- append(probs_hcst, + list(Subset(hcst_probs_ev[[ps]], + along = 'bin', + indices = perc, + drop = 'selected'))) + probs_obs <- append(probs_obs, + list(Subset(obs_probs_ev[[ps]], + along = 'bin', + indices = perc, + drop = 'selected'))) + if (!is.null(data$fcst)) { + probs_fcst <- append(probs_fcst, + list(Subset(fcst_probs[[ps]], + along = 'bin', + indices = perc, + drop = 'selected'))) + } + all_names <- c(all_names, name_elem) + } + } + ## TODO: Apply .drop_dims() directly to hcst_probs_ev etc; and move + ## reorganizing and renaming to save_probabilities() + names(probs_hcst) <- all_names + probs_hcst <- lapply(probs_hcst, .drop_dims) + names(probs_obs) <- all_names + probs_obs <- lapply(probs_obs, .drop_dims) + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + probs_fcst <- lapply(probs_fcst, .drop_dims) + } + + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = probs_hcst, + data_cube = data$hcst, agg = agg, + type = "hcst") + save_probabilities(recipe = recipe, probs = probs_obs, + data_cube = data$hcst, agg = agg, + type = "obs") + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + } + # Save ensemble mean for multimodel option: + hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) +# save_metrics(recipe = recipe, +# metrics = list(hcst_EM = +# Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), +# data_cube = data$hcst, agg = agg, +# module = "statistics") +# fcst_EM <- NULL + if (!is.null(data$fcst)) { + fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) +# save_metrics(recipe = recipe, +# metrics = list(fcst_EM = +# Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), +# data_cube = data$fcst, agg = agg, +# module = "statistics") + } + return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + hcst_EM = hcst_EM, fcst_EM = fcst_EM, + #cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + # obs_tr = lims_ano_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev, + fcst = fcst_probs), + ref_obs_tr = ano_obs_tr_res)) +} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/modules/Crossval/Crossval_metrics.R b/modules/Crossval/Crossval_metrics.R new file mode 100644 index 0000000000000000000000000000000000000000..6475f4781a2bdf47d978a26560f46557c52ca389 --- /dev/null +++ b/modules/Crossval/Crossval_metrics.R @@ -0,0 +1,292 @@ + +source("modules/Saving/Saving.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Crossval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") + +## data_crossval is the result from function full_crossval_anomalies or similar. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices +## the recipe could be used to read the Percentiles +## if fair is TRUE, the nmemb used to compute the probabilities is needed + ## nmemb_ref is the number of year - 1 in case climatological forecast is the reference +Crossval_metrics <- function(recipe, data_crossval, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) { + ncores <- recipe$Analysis$ncores + alpha <- recipe$Analysis$Skill$alpha + na.rm <- recipe$Analysis$remove_NAs + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + + # TODO: distinguish between rpss and bss + # if 1 percentile -> bss + # if more than 1 -> rpss + # TODO: for subseasonal check if the dimension sday is 1 + exe_rps <- unlist(lapply(categories, function(x) { + if (length(x) > 1) { + x <- x[1] *100 + } + return(x)})) + if (is.null(alpha)) { + alpha <- 0.05 + } + ## START SKILL ASSESSMENT: + skill_metrics <- list() + requested_metrics <- tolower(strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]]) + # The recipe allows to requset more than only terciles: + for (ps in 1:length(exe_rps)) { + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + data_crossval$probs$hcst_ev[[ps]] <- MergeDims(data_crossval$probs$hcst_ev[[ps]], + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$probs$obs_ev[[ps]] <- MergeDims(data_crossval$probs$obs_ev[[ps]], + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + } + + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = data_crossval$probs$hcst_ev[[ps]], + obs = data_crossval$probs$obs_ev[[1]], memb_dim = NULL, + cat_dim = 'bin', cross.val = FALSE, time_dim = 'syear', + Fair = fair, nmemb = nmemb, + ncores = ncores) + rps_clim <- Apply(list(data_crossval$probs$obs_ev[[1]]), + target_dims = c('bin', 'syear'), + RPS_clim, bin_dim_abs = 'bin', Fair = fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + # names based on the categories: + # To use it when visualization works for more rps + #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps + #skill_metrics[[paste0('rps_clim', + # exe_rps[ps])]] <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = data_crossval$probs$hcst_ev[[1]], + obs = data_crossval$probs$obs_ev[[1]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'bin', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = categories[[ps]], + indices_for_clim = NULL, + Fair = fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + # TO USE IT when visualization works for more rpsss + #skill_metrics[[paste0('rpss', exe_rps[ps])]] <- rpss$rpss + #skill_metrics[[paste0('rpss', + # exe_rps[ps], + # "_significance")]] <- rpss$sign + } + } + if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { + # The evaluation of all metrics are done with extra sample + data_crossval$hcst <- CST_MergeDims(data_crossval$hcst, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$obs <- CST_MergeDims(data_crossval$obs, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$ref_obs_tr <- MergeDims(data_crossval$ref_obs_tr, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$hcst.full_val <- CST_MergeDims(data_crossval$hcst.full_val, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + data_crossval$obs.full_val <- CST_MergeDims(data_crossval$obs.full_val, + merge_dims = c('sweek', 'syear'), + rename_dim = 'syear', na.rm = FALSE) + } + + if ('crps' %in% requested_metrics) { + crps <- CRPS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, + ncores = ncores) + skill_metrics$crps <- crps + crps_clim <- CRPS(exp = data_crossval$ref_obs_tr, + obs = data_crossval$obs$data, + time_dim = 'syear', memb_dim = 'ensemble', + Fair = fair, ncores = ncores) + skill_metrics$crps_clim <- crps_clim + } + if ('crpss' %in% requested_metrics) { + crpss <- CRPSS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + ref = data_crossval$ref_obs_tr, + memb_dim = 'ensemble', Fair = fair, + time_dim = 'syear', clim.cross.val = FALSE, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + } + + if ('enscorr' %in% requested_metrics) { + enscorr <- Corr(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha, + ncores = ncores) + skill_metrics$enscorr <- enscorr$corr + skill_metrics$enscorr_significance <- enscorr$sign + } + if ('mean_bias' %in% requested_metrics) { + if (!is.null(data_crossval$hcst.full_val$data)) { + mean_bias <- Bias(exp = data_crossval$hcst.full_val$data, + obs = data_crossval$obs.full_val$data, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha, + na.rm = na.rm, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } else { + info(recipe$Run$logger, + "Full values not available") + } + } + if ('enssprerr' %in% requested_metrics) { + enssprerr <- SprErr(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', pval = TRUE, + na.rm = na.rm, + ncores = ncores) + skill_metrics$SprErr <- enssprerr$ratio + skill_metrics$SprErr_significance <- enssprerr$p.val <= alpha + } + if ('rms' %in% requested_metrics) { + rms <- RMS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + if ('rmss' %in% requested_metrics) { + rmss <- RMSSS(exp = data_crossval$hcst$data, + obs = data_crossval$obs$data, + ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + if (is.null(data_crossval$hcst_EM)) { + data_crossval$hcst_EM <- MeanDims(data_crossval$hcst$data, + dims = 'ensemble', + drop = TRUE) + } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + std_hcst <- Apply(data = data_crossval$hcst_EM, + target_dims = 'syear', + fun = 'sd')$output1 + + std_obs <- Apply(data = data_crossval$obs$data, + target_dims = 'syear', + fun = 'sd')$output1 + + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(data = data_crossval$hcst_EM, + target_dims = 'syear', + fun = 'sd')$output1 ^ 2 + + var_obs <- Apply(data = data_crossval$obs$data, + target_dims = 'syear', + fun = 'sd')$output1 ^ 2 + + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs + } ## close if on variance + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(data = data_crossval$obs$data, + time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } ## close on n_eff + + if (any(c('cov', 'covariance') %in% requested_metrics)) { + covariance <- Apply(data = list(x = data_crossval$obs$data, + y = data_crossval$hcst_EM), + target_dims = 'syear', + fun = function(x, y) { + cov(as.vector(x), as.vector(y), + use = "everything", + method = "pearson")})$output1 + skill_metrics$covariance <- covariance + } + original <- recipe$Run$output_dir + recipe$Run$output_dir <- paste0(original, "/outputs/Skill/") + + skill_metrics <- lapply(skill_metrics, function(x) { + if (is.logical(x)) { + dims <- dim(x) + res <- as.numeric(x) + dim(res) <- dims + } else { + res <- x + } + return(res) + }) + # Save metrics + # reduce dimension to work with Visualization module: + skill_metrics <- lapply(skill_metrics, function(x) {.drop_dims(x)}) + + if (tolower(recipe$Analysis$Workflow$Skill$save) == 'all') { + save_metrics(recipe = recipe, + metrics = skill_metrics, + data_cube = data_crossval$hcst, agg = 'global', + outdir = recipe$Run$output_dir) + } + recipe$Run$output_dir <- original + return(skill_metrics) +} + + diff --git a/modules/Crossval/Crossval_multimodel_Calibration.R b/modules/Crossval/Crossval_multimodel_Calibration.R new file mode 100644 index 0000000000000000000000000000000000000000..3f7788e03a975e9847d1c0a42b5a1ce98706109a --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_Calibration.R @@ -0,0 +1,391 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_multimodel_Calibration <- function(recipe, data) { + cal_method <- recipe$Analysis$Workflow$Calibration$method + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst[[1]]$data)['syear'] + orig_dims <- names(dim(data$hcst[[1]]$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst[[1]]$data)['latitude'] + nlons <- dim(data$hcst[[1]]$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst[[1]]$data))) { + agg = 'region' + nregions <- dim(data$hcst[[1]]$data)['region'] + } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + ## output objects + obs_ev_res <- NULL + cal_hcst_ev_res <- lapply(data$hcst, function(x) {NULL}) + cal_hcst_tr_res <- lapply(data$hcst, function(x) {NULL}) + obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_cal_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + hcst_res <- list() + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + # Observations + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + # Store cross validation loops: + obs_ev_res <- abind(obs_ev_res, obs_ev, + along = length(dim(obs_ev)) + 1) + obs_tr_res <- abind(obs_tr_res, obs_tr, + along = length(dim(obs_tr)) + 1) + + # Calibrate individual models + cal_hcst_tr <- list() + for (sys in 1:length(data$hcst)) { + hcst_tr <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + # calibrate: + cal_hcst_tr <- append(cal_hcst_tr, + list( + Calibration(exp = hcst_tr, + obs = obs_tr, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = FALSE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, ncores = ncores))) + cal_hcst_ev <- #append(cal_hcst_ev, + #list( + Calibration(exp = hcst_tr, + obs = obs_tr, + exp_cor = hcst_ev, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = FALSE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, ncores = ncores)#)#) + + cal_hcst_ev_res[[sys]] <- abind(cal_hcst_ev_res[[sys]], + cal_hcst_ev, + along = length(dim(cal_hcst_ev)) + 1) + # cal_hcst_tr_res[[sys]] <- abind(cal_hcst_tr_res[[sys]], + # cal_hcst_tr, +# along = length(dim(cal_hcst_tr)) + 1) + } + # compute category limits + lims_cal_hcst_tr <- Apply(cal_hcst_tr, + target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(res), + ps, na.rm = na.rm))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + lims_obs_tr <- Apply(obs_tr, + target_dims = c('syear'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), + ps, na.rm = na.rm))})}, + output_dims = lapply(categories, function(x){'bin'}), + prob_lims = categories, + ncores = ncores) + + #store results + for (ps in 1:length(categories)) { + lims_cal_hcst_tr_res[[ps]] <- abind(lims_cal_hcst_tr_res[[ps]], + lims_cal_hcst_tr[[ps]], + along = length(dim(lims_cal_hcst_tr[[ps]])) + 1) + lims_obs_tr_res[[ps]] <- abind(lims_obs_tr_res[[ps]], + lims_obs_tr[[ps]], + along = length(dim(lims_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Calibration Cross-validation loop ended #####") + gc() + # Add dim names: + cal_hcst_ev_res <- lapply(cal_hcst_ev_res, function(x) { + dim(x) <- dim(x)[-1] + names(dim(x)) <- ev_dim_names + return(x)}) + names(dim(obs_ev_res)) <- ev_dim_names + names(dim(obs_tr_res)) <- tr_dim_names + # To make crps_clim to work the reference forecast need to have same dims as obs: + obs_tr_res <- Subset(obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_cal_hcst_tr_res[[ps]])) <- c('bin', + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') + names(dim(lims_obs_tr_res[[ps]])) <- c('bin', + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) + lims_obs_tr_res[[ps]] <- Subset(lims_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + # Forecast calibration: + cal_fcst <- lims_fcst <- NULL + if (!is.null(data$fcst)) { + for (sys in 1:length(data$hcst)) { + cal_fcst <- append(cal_fcst, + list(Calibration(exp = data$hcst[[sys]]$data, + obs = data$obs$data, + exp_cor = data$fcst[[sys]]$data, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + na.fill = FALSE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, ncores = ncores))) + } + names(cal_fcst) <- names(data$hcst) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(cal_hcst_ev_res, target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(res), ps, + na.rm = na.rm))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + # create a list of unknown length of systems and limits: + target_dims_list <- append(list(lims = 'bin'), + lapply(cal_hcst_ev_res, function(x) { + c('syear', 'ensemble')})) + hcst_probs_ev[[ps]] <- Apply(append(list(lims = lims_cal_hcst_tr[[ps]]), + cal_hcst_ev_res), + target_dims = target_dims_list, + function(lims, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims)}, + ncores = ncores)$output1 + obs_probs_ev[[ps]] <- GetProbs(obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_obs_tr_res[[ps]], + ncores = ncores) +fcst_probs <- NULL + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- Apply(append(list(lims = lims_fcst[[ps]]), + cal_fcst), + target_dims = target_dims_list, + fun = function(lims, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims)}, + ncores = ncores) + } + } + return(list(hcst = cal_hcst_ev_res, obs = obs_ev_res, fcst = cal_fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + #hcst_EM = hcst_EM, fcst_EM = fcst_EM, + cat_lims = list(hcst_tr = lims_cal_hcst_tr_res, + obs_tr = lims_obs_tr_res, + fcst = lims_fcst), + probs = list(hcst_probs = hcst_probs_ev, + obs_probs = obs_probs_ev, probs_fcst = fcst_probs), + ref_obs_tr = obs_tr_res)) +} +##### TO SAVE edit the lines below: + # Convert to s2dv_cubes the resulting anomalies +# ano_hcst <- data$hcst +# ano_hcst$data <- ano_hcst_ev_res +# ano_obs <- data$obs +# ano_obs$data <- ano_obs_ev_res + +# info(recipe$Run$logger, +# "#### Anomalies and Probabilities Done #####") +# if (recipe$Analysis$Workflow$Anomalies$save != 'none') { +# info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") +# recipe$Run$output_dir <- paste0(recipe$Run$output_dir, +# "/outputs/Anomalies/") + # Save forecast +# if ((recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { +# save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') +# } + # Save hindcast +# if (recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only')) { +# save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') +# } +# # Save observation +# if (recipe$Analysis$Workflow$Anomalies$save == 'all') { +# save_observations(recipe = recipe, data_cube = ano_obs) +# } +# } + # Save probability bins +# probs_hcst <- list() +# probs_fcst <- list() +# probs_obs <- list() +# all_names <- NULL + # Make categories rounded number to use as names: +# categories <- recipe$Analysis$Workflow$Probabilities$percentiles +# categories <- lapply(categories, function (x) { +# sapply(x, function(y) { +# round(eval(parse(text = y)),2)})}) + + # for (ps in 1:length(categories)) { + # for (perc in 1:(length(categories[[ps]]) + 1)) { + # if (perc == 1) { + # name_elem <- paste0("below_", categories[[ps]][perc]) + # } else if (perc == length(categories[[ps]]) + 1) { + # name_elem <- paste0("above_", categories[[ps]][perc-1]) + # } else { + # name_elem <- paste0("from_", categories[[ps]][perc-1], + # "_to_", categories[[ps]][perc]) + # } + # probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_hcst) + # probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_obs) + # if (!is.null(data$fcst)) { + # probs_fcst <- append(list(Subset(fcst_probs[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_fcst) + # } + # all_names <- c(all_names, name_elem) + # } + # } + # names(probs_hcst) <- all_names + # if (!('var' %in% names(dim(probs_hcst[[1]])))) { + # probs_hcst <- lapply(probs_hcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # names(probs_obs) <- all_names + # if (!('var' %in% names(dim(probs_obs[[1]])))) { + # probs_obs <- lapply(probs_obs, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + + # if (!is.null(data$fcst)) { + # names(probs_fcst) <- all_names + # if (!('var' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(syear = 1, dim(x)) + # return(x)}) + # } + # } + # if (recipe$Analysis$Workflow$Probabilities$save %in% + # c('all', 'bins_only')) { + # save_probabilities(recipe = recipe, probs = probs_hcst, + # data_cube = data$hcst, agg = agg, + # type = "hcst") + # save_probabilities(recipe = recipe, probs = probs_obs, + # data_cube = data$hcst, agg = agg, + # type = "obs") + # TODO Forecast + # if (!is.null(probs_fcst)) { + # save_probabilities(recipe = recipe, probs = probs_fcst, + # data_cube = data$fcst, agg = agg, + # type = "fcst") + # } + # } + # Save ensemble mean for multimodel option: + # hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(hcst_EM = + # Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$hcst, agg = agg, + # module = "statistics") + #fcst_EM <- NULL + #if (!is.null(data$fcst)) { + # fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(fcst_EM = + # Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$fcst, agg = agg, + # module = "statistics") + #} + #return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + # hcst.full_val = data$hcst, obs.full_val = data$obs, + # hcst_EM = hcst_EM, fcst_EM = fcst_EM, + # cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + # obs_tr = lims_ano_obs_tr_res), + # probs = list(hcst_ev = hcst_probs_ev, + # obs_ev = obs_probs_ev), + # ref_obs_tr = ano_obs_tr_res)) +#} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/modules/Crossval/Crossval_multimodel_NAO.R b/modules/Crossval/Crossval_multimodel_NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..b82aea16183852d0ec8138e72d00ed1d52d731cb --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_NAO.R @@ -0,0 +1,139 @@ +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_multimodel_NAO <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + obsproj <- recipe$Analysis$Workflow$Indices$NAO$obsproj + if (is.null(obsproj)) { + obsproj <- TRUE + } + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst[[1]]$data)['syear'] + orig_dims <- names(dim(data$hcst[[1]]$data)) + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + # Define output dims and names: + nao_hcst_tr_aux <- list() + nao_hcst_ev_aux <- list() + + nao_obs_tr_res <- list() + nao_hcst_ev_res <- list() + # Cross-val loop starts + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + nao_hcst_tr_res <- list() + nao_hcst_ev_res <- list() + + # Observations + obs_tr <- Subset(data $obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + clim_obs_tr <- MeanDims(obs_tr, 'syear') + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # NAO for individual models + for (sys in 1:length(data$hcst)) { + hcst_tr <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + # compute climatology: + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores) + # compute NAO: + nao <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_hcst_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores) + + nao_obs_ev <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, + exp_cor = ano_obs_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores)$exp_cor + #Standarisation: + # Need the nao_hcst (for the train.dexes) to standarize the eval.dexes? + nao_hcst_ev <- Apply(list(nao$exp, nao$exp_cor), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_ev <- Apply(list(nao$obs, nao_obs_ev), + target_dims = c('syear','ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_tr <- Apply(list(nao$obs), target_dims = 'syear', + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, 1, function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores, + output_dims = 'syear')$output1 + nao_hcst_tr <- Apply(list(nao$exp), target_dims = c('syear', 'ensemble'), + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + # store results: + nao_hcst_tr_res <- append(nao_hcst_tr_res, list(nao_hcst_tr)) + names(nao_hcst_tr_res)[length(nao_hcst_tr_res)] <- names(data$hcst)[[sys]] + nao_hcst_ev_res <- append(nao_hcst_ev_res, list(nao_hcst_ev)) + names(nao_hcst_ev_res)[length(nao_hcst_ev_res)] <- names(data$hcst)[[sys]] + } + if (t == 1) { + nao_hcst_ev_aux <- nao_hcst_ev_res + } else { + for (sys in 1:length(data$hcst)) { + nao_hcst_ev_aux[[sys]] <- abind(nao_hcst_ev_aux[[sys]], + nao_hcst_ev_res[[sys]], + along = length(dim(nao_hcst_ev_res[[sys]])) + 1) + nao_hcst_tr_aux[[sys]] <- abind(nao_hcst_tr_aux[[sys]], + nao_hcst_tr_res[[sys]], + along = length(dim(nao_hcst_tr_res[[sys]])) + 1) + } + } + } + nao_hcst_ev_aux <- lapply(1:length(nao_hcst_ev_aux), function(x) { + names(dim(nao_hcst_ev_aux[[x]])) <- c(names(dim(nao_hcst_ev_res[[x]])), 'sample') + return(nao_hcst_ev_aux[[x]])}) + nao_hcst_tr_aux <- lapply(1:length(nao_hcst_tr_aux), function(x) { + names(dim(nao_hcst_tr_aux[[x]])) <- c(names(dim(nao_hcst_tr_res[[x]])), 'sample') + return(nao_hcst_tr_aux[[x]])}) + + # Observed NAO should be the same for all models. + nao_obs_ev_res <- append(nao_obs_ev_res, list(nao_obs_ev)) + names(nao_obs_ev_res)[length(nao_obs_ev_res)] <- names(data$hcst)[[1]] + + diff --git a/modules/Crossval/Crossval_multimodel_anomalies.R b/modules/Crossval/Crossval_multimodel_anomalies.R new file mode 100644 index 0000000000000000000000000000000000000000..3b92f17d7a7fb9156ecac209823bcaa12cac3662 --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_anomalies.R @@ -0,0 +1,372 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_multimodel_anomalies <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst[[1]]$data)['syear'] + orig_dims <- names(dim(data$hcst[[1]]$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst[[1]]$data)['latitude'] + nlons <- dim(data$hcst[[1]]$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst[[1]]$data))) { + agg = 'region' + nregions <- dim(data$hcst[[1]]$data)['region'] + } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + ## output objects + ano_obs_ev_res <- NULL + ano_hcst_ev_res <- lapply(data$hcst, function(x) {NULL}) + ano_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_ano_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_ano_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + hcst_res <- list() + ano_hcst_tr <- ano_hcst_ev <- ano_fcst <- list() + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # Observations + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + clim_obs_tr <- MeanDims(obs_tr, 'syear') + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # Store cross validation loops: + ano_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, + along = length(dim(ano_obs_ev)) + 1) + ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, + along = length(dim(ano_obs_tr)) + 1) + + # Anomalies of individual models + ano_hcst_ev <- ano_hcst_tr <- NULL + for (sys in 1:length(data$hcst)) { + hcst_tr <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + # compute climatology: + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_hcst_tr <- append(ano_hcst_tr, + list(s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores))) + ano_hcst_ev <- append(ano_hcst_ev, + list(s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores))) + ano_hcst_ev_res[[sys]] <- abind(ano_hcst_ev_res[[sys]], + ano_hcst_ev[[sys]], + along = length(dim(ano_hcst_ev[[sys]])) + 1) + } + # compute category limits + lims_ano_hcst_tr <- Apply(ano_hcst_tr, + target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(res), + ps, na.rm = na.rm))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + lims_ano_obs_tr <- Apply(ano_obs_tr, + target_dims = c('syear'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(x), ps, + na.rm = na.rm))})}, + output_dims = lapply(categories, function(x){'bin'}), + prob_lims = categories, + ncores = ncores) + + #store results + for(ps in 1:length(categories)) { + lims_ano_hcst_tr_res[[ps]] <- abind(lims_ano_hcst_tr_res[[ps]], lims_ano_hcst_tr[[ps]], + along = length(dim(lims_ano_hcst_tr[[ps]])) + 1) + lims_ano_obs_tr_res[[ps]] <- abind(lims_ano_obs_tr_res[[ps]], lims_ano_obs_tr[[ps]], + along = length(dim(lims_ano_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + # Add dim names: + ano_hcst_ev_res <- lapply(ano_hcst_ev_res, function(x) { + names(dim(x)) <- ev_dim_names + return(x)}) + names(dim(ano_obs_ev_res)) <- ev_dim_names + names(dim(ano_obs_tr_res)) <- tr_dim_names + # To make crps_clim to work the reference forecast need to have same dims as obs: + ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('bin', + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('bin', + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) + lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + # Forecast anomalies: + ano_fcst <- lims_fcst <- NULL + if (!is.null(data$fcst)) { + ano_fcst <- Apply(ano_hcst_ev_res, + target_dims = c('syear', 'ensemble'), + function(...) { + res <- abind(..., along = 2) + clim <- mean(as.vector(res), na.rm = na.rm) + res <- res - clim}, + ncores = ncores, + output_dims = c('syear', 'ensemble'))$output1 + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + lapply(prob_lims, function(ps) { + as.array(quantile(as.vector(res), ps, + na.rm = na.rm))})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + # create a list of unknown length of systems and limits: + target_dims_list <- append(list(lims = 'bin'), + lapply(ano_hcst_ev_res, function(x) { + c('syear', 'ensemble')})) + hcst_probs_ev[[ps]] <- Apply(append(list(lims = lims_ano_hcst_tr[[ps]]), + ano_hcst_ev_res), + target_dims = target_dims_list, + function(lims, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims)}, + ncores = ncores)$output1 + obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_ano_obs_tr_res[[ps]], + ncores = ncores) +fcst_probs <- NULL + if (!is.null(data$fcst)) { +# fcst_probs[[ps]] <- Apply(append(list(lims = lims_fcst[[ps]]), +# ano_fcst), +# target_dims = list('cat', +# c('syear', 'ensemble')), +# function(lims, fcst) { + fcst_probs[[ps]] <- GetProbs(ano_fcst, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], +#}, + ncores = ncores) + } + } + return(list(hcst = ano_hcst_ev_res, obs = ano_obs_ev_res, fcst = ano_fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + #hcst_EM = hcst_EM, fcst_EM = fcst_EM, + cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + obs_tr = lims_ano_obs_tr_res, + fcst = lims_fcst), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev, fcst_probs), + ref_obs_tr = ano_obs_tr_res)) +} +##### TO SAVE edit the lines below: + # Convert to s2dv_cubes the resulting anomalies +# ano_hcst <- data$hcst +# ano_hcst$data <- ano_hcst_ev_res +# ano_obs <- data$obs +# ano_obs$data <- ano_obs_ev_res + +# info(recipe$Run$logger, +# "#### Anomalies and Probabilities Done #####") +# if (recipe$Analysis$Workflow$Anomalies$save != 'none') { +# info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") +# recipe$Run$output_dir <- paste0(recipe$Run$output_dir, +# "/outputs/Anomalies/") + # Save forecast +# if ((recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { +# save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') +# } + # Save hindcast +# if (recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only')) { +# save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') +# } +# # Save observation +# if (recipe$Analysis$Workflow$Anomalies$save == 'all') { +# save_observations(recipe = recipe, data_cube = ano_obs) +# } +# } + # Save probability bins +# probs_hcst <- list() +# probs_fcst <- list() +# probs_obs <- list() +# all_names <- NULL + # Make categories rounded number to use as names: +# categories <- recipe$Analysis$Workflow$Probabilities$percentiles +# categories <- lapply(categories, function (x) { +# sapply(x, function(y) { +# round(eval(parse(text = y)),2)})}) + + # for (ps in 1:length(categories)) { + # for (perc in 1:(length(categories[[ps]]) + 1)) { + # if (perc == 1) { + # name_elem <- paste0("below_", categories[[ps]][perc]) + # } else if (perc == length(categories[[ps]]) + 1) { + # name_elem <- paste0("above_", categories[[ps]][perc-1]) + # } else { + # name_elem <- paste0("from_", categories[[ps]][perc-1], + # "_to_", categories[[ps]][perc]) + # } + # probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_hcst) + # probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_obs) + # if (!is.null(data$fcst)) { + # probs_fcst <- append(list(Subset(fcst_probs[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_fcst) + # } + # all_names <- c(all_names, name_elem) + # } + # } + # names(probs_hcst) <- all_names + # if (!('var' %in% names(dim(probs_hcst[[1]])))) { + # probs_hcst <- lapply(probs_hcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # names(probs_obs) <- all_names + # if (!('var' %in% names(dim(probs_obs[[1]])))) { + # probs_obs <- lapply(probs_obs, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + + # if (!is.null(data$fcst)) { + # names(probs_fcst) <- all_names + # if (!('var' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(syear = 1, dim(x)) + # return(x)}) + # } + # } + # if (recipe$Analysis$Workflow$Probabilities$save %in% + # c('all', 'bins_only')) { + # save_probabilities(recipe = recipe, probs = probs_hcst, + # data_cube = data$hcst, agg = agg, + # type = "hcst") + # save_probabilities(recipe = recipe, probs = probs_obs, + # data_cube = data$hcst, agg = agg, + # type = "obs") + # TODO Forecast + # if (!is.null(probs_fcst)) { + # save_probabilities(recipe = recipe, probs = probs_fcst, + # data_cube = data$fcst, agg = agg, + # type = "fcst") + # } + # } + # Save ensemble mean for multimodel option: + # hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(hcst_EM = + # Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$hcst, agg = agg, + # module = "statistics") + #fcst_EM <- NULL + #if (!is.null(data$fcst)) { + # fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(fcst_EM = + # Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$fcst, agg = agg, + # module = "statistics") + #} + #return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + # hcst.full_val = data$hcst, obs.full_val = data$obs, + # hcst_EM = hcst_EM, fcst_EM = fcst_EM, + # cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + # obs_tr = lims_ano_obs_tr_res), + # probs = list(hcst_ev = hcst_probs_ev, + # obs_ev = obs_probs_ev), + # ref_obs_tr = ano_obs_tr_res)) +#} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/modules/Crossval/Crossval_multimodel_metrics.R b/modules/Crossval/Crossval_multimodel_metrics.R new file mode 100644 index 0000000000000000000000000000000000000000..2adfb3610adb5cf5ece055ba06c50ff29573649d --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_metrics.R @@ -0,0 +1,273 @@ +# One function to compute all the possible metrics or statistics +# datos is a list returned by load_multimodel or load_multimodel_mean + ## this reduce memory and it can be used to compute several metrics + ## in the mean the ensemble dim is length 1 for each model +# probs is a list returned by load_multimodel_probs + ## this is needed to compute rps/rpss +source("modules/Saving/Saving.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Crossval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Visualization/Visualization.R") # to load .warning from utils +Crossval_multimodel_metrics <- function(recipe, + data = NULL, + Fair = FALSE) { + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + # Prepare indices for crossval + sdate_dim <- dim(data$hcst[[1]])['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + tmp <- array(1:length(cross), c(syear = length(cross))) + alpha <- recipe$Analysis$alpha + if (is.null(alpha)) { + alpha <- 0.05 + } + + requested_metrics <- tolower(strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]]) + skill_metrics <- list() + if (!is.null(data)) { + # conver $data elements to list to use multiApply: + datos <- append(list(obs = data$obs), data$hcst) + ## CRPS metrics only make sense in pool method: + if (any(c('crps', 'crpss') %in% requested_metrics)) { + crps <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + mean(s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + time_dim = 'syear', + memb_dim = 'ensemble'))}, + ncores = ncores)$output1 + skill_metrics$crps <- crps + # Build the reference forecast: + ref_clim <- Apply(list(datos$obs, tmp), + target_dims = list(c('ensemble', 'syear'), NULL), + function(x, y) { + Subset(x, along = 'syear', + indices = cross[[y]]$train.dexes, drop = T)}, + ncores = ncores, output_dims = 'ensemble')$output1 + datos <- append(list(ref = ref_clim), datos) + crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), + fun = function(ref, obs) { + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + mean(s2dv:::.CRPS(exp = ref, obs = obs, + dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble'))}, + ncores = ncores)$output1 + skill_metrics$crps_clim <- crps_clim + + + crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(ref, obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + CRPSS(exp = res, obs = obs, ref = ref, + memb_dim = 'ensemble', Fair = FALSE, + time_dim = 'syear', clim.cross.val = FALSE)}, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + datos <- datos[-1] + } + ## Deterministic metrics using ensemble mean: + if ('enscorr' %in% requested_metrics) { + enscorr <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Corr(exp = res, + obs = obs, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha)}, ncores = ncores) + skill_metrics$enscorr <- enscorr$corr + skill_metrics$enscorr_significance <- enscorr$sign + } + if ('mean_bias' %in% requested_metrics) { + ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) + mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Bias(exp = res, + obs = obs, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha)}, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } + if ('rms' %in% requested_metrics) { + rms <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMS(exp = res, + obs = obs, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha)}, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + ####### Do we need reference fcst to compute rmss?? + if ('rmss' %in% requested_metrics) { + # if (is.null(res$ref_obs_tr)) { + # } + rmss <- Apply(datos, target_dims = c('syear','ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMSSS(exp = res, + obs = obs, + #ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE)}, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + # This are used to compute correlation for the scorecards + # It's the temporal standard deviation of the ensmean + std_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + sd(as.vector(res))}, + ncores = ncores)$output1 + + std_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = 'sd')$output1 + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } + + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + fun = function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + variance <- var(as.vector(res))}, + ncores = ncores)$output1 + + var_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = function(x) {var(as.vector(x))}, + ncores = ncores)$output1 + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs + } + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(datos[[1]], time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } + if (any(c('cov', 'covariance') %in% requested_metrics)) { + # The covariance of the ensemble mean + covariance <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + cov(as.vector(obs), as.vector(res), + use = "everything", + method = "pearson")}, + ncores = ncores)$output1 + skill_metrics$covariance <- covariance + } + } + # Probabilistic metrics for categories + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + # TODO: distinguish between rpss and bss + # if 1 percentile -> bss + # if more than 1 -> rpss + exe_rps <- unlist(lapply(categories, function(x) { + if (length(x) > 1) { + x <- x[1] *100 + } + return(x)})) + if (!is.null(datos)) { + if (Fair) { + nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) + } else { + nmemb <- NULL + } + } else { + nmemb <- NULL + } + + # Compute rps + for (ps in 1:length(exe_rps)) { + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], memb_dim = NULL, + cat_dim = 'bin', cross.val = FALSE, time_dim = 'syear', + Fair = Fair, nmemb = nmemb, + ncores = ncores) + rps_clim <- Apply(list(data$probs$obs[[ps]]), + target_dims = c('bin', 'syear'), + RPS_clim, bin_dim_abs = 'bin', Fair = Fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + # names based on the categories: + # To use it when visualization works for more rps + #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps + #skill_metrics[[paste0('rps_clim', + # exe_rps[ps])]] <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'bin', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = categories[[ps]], # un use param when providing probs + indices_for_clim = NULL, + Fair = Fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + } + } + skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) + skill_metrics <- lapply(skill_metrics, function(x){ + InsertDim(x, pos = 1, len = 1, name = 'var')}) + return(skill_metrics) +} diff --git a/modules/Crossval/R/CRPS_clim.R b/modules/Crossval/R/CRPS_clim.R new file mode 100644 index 0000000000000000000000000000000000000000..0e6bef6572673741bb6df3fbe77f92ace2e515af --- /dev/null +++ b/modules/Crossval/R/CRPS_clim.R @@ -0,0 +1,35 @@ +# CRPS version for climatology +CRPS_clim <- function(obs, memb_dim ='ensemble', return_mean = TRUE, clim.cross.val= TRUE){ + time_dim <- names(dim(obs)) + obs_time_len <- dim(obs)[time_dim] + + if (isFALSE(clim.cross.val)) { + # Without cross-validation + ref <- array(data = rep(obs, each = obs_time_len), + dim = c(obs_time_len, obs_time_len)) + } else if (isTRUE(clim.cross.val)) { + # With cross-validation (excluding the value of that year to create ref for that year) + ref <- array(data = NA, + dim = c(obs_time_len, obs_time_len - 1)) + for (i in 1:obs_time_len) { + ref[i, ] <- obs[-i] + } + } + + names(dim(ref)) <- c(time_dim, memb_dim) + # ref: [sdate, memb] + # obs: [sdate] + crps_ref <- s2dv:::.CRPS(exp = ref, obs = obs, + time_dim = time_dim, + memb_dim = memb_dim, + dat_dim = NULL, + Fair = FALSE) + + # crps_ref should be [sdate] + if (return_mean == TRUE) { + return(mean(crps_ref)) + } else { + return(crps_ref) + } +} + diff --git a/modules/Crossval/R/RPS_clim.R b/modules/Crossval/R/RPS_clim.R new file mode 100644 index 0000000000000000000000000000000000000000..6deab3ec9cb7ba05c811f2957630a3a2f49ab7fc --- /dev/null +++ b/modules/Crossval/R/RPS_clim.R @@ -0,0 +1,39 @@ +# RPS version for climatology +RPS_clim <- function(obs, indices_for_clim = NULL, + prob_thresholds = c(1/3, 2/3), cross.val = TRUE, + Fair = FALSE, bin_dim_abs = NULL, return_mean = TRUE) { + if (is.null(indices_for_clim)){ + indices_for_clim <- 1:length(obs) + } + if (is.null(bin_dim_abs)) { + obs_probs <- .GetProbs(data = obs, indices_for_quantiles = indices_for_clim, ## temporarily removed s2dv::: + prob_thresholds = prob_thresholds, weights = NULL, + cross.val = cross.val) + } else { + obs_probs <- obs + } + # clim_probs: [bin, sdate] + clim_probs <- c(prob_thresholds[1], + diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- array(clim_probs, dim = dim(obs_probs)) + + # Calculate RPS for each time step + probs_clim_cumsum <- apply(clim_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) + if (Fair) { # FairRPS + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + ## See explanation in https://freva.met.fu-berlin.de/about/problems/ + R <- dim(obs)[2] #years + adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) + adjustment <- colSums(adjustment) + rps_ref <- rps_ref + adjustment + } + + if (return_mean == TRUE) { + return(mean(rps_ref)) + } else { + return(rps_ref) + } +} diff --git a/modules/Crossval/R/tmp/Bias.R b/modules/Crossval/R/tmp/Bias.R new file mode 100644 index 0000000000000000000000000000000000000000..b9292cae24e5c425f2b5654e49b8d9b2358d916b --- /dev/null +++ b/modules/Crossval/R/tmp/Bias.R @@ -0,0 +1,216 @@ +#'Compute the Mean Bias +#' +#'The Mean Bias or Mean Error (Wilks, 2011) is defined as the mean difference +#'between the ensemble mean forecast and the observations. It is a deterministic +#'metric. Positive values indicate that the forecasts are on average too high +#'and negative values indicate that the forecasts are on average too low. +#'It also allows to compute the Absolute Mean Bias or bias without temporal +#'mean. If there is more than one dataset, the result will be computed for each +#'pair of exp and obs data. +#' +#'@param exp A named numerical array of the forecast with at least time +#' dimension. +#'@param obs A named numerical array of the observation with at least time +#' dimension. The dimensions must be the same as 'exp' except 'memb_dim' and +#' 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the ensemble mean; it should be set to NULL if the parameter +#' 'exp' is already the ensemble mean. The default value is NULL. +#'@param na.rm A logical value indicating if NAs should be removed (TRUE) or +#' kept (FALSE) for computation. The default value is FALSE. +#'@param absolute A logical value indicating whether to compute the absolute +#' bias. The default value is FALSE. +#'@param time_mean A logical value indicating whether to compute the temporal +#' mean of the bias. The default value is TRUE. +#'@param alpha A numeric or NULL (default) to indicate the significance level using Weltch test. Only available when absolute is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of bias with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' (if time_mean = T) and 'memb_dim'). nexp is the number +#'of experiment (i.e., 'dat_dim' in exp), and nobs is the number of observation +#'(i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and nobs are omitted. If alpha is specified, and absolute is FALSE, the result is a list with two elements, the bias as describe above and the significance as logical array with the same dimensions. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'exp <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(dat = 1, lat = 3, lon = 5, sdate = 50)) +#'bias <- Bias(exp = exp, obs = obs, memb_dim = 'member') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, alpha = NULL, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop("Parameter 'exp' must be a numeric array.") + if (!is.array(obs) | !is.numeric(obs)) + stop("Parameter 'obs' must be a numeric array.") + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop("Parameter 'time_dim' must be a character string.") + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (memb_dim %in% names(dim(obs))) { + if (identical(as.numeric(dim(obs)[memb_dim]), 1)) { + obs <- ClimProjDiags::Subset(x = obs, along = memb_dim, indices = 1, drop = 'selected') + } else { + stop("Not implemented for observations with members ('obs' can have 'memb_dim', ", + "but it should be of length = 1).") + } + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + ## na.rm + if (!is.logical(na.rm) | length(na.rm) > 1) { + stop("Parameter 'na.rm' must be one logical value.") + } + ## absolute + if (!is.logical(absolute) | length(absolute) > 1) { + stop("Parameter 'absolute' must be one logical value.") + } + ## time_mean + if (!is.logical(time_mean) | length(time_mean) > 1) { + stop("Parameter 'time_mean' must be one logical value.") + } + ## alpha + if (!is.null(alpha)) { + if (!is.numeric(alpha) | length(alpha) > 1) { + stop("Parameter 'alpha' must be null or a numeric value.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + ## Ensemble mean + if (!is.null(memb_dim)) { + exp <- MeanDims(exp, memb_dim, na.rm = na.rm) + } + + ## (Mean) Bias + bias <- Apply(data = list(exp, obs), + target_dims = c(time_dim, dat_dim), + fun = .Bias, + time_dim = time_dim, + dat_dim = dat_dim, + na.rm = na.rm, + absolute = absolute, + time_mean = time_mean, + alpha = alpha, + ncores = ncores) + + if (is.null(alpha)) { + bias <- bias$output1 + } + return(bias) +} + + +.Bias <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, na.rm = FALSE, + absolute = FALSE, time_mean = TRUE, alpha = NULL) { + # exp and obs: [sdate, (dat)] + if (is.null(dat_dim)) { + bias <- exp - obs + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- mean(bias, na.rm = na.rm) + } + + if (!is.null(alpha)) { + if (!absolute) { + pval <- t.test(x = obs, y = exp, alternative = "two.sided")$p.value + sig <- pval <= alpha + } + } + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + bias <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + pval <- array(dim = c(nexp = nexp, nobs = nobs)) + sig <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + bias[, i, j] <- exp[, i] - obs[, j] + if (!is.null(alpha)) { + if (!absolute) { + pval[i,j] <- t.test(x = obs[,j], y = exp[,i], + alternative = "two.sided")$p.value + sig[i,j] <- pval <= alpha + } + } + } + } + + if (isTRUE(absolute)) { + bias <- abs(bias) + } + + if (isTRUE(time_mean)) { + bias <- MeanDims(bias, time_dim, na.rm = na.rm) + } + } + if (!is.null(alpha) && !absolute) { + return(list(bias = bias, sig = sig)) + } else { + return(bias) + } +} diff --git a/modules/Crossval/R/tmp/CST_MergeDims.R b/modules/Crossval/R/tmp/CST_MergeDims.R new file mode 100644 index 0000000000000000000000000000000000000000..bb06bf608051d06ecabce9a9cd3163c03128c6ad --- /dev/null +++ b/modules/Crossval/R/tmp/CST_MergeDims.R @@ -0,0 +1,162 @@ +#'Function to Merge Dimensions +#' +#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +#' +#'@description This function merges two dimensions of the array \code{data} in a +#''s2dv_cube' object into one. The user can select the dimensions to merge and +#'provide the final name of the dimension. The user can select to remove NA +#'values or keep them. +#' +#'@param data An 's2dv_cube' object +#'@param merge_dims A character vector indicating the names of the dimensions to +#' merge. +#'@param rename_dim a character string indicating the name of the output +#' dimension. If left at NULL, the first dimension name provided in parameter +#' \code{merge_dims} will be used. +#'@param na.rm A logical indicating if the NA values should be removed or not. +#' +#'@examples +#'data <- 1 : c(2 * 3 * 4 * 5 * 6 * 7) +#'dim(data) <- c(time = 7, lat = 2, lon = 3, monthly = 4, member = 6, +#' dataset = 5, var = 1) +#'data[2,,,,,,] <- NA +#'data[c(3,27)] <- NA +#'data <- list(data = data) +#'class(data) <- 's2dv_cube' +#'new_data <- CST_MergeDims(data, merge_dims = c('time', 'monthly')) +#'new_data <- CST_MergeDims(data, merge_dims = c('lon', 'lat'), rename_dim = 'grid') +#'new_data <- CST_MergeDims(data, merge_dims = c('time', 'monthly'), na.rm = TRUE) +#'@export +CST_MergeDims <- function(data, merge_dims = c('ftime', 'monthly'), + rename_dim = NULL, na.rm = FALSE) { + # Check 's2dv_cube' + if (!inherits(data, 's2dv_cube')) { + stop("Parameter 'data' must be of the class 's2dv_cube'.") + } + if (is.null(rename_dim)) { + rename_dim <- merge_dims[1] + } + # data + data$data <- MergeDims(data$data, merge_dims = merge_dims, + rename_dim = rename_dim, na.rm = na.rm) + # dims + data$dims <- dim(data$data) + + # rename_dim + if (length(rename_dim) > 1) { + rename_dim <- as.character(rename_dim[1]) + } + # coords + data$coords[merge_dims] <- NULL + data$coords[[rename_dim]] <- 1:dim(data$data)[rename_dim] + attr(data$coords[[rename_dim]], 'indices') <- TRUE + + # attrs + if (all(merge_dims %in% names(dim(data$attrs$Dates)))) { + original_timezone <- attr(data$attrs$Dates[1], "tzone") + data$attrs$Dates <- MergeDims(data$attrs$Dates, merge_dims = merge_dims, + rename_dim = rename_dim, na.rm = na.rm) + # Transform dates back to POSIXct + data$attrs$Dates <- as.POSIXct(data$attrs$Dates, + origin = "1970-01-01", + tz = original_timezone) + + } else if (any(merge_dims %in% names(dim(data$attrs$Dates)))) { + warning("The dimensions of 'Dates' array will be different from ", + "the temporal dimensions in 'data'. Parameter 'merge_dims' ", + "only includes one temporal dimension of 'Dates'.") + } + return(data) +} +#'Function to Split Dimension +#' +#'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} +#' +#'@description This function merges two dimensions of an array into one. The +#'user can select the dimensions to merge and provide the final name of the +#'dimension. The user can select to remove NA values or keep them. +#' +#'@param data An n-dimensional array with named dimensions +#'@param merge_dims A character vector indicating the names of the dimensions to +#' merge. +#'@param rename_dim A character string indicating the name of the output +#' dimension. If left at NULL, the first dimension name provided in parameter +#' \code{merge_dims} will be used. +#'@param na.rm A logical indicating if the NA values should be removed or not. +#' +#'@examples +#'data <- 1 : 20 +#'dim(data) <- c(time = 10, lat = 2) +#'new_data <- MergeDims(data, merge_dims = c('time', 'lat')) +#'@import abind +#'@importFrom ClimProjDiags Subset +#'@export +MergeDims <- function(data, merge_dims = c('time', 'monthly'), + rename_dim = NULL, na.rm = FALSE) { + # check data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (is.null(dim(data))) { + stop("Parameter 'data' must have dimensions.") + } + if (is.null(names(dim(data)))) { + stop("Parameter 'data' must have dimension names.") + } + dims <- dim(data) + # check merge_dims + if (is.null(merge_dims)) { + stop("Parameter 'merge_dims' cannot be NULL.") + } + if (!is.character(merge_dims)) { + stop("Parameter 'merge_dims' must be a character vector ", + "indicating the names of the dimensions to be merged.") + } + if (length(merge_dims) > 2) { + warning("Only two dimensions can be merge, only the first two ", + "dimension will be used. To merge further dimensions ", + "consider to use this function multiple times.") + merge_dims <- merge_dims[1 : 2] + } else if (length(merge_dims) < 2) { + stop("Parameter 'merge_dims' must be of length two.") + } + if (is.null(rename_dim)) { + rename_dim <- merge_dims[1] + } + if (length(rename_dim) > 1) { + warning("Parameter 'rename_dim' has length greater than 1 ", + "and only the first element will be used.") + rename_dim <- as.character(rename_dim[1]) + } + if (!any(names(dims) %in% merge_dims)) { + stop("Parameter 'merge_dims' must match with dimension ", + "names in parameter 'data'.") + } + pos1 <- which(names(dims) == merge_dims[1]) + pos2 <- which(names(dims) == merge_dims[2]) + if (length(pos1) == 0 | length(pos2) == 0) { + stop("Parameter 'merge_dims' must match with dimension ", + "names in parameter 'data'.") + } + if (pos1 > pos2) { + pos1 <- pos1 - 1 + } + data <- lapply(1:dims[pos2], function(x) {Subset(data, along = pos2, + indices = x, drop = 'selected')}) + data <- abind(data, along = pos1) + names(dim(data)) <- names(dims)[-pos2] + if (!is.null(rename_dim)) { + names(dim(data))[pos1] <- rename_dim + } + if (na.rm) { + nas <- which(is.na(Subset(data, along = -pos1, indices = 1))) + if (length(nas) != 0) { + nas <- unlist(lapply(nas, function(x) { + if(all(is.na(Subset(data, along = pos1, + indices = x)))) { + return(x)}})) + data <- Subset(data, along = pos1, indices = -nas) + } + } + return(data) +} diff --git a/modules/Crossval/R/tmp/Corr.R b/modules/Crossval/R/tmp/Corr.R new file mode 100644 index 0000000000000000000000000000000000000000..744ff10996d9261e8e8ef8eded34c5b442537ae2 --- /dev/null +++ b/modules/Crossval/R/tmp/Corr.R @@ -0,0 +1,484 @@ +#'Compute the correlation coefficient between an array of forecast and their corresponding observation +#' +#'Calculate the correlation coefficient (Pearson, Kendall or Spearman) for +#'an array of forecast and an array of observation. The correlations are +#'computed along 'time_dim' that usually refers to the start date dimension. If +#''comp_dim' is given, the correlations are computed only if obs along comp_dim +#'dimension are complete between limits[1] and limits[2], i.e., there is no NA +#'between limits[1] and limits[2]. This option can be activated if the user +#'wants to account only for the forecasts which the corresponding observations +#'are available at all leadtimes.\cr +#'The confidence interval is computed by the Fisher transformation and the +#'significance level relies on an one-sided student-T distribution.\cr +#'The function can calculate ensemble mean before correlation by 'memb_dim' +#'specified and 'memb = F'. If ensemble mean is not calculated, correlation will +#'be calculated for each member. +#'If there is only one dataset for exp and obs, you can simply use cor() to +#'compute the correlation. +#' +#'@param exp A named numeric array of experimental data, with at least dimension +#' 'time_dim'. +#'@param obs A named numeric array of observational data, same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param time_dim A character string indicating the name of dimension along +#' which the correlations are computed. The default value is 'sdate'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param comp_dim A character string indicating the name of dimension along which +#' obs is taken into account only if it is complete. The default value +#' is NULL. +#'@param limits A vector of two integers indicating the range along comp_dim to +#' be completed. The default is c(1, length(comp_dim dimension)). +#'@param method A character string indicating the type of correlation: +#' 'pearson', 'spearman', or 'kendall'. The default value is 'pearson'. +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. If there is no +#' member dimension, set NULL. The default value is NULL. +#'@param memb A logical value indicating whether to remain 'memb_dim' dimension +#' (TRUE) or do ensemble mean over 'memb_dim' (FALSE). Only functional when +#' 'memb_dim' is not NULL. The default value is TRUE. +#'@param pval A logical value indicating whether to return or not the p-value +#' of the test Ho: Corr = 0. The default value is TRUE. +#'@param conf A logical value indicating whether to return or not the confidence +#' intervals. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: Corr = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing the numeric arrays with dimension:\cr +#' c(nexp, nobs, exp_memb, obs_memb, all other dimensions of exp except +#' time_dim and memb_dim).\cr +#'nexp is the number of experiment (i.e., 'dat_dim' in exp), and nobs is the +#'number of observation (i.e., 'dat_dim' in obs). If dat_dim is NULL, nexp and +#'nobs are omitted. exp_memb is the number of member in experiment (i.e., +#''memb_dim' in exp) and obs_memb is the number of member in observation (i.e., +#''memb_dim' in obs). If memb = F, exp_memb and obs_memb are omitted.\cr\cr +#'\item{$corr}{ +#' The correlation coefficient. +#'} +#'\item{$p.val}{ +#' The p-value. Only present if \code{pval = TRUE}. +#'} +#'\item{$conf.lower}{ +#' The lower confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$conf.upper}{ +#' The upper confidence interval. Only present if \code{conf = TRUE}. +#'} +#'\item{$sign}{ +#' The statistical significance. Only present if \code{sign = TRUE}. +#'} +#' +#'@examples +#'# Case 1: Load sample data as in Load() example: +#'example(Load) +#'clim <- Clim(sampleData$mod, sampleData$obs) +#'ano_exp <- Ano(sampleData$mod, clim$clim_exp) +#'ano_obs <- Ano(sampleData$obs, clim$clim_obs) +#'runmean_months <- 12 +#' +#'# Smooth along lead-times +#'smooth_ano_exp <- Smoothing(ano_exp, runmeanlen = runmean_months) +#'smooth_ano_obs <- Smoothing(ano_obs, runmeanlen = runmean_months) +#'required_complete_row <- 3 # Discard start dates which contain any NA lead-times +#'leadtimes_per_startdate <- 60 +#'corr <- Corr(MeanDims(smooth_ano_exp, 'member'), +#' MeanDims(smooth_ano_obs, 'member'), +#' comp_dim = 'ftime', dat_dim = 'dataset', +#' limits = c(ceiling((runmean_months + 1) / 2), +#' leadtimes_per_startdate - floor(runmean_months / 2))) +#' +#'# Case 2: Keep member dimension +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', dat_dim = 'dataset') +#'# ensemble mean +#'corr <- Corr(smooth_ano_exp, smooth_ano_obs, memb_dim = 'member', memb = FALSE, +#' dat_dim = 'dataset') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@importFrom stats cor pt qnorm +#'@export +Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, + comp_dim = NULL, limits = NULL, method = 'pearson', + memb_dim = NULL, memb = TRUE, + pval = TRUE, conf = TRUE, sign = FALSE, + alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be at least two dimensions ", + "containing time_dim and dat_dim.")) + } + if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string or NULL.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## comp_dim + if (!is.null(comp_dim)) { + if (!is.character(comp_dim) | length(comp_dim) > 1) { + stop("Parameter 'comp_dim' must be a character string.") + } + if (!comp_dim %in% names(dim(exp)) | !comp_dim %in% names(dim(obs))) { + stop("Parameter 'comp_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## limits + if (!is.null(limits)) { + if (is.null(comp_dim)) { + stop("Paramter 'comp_dim' cannot be NULL if 'limits' is assigned.") + } + if (!is.numeric(limits) | any(limits %% 1 != 0) | any(limits < 0) | + length(limits) != 2 | any(limits > dim(exp)[comp_dim])) { + stop(paste0("Parameter 'limits' must be a vector of two positive ", + "integers smaller than the length of paramter 'comp_dim'.")) + } + } + ## method + if (!(method %in% c("kendall", "spearman", "pearson"))) { + stop("Parameter 'method' must be one of 'kendall', 'spearman' or 'pearson'.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + } + ## memb + if (!is.logical(memb) | length(memb) > 1) { + stop("Parameter 'memb' must be one logical value.") + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## conf + if (!is.logical(conf) | length(conf) > 1) { + stop("Parameter 'conf' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + ## alpha + if (!is.numeric(alpha) | alpha < 0 | alpha > 1 | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all dimension except 'dat_dim' and 'memb_dim'.")) + } + if (dim(exp)[time_dim] < 3) { + stop("The length of time_dim must be at least 3 to compute correlation.") + } + + + ############################### + # Sort dimension + name_exp <- names(dim(exp)) + name_obs <- names(dim(obs)) + order_obs <- match(name_exp, name_obs) + obs <- Reorder(obs, order_obs) + + + ############################### + # Calculate Corr + + # Remove data along comp_dim dim if there is at least one NA between limits + if (!is.null(comp_dim)) { + pos <- which(names(dim(obs)) == comp_dim) + if (is.null(limits)) { + obs_sub <- obs + } else { + obs_sub <- ClimProjDiags::Subset(obs, pos, list(limits[1]:limits[2])) + } + outrows <- is.na(MeanDims(obs_sub, pos, na.rm = FALSE)) + outrows <- InsertDim(outrows, pos, dim(obs)[comp_dim]) + obs[which(outrows)] <- NA + rm(obs_sub, outrows) + } + if (!is.null(memb_dim)) { + if (!memb) { #ensemble mean + exp <- MeanDims(exp, memb_dim, na.rm = TRUE) + obs <- MeanDims(obs, memb_dim, na.rm = TRUE) +# name_exp <- names(dim(exp)) +# margin_dims_ind <- c(1:length(name_exp))[-which(name_exp == memb_dim)] +# exp <- apply(exp, margin_dims_ind, mean, na.rm = TRUE) #NOTE: remove NAs here +# obs <- apply(obs, margin_dims_ind, mean, na.rm = TRUE) + memb_dim <- NULL + } + } + + res <- Apply(list(exp, obs), + target_dims = list(c(time_dim, dat_dim, memb_dim), + c(time_dim, dat_dim, memb_dim)), + fun = .Corr, + dat_dim = dat_dim, memb_dim = memb_dim, + time_dim = time_dim, method = method, + pval = pval, conf = conf, sign = sign, alpha = alpha, + ncores = ncores) + + return(res) +} + +.Corr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', method = 'pearson', + conf = TRUE, pval = TRUE, sign = FALSE, alpha = 0.05) { + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + if (is.null(memb_dim)) { + CORR <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (is.null(dat_dim)) { + # exp: [sdate] + # obs: [sdate] + if (any(!is.na(exp)) && sum(!is.na(obs)) > 2) { + CORR[, ] <- cor(exp, obs, use = "pairwise.complete.obs", method = method) + } + } else { + # exp: [sdate, dat_exp] + # obs: [sdate, dat_obs] + for (j in 1:nobs) { + for (y in 1:nexp) { + if (any(!is.na(exp[, y])) && sum(!is.na(obs[, j])) > 2) { + CORR[y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + } + } +#---------------------------------------- +# Same as above calculation. +#TODO: Compare which is faster. +# CORR <- sapply(1:nobs, function(i) { +# sapply(1:nexp, function (x) { +# if (any(!is.na(exp[, x])) && sum(!is.na(obs[, i])) > 2) { +# cor(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method) +# } else { +# NA +# } +# }) +# }) +#----------------------------------------- + } + + } else { # memb_dim != NULL + exp_memb <- as.numeric(dim(exp)[memb_dim]) # memb_dim + obs_memb <- as.numeric(dim(obs)[memb_dim]) + + CORR <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + + if (is.null(dat_dim)) { + # exp: [sdate, memb_exp] + # obs: [sdate, memb_obs] + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + + if (any(!is.na(exp[,y])) && sum(!is.na(obs[, j])) > 2) { + CORR[, , y, j] <- cor(exp[, y], obs[, j], + use = "pairwise.complete.obs", + method = method) + } + + } + } + } else { + # exp: [sdate, dat_exp, memb_exp] + # obs: [sdate, dat_obs, memb_obs] + for (j in 1:obs_memb) { + for (y in 1:exp_memb) { + CORR[, , y, j] <- sapply(1:nobs, function(i) { + sapply(1:nexp, function (x) { + if (any(!is.na(exp[, x, y])) && sum(!is.na(obs[, i, j])) > 2) { + cor(exp[, x, y], obs[, i, j], + use = "pairwise.complete.obs", + method = method) + } else { + NA + } + }) + }) + + } + } + } + + } + + +# if (pval) { +# for (i in 1:nobs) { +# p.val[, i] <- try(sapply(1:nexp, +# function(x) {(cor.test(exp[, x], obs[, i], +# use = "pairwise.complete.obs", +# method = method)$p.value)/2}), silent = TRUE) +# if (class(p.val[, i]) == 'character') { +# p.val[, i] <- NA +# } +# } +# } + + if (pval || conf || sign) { + if (method == "kendall" | method == "spearman") { + if (!is.null(dat_dim) | !is.null(memb_dim)) { + tmp <- apply(obs, c(1:length(dim(obs)))[-1], rank) # for memb_dim = NULL, 2; for memb_dim, c(2, 3) + names(dim(tmp))[1] <- time_dim + eno <- Eno(tmp, time_dim) + } else { + tmp <- rank(obs) + tmp <- array(tmp) + names(dim(tmp)) <- time_dim + eno <- Eno(tmp, time_dim) + } + } else if (method == "pearson") { + eno <- Eno(obs, time_dim) + } + + if (is.null(memb_dim)) { + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + eno_expand[i, ] <- eno + } + } else { #member + eno_expand <- array(dim = c(nexp = nexp, nobs = nobs, exp_memb = exp_memb, obs_memb = obs_memb)) + for (i in 1:nexp) { + for (j in 1:exp_memb) { + eno_expand[i, , j, ] <- eno + } + } + } + + } + +#############old################# +#This doesn't return error but it's diff from cor.test() when method is spearman and kendall + if (pval || sign) { + t <- sqrt(CORR * CORR * (eno_expand - 2) / (1 - (CORR ^ 2))) + p.val <- pt(t, eno_expand - 2, lower.tail = FALSE) + if (sign) signif <- !is.na(p.val) & p.val <= alpha + } +################################### + if (conf) { + conf.lower <- alpha / 2 + conf.upper <- 1 - conf.lower + suppressWarnings({ + conflow <- tanh(atanh(CORR) + qnorm(conf.lower) / sqrt(eno_expand - 3)) + confhigh <- tanh(atanh(CORR) + qnorm(conf.upper) / sqrt(eno_expand - 3)) + }) + } + +################################### + # Remove nexp and nobs if dat_dim = NULL + if (is.null(dat_dim)) { +# if (is.null(dat_dim) & !is.null(memb_dim)) { + + if (length(dim(CORR)) == 2) { + dim(CORR) <- NULL + if (pval) { + dim(p.val) <- NULL + } + if (conf) { + dim(conflow) <- NULL + dim(confhigh) <- NULL + } + if (sign) { + dim(signif) <- NULL + } + } else { + dim(CORR) <- dim(CORR)[3:length(dim(CORR))] + if (pval) { + dim(p.val) <- dim(p.val)[3:length(dim(p.val))] + } + if (conf) { + dim(conflow) <- dim(conflow)[3:length(dim(conflow))] + dim(confhigh) <- dim(confhigh)[3:length(dim(confhigh))] + } + if (sign) { + dim(signif) <- dim(signif)[3:length(dim(signif))] + } + } + } + +################################### + + res <- list(corr = CORR) + if (pval) { + res <- c(res, list(p.val = p.val)) + } + if (conf) { + res <- c(res, list(conf.lower = conflow, conf.upper = confhigh)) + } + if (sign) { + res <- c(res, list(sign = signif)) + } + + return(res) + +} diff --git a/modules/Crossval/R/tmp/EOF.R b/modules/Crossval/R/tmp/EOF.R new file mode 100644 index 0000000000000000000000000000000000000000..87795b66ec37c62de76c283f88cdebb535292e12 --- /dev/null +++ b/modules/Crossval/R/tmp/EOF.R @@ -0,0 +1,293 @@ +#'Area-weighted empirical orthogonal function analysis using SVD +#' +#'Perform an area-weighted EOF analysis using single value decomposition (SVD) +#'based on a covariance matrix or a correlation matrix if parameter 'corr' is +#'set to TRUE. +#' +#'@param ano A numerical array of anomalies with named dimensions to calculate +#' EOF. The dimensions must have at least 'time_dim' and 'space_dim'. NAs +#' could exist but it should be consistent along time_dim. That is, if one grid +#' point has NAs, all the time steps at this point should be NAs. +#'@param lat A vector of the latitudes of 'ano'. +#'@param lon A vector of the longitudes of 'ano'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param neofs A positive integer of the modes to be kept. The default value is +#' 15. If time length or the product of the length of space_dim is smaller than +#' neofs, neofs will be changed to the minimum of the three values. +#'@param corr A logical value indicating whether to base on a correlation (TRUE) +#' or on a covariance matrix (FALSE). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{EOFs}{ +#' An array of EOF patterns normalized to 1 (unitless) with dimensions +#' (number of modes, rest of the dimensions of 'ano' except 'time_dim'). +#' Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed +#' field. +#'} +#'\item{PCs}{ +#' An array of principal components with the units of the original field to +#' the power of 2, with dimensions (time_dim, number of modes, rest of the +#' dimensions of 'ano' except 'space_dim'). +#' 'PCs' contains already the percentage of explained variance so, +#' to reconstruct the original field it's only needed to multiply 'EOFs' +#' by 'PCs'. +#'} +#'\item{var}{ +#' An array of the percentage (%) of variance fraction of total variance +#' explained by each mode (number of modes). The dimensions are (number of +#' modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim'). +#'} +#'\item{mask}{ +#' An array of the mask with dimensions (space_dim, rest of the dimensions of +#' 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that +#' 'ano' has value and NA for the positions that 'ano' has NA. It is used to +#' replace NAs with 0s for EOF calculation and mask the result with NAs again +#' after the calculation. +#'} +#'\item{wght}{ +#' An array of the area weighting with dimensions 'space_dim'. It is calculated +#' by cosine of 'lat' and used to compute the fraction of variance explained by +#' each EOFs. +#'} +#'\item{tot_var}{ +#' A number or a numeric array of the total variance explained by all the modes. +#' The dimensions are same as 'ano' except 'time_dim' and 'space_dim'. +#'} +#' +#'@seealso ProjectField, NAO, PlotBoxWhisker +#'@examples +#'# This example computes the EOFs along forecast horizons and plots the one +#'# that explains the greatest amount of variability. The example data has low +#'# resolution so the result may not be explanatory, but it displays how to +#'# use this function. +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'tmp <- MeanDims(ano$exp, c('dataset', 'member')) +#'ano <- tmp[1, , ,] +#'names(dim(ano)) <- names(dim(tmp))[-2] +#'eof <- EOF(ano, sampleData$lat, sampleData$lon) +#'\dontrun{ +#'PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat) +#'} +#' +#'@import multiApply +#'@importFrom stats sd +#'@export +EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), + neofs = 15, corr = FALSE, ncores = NULL) { + + # Check inputs + ## ano + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!all(space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## lat + if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } + ## lon + if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") + } + if (any(lon > 360 | lon < -360)) { + .warning("Some 'lon' is out of the range [-360, 360].") + } + ## neofs + if (!is.numeric(neofs) | neofs %% 1 != 0 | neofs <= 0 | length(neofs) > 1) { + stop("Parameter 'neofs' must be a positive integer.") + } + ## corr + if (!is.logical(corr) | length(corr) > 1) { + stop("Parameter 'corr' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate EOF + +# # Replace mask of NAs with 0s for EOF analysis. +# ano[!is.finite(ano)] <- 0 + + # Area weighting. Weights for EOF; needed to compute the + # fraction of variance explained by each EOFs + space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) + wght <- array(cos(lat * pi / 180), dim = dim(ano)[space_ind]) + + # We want the covariance matrix to be weigthed by the grid + # cell area so the anomaly field is weighted by its square + # root since the covariance matrix equals transpose(ano) + # times ano. + wght <- sqrt(wght) + + # neofs is bounded + if (neofs != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)) { + neofs <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs) + .warning(paste0("Parameter 'neofs' is changed to ", neofs, ", the minimum among ", + "the length of time_dim, the production of the length of space_dim, ", + "and neofs.")) + } + + res <- Apply(ano, + target_dims = c(time_dim, space_dim), + output_dims = list(EOFs = c('mode', space_dim), + PCs = c(time_dim, 'mode'), + var = 'mode', + tot_var = NULL, + mask = space_dim), + fun = .EOF, + corr = corr, neofs = neofs, + wght = wght, + ncores = ncores) + + return(c(res, wght = list(wght))) + +} + +.EOF <- function(ano, neofs = 15, corr = FALSE, wght = wght) { + # ano: [time, lat, lon] + + # Dimensions + nt <- dim(ano)[1] + ny <- dim(ano)[2] + nx <- dim(ano)[3] + + # Check if all the time steps at one grid point are NA-consistent. + # The grid point should have all NAs or no NA along time dim. + if (anyNA(ano)) { + ano_latlon <- array(ano, dim = c(nt, ny * nx)) # [time, lat*lon] + na_ind <- which(is.na(ano_latlon), arr.ind = T) + if (dim(na_ind)[1] != nt * length(unique(na_ind[, 2]))) { + stop("Detect certain grid points have NAs but not consistent across time ", + "dimension. If the grid point is NA, it should have NA at all time step.") + } + } + + # Build the mask + mask <- ano[1, , ] + mask[!is.finite(mask)] <- NA + mask[is.finite(mask)] <- 1 + dim(mask) <- c(ny, nx) + + # Replace mask of NAs with 0s for EOF analysis. + ano[!is.finite(ano)] <- 0 + + ano <- ano * InsertDim(wght, 1, nt) + + # The use of the correlation matrix is done under the option corr. + if (corr) { + stdv <- apply(ano, c(2, 3), sd, na.rm = T) + ano <- ano / InsertDim(stdv, 1, nt) + } + + # Time/space matrix for SVD + dim(ano) <- c(nt, ny * nx) + dim.dat <- dim(ano) + + # 'transpose' means the array needs to be transposed before + # calling La.svd for computational efficiency because the + # spatial dimension is larger than the time dimension. This + # goes with transposing the outputs of LA.svd also. + if (dim.dat[2] > dim.dat[1]) { + transpose <- TRUE + } else { + transpose <- FALSE + } + if (transpose) { + pca <- La.svd(t(ano)) + } else { + pca <- La.svd(ano) + } + + # La.svd conventions: decomposition X = U D t(V) La.svd$u + # returns U La.svd$d returns diagonal values of D La.svd$v + # returns t(V) !! The usual convention is PC=U and EOF=V. + # If La.svd is called for ano (transpose=FALSE case): EOFs: + # $v PCs: $u If La.svd is called for t(ano) (transposed=TRUE + # case): EOFs: t($u) PCs: t($v) + + if (transpose) { + pca.EOFs <- t(pca$u) + pca.PCs <- t(pca$v) + } else { + pca.EOFs <- pca$v + pca.PCs <- pca$u + } + + # The numbers of transposition is limited to neofs + PC <- pca.PCs[, 1:neofs] + EOF <- pca.EOFs[1:neofs, ] + dim(EOF) <- c(neofs, ny, nx) + + # To sort out crash when neofs=1. + if (neofs == 1) { + PC <- InsertDim(PC, 2, 1, name = 'new') + } + + # Computation of the % of variance associated with each mode + W <- pca$d[1:neofs] + tot.var <- sum(pca$d^2) + var.eof <- 100 * pca$d[1:neofs]^2 / tot.var + + for (e in 1:neofs) { + # Set all masked grid points to NA in the EOFs + # Divide patterns by area weights so that EOF * PC gives unweigthed (original) data + EOF[e, , ] <- EOF[e, , ] * mask / wght + # PC is multiplied by the explained variance, + # so that the reconstruction is only EOF * PC + PC[, e] <- PC[, e] * W[e] + } + + if (neofs == 1) { + var.eof <- as.array(var.eof) + } + + return(invisible(list(EOFs = EOF, PCs = PC, var = var.eof, tot_var = tot.var, mask = mask))) +} + diff --git a/modules/Crossval/R/tmp/Eno.R b/modules/Crossval/R/tmp/Eno.R new file mode 100644 index 0000000000000000000000000000000000000000..cb927602221e10f6d3c0853bf0935aad93834099 --- /dev/null +++ b/modules/Crossval/R/tmp/Eno.R @@ -0,0 +1,103 @@ +#'Compute effective sample size with classical method +#' +#'Compute the number of effective samples along one dimension of an array. This +#'effective number of independent observations can be used in +#'statistical/inference tests.\cr +#'The calculation is based on eno function from Caio Coelho from rclim.txt. +#' +#'@param data A numeric array with named dimensions. +#'@param time_dim A function indicating the dimension along which to compute +#' the effective sample size. The default value is 'sdate'. +#'@param na.action A function. It can be na.pass (missing values are allowed) +#' or na.fail (no missing values are allowed). See details in stats::acf(). +#' The default value is na.pass. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return An array with the same dimension as parameter 'data' except the +#' time_dim dimension, which is removed after the computation. The array +#' indicates the number of effective sample along time_dim. +#' +#'@examples +#'set.seed(1) +#'data <- array(rnorm(800), dim = c(dataset = 1, member = 2, sdate = 4, +#' ftime = 4, lat = 10, lon = 10)) +#'na <- floor(runif(40, min = 1, max = 800)) +#'data[na] <- NA +#'res <- Eno(data) +#' +#'@importFrom stats acf na.pass na.fail +#'@import multiApply +#'@export +Eno <- function(data, time_dim = 'sdate', na.action = na.pass, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (is.null(dim(data))) { #is vector + dim(data) <- c(length(data)) + names(dim(data)) <- time_dim + } + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimension.") + } + ## na.action + if (as.character(substitute(na.action)) != "na.pass" & + as.character(substitute(na.action)) != "na.fail") { + stop("Parameter 'na.action' must be a function either na.pass or na.fail.") + } + if (as.character(substitute(na.action)) == "na.fail" && anyNA(data)) { + stop("Calculation fails because NA is found in paratemter 'data', ", + "which is not accepted when ", + "parameter 'na.action' = na.fail.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate Eno + + eno <- Apply(data = list(data), + target_dims = time_dim, + output_dims = NULL, + fun = .Eno, + na.action = na.action, + ncores = ncores)$output1 + + return(eno) +} + +.Eno <- function(x, na.action) { + n <- length(sort(x)) + if (n > 1) { + a <- acf(x, lag.max = n - 1, plot = FALSE, + na.action = na.action)$acf[2:n, 1, 1] + s <- 0 + for (k in 1:(n - 1)) { + s <- s + (((n - k) / n) * a[k]) + } + eno <- min(n / (1 + (2 * s)), n) + } else { + eno <- NA + } + + return(eno) +} + diff --git a/modules/Crossval/R/tmp/GetProbs.R b/modules/Crossval/R/tmp/GetProbs.R new file mode 100644 index 0000000000000000000000000000000000000000..fb2cda0c9cc839c7f88e44ec6f4d0cb4a6944019 --- /dev/null +++ b/modules/Crossval/R/tmp/GetProbs.R @@ -0,0 +1,351 @@ +#'Compute probabilistic forecasts or the corresponding observations +#' +#'Compute probabilistic forecasts from an ensemble based on the relative +#'thresholds, or the probabilistic observations (i.e., which probabilistic +#'category was observed). A reference period can be specified to calculate the +#'absolute thresholds between each probabilistic category. The absolute +#'thresholds can be computed in cross-validation mode. If data is an ensemble, +#'the probabilities are calculated as the percentage of members that fall into +#'each category. For observations (or forecast without member dimension), 1 +#'means that the event happened, while 0 indicates that the event did not +#'happen. Weighted probabilities can be computed if the weights are provided for +#'each ensemble member and time step. The absolute thresholds can also be +#'provided directly for probabilities calculation. +#' +#'@param data A named numerical array of the forecasts or observations with, at +#' least, time dimension. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast, or NULL if there is no member +#' dimension (e.g., for observations, or for forecast with only one ensemble +#' member). The default value is 'member'. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param abs_thresholds A numeric array or vector of the absolute thresholds in +#' the same units as \code{data}. If an array is provided, it should have at +#' least 'bin_dim_abs' dimension. If it has more dimensions (e.g. different +#' thresholds for different locations, i.e. lon and lat dimensions), they +#' should match the dimensions of \code{data}, except the member dimension +#' which should not be included. The default value is NULL and, in this case, +#' 'prob_thresholds' is used for calculating the probabilities. +#'@param bin_dim_abs A character string of the dimension name of +#' 'abs_thresholds' array in which category limits are stored. It will also be +#' the probabilistic category dimension name in the output. The default value +#' is 'bin'. +#'@param indices_for_quantiles A vector of the indices to be taken along +#' 'time_dim' for computing the absolute thresholds between the probabilistic +#' categories. If NULL (default), the whole period is used. It is only used +#' when 'prob_thresholds' is provided. +#'@param weights A named numerical array of the weights for 'data' with +#' dimensions 'time_dim' and 'memb_dim' (if 'data' has them). The default value +#' is NULL. The ensemble should have at least 70 members or span at least 10 +#' time steps and have more than 45 members if consistency between the weighted +#' and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation mode. The default value +#' is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of probabilities with dimensions c(bin_dim_abs, the rest +#'dimensions of 'data' except 'memb_dim'). 'bin' dimension has the length of +#'probabilistic categories, i.e., \code{length(prob_thresholds) + 1}. +#' +#'@examples +#'data <- array(rnorm(2000), dim = c(ensemble = 25, sdate = 20, time = 4)) +#'res <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' indices_for_quantiles = 4:17) +#' +#'# abs_thresholds is provided +#'abs_thr1 <- c(-0.2, 0.3) +#'abs_thr2 <- array(c(-0.2, 0.3) + rnorm(40) * 0.1, dim = c(cat = 2, sdate = 20)) +#'res1 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr1) +#'res2 <- GetProbs(data = data, time_dim = 'sdate', memb_dim = 'ensemble', +#' prob_thresholds = NULL, abs_thresholds = abs_thr2, bin_dim_abs = 'cat') +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', + indices_for_quantiles = NULL, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + bin_dim_abs = 'bin', weights = NULL, cross.val = FALSE, ncores = NULL) { + + # Check inputs + ## data + if (is.null(data)) { + stop("Parameter 'data' cannot be NULL.") + } + if (!is.numeric(data)) { + stop("Parameter 'data' must be a numeric array.") + } + if (any(is.null(names(dim(data)))) | any(nchar(names(dim(data))) == 0)) { + stop("Parameter 'data' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(data))) { + stop("Parameter 'time_dim' is not found in 'data' dimensions.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(data))) { + stop("Parameter 'memb_dim' is not found in 'data' dimensions. If no member ", + "dimension exists, set it as NULL.") + } + } + ## bin_dim_abs + if (!is.character(bin_dim_abs) | length(bin_dim_abs) != 1) { + stop('Parameter "bin_dim_abs" must be a character string.') + } + ## prob_thresholds, abs_thresholds + if (!is.null(abs_thresholds) & !is.null(prob_thresholds)) { + .warning(paste0("Parameters 'prob_thresholds' and 'abs_thresholds' are both provided. ", + "Only the first one is used.")) + abs_thresholds <- NULL + } else if (is.null(abs_thresholds) & is.null(prob_thresholds)) { + stop("One of the parameters 'prob_thresholds' and 'abs_thresholds' must be provided.") + } + if (!is.null(prob_thresholds)) { + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_quantiles + if (is.null(indices_for_quantiles)) { + indices_for_quantiles <- seq_len(dim(data)[time_dim]) + } else { + if (!is.numeric(indices_for_quantiles) | !is.vector(indices_for_quantiles)) { + stop("Parameter 'indices_for_quantiles' must be NULL or a numeric vector.") + } else if (length(indices_for_quantiles) > dim(data)[time_dim] | + max(indices_for_quantiles) > dim(data)[time_dim] | + any(indices_for_quantiles < 1)) { + stop("Parameter 'indices_for_quantiles' should be the indices of 'time_dim'.") + } + } + + } else { # abs_thresholds + + if (is.null(dim(abs_thresholds))) { # a vector + dim(abs_thresholds) <- length(abs_thresholds) + names(dim(abs_thresholds)) <- bin_dim_abs + } + # bin_dim_abs + if (!(bin_dim_abs %in% names(dim(abs_thresholds)))) { + stop("Parameter abs_thresholds' can be a vector or array with 'bin_dim_abs' dimension.") + } + if (!is.null(memb_dim) && memb_dim %in% names(dim(abs_thresholds))) { + stop("Parameter abs_thresholds' cannot have member dimension.") + } + dim_name_abs <- names(dim(abs_thresholds))[which(names(dim(abs_thresholds)) != bin_dim_abs)] + if (!all(dim_name_abs %in% names(dim(data)))) { + stop("Parameter 'abs_thresholds' dimensions except 'bin_dim_abs' must be in 'data' as well.") + } else { + if (any(dim(abs_thresholds)[dim_name_abs] != dim(data)[dim_name_abs])) { + stop("Parameter 'abs_thresholds' dimensions must have the same length as 'data'.") + } + } + if (!is.null(indices_for_quantiles)) { + warning("Parameter 'indices_for_quantiles' is not used when 'abs_thresholds' are provided.") + } + abs_target_dims <- bin_dim_abs + if (time_dim %in% names(dim(abs_thresholds))) { + abs_target_dims <- c(bin_dim_abs, time_dim) + } + + } + + ## weights + if (!is.null(weights)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + +# if (is.null(dat_dim)) { + if (!is.null(memb_dim)) { + lendim_weights <- 2 + namesdim_weights <- c(time_dim, memb_dim) + } else { + lendim_weights <- 1 + namesdim_weights <- c(time_dim) + } + if (length(dim(weights)) != lendim_weights | + !all(names(dim(weights)) %in% namesdim_weights)) { + stop("Parameter 'weights' must have dimension ", + paste0(namesdim_weights, collapse = ' and '), ".") + } + if (any(dim(weights)[namesdim_weights] != dim(data)[namesdim_weights])) { + stop("Parameter 'weights' must have the same dimension length as ", + paste0(namesdim_weights, collapse = ' and '), " dimension in 'data'.") + } + weights <- Reorder(weights, namesdim_weights) + +# } else { +# if (length(dim(weights)) != 3 | +# any(!names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) +# stop("Parameter 'weights' must have three dimensions with the names of ", +# "'memb_dim', 'time_dim' and 'dat_dim'.") +# if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | +# dim(weights)[time_dim] != dim(exp)[time_dim] | +# dim(weights)[dat_dim] != dim(exp)[dat_dim]) { +# stop(paste0("Parameter 'weights' must have the same dimension lengths ", +# "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.")) +# } +# weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) +# } + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + ############################### + if (is.null(abs_thresholds)) { + res <- Apply(data = list(data = data), + target_dims = c(time_dim, memb_dim), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = prob_thresholds, + indices_for_quantiles = indices_for_quantiles, + weights = weights, cross.val = cross.val, ncores = ncores)$output1 + } else { + res <- Apply(data = list(data = data, abs_thresholds = abs_thresholds), + target_dims = list(c(time_dim, memb_dim), abs_target_dims), + output_dims = c(bin_dim_abs, time_dim), + fun = .GetProbs, + prob_thresholds = NULL, + indices_for_quantiles = NULL, + weights = NULL, cross.val = FALSE, ncores = ncores)$output1 + } + + return(res) +} + +.GetProbs <- function(data, indices_for_quantiles, + prob_thresholds = c(1/3, 2/3), abs_thresholds = NULL, + weights = NULL, cross.val = FALSE) { + # .GetProbs() is used in RPS, RPSS, ROCSS + # data + ## if data is exp: [sdate, memb] + ## if data is obs: [sdate, (memb)] + # weights: [sdate, (memb)], same as data + # if abs_thresholds is not NULL: [bin, (sdate)] + + # Add dim [memb = 1] to data if it doesn't have memb_dim + if (length(dim(data)) == 1) { + dim(data) <- c(dim(data), 1) + if (!is.null(weights)) dim(weights) <- c(dim(weights), 1) + } + + # Calculate absolute thresholds + if (is.null(abs_thresholds)) { + if (cross.val) { + quantiles <- array(NA, dim = c(bin = length(prob_thresholds), sdate = dim(data)[1])) + for (i_time in seq_len(dim(data)[1])) { + if (is.null(weights)) { + tmp <- which(indices_for_quantiles != i_time) + quantiles[, i_time] <- + quantile(x = as.vector(data[indices_for_quantiles[tmp], ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + tmp <- which(indices_for_quantiles != i_time) + sorted_arrays <- + .sorted_distributions(data[indices_for_quantiles[tmp], ], + weights[indices_for_quantiles[tmp], ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles[, i_time] <- approx(cumulative_weights, sorted_data, + prob_thresholds, "linear")$y + } + } + + } else { + if (is.null(weights)) { + quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), + probs = prob_thresholds, type = 8, na.rm = TRUE) + } else { + # weights: [sdate, memb] + sorted_arrays <- .sorted_distributions(data[indices_for_quantiles, ], + weights[indices_for_quantiles, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + quantiles <- approx(cumulative_weights, sorted_data, prob_thresholds, "linear")$y + } + quantiles <- array(rep(quantiles, dim(data)[1]), + dim = c(bin = length(quantiles), dim(data)[1])) + } + + } else { # abs_thresholds provided + quantiles <- abs_thresholds + if (length(dim(quantiles)) == 1) { + quantiles <- InsertDim(quantiles, lendim = dim(data)[1], + posdim = 2, name = names(dim(data))[1]) + } + } + # quantiles: [bin-1, sdate] + # Probabilities + probs <- array(dim = c(dim(quantiles)[1] + 1, dim(data)[1])) # [bin, sdate] + for (i_time in seq_len(dim(data)[1])) { + if (anyNA(data[i_time, ])) { + probs[, i_time] <- rep(NA, dim = dim(quantiles)[1] + 1) + } else { + if (is.null(weights)) { + probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], + threshold = quantiles[, i_time])) + } else { + sorted_arrays <- .sorted_distributions(data[i_time, ], weights[i_time, ]) + sorted_data <- sorted_arrays$data + cumulative_weights <- sorted_arrays$cumulative_weights + # find any quantiles that are outside the data range + integrated_probs <- array(dim = dim(quantiles)) + for (i_quant in seq_len(dim(quantiles)[1])) { + # for thresholds falling under the distribution + if (quantiles[i_quant, i_time] < min(sorted_data)) { + integrated_probs[i_quant, i_time] <- 0 + # for thresholds falling over the distribution + } else if (max(sorted_data) < quantiles[i_quant, i_time]) { + integrated_probs[i_quant, i_time] <- 1 + } else { + integrated_probs[i_quant, i_time] <- approx(sorted_data, cumulative_weights, + quantiles[i_quant, i_time], "linear")$y + } + } + probs[, i_time] <- append(integrated_probs[, i_time], 1) - + append(0, integrated_probs[, i_time]) + if (min(probs[, i_time]) < 0 | max(probs[, i_time]) > 1) { + stop("Probability in i_time = ", i_time, " is out of [0, 1].") + } + } + } + } + + return(probs) +} + +.sorted_distributions <- function(data_vector, weights_vector) { + weights_vector <- as.vector(weights_vector) + data_vector <- as.vector(data_vector) + weights_vector <- weights_vector / sum(weights_vector) # normalize to 1 + sorter <- order(data_vector) + sorted_weights <- weights_vector[sorter] + cumulative_weights <- cumsum(sorted_weights) - 0.5 * sorted_weights + cumulative_weights <- cumulative_weights - cumulative_weights[1] # fix the 0 + cumulative_weights <- cumulative_weights / + cumulative_weights[length(cumulative_weights)] # fix the 1 + return(list(data = data_vector[sorter], cumulative_weights = cumulative_weights)) +} + diff --git a/modules/Crossval/R/tmp/NAO.R b/modules/Crossval/R/tmp/NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..255e2a9fd587b1d1bad90374b2aae7d803299ba6 --- /dev/null +++ b/modules/Crossval/R/tmp/NAO.R @@ -0,0 +1,574 @@ +#'Compute the North Atlantic Oscillation (NAO) Index +#' +#'Compute the North Atlantic Oscillation (NAO) index based on the leading EOF +#'of the sea level pressure (SLP) anomalies over the north Atlantic region +#'(20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and +#'observed anomalies onto the observed EOF pattern or the forecast +#'anomalies onto the EOF pattern of the other years of the forecast. +#'By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month +#'lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns +#'cross-validated PCs of the NAO index for hindcast (exp) and observations +#'(obs) based on the leading EOF pattern, or, if forecast (exp_cor) is provided, +#'the NAO index for forecast and the corresponding data (exp and obs). +#' +#'@param exp A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' hindcast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of observational data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param obs A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' observed anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of experimental data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param exp_cor A named numeric array of the Nort Atlantic SLP (20-80N, 80W-40E) +#' forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimension 'time_dim' of length 1 (as in the case of an operational +#' forecast), 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of reference period needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param lat A vector of the latitudes of 'exp' and 'obs'. +#'@param lon A vector of the longitudes of 'exp' and 'obs'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'exp' and 'obs'. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member +#' dimension of 'exp' (and 'obs', optional). If 'obs' has memb_dim, the length +#' must be 1. The default value is 'member'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension of 'exp' and 'obs'. The default value is 'ftime'. +#'@param ftime_avg A numeric vector of the forecast time steps to average +#' across the target period. If average is not needed, set NULL. The default +#' value is 2:4, i.e., from 2nd to 4th forecast time steps. +#'@param obsproj A logical value indicating whether to compute the NAO index by +#' projecting the forecast anomalies onto the leading EOF of observational +#' reference (TRUE, default) or compute the NAO by first computing the leading +#' EOF of the forecast anomalies (in cross-validation mode, i.e. leave the +#' evaluated year out), then projecting forecast anomalies onto this EOF +#' (FALSE). If 'exp_cor' is provided, 'obs' will be used when obsproj is TRUE +#' and 'exp' will be used when obsproj is FALSE, and no cross-validation is +#' applied. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list which contains some of the following items depending on the data inputs: +#'\item{exp}{ +#' A numeric array of hindcast NAO index in verification format with the same +#' dimensions as 'exp' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#'\item{obs}{ +#' A numeric array of observation NAO index in verification format with the same +#' dimensions as 'obs' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#'} +#'\item{exp_cor}{ +#' A numeric array of forecast NAO index in verification format with the same +#' dimensions as 'exp_cor' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#' +#'@references +#'Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of +#' multi-model seasonal forecasts of the wintertime North Atlantic +#' Oscillation. Climate Dynamics, 21, 501-514. +#' DOI: 10.1007/s00382-003-0350-4 +#' +#'@examples +#'# Make up synthetic data +#'set.seed(1) +#'exp <- array(rnorm(1620), dim = c(member = 2, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'set.seed(2) +#'obs <- array(rnorm(1620), dim = c(member = 1, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'lat <- seq(20, 80, length.out = 6) +#'lon <- seq(-80, 40, length.out = 9) +#'nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) +#' +#'exp_cor <- array(rnorm(540), dim = c(member = 2, sdate = 1, ftime = 5, lat = 6, lon = 9)) +#'nao <- NAO(exp = exp, obs = obs, exp_cor = exp_cor, lat = lat, lon = lon, obsproj = TRUE) +#'# plot the NAO index +#' \dontrun{ +#'nao$exp <- Reorder(nao$exp, c(2, 1)) +#'nao$obs <- Reorder(nao$obs, c(2, 1)) +#'PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", +#' monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") +#' } +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, lon, time_dim = 'sdate', + memb_dim = 'member', space_dim = c('lat', 'lon'), + ftime_dim = 'ftime', ftime_avg = 2:4, + obsproj = TRUE, ncores = NULL) { + # Check inputs + ## exp, obs, and exp_cor (1) + if (is.null(obs) & is.null(exp)) { + stop("Parameter 'exp' and 'obs' cannot both be NULL.") + } + if (!is.null(exp)) { + if (!is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (is.null(dim(exp))) { + stop("Parameter 'exp' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.") + } + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0)) { + stop("Parameter 'exp' must have dimension names.") + } + } + if (!is.null(obs)) { + if (!is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (is.null(dim(obs))) { + stop("Parameter 'obs' must have at least dimensions ", + "time_dim, space_dim, and ftime_dim.") + } + if (any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'obs' must have dimension names.") + } + } + if (!is.null(exp_cor)) { + if (!is.numeric(exp_cor)) { + stop("Parameter 'exp_cor' must be a numeric array.") + } + if (is.null(dim(exp_cor))) { + stop(paste0("Parameter 'exp_cor' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.")) + } + if (any(is.null(names(dim(exp_cor)))) | any(nchar(names(dim(exp_cor))) == 0)) { + stop("Parameter 'exp_cor' must have dimension names.") + } + if (is.null(exp) || is.null(obs)) { + stop("Parameters 'exp' and 'obs' are required when 'exp_cor' is not provided.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!time_dim %in% names(dim(exp))) { + stop("Parameter 'time_dim' is not found in 'exp' dimension.") + } + } + if (!is.null(obs)) { + if (!time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (!time_dim %in% names(dim(exp_cor))) { + stop("Parameter 'time_dim' is not found in 'exp_cor' dimension.") + } + if (dim(exp_cor)[time_dim] > 1) { + stop("Parameter 'exp_cor' is expected to have length 1 in ", + time_dim, "dimension.") + } + } + + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + add_member_back <- FALSE + if (!is.null(obs)) { + if (memb_dim %in% names(dim(obs))) { + if (dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") + } else { + add_member_back <- TRUE + obs <- ClimProjDiags::Subset(obs, memb_dim, 1, drop = 'selected') + } + } + } + if (!is.null(exp_cor)) { + if (!memb_dim %in% names(dim(exp_cor))) { + stop("Parameter 'memb_dim' is not found in 'exp_cor' dimension.") + } + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!is.null(exp)) { + if (!all(space_dim %in% names(dim(exp)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!all(space_dim %in% names(dim(obs)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (any(!space_dim %in% names(dim(exp_cor)))) { + stop("Parameter 'space_dim' is not found in 'exp_cor' dimensions.") + } + } + ## ftime_dim + if (!is.character(ftime_dim) | length(ftime_dim) > 1) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!ftime_dim %in% names(dim(exp))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!ftime_dim %in% names(dim(obs))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (!ftime_dim %in% names(dim(exp_cor))) { + stop("Parameter 'ftime_dim' is not found in 'exp_cor' dimensions.") + } + } + ## exp and obs (2) + #TODO: Add checks for exp_cor + if (!is.null(exp) & !is.null(obs)) { + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + throw_error <- FALSE + if (length(name_exp) != length(name_obs)) { + throw_error <- TRUE + } else if (any(name_exp != name_obs)) { + throw_error <- TRUE + } else if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + throw_error <- TRUE + } + if (throw_error) { + stop("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions except 'memb_dim'.") + } + } + ## ftime_avg + if (!is.null(ftime_avg)) { + if (!is.vector(ftime_avg) | !is.numeric(ftime_avg)) { + stop("Parameter 'ftime_avg' must be an integer vector.") + } + if (!is.null(exp)) { + if (max(ftime_avg) > dim(exp)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + if (!is.null(obs)) { + if (max(ftime_avg) > dim(obs)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + if (!is.null(exp_cor)) { + if (max(ftime_avg) > dim(exp_cor)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + } + ## sdate >= 2 + if (!is.null(exp)) { + if (dim(exp)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } else { + if (dim(obs)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } + ## lat and lon + if (!is.null(exp)) { + if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + } + if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + } + } + if (!is.null(obs)) { + if (!is.numeric(lat) | length(lat) != dim(obs)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + } + if (!is.numeric(lon) | length(lon) != dim(obs)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + } + } + if (!is.null(exp_cor)) { + if (!is.numeric(lat) | length(lat) != dim(exp_cor)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp_cor'.") + } + if (!is.numeric(lon) | length(lon) != dim(exp_cor)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp_cor'.") + } + } + stop_needed <- FALSE + if (max(lat) > 80 | min(lat) < 20) { + stop_needed <- TRUE + } + #NOTE: different from s2dverification + # lon is not used in the calculation actually. EOF only uses lat to do the + # weight. So we just need to ensure the data is in this region, regardless + # the order. + if (any(lon < 0)) { #[-180, 180] + if (!(min(lon) > -90 & min(lon) < -70 & max(lon) < 50 & max(lon) > 30)) { + stop_needed <- TRUE + } + } else { #[0, 360] + if (any(lon >= 50 & lon <= 270)) { + stop_needed <- TRUE + } else { + lon_E <- lon[which(lon < 50)] + lon_W <- lon[-which(lon < 50)] + if (max(lon_E) < 30 | min(lon_W) > 290) { + stop_needed <- TRUE + } + } + } + if (stop_needed) { + stop("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.") + } + ## obsproj + if (!is.logical(obsproj) | length(obsproj) > 1) { + stop("Parameter 'obsproj' must be either TRUE or FALSE.") + } + if (obsproj) { + if (is.null(obs)) { + stop("Parameter 'obsproj' set to TRUE but no 'obs' provided.") + } + if (is.null(exp) & is.null(exp_cor)) { + .warning("parameter 'obsproj' set to TRUE but no 'exp' nor 'exp_cor' provided.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores == 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # Average ftime + if (!is.null(ftime_avg)) { + if (!is.null(exp)) { + exp_sub <- ClimProjDiags::Subset(exp, ftime_dim, ftime_avg, drop = FALSE) + exp <- MeanDims(exp_sub, ftime_dim, na.rm = TRUE) + ## Cross-validated PCs. Fabian. This should be extended to + ## nmod and nlt by simple loops. Virginie + } + if (!is.null(obs)) { + obs_sub <- ClimProjDiags::Subset(obs, ftime_dim, ftime_avg, drop = FALSE) + obs <- MeanDims(obs_sub, ftime_dim, na.rm = TRUE) + } + if (!is.null(exp_cor)) { + exp_cor_sub <- ClimProjDiags::Subset(exp_cor, ftime_dim, ftime_avg, drop = FALSE) + exp_cor <- MeanDims(exp_cor_sub, ftime_dim, na.rm = TRUE) + } + } + + # wght + wght <- array(sqrt(cos(lat * pi / 180)), dim = c(length(lat), length(lon))) + if (is.null(exp_cor)) { + if (!is.null(exp) & !is.null(obs)) { + res <- Apply(list(exp, obs), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } else if (!is.null(exp)) { + res <- Apply(list(exp = exp), + target_dims = list(exp = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, obs = NULL, + obsproj = obsproj, add_member_back = FALSE, + ncores = ncores) + } else if (!is.null(obs)) { + if (add_member_back) { + output_dims <- list(obs = c(time_dim, memb_dim)) + } else { + output_dims <- list(obs = time_dim) + } + res <- Apply(list(obs = obs), + target_dims = list(obs = c(time_dim, space_dim)), + output_dims = output_dims, + fun = .NAO, + lat = lat, wght = wght, exp = NULL, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + } else { # exp_cor provided + res <- Apply(list(exp = exp, obs = obs, exp_cor = exp_cor), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim), + exp_cor = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + + return(res) +} + +.NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, wght, obsproj = TRUE, + add_member_back = FALSE) { + # exp: [memb_exp, sdate, lat, lon] + # obs: [sdate, lat, lon] + # exp_cor: [memb, sdate = 1, lat, lon] + # wght: [lat, lon] + + if (!is.null(exp)) { + ntime <- dim(exp)[2] + nlat <- dim(exp)[3] + nlon <- dim(exp)[4] + nmemb_exp <- dim(exp)[1] + } else { + ntime <- dim(obs)[1] + nlat <- dim(obs)[2] + nlon <- dim(obs)[3] + } + if (!is.null(exp_cor)) { + ntime_exp_cor <- dim(exp_cor)[2] # should be 1 + nmemb_exp_cor <- dim(exp_cor)[1] + } + + if (!is.null(obs)) nao_obs <- array(NA, dim = ntime) + if (!is.null(exp)) nao_exp <- array(NA, dim = c(ntime, nmemb_exp)) + if (!is.null(exp_cor)) { + nao_exp_cor <- array(NA, dim = c(ntime_exp_cor, nmemb_exp_cor)) + #NOTE: The dimensions are flipped to fill in data correctly. Need to flip it back later. + } + + if (is.null(exp_cor)) { + + for (tt in 1:ntime) { # cross-validation + + if (!is.null(obs)) { + ## Calculate observation EOF. Excluding one forecast start year. + obs_sub <- obs[(1:ntime)[-tt], , , drop = FALSE] + EOF_obs <- .EOF(obs_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode = 1, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies. + PF <- .ProjectField(obs, eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + ## Keep PCs of excluded forecast start year. Fabian. + nao_obs[tt] <- PF[tt] + } + + if (!is.null(exp)) { + if (!obsproj) { + exp_sub <- exp[, (1:ntime)[-tt], , , drop = FALSE] + # Combine 'memb' and 'sdate' to calculate EOF + dim(exp_sub) <- c(nmemb_exp * (ntime - 1), nlat, nlon) + EOF_exp <- .EOF(exp_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + + ## Correct polarity of pattern + ##NOTE: different from s2dverification, which doesn't use mean(). +# if (0 < EOF_exp[1, which.min(abs(lat - 65)), ]) { + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } + + ### Lines below could be simplified further by computing + ### ProjectField() only on the year of interest... (though this is + ### not vital). Lauriane + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_exp[1, , ], wght = wght) # [sdate, memb] + nao_exp[tt, imemb] <- PF[tt] + } + } else { + ## Project forecast anomalies on obs EOF + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + nao_exp[tt, imemb] <- PF[tt] + } + } + } + + } # for loop sdate + + } else { # exp_cor provided + + ## Calculate observation EOF. Without cross-validation + EOF_obs <- .EOF(obs, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies + PF <- .ProjectField(obs, eof_mode = EOF_obs, wght = wght) # [mode = 1, sdate] + nao_obs[] <- PF[1, ] + + if (!obsproj) { + # Calculate EOF_exp + tmp <- array(exp, dim = c(nmemb_exp * ntime, nlat, nlon)) + EOF_exp <- .EOF(tmp, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } + eof_mode_input <- EOF_exp[1, , ] + } else { + eof_mode_input <- EOF_obs[1, , ] + } + + # Calculate NAO_exp + for (imemb in 1:dim(exp)[1]) { + exp_sub <- ClimProjDiags::Subset(exp, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp[ , imemb] <- PF + } + + # Calculate NAO_exp_cor + for (imemb in 1:dim(exp_cor)[1]) { + exp_sub <- ClimProjDiags::Subset(exp_cor, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp_cor[, imemb] <- PF + } + + } + # add_member_back + if (add_member_back) { + memb_dim_name <- ifelse(!is.null(names(dim(exp))[1]), names(dim(exp))[1], 'member') + nao_obs <- InsertDim(nao_obs, 2, 1, name = memb_dim_name) + } + + # Return results + if (is.null(exp_cor)) { + res <- NULL + if (!is.null(exp)) { + res <- c(res, list(exp = nao_exp)) + } + if (!is.null(obs)) { + res <- c(res, list(obs = nao_obs)) + } + return(res) + + } else { + return(list(exp = nao_exp, obs = nao_obs, exp_cor = nao_exp_cor)) + } +} + diff --git a/modules/Crossval/R/tmp/ProjectField.R b/modules/Crossval/R/tmp/ProjectField.R new file mode 100644 index 0000000000000000000000000000000000000000..efa35dc35313e6934f354e41e39cd979a4293b33 --- /dev/null +++ b/modules/Crossval/R/tmp/ProjectField.R @@ -0,0 +1,272 @@ +#'Project anomalies onto modes of variability +#' +#'Project anomalies onto modes of variability to get the temporal evolution of +#'the EOF mode selected. It returns principal components (PCs) by area-weighted +#'projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from +#'\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and +#'returns NA if the whole spatial pattern is NA. +#' +#'@param ano A numerical array of anomalies with named dimensions. The +#' dimensions must have at least 'time_dim' and 'space_dim'. It can be +#' generated by Ano(). +#'@param eof A list that contains at least 'EOFs' or 'REOFs' and 'wght', which +#' are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and +#' 'space_dim' at least. 'wght' has dimensions space_dim. It can be generated +#' by EOF() or REOF(). +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param mode An integer of the variability mode number in the EOF to be +#' projected on. The default value is NULL, which means all the modes of 'eof' +#' is calculated. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numerical array of the principal components in the verification +#' format. The dimensions are the same as 'ano' except 'space_dim'. +#' +#'@seealso EOF, NAO, PlotBoxWhisker +#'@examples +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) +#'eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) +#'mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1) +#'mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1) +#' +#'\dontrun{ +#' # Plot the forecast and the observation of the first mode for the last year +#' # of forecast +#' sdate_dim_length <- dim(mode1_obs)['sdate'] +#' plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), +#' lwd = 2) +#' for (i in 1:dim(mode1_exp)['member']) { +#' par(new = TRUE) +#' plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], +#' ylim = c(-15000, 15000)) +#' } +#'} +#' +#'@import multiApply +#'@export +ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon'), + mode = NULL, ncores = NULL) { + + # Check inputs + ## ano (1) + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## eof (1) + if (is.null(eof)) { + stop("Parameter 'eof' cannot be NULL.") + } + if (!is.list(eof)) { + stop("Parameter 'eof' must be a list generated by EOF() or REOF().") + } + if ('EOFs' %in% names(eof)) { + EOFs <- "EOFs" + } else if ('REOFs' %in% names(eof)) { + EOFs <- "REOFs" + } else if ('patterns' %in% names(eof)) { + EOFs <- "patterns" + } else { + stop("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", + "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().") + } + if (!'wght' %in% names(eof)) { + stop("Parameter 'eof' must be a list that contains 'wght'. ", + "It can be generated by EOF() or REOF().") + } + if (!is.numeric(eof[[EOFs]]) || !is.array(eof[[EOFs]])) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array.") + } + if (!is.numeric(eof$wght) || !is.array(eof$wght)) { + stop("The component 'wght' of parameter 'eof' must be a numeric array.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!all(space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## ano (2) + if (!all(space_dim %in% names(dim(ano))) | !time_dim %in% names(dim(ano))) { + stop("Parameter 'ano' must be an array with dimensions named as ", + "parameter 'space_dim' and 'time_dim'.") + } + ## eof (2) + if (!all(space_dim %in% names(dim(eof[[EOFs]]))) | + !'mode' %in% names(dim(eof[[EOFs]]))) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim' and 'mode'.") + } + if (length(dim(eof$wght)) != 2 | !all(names(dim(eof$wght)) %in% space_dim)) { + stop("The component 'wght' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim'.") + } + ## mode + if (!is.null(mode)) { + if (!is.numeric(mode) | mode %% 1 != 0 | mode < 0 | length(mode) > 1) { + stop("Parameter 'mode' must be NULL or a positive integer.") + } + if (mode > dim(eof[[EOFs]])['mode']) { + stop("Parameter 'mode' is greater than the number of available ", + "modes in 'eof'.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + +#------------------------------------------------------- + + # Keep the chosen mode + if (!is.null(mode)) { + eof_mode <- ClimProjDiags::Subset(eof[[EOFs]], 'mode', mode, drop = 'selected') + } else { + eof_mode <- eof[[EOFs]] + } + + if ('mode' %in% names(dim(eof_mode))) { + dimnames_without_mode <- names(dim(eof_mode))[-which(names(dim(eof_mode)) == 'mode')] + } else { + dimnames_without_mode <- names(dim(eof_mode)) + } + + if (all(dimnames_without_mode %in% space_dim)) { # eof_mode: [lat, lon] or [mode, lat, lon] + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + + } else { + + if (!all(dimnames_without_mode %in% names(dim(ano)))) { + stop("The array 'EOF' in parameter 'eof' has dimension not in parameter ", + "'ano'. Check if 'ano' and 'eof' are compatible.") + } + + common_dim_ano <- dim(ano)[which(names(dim(ano)) %in% dimnames_without_mode)] + if (any(common_dim_ano[match(dimnames_without_mode, names(common_dim_ano))] != + dim(eof_mode)[dimnames_without_mode])) { + stop("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ", + "with different length. Check if 'ano' and 'eof' are compatible.") + } + + # Enlarge eof/ano is needed. The margin_dims of Apply() must be consistent + # between ano and eof. + additional_dims <- dim(ano)[-which(names(dim(ano)) %in% names(dim(eof_mode)))] + additional_dims <- additional_dims[-which(names(additional_dims) == time_dim)] + if (length(additional_dims) != 0) { + for (i in seq_along(additional_dims)) { + eof_mode <- InsertDim(eof_mode, posdim = (length(dim(eof_mode)) + 1), + lendim = additional_dims[i], name = names(additional_dims)[i]) + } + } + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + } + + return(res) +} + + +.ProjectField <- function(ano, eof_mode, wght) { + # ano: [sdate, lat, lon] + # eof_mode: [lat, lon] or [mode, lat, lon] + # wght: [lat, lon] + + ntime <- dim(ano)[1] + if (length(dim(eof_mode)) == 2) { # mode != NULL + # Initialization of pc.ver. + pc.ver <- array(NA, dim = ntime) #[sdate] + + # Weight + e.1 <- eof_mode * wght + ano <- ano * InsertDim(wght, 1, ntime) + #ano <- aaply(ano, 1, '*', wght) # much heavier + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + #na <- apply(ano, 1, mean, na.rm = TRUE) # much heavier + tmp <- ano * InsertDim(e.1, 1, ntime) # [sdate, lat, lon] + rm(ano) + #pc.ver <- apply(tmp, 1, sum, na.rm = TRUE) # much heavier + pc.ver <- rowSums(tmp, na.rm = TRUE) + pc.ver[which(is.na(na))] <- NA + + } else { # mode = NULL + # Weight + e.1 <- eof_mode * InsertDim(wght, 1, dim(eof_mode)[1]) + dim(e.1) <- c(dim(eof_mode)[1], prod(dim(eof_mode)[2:3])) # [mode, lat*lon] + ano <- ano * InsertDim(wght, 1, ntime) + dim(ano) <- c(ntime, prod(dim(ano)[2:3])) # [sdate, lat*lon] + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + na <- aperm(array(na, dim = c(ntime, dim(e.1)[1])), c(2, 1)) + + # Matrix multiplication e.1 [mode, lat*lon] by ano [lat*lon, sdate] + # Result: [mode, sdate] + pc.ver <- e.1 %*% t(ano) + pc.ver[which(is.na(na))] <- NA + +# # Change back dimensions to feet original input +# dim(projection) <- c(moredims, mode = unname(neofs)) +# return(projection) + } + + return(pc.ver) +} + + diff --git a/modules/Crossval/R/tmp/RPS.R b/modules/Crossval/R/tmp/RPS.R new file mode 100644 index 0000000000000000000000000000000000000000..0ed599ac4c145e605bbe39b76feb2ebfdf204d29 --- /dev/null +++ b/modules/Crossval/R/tmp/RPS.R @@ -0,0 +1,408 @@ +#'Compute the Ranked Probability Score +#' +#'The Ranked Probability Score (RPS; Wilks, 2011) is defined as the sum of the +#'squared differences between the cumulative forecast probabilities (computed +#'from the ensemble members) and the observations (defined as 0% if the category +#'did not happen and 100% if it happened). It can be used to evaluate the skill +#'of multi-categorical probabilistic forecasts. The RPS ranges between 0 +#'(perfect forecast) and n-1 (worst possible forecast), where n is the number of +#'categories. In the case of a forecast divided into two categories (the lowest +#'number of categories that a probabilistic forecast can have), the RPS +#'corresponds to the Brier Score (BS; Wilks, 2011), therefore ranging between 0 +#'and 1.\cr +#'The function first calculates the probabilities for forecasts and observations, +#'then use them to calculate RPS. Or, the probabilities of exp and obs can be +#'provided directly to compute the score. If there is more than one dataset, RPS +#'will be computed for each pair of exp and obs data. The fraction of acceptable +#'NAs can be adjusted. +#' +#'@param exp A named numerical array of either the forecasts with at least time +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast. The default value is 'member'. +#' If the data are probabilities, set memb_dim as NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when the exp and obs are probabilities. The default +#' value is NULL, which means that the data are not probabilities. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPS (the +#' potential RPS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param nmemb A numeric value indicating the number of members used to compute the probabilities. This parameter is necessary when calculating FairRPS from probabilities. Default is NULL. +#'@param weights A named numerical array of the weights for 'exp' probability +#' calculation. If 'dat_dim' is NULL, the dimensions should include 'memb_dim' +#' and 'time_dim'. Else, the dimension should also include 'dat_dim'. The +#' default value is NULL. The ensemble should have at least 70 members or span +#' at least 10 time steps and have more than 45 members if consistency between +#' the weighted and unweighted methodologies is desired. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistic categories in cross-validation. The default value is +#' FALSE. +#'@param return_mean A logical indicating whether to return the temporal mean +#' of the RPS or not. If TRUE, the temporal mean is calculated along time_dim, +#' if FALSE the time dimension is not aggregated. The default is TRUE. +#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it +#' means the lower limit for the fraction of the non-NA values. 1 is equal to +#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). +# The function returns NA if the fraction of non-NA values in the data is less +#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A numerical array of RPS with dimensions c(nexp, nobs, the rest dimensions of +#''exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +#'experiment (i.e., dat_dim in exp), and nobs is the number of observation +#'(i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#' +#'@examples +#'# Use synthetic data +#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) +#'res <- RPS(exp = exp, obs = obs) +#'# Use probabilities as inputs +#'exp_probs <- GetProbs(exp, time_dim = 'sdate', memb_dim = 'member') +#'obs_probs <- GetProbs(obs, time_dim = 'sdate', memb_dim = NULL) +#'res2 <- RPS(exp = exp_probs, obs = obs_probs, memb_dim = NULL, cat_dim = 'bin') +#' +#' +#'@import multiApply +#'@importFrom easyVerification convert2prob +#'@export +RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, weights = NULL, + cross.val = FALSE, return_mean = TRUE, + na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (!is.array(exp) | !is.numeric(exp)) + stop('Parameter "exp" must be a numeric array.') + if (!is.array(obs) | !is.numeric(obs)) + stop('Parameter "obs" must be a numeric array.') + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) + stop('Parameter "time_dim" must be a character string.') + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs))) { + stop("Parameter 'cat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- seq_len(dim(obs)[time_dim]) + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + if (Fair) { + if (!is.null(cat_dim)) { + if (cat_dim %in% names(dim(exp))) { + if (is.null(nmemb)) { + stop("Parameter 'nmemb' necessary to compute Fair", + "score from probabilities") + } + } + } + } + ## return_mean + if (!is.logical(return_mean) | length(return_mean) > 1) { + stop("Parameter 'return_mean' must be either TRUE or FALSE.") + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## weights + if (!is.null(weights) & is.null(cat_dim)) { + if (!is.array(weights) | !is.numeric(weights)) + stop("Parameter 'weights' must be a named numeric array.") + if (is.null(dat_dim)) { + if (length(dim(weights)) != 2 | !all(names(dim(weights)) %in% c(memb_dim, time_dim))) + stop("Parameter 'weights' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim' and 'time_dim' in 'exp'.") + } + weights <- Reorder(weights, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights)) != 3 | !all(names(dim(weights)) %in% c(memb_dim, time_dim, dat_dim))) + stop("Parameter 'weights' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + if (dim(weights)[memb_dim] != dim(exp)[memb_dim] | + dim(weights)[time_dim] != dim(exp)[time_dim] | + dim(weights)[dat_dim] != dim(exp)[dat_dim]) { + stop("Parameter 'weights' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.") + } + weights <- Reorder(weights, c(time_dim, memb_dim, dat_dim)) + + } + } else if (!is.null(weights) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'exp' and 'obs' are probabilities already, so parameter ", + "'weights' is not used. Change 'weights' to NULL.")) + weights <- NULL + } + ## na.rm + if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) { + stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1') + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPS + + ## Decide target_dims + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) + } + + rps <- Apply(data = list(exp = exp, obs = obs), + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs), + fun = .RPS, + dat_dim = dat_dim, time_dim = time_dim, + memb_dim = memb_dim, cat_dim = cat_dim, + prob_thresholds = prob_thresholds, nmemb = nmemb, + indices_for_clim = indices_for_clim, Fair = Fair, + weights = weights, cross.val = cross.val, + na.rm = na.rm, ncores = ncores)$output1 + + if (return_mean) { + rps <- MeanDims(rps, time_dim, na.rm = TRUE) + } else { + rps <- rps + } + + return(rps) +} + + +.RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, weights = NULL, + cross.val = FALSE, na.rm = FALSE) { + #--- if memb_dim: + # exp: [sdate, memb, (dat)] + # obs: [sdate, (memb), (dat)] + # weights: NULL or same as exp + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] + + # Adjust dimensions to be [sdate, memb, dat] for both exp and obs + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + dim(exp) <- c(dim(exp), nexp = nexp) + dim(obs) <- c(dim(obs), nobs = nobs) + if (!is.null(weights)) dim(weights) <- c(dim(weights), nexp = nexp) + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + rps <- array(dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + + for (i in 1:nexp) { + for (j in 1:nobs) { + exp_data <- exp[, , i] + obs_data <- obs[, , j] + + if (is.null(dim(exp_data))) dim(exp_data) <- c(dim(exp)[1:2]) + if (is.null(dim(obs_data))) dim(obs_data) <- c(dim(obs)[1:2]) + + # Find the fraction of NAs + ## If any member/bin is NA at this time step, it is not good value. + exp_mean <- rowMeans(exp_data) + obs_mean <- rowMeans(obs_data) + good_values <- !is.na(exp_mean) & !is.na(obs_mean) + + if (isTRUE(na.rm)) { + f_NAs <- 0 + } else if (isFALSE(na.rm)) { + f_NAs <- 1 + } else { + f_NAs <- na.rm + } + + if (f_NAs <= sum(good_values) / length(obs_mean)) { + + exp_data <- exp_data[good_values, , drop = F] + obs_data <- obs_data[good_values, , drop = F] + + # If the data inputs are forecast/observation, calculate probabilities + if (is.null(cat_dim)) { + if (!is.null(weights)) { + weights_data <- weights[which(good_values), , i] + if (is.null(dim(weights_data))) dim(weights_data) <- c(dim(weights)[1:2]) + } else { + weights_data <- weights #NULL + } + + # Subset indices_for_clim + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + + exp_probs <- .GetProbs(data = exp_data, indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, weights = weights_data, + cross.val = cross.val) + # exp_probs: [bin, sdate] + obs_probs <- .GetProbs(data = obs_data, indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, weights = NULL, + cross.val = cross.val) + # obs_probs: [bin, sdate] + + } else { # inputs are probabilities already + if (all(names(dim(exp_data)) == c(time_dim, memb_dim)) || + all(names(dim(exp_data)) == c(time_dim, cat_dim))) { + exp_probs <- t(exp_data) + obs_probs <- t(obs_data) + } + } + + probs_exp_cumsum <- apply(exp_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + + # rps: [sdate, nexp, nobs] + rps [good_values, i, j] <- colSums((probs_exp_cumsum - probs_obs_cumsum)^2) + if (Fair) { # FairRPS + if (!is.null(memb_dim)) { + if (memb_dim %in% names(dim(exp))) { + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + R <- dim(exp)[2] #memb + } + } else { + R <- nmemb + } + warning("Applying fair correction.") + adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) + adjustment <- colSums(adjustment) + rps[, i, j] <- rps[, i, j] + adjustment + } + + } else { ## not enough values different from NA + + rps[, i, j] <- NA_real_ + + } + + } + } + + if (is.null(dat_dim)) { + dim(rps) <- dim(exp)[time_dim] + } + + return(rps) +} + diff --git a/modules/Crossval/R/tmp/RPSS.R b/modules/Crossval/R/tmp/RPSS.R new file mode 100644 index 0000000000000000000000000000000000000000..fc9931ad8610f400f3cc59e902c9ee5bb3256508 --- /dev/null +++ b/modules/Crossval/R/tmp/RPSS.R @@ -0,0 +1,638 @@ +#'Compute the Ranked Probability Skill Score +#' +#'The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score +#'based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to +#'assess whether a forecast presents an improvement or worsening with respect to +#'a reference forecast. The RPSS ranges between minus infinite and 1. If the +#'RPSS is positive, it indicates that the forecast has higher skill than the +#'reference forecast, while a negative value means that it has a lower skill.\cr +#'Examples of reference forecasts are the climatological forecast (same +#'probabilities for all categories for all time steps), persistence, a previous +#'model version, and another model. It is computed as +#'\code{RPSS = 1 - RPS_exp / RPS_ref}. The statistical significance is obtained +#'based on a Random Walk test at the specified confidence level (DelSole and +#'Tippett, 2016).\cr +#'The function accepts either the ensemble members or the probabilities of +#'each data as inputs. If there is more than one dataset, RPSS will be +#'computed for each pair of exp and obs data. The NA ratio of data will be +#'examined before the calculation. If the ratio is higher than the threshold +#'(assigned by parameter \code{na.rm}), NA will be returned directly. NAs are +#'counted by per-pair method, which means that only the time steps that all the +#'datasets have values count as non-NA values. +#' +#'@param exp A named numerical array of either the forecast with at least time +#' and member dimensions, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. +#'@param obs A named numerical array of either the observation with at least +#' time dimension, or the probabilities with at least time and category +#' dimensions. The probabilities can be generated by \code{s2dv::GetProbs}. The +#' dimensions must be the same as 'exp' except 'memb_dim' and 'dat_dim'. +#'@param ref A named numerical array of either the reference forecast with at +#' least time and member dimensions, or the probabilities with at least time and +#' category dimensions. The probabilities can be generated by +#' \code{s2dv::GetProbs}. The dimensions must be the same as 'exp' except +#' 'memb_dim' and 'dat_dim'. If there is only one reference dataset, it should +#' not have dataset dimension. If there is corresponding reference for each +#' experiment, the dataset dimension must have the same length as in 'exp'. If +#' 'ref' is NULL, the climatological forecast is used as reference forecast. +#' The default value is NULL. +#'@param time_dim A character string indicating the name of the time dimension. +#' The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member dimension +#' to compute the probabilities of the forecast and the reference forecast. The +#' default value is 'member'. If the data are probabilities, set memb_dim as +#' NULL. +#'@param cat_dim A character string indicating the name of the category +#' dimension that is needed when exp, obs, and ref are probabilities. The +#' default value is NULL, which means that the data are not probabilities. +#'@param dat_dim A character string indicating the name of dataset dimension. +#' The length of this dimension can be different between 'exp' and 'obs'. +#' The default value is NULL. +#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to +#' 1) between the categories. The default value is c(1/3, 2/3), which +#' corresponds to tercile equiprobable categories. +#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' +#' for computing the thresholds between the probabilistic categories. If NULL, +#' the whole period is used. The default value is NULL. +#'@param Fair A logical indicating whether to compute the FairRPSS (the +#' potential RPSS that the forecast would have with an infinite ensemble size). +#' The default value is FALSE. +#'@param weights_exp A named numerical array of the forecast ensemble weights +#' for probability calculation. The dimension should include 'memb_dim', +#' 'time_dim' and 'dat_dim' if there are multiple datasets. All dimension +#' lengths must be equal to 'exp' dimension lengths. The default value is NULL, +#' which means no weighting is applied. The ensemble should have at least 70 +#' members or span at least 10 time steps and have more than 45 members if +#' consistency between the weighted and unweighted methodologies is desired. +#'@param weights_ref Same as 'weights_exp' but for the reference forecast. +#'@param cross.val A logical indicating whether to compute the thresholds +#' between probabilistics categories in cross-validation. The default value is +#' FALSE. +#'@param na.rm A logical or numeric value between 0 and 1. If it is numeric, it +#' means the lower limit for the fraction of the non-NA values. 1 is equal to +#' FALSE (no NA is acceptable), 0 is equal to TRUE (all NAs are acceptable). +# The function returns NA if the fraction of non-NA values in the data is less +#' than na.rm. Otherwise, RPS will be calculated. The default value is FALSE. +#'@param sig_method.type A character string indicating the test type of the +#' significance method. Check \code{RandomWalkTest()} parameter +#' \code{test.type} for details. The default is 'two.sided.approx', which is +#' the default of \code{RandomWalkTest()}. +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test. The default value is 0.05. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'\item{$rpss}{ +#' A numerical array of RPSS with dimensions c(nexp, nobs, the rest dimensions +#' of 'exp' except 'time_dim' and 'memb_dim' dimensions). nexp is the number of +#' experiment (i.e., dat_dim in exp), and nobs is the number of observation +#' i.e., dat_dim in obs). If dat_dim is NULL, nexp and nobs are omitted. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance of the RPSS with the same +#' dimensions as $rpss. +#'} +#' +#'@references +#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 +#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#'set.seed(1) +#'exp <- array(rnorm(3000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'set.seed(2) +#'obs <- array(rnorm(300), dim = c(lat = 3, lon = 2, sdate = 50)) +#'set.seed(3) +#'ref <- array(rnorm(3000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) +#'weights <- sapply(1:dim(exp)['sdate'], function(i) { +#' n <- abs(rnorm(10)) +#' n/sum(n) +#' }) +#'dim(weights) <- c(member = 10, sdate = 50) +#'# Use data as input +#'res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast +#'res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast +#'res <- RPSS(exp = exp, obs = obs, ref = ref, weights_exp = weights, weights_ref = weights) +#'res <- RPSS(exp = exp, obs = obs, alpha = 0.01, sig_method.type = 'two.sided') +#' +#'# Use probs as input +#'exp_probs <- GetProbs(exp, memb_dim = 'member') +#'obs_probs <- GetProbs(obs, memb_dim = NULL) +#'ref_probs <- GetProbs(ref, memb_dim = 'member') +#'res <- RPSS(exp = exp_probs, obs = obs_probs, ref = ref_probs, memb_dim = NULL, +#' cat_dim = 'bin') +#' +#'@import multiApply +#'@export +RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, nmemb_ref = NULL, + weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = FALSE, + sig_method.type = 'two.sided.approx', alpha = 0.05, ncores = NULL) { + + # Check inputs + ## exp, obs, and ref (1) + if (!is.array(exp) | !is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (!is.array(obs) | !is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + if (!is.null(ref)) { + if (!is.array(ref) | !is.numeric(ref)) + stop("Parameter 'ref' must be a numeric array.") + if (any(is.null(names(dim(ref)))) | any(nchar(names(dim(ref))) == 0)) { + stop("Parameter 'ref' must have dimension names.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + if (!is.null(ref) & !time_dim %in% names(dim(ref))) { + stop("Parameter 'time_dim' is not found in 'ref' dimension.") + } + ## memb_dim & cat_dim + if (is.null(memb_dim) + is.null(cat_dim) != 1) { + stop("Only one of the two parameters 'memb_dim' and 'cat_dim' can have value.") + } + ## memb_dim + if (!is.null(memb_dim)) { + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { + stop("Parameter 'memb_dim' is not found in 'ref' dimension.") + } + } + ## cat_dim + if (!is.null(cat_dim)) { + if (!is.character(cat_dim) | length(cat_dim) > 1) { + stop("Parameter 'cat_dim' must be a character string.") + } + if (!cat_dim %in% names(dim(exp)) | !cat_dim %in% names(dim(obs)) | + (!is.null(ref) & !cat_dim %in% names(dim(ref)))) { + stop("Parameter 'cat_dim' is not found in 'exp', 'obs', or 'ref' dimension.") + } + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.", + " Set it as NULL if there is no dataset dimension.") + } + } + ## exp, obs, and ref (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(memb_dim)) { + name_exp <- name_exp[-which(name_exp == memb_dim)] + if (memb_dim %in% name_obs) { + name_obs <- name_obs[-which(name_obs == memb_dim)] + } + } + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + if (!identical(length(name_exp), length(name_obs)) | + !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop("Parameter 'exp' and 'obs' must have same length of ", + "all dimensions except 'memb_dim' and 'dat_dim'.") + } + if (!is.null(ref)) { + name_ref <- sort(names(dim(ref))) + if (!is.null(memb_dim)) { + name_ref <- name_ref[-which(name_ref == memb_dim)] + } + if (!is.null(dat_dim)) { + if (dat_dim %in% name_ref) { + if (!identical(dim(exp)[dat_dim], dim(ref)[dat_dim])) { + stop("If parameter 'ref' has dataset dimension, it must be", + " equal to dataset dimension of 'exp'.") + } + name_ref <- name_ref[-which(name_ref == dat_dim)] + } + } + if (!identical(length(name_exp), length(name_ref)) | + !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { + stop("Parameter 'exp' and 'ref' must have the same length of ", + "all dimensions except 'memb_dim' and 'dat_dim' if there is ", + "only one reference dataset.") + } + } + ## prob_thresholds + if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | + any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { + stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") + } + ## indices_for_clim + if (is.null(indices_for_clim)) { + indices_for_clim <- seq_len(dim(obs)[time_dim]) + } else { + if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { + stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") + } else if (length(indices_for_clim) > dim(obs)[time_dim] | + max(indices_for_clim) > dim(obs)[time_dim] | + any(indices_for_clim) < 1) { + stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") + } + } + ## Fair + if (!is.logical(Fair) | length(Fair) > 1) { + stop("Parameter 'Fair' must be either TRUE or FALSE.") + } + ## cross.val + if (!is.logical(cross.val) | length(cross.val) > 1) { + stop("Parameter 'cross.val' must be either TRUE or FALSE.") + } + ## weights_exp + if (!is.null(weights_exp) & is.null(cat_dim)) { + if (!is.array(weights_exp) | !is.numeric(weights_exp)) + stop("Parameter 'weights_exp' must be a named numeric array.") + + if (is.null(dat_dim)) { + if (length(dim(weights_exp)) != 2 | + !all(names(dim(weights_exp)) %in% c(memb_dim, time_dim))) { + stop("Parameter 'weights_exp' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + } + if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] | + dim(weights_exp)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights_exp' must have the same dimension lengths as ", + "'memb_dim' and 'time_dim' in 'exp'.") + } + weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights_exp)) != 3 | + !all(names(dim(weights_exp)) %in% c(memb_dim, time_dim, dat_dim))) { + stop("Parameter 'weights_exp' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + } + if (dim(weights_exp)[memb_dim] != dim(exp)[memb_dim] | + dim(weights_exp)[time_dim] != dim(exp)[time_dim] | + dim(weights_exp)[dat_dim] != dim(exp)[dat_dim]) { + stop("Parameter 'weights_exp' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'exp'.") + } + weights_exp <- Reorder(weights_exp, c(time_dim, memb_dim, dat_dim)) + } + } else if (!is.null(weights_exp) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'exp' is probability already, so parameter ", + "'weights_exp' is not used. Change 'weights_exp' to NULL.")) + weights_exp <- NULL + } + ## weights_ref + if (!is.null(weights_ref) & is.null(cat_dim)) { + if (!is.array(weights_ref) | !is.numeric(weights_ref)) + stop("Parameter 'weights_ref' must be a named numeric array.") + + if (is.null(dat_dim) | ((!is.null(dat_dim)) && (!dat_dim %in% names(dim(ref))))) { + if (length(dim(weights_ref)) != 2 | + !all(names(dim(weights_ref)) %in% c(memb_dim, time_dim))) { + stop("Parameter 'weights_ref' must have two dimensions with the names of ", + "'memb_dim' and 'time_dim'.") + } + if (dim(weights_ref)[memb_dim] != dim(exp)[memb_dim] | + dim(weights_ref)[time_dim] != dim(exp)[time_dim]) { + stop("Parameter 'weights_ref' must have the same dimension lengths as ", + "'memb_dim' and 'time_dim' in 'ref'.") + } + weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim)) + + } else { + if (length(dim(weights_ref)) != 3 | + !all(names(dim(weights_ref)) %in% c(memb_dim, time_dim, dat_dim))) { + stop("Parameter 'weights_ref' must have three dimensions with the names of ", + "'memb_dim', 'time_dim' and 'dat_dim'.") + } + if (dim(weights_ref)[memb_dim] != dim(ref)[memb_dim] | + dim(weights_ref)[time_dim] != dim(ref)[time_dim] | + dim(weights_ref)[dat_dim] != dim(ref)[dat_dim]) { + stop("Parameter 'weights_ref' must have the same dimension lengths ", + "as 'memb_dim', 'time_dim' and 'dat_dim' in 'ref'.") + } + weights_ref <- Reorder(weights_ref, c(time_dim, memb_dim, dat_dim)) + } + } else if (!is.null(weights_ref) & !is.null(cat_dim)) { + .warning(paste0("Parameter 'ref' is probability already, so parameter ", + "'weights_ref' is not used. Change 'weights_ref' to NULL.")) + weights_ref <- NULL + } + ## na.rm + if (!isTRUE(na.rm) & !isFALSE(na.rm) & !(is.numeric(na.rm) & na.rm >= 0 & na.rm <= 1)) { + stop('"na.rm" should be TRUE, FALSE or a numeric between 0 and 1') + } + ## alpha + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop("Parameter 'alpha' must be a number between 0 and 1.") + } + ## sig_method.type + #NOTE: These are the types of RandomWalkTest() + if (!sig_method.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { + stop("Parameter 'sig_method.type' must be 'two.sided.approx', 'two.sided', ", + "'greater', or 'less'.") + } + if (sig_method.type == 'two.sided.approx' && alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be either NULL or a positive integer.") + } + } + + ############################### + + # Compute RPSS + + ## Decide target_dims + if (!is.null(memb_dim)) { + target_dims_exp <- c(time_dim, memb_dim, dat_dim) + if (!memb_dim %in% names(dim(obs))) { + target_dims_obs <- c(time_dim, dat_dim) + } else { + target_dims_obs <- c(time_dim, memb_dim, dat_dim) + } + } else { # cat_dim + target_dims_exp <- target_dims_obs <- c(time_dim, cat_dim, dat_dim) + } + + if (!is.null(ref)) { # use "ref" as reference forecast + if (!is.null(memb_dim)) { + if (!is.null(dat_dim) && (dat_dim %in% names(dim(ref)))) { + target_dims_ref <- c(time_dim, memb_dim, dat_dim) + } else { + target_dims_ref <- c(time_dim, memb_dim) + } + } else { + target_dims_ref <- c(time_dim, cat_dim, dat_dim) + } + data <- list(exp = exp, obs = obs, ref = ref) + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs, + ref = target_dims_ref) + } else { + data <- list(exp = exp, obs = obs) + target_dims = list(exp = target_dims_exp, + obs = target_dims_obs) + } + + output <- Apply(data, + target_dims = target_dims, + fun = .RPSS, + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, + prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + nmemb = nmemb, nmemb_ref = nmemb_ref, + weights_exp = weights_exp, + weights_ref = weights_ref, + cross.val = cross.val, + na.rm = na.rm, sig_method.type = sig_method.type, alpha = alpha, + ncores = ncores) + + return(output) + +} + +.RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', cat_dim = NULL, + dat_dim = NULL, prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, + Fair = FALSE, nmemb = NULL, nmemb_ref = NULL, + weights_exp = NULL, weights_ref = NULL, cross.val = FALSE, + na.rm = FALSE, sig_method.type = 'two.sided.approx', alpha = 0.05) { + #--- if memb_dim: + # exp: [sdate, memb, (dat)] + # obs: [sdate, (memb), (dat)] + # ref: [sdate, memb, (dat)] or NULL + #--- if cat_dim: + # exp: [sdate, bin, (dat)] + # obs: [sdate, bin, (dat)] + # ref: [sdate, bin, (dat)] or NULL + + if (isTRUE(na.rm)) { + f_NAs <- 0 + } else if (isFALSE(na.rm)) { + f_NAs <- 1 + } else { + f_NAs <- na.rm + } + + if (is.null(dat_dim)) { + nexp <- 1 + nobs <- 1 + } else { + nexp <- as.numeric(dim(exp)[dat_dim]) + nobs <- as.numeric(dim(obs)[dat_dim]) + } + + # Calculate RPS + + if (!is.null(ref)) { + + # Adjust dimensions to be [sdate, memb, dat] for both exp, obs, and ref + ## Insert memb_dim in obs + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + ## Insert dat_dim + if (is.null(dat_dim)) { + dim(obs) <- c(dim(obs), dat = nobs) + dim(exp) <- c(dim(exp), dat = nexp) + if (!is.null(weights_exp)) dim(weights_exp) <- c(dim(weights_exp), dat = nexp) + } + if (is.null(dat_dim) || (!is.null(dat_dim) && !dat_dim %in% names(dim(ref)))) { + nref <- 1 + dim(ref) <- c(dim(ref), dat = nref) + if (!is.null(weights_ref)) dim(weights_ref) <- c(dim(weights_ref), dat = nref) + } else { + nref <- as.numeric(dim(ref)[dat_dim]) # should be the same as nexp + } + + # Find good values then calculate RPS + rps_exp <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + rps_ref <- array(NA, dim = c(dim(exp)[time_dim], nexp = nexp, nobs = nobs)) + for (i in 1:nexp) { + for (j in 1:nobs) { + for (k in 1:nref) { + if (nref != 1 & k != i) { # if nref is 1 or equal to nexp, calculate rps + next + } + exp_data <- exp[, , i, drop = F] + obs_data <- obs[, , j, drop = F] + ref_data <- ref[, , k, drop = F] + exp_mean <- rowMeans(exp_data) + obs_mean <- rowMeans(obs_data) + ref_mean <- rowMeans(ref_data) + good_values <- !is.na(exp_mean) & !is.na(obs_mean) & !is.na(ref_mean) + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + + if (f_NAs <= sum(good_values) / length(good_values)) { + rps_exp[good_values, i, j] <- .RPS(exp = exp[good_values, , i], + obs = obs[good_values, , j], + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = NULL, + prob_thresholds = prob_thresholds, + indices_for_clim = good_indices_for_clim, + Fair = Fair, nmemb = nmemb, + weights = weights_exp[good_values, , i], + cross.val = cross.val, na.rm = na.rm) + rps_ref[good_values, i, j] <- .RPS(exp = ref[good_values, , k], + obs = obs[good_values, , j], + time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = NULL, + prob_thresholds = prob_thresholds, + indices_for_clim = good_indices_for_clim, + Fair = Fair, nmemb = nmemb_ref, + weights = weights_ref[good_values, , k], + na.rm = na.rm, cross.val = cross.val) + } + } + } + } + + } else { # ref is NULL + rps_exp <- .RPS(exp = exp, obs = obs, time_dim = time_dim, memb_dim = memb_dim, + cat_dim = cat_dim, dat_dim = dat_dim, prob_thresholds = prob_thresholds, + indices_for_clim = indices_for_clim, Fair = Fair, + nmemb = nmemb, weights = weights_exp, + cross.val = cross.val, na.rm = na.rm) + + # RPS of the reference forecast + if (!is.null(memb_dim)) { + if (!memb_dim %in% names(dim(obs))) { + obs <- InsertDim(obs, posdim = 2, lendim = 1, name = memb_dim) + } + } + + rps_ref <- array(NA, dim = c(dim(obs)[time_dim], nexp = nexp, nobs = nobs)) + + if (is.null(dat_dim)) { + dim(obs) <- c(dim(obs), nobs = nobs) + dim(exp) <- c(dim(exp), nexp = nexp) + dim(rps_exp) <- dim(rps_ref) + } + + for (i in 1:nexp) { + for (j in 1:nobs) { + # Use good values only + good_values <- !is.na(rps_exp[, i, j]) + if (f_NAs <= sum(good_values) / length(good_values)) { + obs_data <- obs[good_values, , j] + if (is.null(dim(obs_data))) dim(obs_data) <- c(length(obs_data), 1) + + if (is.null(cat_dim)) { # calculate probs + # Subset indices_for_clim + dum <- match(indices_for_clim, which(good_values)) + good_indices_for_clim <- dum[!is.na(dum)] + obs_probs <- .GetProbs(data = obs_data, + indices_for_quantiles = good_indices_for_clim, + prob_thresholds = prob_thresholds, + weights = NULL, cross.val = cross.val) + } else { + obs_probs <- t(obs_data) + } + # obs_probs: [bin, sdate] + + clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), + 1 - prob_thresholds[length(prob_thresholds)]) + clim_probs <- array(clim_probs, dim = dim(obs_probs)) + # clim_probs: [bin, sdate] + + # Calculate RPS for each time step + probs_clim_cumsum <- apply(clim_probs, 2, cumsum) + probs_obs_cumsum <- apply(obs_probs, 2, cumsum) + rps_ref[good_values, i, j] <- colSums((probs_clim_cumsum - probs_obs_cumsum)^2) + } + if (Fair) { # FairRPS + if (!is.null(memb_dim)) { + if (memb_dim %in% names(dim(exp))) { + ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) + ## [formula taken from SpecsVerification::EnsRps] + R <- dim(obs)[1] #number of years + } + } else { + R <- nmemb_ref + } + adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) + adjustment <- colSums(adjustment) + rps_ref[, i, j] <- rps_ref[, i, j] + adjustment + } + } + } + } + + if (is.null(dat_dim)) { + dim(rps_ref) <- dim(rps_exp) <- dim(exp)[time_dim] + } + +#---------------------------------------------- + # Calculate RPSS + + if (!is.null(dat_dim)) { + # rps_exp and rps_ref: [sdate, nexp, nobs] + rps_exp_mean <- colMeans(rps_exp, na.rm = TRUE) + rps_ref_mean <- colMeans(rps_ref, na.rm = TRUE) + rpss <- array(dim = c(nexp = nexp, nobs = nobs)) + sign <- array(dim = c(nexp = nexp, nobs = nobs)) + + if (!all(is.na(rps_exp_mean))) { + for (i in 1:nexp) { + for (j in 1:nobs) { + rpss[i, j] <- 1 - rps_exp_mean[i, j] / rps_ref_mean[i, j] + ind_nonNA <- !is.na(rps_exp[, i, j]) + if (!any(ind_nonNA)) { + sign[i, j] <- NA + } else { + sign[i, j] <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA, i, j], + skill_B = rps_ref[ind_nonNA, i, j], + test.type = sig_method.type, alpha = alpha, + sign = T, pval = F)$sign + } + } + } + } + + # Turn NaN into NA + if (any(is.nan(rpss))) rpss[which(is.nan(rpss))] <- NA + + } else { # dat_dim is NULL + + ind_nonNA <- !is.na(rps_exp) + if (!any(ind_nonNA)) { + rpss <- NA + sign <- NA + } else { + # rps_exp and rps_ref: [sdate] + rpss <- 1 - mean(rps_exp, na.rm = TRUE) / mean(rps_ref, na.rm = TRUE) + sign <- .RandomWalkTest(skill_A = rps_exp[ind_nonNA], + skill_B = rps_ref[ind_nonNA], + test.type = sig_method.type, alpha = alpha, + sign = T, pval = F)$sign + } + } + + return(list(rpss = rpss, sign = sign)) +} diff --git a/modules/Crossval/R/tmp/RandomWalkTest.R b/modules/Crossval/R/tmp/RandomWalkTest.R new file mode 100644 index 0000000000000000000000000000000000000000..16d89f6d8b34bf824188677dd4f1728823725eca --- /dev/null +++ b/modules/Crossval/R/tmp/RandomWalkTest.R @@ -0,0 +1,184 @@ +#'Random Walk test for skill differences +#' +#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a +#'common observational reference) based on Random Walks (DelSole and Tippett, +#'2016). +#' +#'@param skill_A A numerical array of the time series of the scores obtained +#' with the forecaster A. +#'@param skill_B A numerical array of the time series of the scores obtained +#' with the forecaster B. The dimensions should be identical as parameter +#' 'skill_A'. +#'@param time_dim A character string indicating the name of the dimension along +#' which the tests are computed. The default value is 'sdate'. +#'@param test.type A character string indicating the type of significance test. +#' It can be "two.sided.approx" (to assess whether forecaster A and forecaster +#' B are significantly different in terms of skill with a two-sided test using +#' the approximation of DelSole and Tippett, 2016), "two.sided" (to assess +#' whether forecaster A and forecaster B are significantly different in terms +#' of skill with an exact two-sided test), "greater" (to assess whether +#' forecaster A shows significantly better skill than forecaster B with a +#' one-sided test for negatively oriented scores), or "less" (to assess whether +#' forecaster A shows significantly better skill than forecaster B with a +#' one-sided test for positively oriented scores). The default value is +#' "two.sided.approx". +#'@param alpha A numeric of the significance level to be used in the statistical +#' significance test (output "sign"). The default value is 0.05. +#'@param pval A logical value indicating whether to return the p-value of the +#' significance test. The default value is TRUE. +#'@param sign A logical value indicating whether to return the statistical +#' significance of the test based on 'alpha'. The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list with: +#'\item{$score}{ +#' A numerical array with the same dimensions as the input arrays except +#' 'time_dim'. The number of times that forecaster A has been better than +#' forecaster B minus the number of times that forecaster B has been better +#' than forecaster A (for skill negatively oriented, i.e., the lower the +#' better). If $score is positive, forecaster A has been better more times +#' than forecaster B. If $score is negative, forecaster B has been better more +#' times than forecaster A. +#'} +#'\item{$sign}{ +#' A logical array of the statistical significance with the same dimensions +#' as the input arrays except "time_dim". Returned only if "sign" is TRUE. +#'} +#'\item{$p.val}{ +#' A numeric array of the p-values with the same dimensions as the input arrays +#' except "time_dim". Returned only if "pval" is TRUE. +#'} +#' +#'@details +#' Null and alternative hypothesis for "two-sided" test (regardless of the +#' orientation of the scores):\cr +#' H0: forecaster A and forecaster B are not different in terms of skill\cr +#' H1: forecaster A and forecaster B are different in terms of skill +#' +#' Null and alternative hypothesis for one-sided "greater" (for negatively +#' oriented scores, i.e., the lower the better) and "less" (for positively +#' oriented scores, i.e., the higher the better) tests:\cr +#' H0: forecaster A is not better than forecaster B\cr +#' H1: forecaster A is better than forecaster B +#' +#' Examples of negatively oriented scores are the RPS, RMSE and the Error, while +#' the ROC score is a positively oriented score. +#' +#' DelSole and Tippett (2016) approximation for two-sided test at 95% confidence +#' level: significant if the difference between the number of times that +#' forecaster A has been better than forecaster B and forecaster B has been +#' better than forecaster A is above 2sqrt(N) or below -2sqrt(N). +#' +#'@references +#'DelSole and Tippett (2016): https://doi.org/10.1175/MWR-D-15-0218.1 +#' +#'@examples +#' fcst_A <- array(data = 11:50, dim = c(sdate = 10, lat = 2, lon = 2)) +#' fcst_B <- array(data = 21:60, dim = c(sdate = 10, lat = 2, lon = 2)) +#' reference <- array(data = 1:40, dim = c(sdate = 10, lat = 2, lon = 2)) +#' scores_A <- abs(fcst_A - reference) +#' scores_B <- abs(fcst_B - reference) +#' res1 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, pval = FALSE, sign = TRUE) +#' res2 <- RandomWalkTest(skill_A = scores_A, skill_B = scores_B, test.type = 'greater') +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', + test.type = 'two.sided.approx', alpha = 0.05, pval = TRUE, + sign = FALSE, ncores = NULL) { + + # Check inputs + ## skill_A and skill_B + if (is.null(skill_A) | is.null(skill_B)) { + stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") + } + if (!is.numeric(skill_A) | !is.numeric(skill_B)) { + stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") + } + if (!identical(dim(skill_A), dim(skill_B))) { + stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) != 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))) { + stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") + } + ## alpha + if (any(!is.numeric(alpha) | alpha <= 0 | alpha >= 1 | length(alpha) > 1)) { + stop("Parameter 'alpha' must be a number between 0 and 1.") + } + ## test.type + if (!test.type %in% c('two.sided.approx', 'two.sided', 'greater', 'less')) { + stop("Parameter 'test.type' must be 'two.sided.approx', 'two.sided', 'greater', or 'less'.") + } + if (test.type == 'two.sided.approx') { + if (alpha != 0.05) { + .warning("DelSole and Tippett (2016) aproximation is valid for alpha ", + "= 0.05 only. Returning the significance at the 0.05 significance level.") + } + if (pval) { + .warning("p-value cannot be returned with the DelSole and Tippett (2016) ", + "aproximation. Returning the significance at the 0.05 significance level.") + } + sign <- TRUE + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ## Compute the Random Walk Test + res <- Apply(data = list(skill_A = skill_A, + skill_B = skill_B), + target_dims = list(skill_A = time_dim, + skill_B = time_dim), + fun = .RandomWalkTest, + test.type = test.type, + alpha = alpha, pval = pval, sign = sign, + ncores = ncores) + + return(res) +} + +.RandomWalkTest <- function(skill_A, skill_B, test.type = 'two.sided.approx', + alpha = 0.05, pval = TRUE, sign = FALSE) { + #skill_A and skill_B: [sdate] + + N.eff <- length(skill_A) + + A_better <- sum(skill_B > skill_A) + B_better <- sum(skill_B < skill_A) + + output <- NULL + output$score <- A_better - B_better + + if (test.type == 'two.sided.approx') { + output$sign <- abs(output$score) > (2 * sqrt(N.eff)) + + } else { + + if (!is.na(output$score)) { + p.val <- binom.test(x = A_better, n = floor(N.eff), p = 0.5, conf.level = 1 - alpha, + alternative = test.type)$p.value + + } else { + p.val <- NA + } + + if (pval) { + output$p.val <- p.val + } + if (sign) { + output$sign <- !is.na(p.val) & p.val <= alpha + } + + } + + return(output) +} diff --git a/modules/Crossval/R/tmp/SprErr.R b/modules/Crossval/R/tmp/SprErr.R new file mode 100644 index 0000000000000000000000000000000000000000..33642eab1c48740736623c168cb0edc167778b2d --- /dev/null +++ b/modules/Crossval/R/tmp/SprErr.R @@ -0,0 +1,227 @@ +#'Compute the ratio between the ensemble spread and RMSE +#' +#'Compute the ratio between the spread of the members around the +#'ensemble mean in experimental data and the RMSE between the ensemble mean of +#'experimental and observational data. The p-value and/or the statistical +#'significance is provided by a one-sided Fisher's test. +#' +#'@param exp A named numeric array of experimental data with at least two +#' dimensions 'memb_dim' and 'time_dim'. +#'@param obs A named numeric array of observational data with at least two +#' dimensions 'memb_dim' and 'time_dim'. It should have the same dimensions as +#' parameter 'exp' except along 'dat_dim' and 'memb_dim'. +#'@param dat_dim A character string indicating the name of dataset (nobs/nexp) +#' dimension. The default value is NULL (no dataset). +#'@param memb_dim A character string indicating the name of the member +#' dimension. It must be one dimension in 'exp' and 'obs'. The default value +#' is 'member'. +#'@param time_dim A character string indicating the name of dimension along +#' which the ratio is computed. The default value is 'sdate'. +#'@param pval A logical value indicating whether to compute or not the p-value +#' of the test Ho : SD/RMSE = 1 or not. The default value is TRUE. +#'@param sign A logical value indicating whether to retrieve the statistical +#' significance of the test Ho: ACC = 0 based on 'alpha'. The default value is +#' FALSE. +#'@param alpha A numeric indicating the significance level for the statistical +#' significance test. The default value is 0.05. +#'@param na.rm A logical value indicating whether to remove NA values. The default +#' value is TRUE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A list of two arrays with dimensions c(nexp, nobs, the rest of +#' dimensions of 'exp' and 'obs' except memb_dim and time_dim), which nexp is +#' the length of dat_dim of 'exp' and nobs is the length of dat_dim of 'obs'. +#' If dat_dim is NULL, nexp and nobs are omitted. \cr +#'\item{$ratio}{ +#' The ratio of the ensemble spread and RMSE. +#'} +#'\item{$p_val}{ +#' The p-value of the one-sided Fisher's test with Ho: SD/RMSE = 1. Only present +#' if \code{pval = TRUE}. +#'} +#' +#'@examples +#'# Load sample data as in Load() example: +#'example(Load) +#'rsdrms <- RatioSDRMS(sampleData$mod, sampleData$obs, dat_dim = 'dataset') +#'# Reorder the data in order to plot it with PlotVsLTime +#'rsdrms_plot <- array(dim = c(dim(rsdrms$ratio)[1:2], 4, dim(rsdrms$ratio)[3])) +#'rsdrms_plot[, , 2, ] <- rsdrms$ratio +#'rsdrms_plot[, , 4, ] <- rsdrms$p.val +#'\dontrun{ +#'PlotVsLTime(rsdrms_plot, toptitle = "Ratio ensemble spread / RMSE", ytitle = "", +#' monini = 11, limits = c(-1, 1.3), listexp = c('CMIP5 IC3'), +#' listobs = c('ERSST'), biglab = FALSE, siglev = TRUE) +#'} +#' +#'@import multiApply +#'@export +SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', + time_dim = 'sdate', pval = TRUE, sign = FALSE, + alpha = 0.05, na.rm = FALSE, ncores = NULL) { + + # Check inputs + ## exp and obs (1) + if (is.null(exp) | is.null(obs)) { + stop("Parameter 'exp' and 'obs' cannot be NULL.") + } + if (!is.numeric(exp) | !is.numeric(obs)) { + stop("Parameter 'exp' and 'obs' must be a numeric array.") + } + if (is.null(dim(exp)) | is.null(dim(obs))) { + stop(paste0("Parameter 'exp' and 'obs' must be array with as least two ", + "dimensions memb_dim and time_dim.")) + } + if (any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | + any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'exp' and 'obs' must have dimension names.") + } + ## dat_dim + if (!is.null(dat_dim)) { + if (!is.character(dat_dim) | length(dat_dim) > 1) { + stop("Parameter 'dat_dim' must be a character string.") + } + if (!dat_dim %in% names(dim(exp)) | !dat_dim %in% names(dim(obs))) { + stop("Parameter 'dat_dim' is not found in 'exp' or 'obs' dimension.") + } + } + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + stop("Parameter 'memb_dim' is not found in 'exp' nor 'obs' dimension. ", + "Set it as NULL if there is no member dimension.") + } + # Add [member = 1] + if (memb_dim %in% names(dim(exp)) & !memb_dim %in% names(dim(obs))) { + dim(obs) <- c(dim(obs), 1) + names(dim(obs))[length(dim(obs))] <- memb_dim + } + if (!memb_dim %in% names(dim(exp)) & memb_dim %in% names(dim(obs))) { + dim(exp) <- c(dim(exp), 1) + names(dim(exp))[length(dim(exp))] <- memb_dim + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") + } + ## exp and obs (2) + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + if (!is.null(dat_dim)) { + name_exp <- name_exp[-which(name_exp == dat_dim)] + name_obs <- name_obs[-which(name_obs == dat_dim)] + } + name_exp <- name_exp[-which(name_exp == memb_dim)] + name_obs <- name_obs[-which(name_obs == memb_dim)] + if (!identical(dim(exp)[name_exp], dim(obs)[name_obs])) { + stop(paste0("Parameter 'exp' and 'obs' must have same length of ", + "all the dimensions except 'dat_dim' and 'memb_dim'.")) + } + ## pval + if (!is.logical(pval) | length(pval) > 1) { + stop("Parameter 'pval' must be one logical value.") + } + ## sign + if (!is.logical(sign) | length(sign) > 1) { + stop("Parameter 'sign' must be one logical value.") + } + # alpha + if (!is.numeric(alpha) | any(alpha < 0) | any(alpha > 1) | length(alpha) > 1) { + stop("Parameter 'alpha' must be a numeric number between 0 and 1.") + } + # na.rm + if (!na.rm %in% c(TRUE, FALSE)) { + stop("Parameter 'na.rm' must be TRUE or FALSE") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + + ############################### + # Calculate RatioSDRMS + + # If dat_dim = NULL, insert dat dim + remove_dat_dim <- FALSE + if (is.null(dat_dim)) { + dat_dim <- 'dataset' + exp <- InsertDim(exp, posdim = 1, lendim = 1, name = 'dataset') + obs <- InsertDim(obs, posdim = 1, lendim = 1, name = 'dataset') + remove_dat_dim <- TRUE + } + + res <- Apply(list(exp, obs), + target_dims = list(c(dat_dim, memb_dim, time_dim), + c(dat_dim, memb_dim, time_dim)), + pval = pval, + sign = sign, + na.rm = na.rm, + fun = .SprErr, + ncores = ncores) + + if (remove_dat_dim) { + if (length(dim(res[[1]])) > 2) { + res <- lapply(res, Subset, c('nexp', 'nobs'), list(1, 1), drop = 'selected') + } else { + res <- lapply(res, as.numeric) + } + } + + return(res) +} + +.SprErr <- function(exp, obs, pval = TRUE, sign = FALSE, alpha = 0.05, na.rm = FALSE) { + + # exp: [dat_exp, member, sdate] + # obs: [dat_obs, member, sdate] + nexp <- dim(exp)[1] + nobs <- dim(obs)[1] + + # ensemble mean + ens_exp <- MeanDims(exp, 2, na.rm = na.rm) # [dat, sdate] + ens_obs <- MeanDims(obs, 2, na.rm = na.rm) + + # Create empty arrays + ratio <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + p.val <- array(dim = c(nexp = as.numeric(nexp), nobs = as.numeric(nobs))) # [nexp, nobs] + + for (jexp in 1:nexp) { + for (jobs in 1:nobs) { + + # spread and error + spread <- sqrt(mean(apply(exp[jexp,,], 2, var, na.rm = na.rm), na.rm = na.rm)) + error <- sqrt(mean((ens_obs - ens_exp[jexp,])^2, na.rm = na.rm)) + ratio[jexp, jobs] <- spread/error + + # effective sample size + enospr <- sum(Eno(apply(exp[jexp,,], 2, var, na.rm = na.rm), names(dim(exp))[3])) + enodif <- .Eno((ens_exp[jexp, ] - ens_obs[jobs, ])^2, na.action = na.pass) + if (pval) { + F <- (enospr[jexp] * spread[jexp]^2 / (enospr[jexp] - 1)) / (enodif * error^2 / (enodif - 1)) + if (!is.na(F) & !is.na(enospr[jexp]) & !is.na(enodif) & any(enospr > 2) & enodif > 2) { + p.val[jexp, jobs] <- pf(F, enospr[jexp] - 1, enodif - 1) + p.val[jexp, jobs] <- 2 * min(p.val[jexp, jobs], 1 - p.val[jexp, jobs]) + } else { + ratio[jexp, jobs] <- NA + } + } + } + } + + res <- list(ratio = ratio) + if (pval) {res$p.val <- p.val} + if (sign) {res$sign <- p.val <= alpha} + + return(res) +} + diff --git a/modules/Crossval/R/tmp/Utils.R b/modules/Crossval/R/tmp/Utils.R new file mode 100644 index 0000000000000000000000000000000000000000..cd7a1e10db1500f69426057ba60d36d2a7bb4cab --- /dev/null +++ b/modules/Crossval/R/tmp/Utils.R @@ -0,0 +1,1885 @@ +#'@importFrom abind abind +#'@import plyr ncdf4 +#'@importFrom grDevices png jpeg pdf svg bmp tiff +#'@importFrom easyVerification convert2prob + +## Function to tell if a regexpr() match is a complete match to a specified name +.IsFullMatch <- function(x, name) { + x > 0 && attributes(x)$match.length == nchar(name) +} + +.ConfigReplaceVariablesInString <- function(string, replace_values, + allow_undefined_key_vars = FALSE) { + # This function replaces all the occurrences of a variable in a string by + # their corresponding string stored in the replace_values. + if (length(strsplit(string, "\\$")[[1]]) > 1) { + parts <- strsplit(string, "\\$")[[1]] + output <- "" + i <- 0 + for (part in parts) { + if (i %% 2 == 0) { + output <- paste0(output, part) + } else { + if (part %in% names(replace_values)) { + output <- paste0(output, + .ConfigReplaceVariablesInString(replace_values[[part]], + replace_values, + allow_undefined_key_vars)) + } else if (allow_undefined_key_vars) { + output <- paste0(output, "$", part, "$") + } else { + stop('Error: The variable $', part, + '$ was not defined in the configuration file.', sep = '') + } + } + i <- i + 1 + } + output + } else { + string + } +} + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon') + return(known_lon_names) +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') + return(known_lat_names) +} + +.t2nlatlon <- function(t) { + ## As seen in cdo's griddes.c: ntr2nlat() + nlats <- (t * 3 + 1) / 2 + if ((nlats > 0) && (nlats - trunc(nlats) >= 0.5)) { + nlats <- ceiling(nlats) + } else { + nlats <- round(nlats) + } + if (nlats %% 2 > 0) { + nlats <- nlats + 1 + } + ## As seen in cdo's griddes.c: compNlon(), and as specified in ECMWF + nlons <- 2 * nlats + keep_going <- TRUE + while (keep_going) { + n <- nlons + if (n %% 8 == 0) n <- trunc(n / 8) + while (n %% 6 == 0) n <- trunc(n / 6) + while (n %% 5 == 0) n <- trunc(n / 5) + while (n %% 4 == 0) n <- trunc(n / 4) + while (n %% 3 == 0) n <- trunc(n / 3) + if (n %% 2 == 0) n <- trunc(n / 2) + if (n <= 8) { + keep_going <- FALSE + } else { + nlons <- nlons + 2 + if (nlons > 9999) { + stop("Error: pick another gaussian grid truncation. ", + "It doesn't fulfill the standards to apply FFT.") + } + } + } + c(nlats, nlons) +} + +.nlat2t <- function(nlats) { + trunc((nlats * 2 - 1) / 3) +} + +.LoadDataFile <- function(work_piece, explore_dims = FALSE, silent = FALSE) { + # The purpose, working modes, inputs and outputs of this function are + # explained in ?LoadDataFile + #suppressPackageStartupMessages({library(ncdf4)}) + #suppressPackageStartupMessages({library(bigmemory)}) + #suppressPackageStartupMessages({library(plyr)}) + # Auxiliar function to convert array indices to lineal indices + arrayIndex2VectorIndex <- function(indices, dims) { + if (length(indices) > length(dims)) { + stop("Error: indices do not match dimensions in arrayIndex2VectorIndex.") + } + position <- 1 + dims <- rev(dims) + indices <- rev(indices) + for (i in seq_along(indices)) { + position <- position + (indices[i] - 1) * prod(dims[-(1:i)]) + } + position + } + + found_file <- NULL + dims <- NULL + grid_name <- units <- var_long_name <- NULL + is_2d_var <- array_across_gw <- NULL + data_across_gw <- NULL + + filename <- work_piece[['filename']] + namevar <- work_piece[['namevar']] + output <- work_piece[['output']] + # The names of all data files in the directory of the repository that match + # the pattern are obtained. + if (any(grep("^http", filename))) { + is_url <- TRUE + files <- filename + ## TODO: Check that the user is not using shell globbing exps. + } else { + is_url <- FALSE + files <- Sys.glob(filename) + } + + # If we don't find any, we leave the flag 'found_file' with a NULL value. + if (length(files) > 0) { + # The first file that matches the pattern is chosen and read. + filename <- head(files, 1) + filein <- filename + found_file <- filename + mask <- work_piece[['mask']] + + if (!silent && explore_dims) { + .message(paste("Exploring dimensions...", filename)) + ##} else { + ## cat(paste("* Reading & processing data...", filename, '\n')) + ##} + } + + # We will fill in 'expected_dims' with the names of the expected dimensions of + # the data array we'll retrieve from the file. + expected_dims <- NULL + remap_needed <- FALSE + # But first we open the file and work out whether the requested variable is 2d + fnc <- nc_open(filein) + if (!(namevar %in% names(fnc$var))) { + stop("Error: The variable", namevar, "is not defined in the file", filename) + } + var_long_name <- fnc$var[[namevar]]$longname + units <- fnc$var[[namevar]]$units + file_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + # The following two 'ifs' are to allow for 'lon'/'lat' by default, instead of + # 'longitude'/'latitude'. + if (!(work_piece[['dimnames']][['lon']] %in% file_dimnames) && + (work_piece[['dimnames']][['lon']] == 'longitude') && + ('lon' %in% file_dimnames)) { + work_piece[['dimnames']][['lon']] <- 'lon' + } + if (!(work_piece[['dimnames']][['lat']] %in% file_dimnames) && + (work_piece[['dimnames']][['lat']] == 'latitude') && + ('lat' %in% file_dimnames)) { + work_piece[['dimnames']][['lat']] <- 'lat' + } + if (is.null(work_piece[['is_2d_var']])) { + is_2d_var <- all(c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) %in% + unlist(lapply(fnc$var[[namevar]][['dim']], + '[[', 'name'))) + } else { + is_2d_var <- work_piece[['is_2d_var']] + } + if ((is_2d_var || work_piece[['is_file_per_dataset']])) { + if (Sys.which("cdo")[[1]] == "") { + stop("Error: CDO libraries not available") + } + + cdo_version <- + strsplit(suppressWarnings( + system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5] + + cdo_version <- + as.numeric_version(unlist(strsplit(cdo_version, "[A-Za-z]", fixed = FALSE))[[1]]) + + } + # If the variable to load is 2-d, we need to determine whether: + # - interpolation is needed + # - subsetting is requested + if (is_2d_var) { + ## We read the longitudes and latitudes from the file. + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + first_lon_in_original_file <- lon[1] + # If a common grid is requested or we are exploring the file dimensions + # we need to read the grid type and size of the file to finally work out the + # CDO grid name. + if (!is.null(work_piece[['grid']]) || explore_dims) { + # Here we read the grid type and its number of longitudes and latitudes + file_info <- system(paste('cdo -s griddes', filein, '2> /dev/null'), intern = TRUE) + grids_positions <- grep('# gridID', file_info) + if (length(grids_positions) < 1) { + stop("The grid should be defined in the files.") + } + grids_first_lines <- grids_positions + 2 + grids_last_lines <- c((grids_positions - 2)[-1], length(file_info)) + grids_info <- as.list(seq_along(grids_positions)) + grids_info <- lapply(grids_info, + function (x) file_info[grids_first_lines[x]:grids_last_lines[x]]) + grids_info <- lapply(grids_info, function (x) gsub(" *", " ", x)) + grids_info <- lapply(grids_info, function (x) gsub("^ | $", "", x)) + grids_info <- lapply(grids_info, function (x) unlist(strsplit(x, " | = "))) + grids_types <- unlist(lapply(grids_info, function (x) x[grep('gridtype', x) + 1])) + grids_matches <- unlist(lapply(grids_info, function (x) { + nlons <- if (any(grep('xsize', x))) { + as.numeric(x[grep('xsize', x) + 1]) + } else { + NA + } + nlats <- if (any(grep('ysize', x))) { + as.numeric(x[grep('ysize', x) + 1]) + } else { + NA + } + result <- FALSE + if (!anyNA(c(nlons, nlats))) { + if ((nlons == length(lon)) && + (nlats == length(lat))) { + result <- TRUE + } + } + result + })) + grids_matches <- grids_matches[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_info <- grids_info[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_types <- grids_types[which(grids_types %in% c('gaussian', 'lonlat'))] + if (length(grids_matches) == 0) { + stop("Error: Only 'gaussian' and 'lonlat' grids supported. See e.g: cdo sinfo ", filename) + } + if (sum(grids_matches) > 1) { + if ((all(grids_types[which(grids_matches)] == 'gaussian') || + all(grids_types[which(grids_matches)] == 'lonlat')) && + all(unlist(lapply(grids_info[which(grids_matches)], identical, + grids_info[which(grids_matches)][[1]])))) { + grid_type <- grids_types[which(grids_matches)][1] + } else { + stop("Error: Load() can't disambiguate: ", + "More than one lonlat/gaussian grids with the same size as ", + "the requested variable defined in ", filename) + } + } else if (sum(grids_matches) == 1) { + grid_type <- grids_types[which(grids_matches)] + } else { + stop("Unexpected error.") + } + grid_lons <- length(lon) + grid_lats <- length(lat) + # Convert to CDO grid name as seen in cdo's griddes.c: nlat2ntr() + if (grid_type == 'lonlat') { + grid_name <- paste0('r', grid_lons, 'x', grid_lats) + } else { + grid_name <- paste0('t', .nlat2t(grid_lats), 'grid') + } + if (is.null(work_piece[['grid']])) { + .warning(paste0("Detect the grid type to be '", grid_name, "'. ", + "If it is not expected, assign parameter 'grid' to avoid wrong result.")) + } + } + # If a common grid is requested, we will also calculate its size which we will use + # later on. + if (!is.null(work_piece[['grid']])) { + # Now we calculate the common grid type and its lons and lats + if (any(grep('^t\\d{1,+}grid$', work_piece[['grid']]))) { + common_grid_type <- 'gaussian' + common_grid_res <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + nlonlat <- .t2nlatlon(common_grid_res) + common_grid_lats <- nlonlat[1] + common_grid_lons <- nlonlat[2] + } else if (any(grep('^r\\d{1,+}x\\d{1,+}$', work_piece[['grid']]))) { + common_grid_type <- 'lonlat' + common_grid_lons <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + common_grid_lats <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][3]) + } else { + stop("Error: Only supported grid types in parameter 'grid' are tgrid and rx") + } + } else { + ## If no 'grid' is specified, there is no common grid. + ## But these variables are filled in for consistency in the code. + common_grid_lons <- length(lon) + common_grid_lats <- length(lat) + } + first_common_grid_lon <- 0 + last_common_grid_lon <- 360 - 360 / common_grid_lons + ## This is not true for gaussian grids or for some regular grids, but + ## is a safe estimation + first_common_grid_lat <- -90 + last_common_grid_lat <- 90 + # And finally determine whether interpolation is needed or not + remove_shift <- FALSE + if (!is.null(work_piece[['grid']])) { + if ((grid_lons != common_grid_lons) || + (grid_lats != common_grid_lats) || + (grid_type != common_grid_type) || + (lon[1] != first_common_grid_lon)) { + if (grid_lons == common_grid_lons && grid_lats == common_grid_lats && + grid_type == common_grid_type && lon[1] != first_common_grid_lon) { + remove_shift <- TRUE + } + remap_needed <- TRUE + common_grid_name <- work_piece[['grid']] + } + } else if ((lon[1] != first_common_grid_lon) && explore_dims && + !work_piece[['single_dataset']]) { + remap_needed <- TRUE + common_grid_name <- grid_name + remove_shift <- TRUE + } + if (remap_needed && (work_piece[['remap']] == 'con') && + (cdo_version >= as.numeric_version('1.7.0'))) { + work_piece[['remap']] <- 'ycon' + } + if (remove_shift && !explore_dims) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + .warning(paste0("The dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "' doesn't start at longitude 0 and will be re-interpolated in order ", + "to align its longitudes with the standard CDO grids definable with ", + "the names 'tgrid' or 'rx', which are by definition ", + "starting at the longitude 0.\n")) + if (!is.null(mask)) { + .warning(paste0("A mask was provided for the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "'. This dataset has been re-interpolated to align its longitudes to ", + "start at 0. You must re-interpolate the corresponding mask to align ", + "its longitudes to start at 0 as well, if you haven't done so yet. ", + "Running cdo remapcon,", common_grid_name, + " original_mask_file.nc new_mask_file.nc will fix it.\n")) + } + } + if (remap_needed && (grid_lons < common_grid_lons || grid_lats < common_grid_lats)) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + if (!explore_dims) { + .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is originally on ", + "a grid coarser than the common grid and it has been ", + "extrapolated. Check the results carefully. It is ", + "recommended to specify as common grid the coarsest grid ", + "among all requested datasets via the parameter 'grid'.\n")) + } + } + # Now calculate if the user requests for a lonlat subset or for the + # entire field + lonmin <- work_piece[['lon_limits']][1] + lonmax <- work_piece[['lon_limits']][2] + latmin <- work_piece[['lat_limits']][1] + latmax <- work_piece[['lat_limits']][2] + lon_subsetting_requested <- FALSE + lonlat_subsetting_requested <- FALSE + if (lonmin <= lonmax) { + if ((lonmin > first_common_grid_lon) || (lonmax < last_common_grid_lon)) { + lon_subsetting_requested <- TRUE + } + } else { + if ((lonmin - lonmax) > 360 / common_grid_lons) { + lon_subsetting_requested <- TRUE + } else { + gap_width <- floor(lonmin / (360 / common_grid_lons)) - + floor(lonmax / (360 / common_grid_lons)) + if (gap_width > 0) { + if (!(gap_width == 1 && (lonmin %% (360 / common_grid_lons) == 0) && + (lonmax %% (360 / common_grid_lons) == 0))) { + lon_subsetting_requested <- TRUE + } + } + } + } + if ((latmin > first_common_grid_lat) || (latmax < last_common_grid_lat) + || (lon_subsetting_requested)) { + lonlat_subsetting_requested <- TRUE + } + # Now that we know if subsetting was requested, we can say if final data + # will go across greenwich + if (lonmax < lonmin) { + data_across_gw <- TRUE + } else { + data_across_gw <- !lon_subsetting_requested + } + + # When remap is needed but no subsetting, the file is copied locally + # so that cdo works faster, and then interpolated. + # Otherwise the file is kept as is and the subset will have to be + # interpolated still. + if (!lonlat_subsetting_requested && remap_needed) { + nc_close(fnc) + filecopy <- tempfile(pattern = "load", fileext = ".nc") + file.copy(filein, filecopy) + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + # "-L" is to serialize I/O accesses. It prevents potential segmentation fault in the + # underlying hdf5 library. + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " -selname,", namevar, " ", filecopy, " ", filein, + " 2>/dev/null")) + file.remove(filecopy) + work_piece[['dimnames']][['lon']] <- 'lon' + work_piece[['dimnames']][['lat']] <- 'lat' + fnc <- nc_open(filein) + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + } + + # Read and check also the mask + if (!is.null(mask)) { + ###mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + if (is.list(mask)) { + if (!file.exists(mask[['path']])) { + stop("Error: Couldn't find the mask file", mask[['path']]) + } + mask_file <- mask[['path']] + ###file.copy(work_piece[['mask']][['path']], mask_file) + fnc_mask <- nc_open(mask_file) + vars_in_mask <- sapply(fnc_mask$var, '[[', 'name') + if ('nc_var_name' %in% names(mask)) { + if (!(mask[['nc_var_name']] %in% + vars_in_mask)) { + stop("Error: couldn't find variable", mask[['nc_var_name']], + "in the mask file", mask[['path']]) + } + } else { + if (length(vars_in_mask) != 1) { + stop("Error: one and only one non-coordinate variable should be ", + "defined in the mask file", + mask[['path']], + "if the component 'nc_var_name' is not specified. ", + "Currently found: ", + toString(vars_in_mask), ".") + } else { + mask[['nc_var_name']] <- vars_in_mask + } + } + if (sum(fnc_mask$var[[mask[['nc_var_name']]]]$size > 1) != 2) { + stop("Error: the variable '", + mask[['nc_var_name']], + "' must be defined only over the dimensions '", + work_piece[['dimnames']][['lon']], "' and '", + work_piece[['dimnames']][['lat']], + "' in the mask file ", + mask[['path']]) + } + mask <- ncvar_get(fnc_mask, mask[['nc_var_name']], collapse_degen = TRUE) + nc_close(fnc_mask) + ### mask_lon <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lon']]) + ### mask_lat <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lat']]) + ###} else { + ### dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + ### dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ### ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + ### fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ### ncvar_put(fnc_mask, ncdf_var, work_piece[['mask']]) + ### nc_close(fnc_mask) + ### fnc_mask <- nc_open(mask_file) + ### work_piece[['mask']] <- list(path = mask_file, nc_var_name = 'LSM') + ### mask_lon <- lon + ### mask_lat <- lat + ###} + ###} + ### Now ready to check that the mask is right + ##if (!(lonlat_subsetting_requested && remap_needed)) { + ### if ((dim(mask)[2] != length(lon)) || (dim(mask)[1] != length(lat))) { + ### stop(paste("Error: the mask of the dataset with index ", + ### tail(work_piece[['indices']], 1), " in '", + ### work_piece[['dataset_type']], "' is wrong. ", + ### "It must be on the common grid if the selected output type is 'lonlat', ", + ### "'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on ", + ### "the grid of the corresponding dataset if the selected output type is ", + ### "'areave' and no 'grid' has been specified. For more information ", + ### "check ?Load and see help on parameters 'grid', 'maskmod' and ", + ### "'maskobs'.", sep = "")) + ### } + ###if (!(identical(mask_lon, lon) && identical(mask_lat, lat))) { + ### stop(paste0("Error: the longitudes and latitudes in the masks must be ", + ### "identical to the ones in the corresponding data files if output = 'areave' ", + ### " or, if the selected output is 'lon', 'lat' or 'lonlat', the longitudes in ", + ### "the mask file must start by 0 and the latitudes must be ordered from ", + ### "highest to lowest. See\n ", + ### work_piece[['mask']][['path']], " and ", filein)) + ###} + } + } + + lon_indices <- seq_along(lon) + if (!(lonlat_subsetting_requested && remap_needed)) { + lon[which(lon < 0)] <- lon[which(lon < 0)] + 360 + } + if (lonmax >= lonmin) { + lon_indices <- lon_indices[which(((lon %% 360) >= lonmin) & ((lon %% 360) <= lonmax))] + } else if (!remap_needed) { + lon_indices <- lon_indices[which(((lon %% 360) <= lonmax) | ((lon %% 360) >= lonmin))] + } + lat_indices <- which(lat >= latmin & lat <= latmax) + ## In most of the cases the latitudes are ordered from -90 to 90. + ## We will reorder them to be in the order from 90 to -90, so mostly + ## always the latitudes are reordered. + ## TODO: This could be avoided in future. + if (lat[1] < lat[length(lat)]) { + lat_indices <- lat_indices[rev(seq_along(lat_indices))] + } + if (!is.null(mask) && !(lonlat_subsetting_requested && remap_needed)) { + if ((dim(mask)[1] != length(lon)) || (dim(mask)[2] != length(lat))) { + stop("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is wrong. It must be on the ", + "common grid if the selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must be on the grid of ", + "the corresponding dataset if the selected output type is 'areave' and ", + "no 'grid' has been specified. For more information check ?Load and see ", + "help on parameters 'grid', 'maskmod' and 'maskobs'.") + } + mask <- mask[lon_indices, lat_indices] + } + ## If the user requests subsetting, we must extend the lon and lat limits if possible + ## so that the interpolation after is done properly + maximum_extra_points <- work_piece[['remapcells']] + if (lonlat_subsetting_requested && remap_needed) { + if ((maximum_extra_points > (head(lon_indices, 1) - 1)) || + (maximum_extra_points > (length(lon) - tail(lon_indices, 1)))) { + ## if the requested number of points goes beyond the left or right + ## sides of the map, we need to take the entire map so that the + ## interpolation works properly + lon_indices <- seq_along(lon) + } else { + extra_points <- min(maximum_extra_points, head(lon_indices, 1) - 1) + if (extra_points > 0) { + lon_indices <- + c((head(lon_indices, 1) - extra_points):(head(lon_indices, 1) - 1), lon_indices) + } + extra_points <- min(maximum_extra_points, length(lon) - tail(lon_indices, 1)) + if (extra_points > 0) { + lon_indices <- c(lon_indices, + (tail(lon_indices, 1) + 1):(tail(lon_indices, 1) + extra_points)) + } + } + min_lat_ind <- min(lat_indices) + max_lat_ind <- max(lat_indices) + extra_points <- min(maximum_extra_points, min_lat_ind - 1) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c(lat_indices, (min_lat_ind - 1):(min_lat_ind - extra_points)) + } else { + lat_indices <- c((min_lat_ind - extra_points):(min_lat_ind - 1), lat_indices) + } + } + extra_points <- min(maximum_extra_points, length(lat) - max_lat_ind) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c((max_lat_ind + extra_points):(max_lat_ind + 1), lat_indices) + } else { + lat_indices <- c(lat_indices, (max_lat_ind + 1):(max_lat_ind + extra_points)) + } + } + } + lon <- lon[lon_indices] + lat <- lat[lat_indices] + expected_dims <- c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) + } else { + lon <- 0 + lat <- 0 + } + # We keep on filling the expected dimensions + var_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + nmemb <- nltime <- NULL + ## Sometimes CDO renames 'members' dimension to 'lev' + old_members_dimname <- NULL + if (('lev' %in% var_dimnames) && !(work_piece[['dimnames']][['member']] %in% var_dimnames)) { + old_members_dimname <- work_piece[['dimnames']][['member']] + work_piece[['dimnames']][['member']] <- 'lev' + } + if (work_piece[['dimnames']][['member']] %in% var_dimnames) { + nmemb <- fnc$var[[namevar]][['dim']][[match(work_piece[['dimnames']][['member']], + var_dimnames)]]$len + expected_dims <- c(expected_dims, work_piece[['dimnames']][['member']]) + } else { + nmemb <- 1 + } + if (length(expected_dims) > 0) { + dim_matches <- match(expected_dims, var_dimnames) + if (anyNA(dim_matches)) { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop("Error: the expected dimension(s)", + toString(expected_dims[which(is.na(dim_matches))]), + "were not found in", filename) + } + time_dimname <- var_dimnames[-dim_matches] + } else { + time_dimname <- var_dimnames + } + if (length(time_dimname) > 0) { + if (length(time_dimname) == 1) { + nltime <- fnc$var[[namevar]][['dim']][[match(time_dimname, var_dimnames)]]$len + expected_dims <- c(expected_dims, time_dimname) + dim_matches <- match(expected_dims, var_dimnames) + } else { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop("Error: the variable ", namevar, + " is defined over more dimensions than the expected (", + toString(c(expected_dims, 'time')), + "). It could also be that the members, longitude or latitude ", + "dimensions are named incorrectly. In that case, either rename ", + "the dimensions in the file or adjust Load() to recognize the actual ", + "name with the parameter 'dimnames'. See file ", filename) + } + } else { + nltime <- 1 + } + + # Now we must retrieve the data from the file, but only the asked indices. + # So we build up the indices to retrieve. + # Longitudes or latitudes have been retrieved already. + if (explore_dims) { + # If we're exploring the file we only want one time step from one member, + # to regrid it and work out the number of longitudes and latitudes. + # We don't need more. + members <- 1 + ltimes_list <- list(1) + } else { + # The data is arranged in the array 'tmp' with the dimensions in a + # common order: + # 1) Longitudes + # 2) Latitudes + # 3) Members (even if is not a file per member experiment) + # 4) Lead-times + if (work_piece[['is_file_per_dataset']]) { + time_indices <- 1:nltime + mons <- strsplit(system(paste('cdo showmon ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + years <- strsplit(system(paste('cdo showyear ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + mons <- as.numeric(mons[[1]][which(mons[[1]] != "")]) + years <- as.numeric(years[[1]][which(years[[1]] != "")]) + time_indices <- ts(time_indices, start = c(years[1], mons[1]), + end = c(years[length(years)], mons[length(mons)]), + frequency = 12) + ltimes_list <- list() + for (sdate in work_piece[['startdates']]) { + selected_time_indices <- window(time_indices, start = c(as.numeric( + substr(sdate, 1, 4)), as.numeric(substr(sdate, 5, 6))), + end = c(3000, 12), frequency = 12, extend = TRUE) + selected_time_indices <- selected_time_indices[work_piece[['leadtimes']]] + ltimes_list <- c(ltimes_list, list(selected_time_indices)) + } + } else { + ltimes <- work_piece[['leadtimes']] + #if (work_piece[['dataset_type']] == 'exp') { + ltimes_list <- list(ltimes[which(ltimes <= nltime)]) + #} + } + ## TODO: Put, when reading matrices, this kind of warnings + # if (nmember < nmemb) { + # cat("Warning: + members <- 1:work_piece[['nmember']] + members <- members[which(members <= nmemb)] + } + + # Now, for each list of leadtimes to load (usually only one list with all leadtimes), + # we'll join the indices and retrieve data + found_disordered_dims <- FALSE + for (ltimes in ltimes_list) { + if (is_2d_var) { + start <- c(min(lon_indices), min(lat_indices)) + end <- c(max(lon_indices), max(lat_indices)) + if (lonlat_subsetting_requested && remap_needed) { + subset_indices <- list(min(lon_indices):max(lon_indices) - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ncdf_dims <- list(dim_longitudes, dim_latitudes) + } else { + subset_indices <- list(lon_indices - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + ncdf_dims <- list() + } + final_dims <- c(length(subset_indices[[1]]), length(subset_indices[[2]]), 1, 1) + } else { + start <- end <- NULL + subset_indices <- list() + ncdf_dims <- list() + final_dims <- c(1, 1, 1, 1) + } + + if (work_piece[['dimnames']][['member']] %in% expected_dims) { + start <- c(start, head(members, 1)) + end <- c(end, tail(members, 1)) + subset_indices <- c(subset_indices, list(members - head(members, 1) + 1)) + dim_members <- ncdim_def(work_piece[['dimnames']][['member']], "", members) + ncdf_dims <- c(ncdf_dims, list(dim_members)) + final_dims[3] <- length(members) + } + if (time_dimname %in% expected_dims) { + if (!all(is.na(ltimes))) { + start <- c(start, head(ltimes[which(!is.na(ltimes))], 1)) + end <- c(end, tail(ltimes[which(!is.na(ltimes))], 1)) + subset_indices <- c(subset_indices, + list(ltimes - head(ltimes[which(!is.na(ltimes))], 1) + 1)) + } else { + start <- c(start, NA) + end <- c(end, NA) + subset_indices <- c(subset_indices, list(ltimes)) + } + dim_time <- ncdim_def(time_dimname, "", seq_along(ltimes), unlim = TRUE) + ncdf_dims <- c(ncdf_dims, list(dim_time)) + final_dims[4] <- length(ltimes) + } + count <- end - start + 1 + start <- start[dim_matches] + count <- count[dim_matches] + subset_indices <- subset_indices[dim_matches] + # Now that we have the indices to retrieve, we retrieve the data + if (prod(final_dims) > 0) { + tmp <- take(ncvar_get(fnc, namevar, start, count, + collapse_degen = FALSE), + seq_along(subset_indices), subset_indices) + # The data is regridded if it corresponds to an atmospheric variable. When + # the chosen output type is 'areave' the data is not regridded to not + # waste computing time unless the user specified a common grid. + if (is_2d_var) { + ###if (!is.null(work_piece[['mask']]) && !(lonlat_subsetting_requested && remap_needed)) { + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### start[dim_matches[1:2]], count[dim_matches[1:2]], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + if (lonlat_subsetting_requested && remap_needed) { + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + filein2 <- tempfile(pattern = "loadRegridded2", fileext = ".nc") + ncdf_var <- ncvar_def(namevar, "", ncdf_dims[dim_matches], + fnc$var[[namevar]]$missval, + prec = if (fnc$var[[namevar]]$prec == 'int') { + 'integer' + } else { + fnc$var[[namevar]]$prec + }) + scale_factor <- ifelse(fnc$var[[namevar]]$hasScaleFact, fnc$var[[namevar]]$scaleFact, 1) + add_offset <- ifelse(fnc$var[[namevar]]$hasAddOffset, fnc$var[[namevar]]$addOffset, 0) + if (fnc$var[[namevar]]$hasScaleFact || fnc$var[[namevar]]$hasAddOffset) { + tmp <- (tmp - add_offset) / scale_factor + } + #nc_close(fnc) + fnc2 <- nc_create(filein2, list(ncdf_var)) + ncvar_put(fnc2, ncdf_var, tmp) + if (add_offset != 0) { + ncatt_put(fnc2, ncdf_var, 'add_offset', add_offset) + } + if (scale_factor != 1) { + ncatt_put(fnc2, ncdf_var, 'scale_factor', scale_factor) + } + nc_close(fnc2) + system(paste0("cdo -L -s -sellonlatbox,", if (lonmin > lonmax) { + "0,360," + } else { + paste0(lonmin, ",", lonmax, ",") + }, latmin, ",", latmax, + " -remap", work_piece[['remap']], ",", common_grid_name, + " ", filein2, " ", filein, " 2>/dev/null")) + file.remove(filein2) + fnc2 <- nc_open(filein) + sub_lon <- ncvar_get(fnc2, 'lon') + sub_lat <- ncvar_get(fnc2, 'lat') + ## We read the longitudes and latitudes from the file. + ## In principle cdo should put in order the longitudes + ## and slice them properly unless data is across greenwich + sub_lon[which(sub_lon < 0)] <- sub_lon[which(sub_lon < 0)] + 360 + sub_lon_indices <- seq_along(sub_lon) + if (lonmax < lonmin) { + sub_lon_indices <- sub_lon_indices[which((sub_lon <= lonmax) | (sub_lon >= lonmin))] + } + sub_lat_indices <- seq_along(sub_lat) + ## In principle cdo should put in order the latitudes + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + sub_lat_indices <- rev(seq_along(sub_lat)) + } + final_dims[c(1, 2)] <- c(length(sub_lon_indices), length(sub_lat_indices)) + subset_indices[[dim_matches[1]]] <- sub_lon_indices + subset_indices[[dim_matches[2]]] <- sub_lat_indices + + tmp <- take(ncvar_get(fnc2, namevar, collapse_degen = FALSE), + seq_along(subset_indices), subset_indices) + + if (!is.null(mask)) { + ## We create a very simple 2d netcdf file that is then interpolated to the common + ## grid to know what are the lons and lats of our slice of data + mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + mask_file_remap <- tempfile(pattern = 'loadMask', fileext = '.nc') + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], + "degrees_east", c(0, 360)) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], + "degrees_north", c(-90, 90)) + ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ncvar_put(fnc_mask, ncdf_var, array(rep(0, 4), dim = c(2, 2))) + nc_close(fnc_mask) + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " ", mask_file, " ", mask_file_remap, " 2>/dev/null")) + fnc_mask <- nc_open(mask_file_remap) + mask_lons <- ncvar_get(fnc_mask, 'lon') + mask_lats <- ncvar_get(fnc_mask, 'lat') + nc_close(fnc_mask) + file.remove(mask_file, mask_file_remap) + if ((dim(mask)[1] != common_grid_lons) || (dim(mask)[2] != common_grid_lats)) { + stop("Error: the mask of the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "' is wrong. It must be on the common grid if the ", + "selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must ", + "be on the grid of the corresponding dataset if the ", + "selected output type is 'areave' and no 'grid' has been ", + "specified. For more information check ?Load and see help ", + "on parameters 'grid', 'maskmod' and 'maskobs'.") + } + mask_lons[which(mask_lons < 0)] <- mask_lons[which(mask_lons < 0)] + 360 + if (lonmax >= lonmin) { + mask_lon_indices <- which((mask_lons >= lonmin) & (mask_lons <= lonmax)) + } else { + mask_lon_indices <- which((mask_lons >= lonmin) | (mask_lons <= lonmax)) + } + mask_lat_indices <- which((mask_lats >= latmin) & (mask_lats <= latmax)) + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + mask_lat_indices <- mask_lat_indices[rev(seq_along(mask_lat_indices))] + } + mask <- mask[mask_lon_indices, mask_lat_indices] + } + sub_lon <- sub_lon[sub_lon_indices] + sub_lat <- sub_lat[sub_lat_indices] + ### nc_close(fnc_mask) + ### system(paste0("cdo -s -sellonlatbox,", if (lonmin > lonmax) { + ### "0,360," + ### } else { + ### paste0(lonmin, ",", lonmax, ",") + ### }, latmin, ",", latmax, + ### " -remap", work_piece[['remap']], ",", common_grid_name, + ###This is wrong: same files + ### " ", mask_file, " ", mask_file, " 2>/dev/null", sep = "")) + ### fnc_mask <- nc_open(mask_file) + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + } + } + if (is.unsorted(dim_matches)) { + if (!found_disordered_dims && + rev(work_piece[['indices']])[2] == 1 && + rev(work_piece[['indices']])[3] == 1) { + found_disordered_dims <- TRUE + .warning(paste0("The dimensions for the variable ", namevar, + " in the files of the experiment with index ", + tail(work_piece[['indices']], 1), + " are not in the optimal order for loading with Load(). ", + "The optimal order would be '", + toString(expected_dims), + "'. One of the files of the dataset is stored in ", filename)) + } + tmp <- aperm(tmp, dim_matches) + } + dim(tmp) <- final_dims + # If we are exploring the file we don't need to process and arrange + # the retrieved data. We only need to keep the dimension sizes. + if (is_2d_var && lonlat_subsetting_requested && remap_needed) { + final_lons <- sub_lon + final_lats <- sub_lat + } else { + final_lons <- lon + final_lats <- lat + } + if (explore_dims) { + if (work_piece[['is_file_per_member']]) { + ## TODO: When the exp_full_path contains asterisks and is file_per_member + ## members from different datasets may be accounted. + ## Also if one file member is missing the accounting will be wrong. + ## Should parse the file name and extract number of members. + if (is_url) { + nmemb <- NULL + } else { + nmemb <- length(files) + } + } + dims <- list(member = nmemb, ftime = nltime, lon = final_lons, lat = final_lats) + } else { + # If we are not exploring, then we have to process the retrieved data + if (is_2d_var) { + tmp <- apply(tmp, c(3, 4), function(x) { + # Disable of large values. + if (!is.na(work_piece[['var_limits']][2])) { + x[which(x > work_piece[['var_limits']][2])] <- NA + } + if (!is.na(work_piece[['var_limits']][1])) { + x[which(x < work_piece[['var_limits']][1])] <- NA + } + if (!is.null(mask)) { + x[which(mask < 0.5)] <- NA + } + + if (output == 'areave' || output == 'lon') { + weights <- InsertDim(cos(final_lats * pi / 180), 1, + length(final_lons), name = 'lon') + weights[which(is.na(x))] <- NA + if (output == 'areave') { + weights <- weights / mean(weights, na.rm = TRUE) + mean(x * weights, na.rm = TRUE) + } else { + weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, + length(final_lats), name = 'lat') + MeanDims(x * weights, 2, na.rm = TRUE) + } + } else if (output == 'lat') { + MeanDims(x, 1, na.rm = TRUE) + } else if (output == 'lonlat') { + signif(x, 5) + } + }) + if (output == 'areave') { + dim(tmp) <- c(1, 1, final_dims[3:4]) + } else if (output == 'lon') { + dim(tmp) <- c(final_dims[1], 1, final_dims[3:4]) + } else if (output == 'lat') { + dim(tmp) <- c(1, final_dims[c(2, 3, 4)]) + } else if (output == 'lonlat') { + dim(tmp) <- final_dims + } + } + var_data <- attach.big.matrix(work_piece[['out_pointer']]) + if (work_piece[['dims']][['member']] > 1 && nmemb > 1 && + work_piece[['dims']][['ftime']] > 1 && + nltime < work_piece[['dims']][['ftime']]) { + work_piece[['indices']][2] <- work_piece[['indices']][2] - 1 + for (jmemb in members) { + work_piece[['indices']][2] <- work_piece[['indices']][2] + 1 + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp[, , jmemb, ]) - 1) + var_data[out_indices] <- as.vector(tmp[, , jmemb, ]) + } + work_piece[['indices']][2] <- work_piece[['indices']][2] - tail(members, 1) + 1 + } else { + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp) - 1) + a <- aperm(tmp, c(1, 2, 4, 3)) + as.vector(a) + var_data[out_indices] <- as.vector(aperm(tmp, c(1, 2, 4, 3))) + } + work_piece[['indices']][3] <- work_piece[['indices']][3] + 1 + } + } + } + nc_close(fnc) + if (is_2d_var) { + if (remap_needed) { + array_across_gw <- FALSE + file.remove(filein) + ###if (!is.null(mask) && lonlat_subsetting_requested) { + ### file.remove(mask_file) + ###} + } else { + if (first_lon_in_original_file < 0) { + array_across_gw <- data_across_gw + } else { + array_across_gw <- FALSE + } + } + } + } + if (explore_dims) { + list(dims = dims, is_2d_var = is_2d_var, grid = grid_name, + units = units, var_long_name = var_long_name, + data_across_gw = data_across_gw, array_across_gw = array_across_gw) + } else { + ###if (!silent && !is.null(progress_connection) && !is.null(work_piece[['progress_amount']])) { + ### foobar <- writeBin(work_piece[['progress_amount']], progress_connection) + ###} + if (!silent && !is.null(work_piece[['progress_amount']])) { + message(work_piece[['progress_amount']], appendLF = FALSE) + } + found_file + } +} + +.LoadSampleData <- function(var, exp = NULL, obs = NULL, sdates, + nmember = NULL, nmemberobs = NULL, + nleadtime = NULL, leadtimemin = 1, + leadtimemax = NULL, storefreq = 'monthly', + sampleperiod = 1, lonmin = 0, lonmax = 360, + latmin = -90, latmax = 90, output = 'areave', + method = 'conservative', grid = NULL, + maskmod = vector("list", 15), + maskobs = vector("list", 15), + configfile = NULL, suffixexp = NULL, + suffixobs = NULL, varmin = NULL, varmax = NULL, + silent = FALSE, nprocs = NULL) { + ## This function loads and selects sample data stored in sampleMap and + ## sampleTimeSeries and is used in the examples instead of Load() so as + ## to avoid nco and cdo system calls and computation time in the stage + ## of running examples in the CHECK process on CRAN. + selected_start_dates <- match(sdates, c('19851101', '19901101', '19951101', + '20001101', '20051101')) + start_dates_position <- 3 + lead_times_position <- 4 + + if (output == 'lonlat') { + sampleData <- s2dv::sampleMap + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times, , ] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times, , ] + } else if (output == 'areave') { + sampleData <- s2dv::sampleTimeSeries + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times] + } + + dims_out <- dim(sampleData$mod) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$mod) <- dims_out + + dims_out <- dim(sampleData$obs) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$obs) <- dims_out + + invisible(list(mod = dataOut$mod, obs = dataOut$obs, + lat = dataOut$lat, lon = dataOut$lon)) +} + +.ConfigGetDatasetInfo <- function(matching_entries, table_name) { + # This function obtains the information of a dataset and variable pair, + # applying all the entries that match in the configuration file. + if (table_name == 'experiments') { + id <- 'EXP' + } else { + id <- 'OBS' + } + defaults <- c(paste0('$DEFAULT_', id, '_MAIN_PATH$'), + paste0('$DEFAULT_', id, '_FILE_PATH$'), + '$DEFAULT_NC_VAR_NAME$', '$DEFAULT_SUFFIX$', + '$DEFAULT_VAR_MIN$', '$DEFAULT_VAR_MAX$') + info <- NULL + + for (entry in matching_entries) { + if (is.null(info)) { + info <- entry[-1:-2] + info[which(info == '*')] <- defaults[which(info == '*')] + } else { + info[which(entry[-1:-2] != '*')] <- entry[-1:-2][which(entry[-1:-2] != '*')] + } + } + + info <- as.list(info) + names(info) <- c('main_path', 'file_path', 'nc_var_name', 'suffix', 'var_min', 'var_max') + info +} + +.ReplaceGlobExpressions <- function(path_with_globs, actual_path, + replace_values, tags_to_keep, + dataset_name, permissive) { + # The goal of this function is to replace the shell globbing expressions in + # a path pattern (that may contain shell globbing expressions and Load() + # tags) by the corresponding part of the real existing path. + # What is done actually is to replace all the values of the tags in the + # actual path by the corresponding $TAG$ + # + # It takes mainly two inputs. The path with expressions and tags, e.g.: + # /data/experiments/*/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_*$START_DATE$*.nc + # and a complete known path to one of the matching files, e.g.: + # /data/experiments/ecearth/i00k/tos/tos_fc0-1_19901101_199011-199110.nc + # and it returns the path pattern but without shell globbing expressions: + # /data/experiments/ecearth/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_fc0-1_$START_DATE$_199011-199110.nc + # + # To do that, it needs also as inputs the list of replace values (the + # association of each tag to their value). + # + # All the tags not present in the parameter tags_to_keep will be repalced. + # + # Not all cases can be resolved with the implemented algorithm. In an + # unsolvable case a warning is given and one possible guess is returned. + # + # In some cases it is interesting to replace only the expressions in the + # path to the file, but not the ones in the file name itself. To keep the + # expressions in the file name, the parameter permissive can be set to + # TRUE. To replace all the expressions it can be set to FALSE. + clean <- function(x) { + if (nchar(x) > 0) { + x <- gsub('\\\\', '', x) + x <- gsub('\\^', '', x) + x <- gsub('\\$', '', x) + x <- unname(sapply(strsplit(x, '[', fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) + do.call(paste0, as.list(x)) + } else { + x + } + } + + strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "") + + if (permissive) { + actual_path_chunks <- strsplit(actual_path, '/')[[1]] + actual_path <- paste(actual_path_chunks[-length(actual_path_chunks)], collapse = '/') + file_name <- tail(actual_path_chunks, 1) + if (length(actual_path_chunks) > 1) { + file_name <- paste0('/', file_name) + } + path_with_globs_chunks <- strsplit(path_with_globs, '/')[[1]] + path_with_globs <- paste(path_with_globs_chunks[-length(path_with_globs_chunks)], + collapse = '/') + path_with_globs <- .ConfigReplaceVariablesInString(path_with_globs, replace_values) + file_name_with_globs <- tail(path_with_globs_chunks, 1) + if (length(path_with_globs_chunks) > 1) { + file_name_with_globs <- paste0('/', file_name_with_globs) + } + right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1) + right_known_no_tags <- .ConfigReplaceVariablesInString(right_known, replace_values) + path_with_globs_rx <- utils::glob2rx(paste0(path_with_globs, right_known_no_tags)) + match <- regexpr(gsub('$', '', path_with_globs_rx, fixed = TRUE), + paste0(actual_path, file_name)) + if (match != 1) { + stop("Incorrect parameters to replace glob expressions. ", + "The path with expressions does not match the actual path.") + } + if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) { + path_with_globs <- paste0(path_with_globs, right_known_no_tags, '*') + file_name_with_globs <- sub(right_known, '/*', file_name_with_globs) + } + } + path_with_globs_rx <- utils::glob2rx(path_with_globs) + values_to_replace <- NULL + tags_to_replace_starts <- NULL + tags_to_replace_ends <- NULL + give_warning <- FALSE + for (tag in tags_to_keep) { + matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]] + lengths <- attr(matches, 'match.length') + if (!(length(matches) == 1 && matches[1] == -1)) { + for (i in seq_along(matches)) { + left <- NULL + if (matches[i] > 1) { + left <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 1, + matches[i] - 1), replace_values) + left_known <- + strReverse(head(strsplit(strReverse(left), + strReverse('.*'), fixed = TRUE)[[1]], 1)) + } + right <- NULL + if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) { + right <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, + matches[i] + lengths[i], + nchar(path_with_globs_rx)), + replace_values) + right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1) + } + match_limits <- NULL + if (!is.null(left)) { + left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) + match_len <- attr(left_match, 'match.length') + left_match_limits <- + c(left_match + match_len - 1 - nchar(clean(right_known)) - + nchar(replace_values[[tag]]) + 1, + left_match + match_len - 1 - nchar(clean(right_known))) + if (!(left_match < 1)) { + match_limits <- left_match_limits + } + } + right_match <- NULL + if (!is.null(right)) { + right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) + match_len <- attr(right_match, 'match.length') + right_match_limits <- + c(right_match + nchar(clean(left_known)), + right_match + nchar(clean(left_known)) + + nchar(replace_values[[tag]]) - 1) + if (is.null(match_limits) && !(right_match < 1)) { + match_limits <- right_match_limits + } + } + if (!is.null(right_match) && !is.null(left_match)) { + if (!identical(right_match_limits, left_match_limits)) { + give_warning <- TRUE + } + } + if (is.null(match_limits)) { + stop("Too complex path pattern specified for ", dataset_name, + ". Specify a simpler path pattern for this dataset.") + } + values_to_replace <- c(values_to_replace, tag) + tags_to_replace_starts <- c(tags_to_replace_starts, match_limits[1]) + tags_to_replace_ends <- c(tags_to_replace_ends, match_limits[2]) + } + } + } + + if (length(tags_to_replace_starts) > 0) { + reorder <- sort(tags_to_replace_starts, index.return = TRUE) + tags_to_replace_starts <- reorder$x + values_to_replace <- values_to_replace[reorder$ix] + tags_to_replace_ends <- tags_to_replace_ends[reorder$ix] + while (length(values_to_replace) > 0) { + actual_path <- paste0(substr(actual_path, 1, head(tags_to_replace_starts, 1) - 1), + '$', head(values_to_replace, 1), '$', + substr(actual_path, head(tags_to_replace_ends, 1) + 1, + nchar(actual_path))) + extra_chars <- nchar(head(values_to_replace, 1)) + 2 - + (head(tags_to_replace_ends, 1) - + head(tags_to_replace_starts, 1) + 1) + values_to_replace <- values_to_replace[-1] + tags_to_replace_starts <- tags_to_replace_starts[-1] + tags_to_replace_ends <- tags_to_replace_ends[-1] + tags_to_replace_starts <- tags_to_replace_starts + extra_chars + tags_to_replace_ends <- tags_to_replace_ends + extra_chars + } + } + + if (give_warning) { + .warning(paste0("Too complex path pattern specified for ", dataset_name, + ". Double check carefully the '$Files' fetched for this dataset ", + "or specify a simpler path pattern.")) + } + + if (permissive) { + paste0(actual_path, file_name_with_globs) + } else { + actual_path + } +} + +.FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) { + tag <- paste0('\\$', tag, '\\$') + path_with_globs_and_tag <- paste0('^', path_with_globs_and_tag, '$') + parts <- strsplit(path_with_globs_and_tag, '*', fixed = TRUE)[[1]] + parts <- as.list(grep(tag, parts, value = TRUE)) + longest_couples <- NULL + pos_longest_couples <- NULL + found_value <- NULL + for (i in seq_along(parts)) { + parts[[i]] <- strsplit(parts[[i]], tag)[[1]] + if (length(parts[[i]]) == 1) { + parts[[i]] <- c(parts[[i]], '') + } + len_parts <- sapply(parts[[i]], nchar) + len_couples <- len_parts[-length(len_parts)] + len_parts[2:length(len_parts)] + pos_longest_couples <- c(pos_longest_couples, which.max(len_couples)) + longest_couples <- c(longest_couples, max(len_couples)) + } + chosen_part <- which.max(longest_couples) + parts[[chosen_part]] <- + parts[[chosen_part]][pos_longest_couples[chosen_part]:(pos_longest_couples[chosen_part] + 1)] + if (nchar(parts[[chosen_part]][1]) >= nchar(parts[[chosen_part]][2])) { + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + actual_path <- + substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) + } + } + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + found_value <- substr(actual_path, 0, match_right - 1) + } + } + } else { + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + actual_path <- substr(actual_path, 0, match_right - 1) + } + } + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + found_value <- + substr(actual_path, match_left + attr(match_left, 'match.length'), + nchar(actual_path)) + } + } + } + found_value +} + +.FilterUserGraphicArgs <- function(excludedArgs, ...) { + # This function filter the extra graphical parameters passed by the user in + # a plot function, excluding the ones that the plot function uses by default. + # Each plot function has a different set of arguments that are not allowed to + # be modified. + args <- list(...) + userArgs <- list() + for (name in names(args)) { + if ((name != "") & !is.element(name, excludedArgs)) { + # If the argument has a name and it is not in the list of excluded + # arguments, then it is added to the list that will be used + userArgs[[name]] <- args[[name]] + } else { + .warning(paste0("the argument '", name, "' can not be + modified and the new value will be ignored")) + } + } + userArgs +} + +.SelectDevice <- function(fileout, width, height, units, res) { + # This function is used in the plot functions to check the extension of the + # files where the graphics will be stored and select the right R device to + # save them. + # If the vector of filenames ('fileout') has files with different + # extensions, then it will only accept the first one, changing all the rest + # of the filenames to use that extension. + + # We extract the extension of the filenames: '.png', '.pdf', ... + ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) + + if (length(ext) != 0) { + # If there is an extension specified, select the correct device + ## units of width and height set to accept inches + if (ext[1] == ".png") { + saveToFile <- function(fileout) { + png(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".jpeg") { + saveToFile <- function(fileout) { + jpeg(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] %in% c(".eps", ".ps")) { + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".pdf") { + saveToFile <- function(fileout) { + pdf(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".svg") { + saveToFile <- function(fileout) { + svg(filename = fileout, width = width, height = height) + } + } else if (ext[1] == ".bmp") { + saveToFile <- function(fileout) { + bmp(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".tiff") { + saveToFile <- function(fileout) { + tiff(filename = fileout, width = width, height = height, res = res, units = units) + } + } else { + .warning("file extension not supported, it will be used '.eps' by default.") + ## In case there is only one filename + fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) + ext[1] <- ".eps" + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } + # Change filenames when necessary + if (any(ext != ext[1])) { + .warning(paste0("some extensions of the filenames provided in 'fileout' ", + "are not ", ext[1], + ". The extensions are being converted to ", ext[1], ".")) + fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) + } + } else { + # Default filenames when there is no specification + .warning("there are no extensions specified in the filenames, default to '.eps'") + fileout <- paste0(fileout, ".eps") + saveToFile <- postscript + } + + # return the correct function with the graphical device, and the correct + # filenames + list(fun = saveToFile, files = fileout) +} + +.message <- function(...) { + # Function to use the 'message' R function with our custom settings + # Default: new line at end of message, indent to 0, exdent to 3, + # collapse to \n* + args <- list(...) + + ## In case we need to specify message arguments + if (!is.null(args[["appendLF"]])) { + appendLF <- args[["appendLF"]] + } else { + ## Default value in message function + appendLF <- TRUE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value in message function + domain <- NULL + } + args[["appendLF"]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n*" + } + args[["collapse"]] <- NULL + + ## Message tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "* " + } + args[["tag"]] <- NULL + + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + message(tmp, appendLF = appendLF, domain = domain) +} + +.warning <- function(...) { + # Function to use the 'warning' R function with our custom settings + # Default: no call information, indent to 0, exdent to 3, + # collapse to \n + args <- list(...) + + ## In case we need to specify warning arguments + if (!is.null(args[["call."]])) { + call <- args[["call."]] + } else { + ## Default: don't show info about the call where the warning came up + call <- FALSE + } + if (!is.null(args[["immediate."]])) { + immediate <- args[["immediate."]] + } else { + ## Default value in warning function + immediate <- FALSE + } + if (!is.null(args[["noBreaks."]])) { + noBreaks <- args[["noBreaks."]] + } else { + ## Default value warning function + noBreaks <- FALSE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value warning function + domain <- NULL + } + args[["call."]] <- NULL + args[["immediate."]] <- NULL + args[["noBreaks."]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n!" + } + args[["collapse"]] <- NULL + + ## Warning tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "! Warning: " + } + args[["tag"]] <- NULL + + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + warning(tmp, call. = call, immediate. = immediate, + noBreaks. = noBreaks, domain = domain) +} + +.IsColor <- function(x) { + res <- try(col2rgb(x), silent = TRUE) + return(!is(res, "try-error")) +} + +# This function switches to a specified figure at position (row, col) in a layout. +# This overcomes the bug in par(mfg = ...). However the mode par(new = TRUE) is +# activated, i.e., all drawn elements will be superimposed. Additionally, after +# using this function, the automatical pointing to the next figure in the layout +# will be spoiled: once the last figure in the layout is drawn, the pointer won't +# move to the first figure in the layout. +# Only figures with numbers other than 0 (when creating the layout) will be +# accessible. +# Inputs: either row and col, or n and mat +.SwitchToFigure <- function(row = NULL, col = NULL, n = NULL, mat = NULL) { + if (!is.null(n) && !is.null(mat)) { + if (!is.numeric(n) || length(n) != 1) { + stop("Parameter 'n' must be a single numeric value.") + } + n <- round(n) + if (!is.array(mat)) { + stop("Parameter 'mat' must be an array.") + } + target <- which(mat == n, arr.ind = TRUE)[1, ] + row <- target[1] + col <- target[2] + } else if (!is.null(row) && !is.null(col)) { + if (!is.numeric(row) || length(row) != 1) { + stop("Parameter 'row' must be a single numeric value.") + } + row <- round(row) + if (!is.numeric(col) || length(col) != 1) { + stop("Parameter 'col' must be a single numeric value.") + } + col <- round(col) + } else { + stop("Either 'row' and 'col' or 'n' and 'mat' must be provided.") + } + next_attempt <- c(row, col) + par(mfg = next_attempt) + i <- 1 + layout_size <- par('mfrow') + layout_cells <- matrix(1:prod(layout_size), layout_size[1], layout_size[2], + byrow = TRUE) + while (any((par('mfg')[1:2] != c(row, col)))) { + next_attempt <- which(layout_cells == i, arr.ind = TRUE)[1, ] + par(mfg = next_attempt) + i <- i + 1 + if (i > prod(layout_size)) { + stop("Figure not accessible.") + } + } + plot(0, type = 'n', axes = FALSE, ann = FALSE) + par(mfg = next_attempt) +} + +# Function to permute arrays of non-atomic elements (e.g. POSIXct) +.aperm2 <- function(x, new_order) { + old_dims <- dim(x) + attr_bk <- attributes(x) + if ('dim' %in% names(attr_bk)) { + attr_bk[['dim']] <- NULL + } + if (is.numeric(x)) { + x <- aperm(x, new_order) + } else { + y <- array(seq_along(x), dim = dim(x)) + y <- aperm(y, new_order) + x <- x[as.vector(y)] + } + dim(x) <- old_dims[new_order] + attributes(x) <- c(attributes(x), attr_bk) + x +} + +# This function is a helper for the function .MergeArrays. +# It expects as inputs two named numeric vectors, and it extends them +# with dimensions of length 1 until an ordered common dimension +# format is reached. +# The first output is dims1 extended with 1s. +# The second output is dims2 extended with 1s. +# The third output is a merged dimension vector. If dimensions with +# the same name are found in the two inputs, and they have a different +# length, the maximum is taken. +.MergeArrayDims <- function(dims1, dims2) { + new_dims1 <- NULL + new_dims2 <- NULL + while (length(dims1) > 0) { + if (names(dims1)[1] %in% names(dims2)) { + pos <- which(names(dims2) == names(dims1)[1]) + dims_to_add <- rep(1, pos - 1) + if (length(dims_to_add) > 0) { + names(dims_to_add) <- names(dims2[1:(pos - 1)]) + } + new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) + new_dims2 <- c(new_dims2, dims2[1:pos]) + dims1 <- dims1[-1] + dims2 <- dims2[-(1:pos)] + } else { + new_dims1 <- c(new_dims1, dims1[1]) + new_dims2 <- c(new_dims2, 1) + names(new_dims2)[length(new_dims2)] <- names(dims1)[1] + dims1 <- dims1[-1] + } + } + if (length(dims2) > 0) { + dims_to_add <- rep(1, length(dims2)) + names(dims_to_add) <- names(dims2) + new_dims1 <- c(new_dims1, dims_to_add) + new_dims2 <- c(new_dims2, dims2) + } + list(new_dims1, new_dims2, pmax(new_dims1, new_dims2)) +} + +# This function takes two named arrays and merges them, filling with +# NA where needed. +# dim(array1) +# 'b' 'c' 'e' 'f' +# 1 3 7 9 +# dim(array2) +# 'a' 'b' 'd' 'f' 'g' +# 2 3 5 9 11 +# dim(.MergeArrays(array1, array2, 'b')) +# 'a' 'b' 'c' 'e' 'd' 'f' 'g' +# 2 4 3 7 5 9 11 +.MergeArrays <- function(array1, array2, along) { + if (!(is.null(array1) || is.null(array2))) { + if (!(identical(names(dim(array1)), names(dim(array2))) && + identical(dim(array1)[-which(names(dim(array1)) == along)], + dim(array2)[-which(names(dim(array2)) == along)]))) { + new_dims <- .MergeArrayDims(dim(array1), dim(array2)) + dim(array1) <- new_dims[[1]] + dim(array2) <- new_dims[[2]] + for (j in seq_along(dim(array1))) { + if (names(dim(array1))[j] != along) { + if (dim(array1)[j] != dim(array2)[j]) { + if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { + na_array_dims <- dim(array2) + na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] + na_array <- array(dim = na_array_dims) + array2 <- abind(array2, na_array, along = j) + names(dim(array2)) <- names(na_array_dims) + } else { + na_array_dims <- dim(array1) + na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] + na_array <- array(dim = na_array_dims) + array1 <- abind(array1, na_array, along = j) + names(dim(array1)) <- names(na_array_dims) + } + } + } + } + } + if (!(along %in% names(dim(array2)))) { + stop("The dimension specified in 'along' is not present in the ", + "provided arrays.") + } + array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) + names(dim(array1)) <- names(dim(array2)) + } else if (is.null(array1)) { + array1 <- array2 + } + array1 +} + +# only can be used in Trend(). Needs generalization or be replaced by other function. +.reorder <- function(output, time_dim, dim_names) { + # Add dim name back + if (is.null(dim(output))) { + dim(output) <- c(stats = length(output)) + } else { #is an array + if (length(dim(output)) == 1) { + if (!is.null(names(dim(output)))) { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { + names(dim(output)) <- time_dim + } + } else { # more than one dim + if (names(dim(output))[1] != "") { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { #regular case + names(dim(output))[1] <- time_dim + } + } + } + # reorder + pos <- match(dim_names, names(dim(output))) + output <- aperm(output, pos) + names(dim(output)) <- dim_names + names(dim(output))[names(dim(output)) == time_dim] <- 'stats' + return(output) +} + +# to be used in AMV.R, TPI.R, SPOD.R, GSAT.R and GMST.R +.Indices <- function(data, type, monini, indices_for_clim, + fmonth_dim, sdate_dim, year_dim, month_dim, na.rm) { + + if (type == 'dcpp') { + + fyear_dim <- 'fyear' + data <- Season(data = data, time_dim = fmonth_dim, + monini = monini, moninf = 1, monsup = 12, + method = mean, na.rm = na.rm) + names(dim(data))[which(names(dim(data)) == fmonth_dim)] <- fyear_dim + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + + anom <- data + + } else { ## Different indices_for_clim for each forecast year (to use the same calendar years) + + n_fyears <- as.numeric(dim(data)[fyear_dim]) + n_sdates <- as.numeric(dim(data)[sdate_dim]) + + if (is.null(indices_for_clim)) { ## climatology over the whole (common) period + first_years_for_clim <- n_fyears : 1 + last_years_for_clim <- n_sdates : (n_sdates - n_fyears + 1) + } else { ## indices_for_clim specified as a numeric vector + first_years_for_clim <- seq(from = indices_for_clim[1], by = -1, length.out = n_fyears) + last_years_for_clim <- + seq(from = indices_for_clim[length(indices_for_clim)], + by = -1, length.out = n_fyears) + } + + data <- s2dv::Reorder(data = data, order = c(fyear_dim, sdate_dim)) + anom <- array(data = NA, dim = dim(data)) + for (i in 1:n_fyears) { + clim <- mean(data[i, first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm) + anom[i, ] <- data[i, ] - clim + } + } + + } else if (type %in% c('obs', 'hist')) { + + data <- multiApply::Apply(data = data, target_dims = month_dim, + fun = mean, na.rm = na.rm)$output1 + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + clim <- 0 + } else if (is.null(indices_for_clim)) { + ## climatology over the whole period + clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, + na.rm = na.rm)$output1 + } else { + ## indices_for_clim specified as a numeric vector + clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, + indices = indices_for_clim), + target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 + } + + anom <- data - clim + + } else { + stop('type must be dcpp, hist or obs') + } + + return(anom) +} + +#TODO: Remove from s2dv when PlotLayout can get colorbar info from plotting function directly. +# The function is temporarily here because PlotLayout() needs to draw the colorbars of +# PlotMostLikelyQuantileMap(). +#Draws Color Bars for Categories +#A wrapper of s2dv::ColorBar to generate multiple color bars for different +#categories, and each category has different color set. +GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, + bar_limits, var_limits = NULL, + triangle_ends = NULL, plot = TRUE, + draw_separators = FALSE, + bar_titles = NULL, title_scale = 1, + label_scale = 1, extra_margin = rep(0, 4), + ...) { + # bar_limits + if (!is.numeric(bar_limits) || length(bar_limits) != 2) { + stop("Parameter 'bar_limits' must be a numeric vector of length 2.") + } + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = bar_limits[1], to = bar_limits[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= nmap) { + chosen_sets <- 1:nmap + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(seq_along(col_sets), nmap) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != nmap) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in seq_along(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (nmap == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nmap == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nmap, " (%)") + } + } + + if (plot) { + for (k in 1:nmap) { + s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, +# bar_limits = bar_limits, var_limits = var_limits, + triangle_ends = triangle_ends, plot = TRUE, + draw_separators = draw_separators, + title = bar_titles[[k]], title_scale = title_scale, + label_scale = label_scale, extra_margin = extra_margin) + } + } else { + #TODO: col_inf and col_sup + return(list(brks = brks, cols = cols)) + } + +} + + diff --git a/modules/Crossval/recipe_crossval_ecvs.yml b/modules/Crossval/recipe_crossval_ecvs.yml new file mode 100644 index 0000000000000000000000000000000000000000..e23495ecfb51976af8a20958de0c07c25576293c --- /dev/null +++ b/modules/Crossval/recipe_crossval_ecvs.yml @@ -0,0 +1,184 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N. Pérez-Zanón + Info: This recipe can be use to test Crossval_anomlaies.R and Crossval_skill.R for single and multimodel. +Analysis: + Horizon: seasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'monthly_mean', units: 'K'} + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'ECMWF-SEAS5.1', member: 'all'} + - {name: 'Meteo-France-System7'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2020' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1993' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '1996' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 6 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + - {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: raw # Calibration method. (Mandatory, str) + save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: no # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + user_def: + DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) + alpha: 0.05 + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: no # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'robinson' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: yes # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + cross.method: 'leave-one-out' # Default 'leave-one-out' + ncores: 16 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /esarchive/scratch/nperez/git4/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git4/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032339 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/modules/Crossval/recipe_crossval_ecvs_global.yml b/modules/Crossval/recipe_crossval_ecvs_global.yml new file mode 100644 index 0000000000000000000000000000000000000000..830225af29c54fd1623ae5435547a4cf2f8c54ff --- /dev/null +++ b/modules/Crossval/recipe_crossval_ecvs_global.yml @@ -0,0 +1,184 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N. Pérez-Zanón + Info: This recipe can be use to test Crossval_anomlaies.R and Crossval_skill.R for single and multimodel. +Analysis: + Horizon: seasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'monthly_mean', units: 'K'} + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'ECMWF-SEAS5.1', member: 'all'} + - {name: 'Meteo-France-System7'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2020' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1993' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 6 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + - {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: raw # Calibration method. (Mandatory, str) + save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: no # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + user_def: + DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr rps crps rms rmss # List of skill metrics separated by spaces or commas. (Mandatory, str) + alpha: 0.05 + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: no # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'robinson' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: yes # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + cross.method: 'leave-one-out' # Default 'leave-one-out' + ncores: 16 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /esarchive/scratch/nperez/git4/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git4/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032339 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index 7cf80b8f080a097eca647c6d349cdfb15cedf50d..a371984e8a1017c988dfc6d5c50a0a11ab92f25a 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -27,7 +27,7 @@ dates2load <- function(recipe, logger) { file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate) file_dates <- .add_dims(file_dates) - } else if (temp_freq == "weekly_mean") { + } else if (temp_freq %in% c("weekly_mean", "daily")) { sday <- recipe$sday_window if (is.null(sday)) { sday <- 3 @@ -57,10 +57,11 @@ dates2load <- function(recipe, logger) { hcst.end = as.numeric(recipe$hcst_end), ftime_min = recipe$ftime_min, ftime_max = recipe$ftime_max, out = 'hcst') - } else { + } else { file_dates <- paste0(strtoi(recipe$hcst_start):strtoi(recipe$hcst_end), recipe$sdate) } + # fcst dates (if fcst_year empty it creates an empty object) if (! is.null(recipe$fcst_year)) { if (temp_freq == "monthly_mean") { @@ -81,6 +82,32 @@ dates2load <- function(recipe, logger) { # if no fcst year is requested: file_dates.fcst <- NULL } + #----------------------------------------------------------------- + if (FALSE) { + #if (recipe$Analysis$Datasets$System$name == 'ECMWF-ENS-EXT') { + if (recipe$Analysis$Variables$freq =='weekly_mean') { + info(recipe$Run$logger, + "hcst period defined in recipe not used for ECMWF-EXT-ENS model.") + sd <- as.Date(as.character(recipe$Analysis$Time$sdate), format ="%Y%m%d") + if (tolower(weekdays(sd)) != tolower(recipe$Analysis$Time$week_day)) { + info(recipe$Run$logger, + paste("sdate is not a", recipe$Analysis$Time$week_day)) + } + year <- as.numeric(format(sd, "%Y")) + fcst_sweek_ind <- (recipe$Analysis$Time$sweek_window + 1)/2 + fcst.sdate_to_start <- as.character(format(sd + + 7 *((fcst_sweek_ind - 1)/2), "%Y%m%d")) + + file_dates <- subseas_file_dates(fcst.sdate_to_start, + recipe$Analysis$Time$sweek_window, + recipe$Analysis$Time$sday_window, + year - 20, model = 'ECMWF-ENS-EXT', + var = recipe$Analysis$Variables$name, + year - 1, recipe$Analysis$Time$ftime_min, + recipe$Analysis$Time$ftime_max, "hcst") + file_dates.fcst <- recipe$Analysis$Time$fcst_year + } + } return(list(hcst = file_dates, fcst = file_dates.fcst)) ## TODO: document header of fun } @@ -93,3 +120,4 @@ dates2load <- function(recipe, logger) { dim(data) <- default_dims return(data) } + diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index c55688e01dc480bd6440fae78d4fc15ebea87b2d..3844e51d18ad2dd2f69e414c95cd6010ae1308f8 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -82,7 +82,6 @@ load_decadal <- function(recipe) { if (identical(member, 'all')) { member <- strsplit(archive$System[[exp.name]]$member, ',')[[1]] } - #------------------------- # derived from above: #------------------------- @@ -435,6 +434,6 @@ load_decadal <- function(recipe) { info(recipe$Run$logger, "##### DATA LOADING COMPLETED SUCCESSFULLY #####") - .log_memory_usage(recipe$Run$logger, when = "After loading") +# .log_memory_usage(recipe$Run$logger, when = "After loading") return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R index 70b7d53436c702145ace52cc15065a3560a29f42..8e65ec03334306a7a7ad90e844deecafce77a495 100644 --- a/modules/Loading/R/load_seasonal.R +++ b/modules/Loading/R/load_seasonal.R @@ -18,7 +18,6 @@ load_seasonal <- function(recipe) { lons.max <- recipe$Analysis$Region$lonmax ref.name <- recipe$Analysis$Datasets$Reference$name exp.name <- recipe$Analysis$Datasets$System$name - variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] store.freq <- recipe$Analysis$Variables$freq @@ -31,7 +30,6 @@ load_seasonal <- function(recipe) { recipe$Analysis$Time$ftime_min, recipe$Analysis$Time$ftime_max, time_freq=store.freq) - if (!(is.null(sdates$fcst))) { idxs$fcst <- get_timeidx(sdates$fcst, recipe$Analysis$Time$ftime_min, @@ -83,7 +81,6 @@ load_seasonal <- function(recipe) { fcst.path <- paste0(archive$src_sys, hcst.dir, "$var_dir$", "$var$_$file_date$.nc") - # Define regrid parameters: #------------------------------------------------------------------- regrid_params <- get_regrid_params(recipe, archive) @@ -100,7 +97,6 @@ load_seasonal <- function(recipe) { # Load hindcast #------------------------------------------------------------------- - hcst <- Start(dat = hcst.path, var = variable, var_dir = var_dir_exp, @@ -391,7 +387,7 @@ load_seasonal <- function(recipe) { ############################################################################ ############################################################################ - .log_memory_usage(recipe$Run$logger, when = "After loading") +# .log_memory_usage(recipe$Run$logger, when = "After loading") return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/modules/Loading/R/load_subseasonal.R b/modules/Loading/R/load_subseasonal.R index 9fa302028a86990537dc8a3a34b3c4981ca91860..65d2cffabc15f3a826351c68e27fb311c3bddbf7 100644 --- a/modules/Loading/R/load_subseasonal.R +++ b/modules/Loading/R/load_subseasonal.R @@ -35,13 +35,12 @@ load_subseasonal <- function(recipe) { # get datasets dict: archive <-get_archive(recipe) # read_yaml("conf/archive_subseasonal.yml")[[recipe$Run$filesystem]] exp_descrip <- archive$System[[exp.name]] - freq.hcst <- unlist(exp_descrip[[store.freq]][variable[1]]) reference_descrip <- archive$Reference[[ref.name]] freq.obs <- unlist(reference_descrip[[store.freq]][variable[1]]) obs.dir <- reference_descrip$src - fcst.dir <- exp_descrip$src - hcst.dir <- exp_descrip$src + fcst.dir <- exp_descrip$srcfc + hcst.dir <- exp_descrip$srchc fcst.nmember <- exp_descrip$nmember$fcst hcst.nmember <- exp_descrip$nmember$hcst @@ -91,7 +90,8 @@ load_subseasonal <- function(recipe) { #------------------------------------------------------------------- ## NOTE: metadata_dims has to be specified as 'file_date' to be able to get ## the metadata when the first file is missing. However, when retrieving two - ## variables, it must be 'var'. Start() does not admit both. + ## variables, it must be 'var'. Start() does not admit both. + if (recipe$Analysis$Datasets$System$name == "NCEP-CFSv2") { hcst <- Start(dat = hcst.path, var = variable, var_dir = var_dir_exp, @@ -117,7 +117,34 @@ load_subseasonal <- function(recipe) { time = 'file_date'), split_multiselected_dims = split_multiselected_dims, retrieve = TRUE) - + } else { + hcst.path <- paste0(archive$src_sys, hcst.dir, "$var_dir$", + "$file_date$.nc") + hcst <- Start(dat = hcst.path, + var = variable, + var_dir = var_dir_exp, + file_date = sdates$hcst, + time = recipe$Analysis$Time$ftime_min:recipe$Analysis$Time$ftime_max, + latitude = values(list(lats.min, lats.max)), + latitude_reorder = Sort(), + longitude = values(list(lons.min, lons.max)), + longitude_reorder = circularsort, + transform = regrid_params$fcst.transform, + transform_params = list(grid = regrid_params$fcst.gridtype, + method = regrid_params$fcst.gridmethod, + print_sys_msg = TRUE), + transform_vars = c('latitude', 'longitude'), + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble', 'lev')), + ensemble = indices(1:hcst.nmember), + metadata_dims = c('file_date'), # change? + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + } # Remove var_dir dimension if ("var_dir" %in% names(dim(hcst))) { hcst <- Subset(hcst, along = "var_dir", indices = 1, drop = "selected") @@ -156,7 +183,6 @@ load_subseasonal <- function(recipe) { # the call uses file_date instead of fcst_syear so that it can work # with the daily case and the current version of startR not allowing # multiple dims split - fcst <- Start(dat = fcst.path, var = variable, var_dir = var_dir_exp, @@ -231,6 +257,37 @@ load_subseasonal <- function(recipe) { dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] # Separate Start() call for monthly vs daily data if (store.freq == "weekly_mean") { + # ---------------------------------------------------- + # Code to fill missing dates from hindcast attributes: + # It fails if all years have an NA + y <- 1 + while (any(is.na(dates[,,y,]))) { + y <- y + 1 + } + # y is a year without missing time steps + complete_year <- dates[,,y,] + # Loop over years to replace missing dates + for (years in 1:dim(dates)['syear']) { + that_year <- dates[,,years,] + if (!all(is.na(that_year))) { + if (any(is.na(that_year))) { + pos_missed <- which(is.na(that_year)) + actual_year <- that_year + actual_year <- format(actual_year[which(!is.na(actual_year))[1]], + "%Y") + that_year[pos_missed] <- as.POSIXct(as.Date(paste0(actual_year, + format(complete_year[pos_missed], "-%m-%d")))) + dates[,,years,] <- that_year + } + } else { # if one year is totally empty + if ( years != 1) { + actual_year <- as.numeric(format(dates[1,1,years-1,1], "%Y")) + 1 + dates[,,years,] <- as.POSIXct(as.Date(paste0(actual_year, + format(complete_year, "-%m-%d")))) + } + } + } + # ----------------------------------------------------- dates_file <- format(as.Date(dates, '%Y%m%d'), "%Y%m%d") dim(dates_file) <- dim(dates) obs <- Start(dat = obs.path, @@ -388,3 +445,4 @@ load_subseasonal <- function(recipe) { } + diff --git a/modules/Loading/R/subseas_file_dates.R b/modules/Loading/R/subseas_file_dates.R index eca66c5572807abef6cc5496f5154accca380055..bc8301aa34c3ce611f715662060aee0612330624 100644 --- a/modules/Loading/R/subseas_file_dates.R +++ b/modules/Loading/R/subseas_file_dates.R @@ -7,7 +7,8 @@ subseas_file_dates <- function(startdate, n.skill.weeks, n.days, hcst.start, hcst.end, - ftime_min, ftime_max, out) { + ftime_min, ftime_max, out, + model = "NCEP-CFSv2", var = NULL) { # Generate the sday_window vectors: ## Only for Thursdays and Mondays ## Create a diagonal matrix of 3 and 4 days matching Mondays/Thursdays @@ -37,7 +38,7 @@ subseas_file_dates <- function(startdate, n.skill.weeks, n.days, monday_win <- c(rev(next_sday) * -1, 0, prev_sday) } else { thrusday_win <- monday_win <- 0 - } + } #### END Generation sdya_window vectors ftime_min <- as.numeric(substr(as.character(startdate), 1, 4)) - hcst.end @@ -81,7 +82,10 @@ subseas_file_dates <- function(startdate, n.skill.weeks, n.days, (strtoi(sday.year) - ftime_min), sep = "") sday.dates <- apply(expand.grid(sday.years, sday.mday), 1, paste, collapse = "") - sdays.file_dates <- abind(sdays.file_dates, sday.dates, along = 1) + if (model == 'ECMWF-ENS-EXT') { + sday.dates <- paste0(sday.sdate,"/", var, "_", sday.dates) + } + sdays.file_dates <- abind(sdays.file_dates, sday.dates, along = 1) } names(dim(sdays.file_dates)) <- c('sday','syear') diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 192e92928974906e6698d2e708cd4c0193559c66..4aa48924e00405456b388201c1e4616be5608f1b 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -9,41 +9,57 @@ source("modules/Loading/R/dates2load.R") source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") source('modules/Multimodel/load_multimodel.R') -source('modules/Multimodel/load_multimodel_splitted.R') -source('modules/Multimodel/dims_multimodel.R') source('modules/Multimodel/build_multimodel.R') source('modules/Multimodel/clean_multimodel.R') +library(startR) +library(CSTools) +library(yaml) +recipe <- read_yaml("recipe_tas.yml") +source("modules/Saving/R/get_dir.R") +source("modules/Loading/Loading.R") +recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240605141443/") +# DON'T KNOW HOW TO SOLVE LOGGER: +recipe$Run$Loglevel <- 'INFO' +recipe$Run$logfile <- "/esarchive/scratch/nperez/git4//recipe_tas_20240531165840/logs/main.log" +recipe$Run$logger <- logger <- log4r::logger(threshold = recipe$Run$Loglevel, + appenders = list(console_appender(layout = default_log_layout()), + file_appender(logfile, append = TRUE, + layout = default_log_layout()))) Multimodel <- function(recipe) { - # recipe: auto-s2s recipe as provided by read_yaml - - # Loading data saved in the jobs for individual models - if (tolower(recipe$Analysis$Datasets$Multimodel$split_loading) %in% c('true','yes')){ - # Load data splitting by system - data_aux <- load_multimodel_splitted(recipe) - - data <- data_aux$data - - files_hcst <- data_aux$files_hcst - files_fcst <- data_aux$files_fcst - files_obs <- data_aux$files_obs - - rm(data_aux) - - } else { - # Load data without splitting - data <- load_multimodel(recipe) - files_hcst <- data$hcst$attrs$source_files - files_fcst <- data$fcst$attrs$source_files - files_obs <- data$obs$attrs$source_files + # Load data splitting by system individual members: + if (recipe$Analysis$Datasets$Multimodel$approach == 'pooled') { + datos <- load_multimodel(recipe) + } else if (recipe$Analysis$Datasets$Multimodel$approach == 'mean') { + # In this case the ensemble dim is 1 for each model + # The functions in skill can still work + source("modules/Multimodel/load_multimodel_mean.R") + datos <- load_multimodel_mean(recipe) + source("modules/Multimodel/load_multimodel_probs.R") + probs <- load_multimodel_probs(recipe) + } else { #(recipe$Analysis$Datasets$Multimodel$approac == "weighted") + stop("Implement the weighting method here") + # I imagine the probs and the ensmean are modified and returned. + multimodel_aux <- build_multimodel(data, recipe) + ensmean <- multimodel_aux$ensmean + probs <- multimodel_aux$prob + rm(multimodel_aux) } - # Building the multi-model - multimodel_aux <- build_multimodel(data, recipe) - data <- multimodel_aux$data - prob <- multimodel_aux$prob - rm(multimodel_aux) + # assessment: + # IF Fair is TRUE, the number of nmembs used to compute the probabilities + # need to be passed + + source("modules/Multimodel/Multimodel_skill.R") + skill_metrics <- multimodel_metrics(recipe = recipe, + datos = datos, + probs = probs, + Fair = FALSE) + + + + # Saving multimodel if (recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save != 'none') { diff --git a/modules/Multimodel/Multimodel_skill.R b/modules/Multimodel/Multimodel_skill.R new file mode 100644 index 0000000000000000000000000000000000000000..242398dcd0a286b2bd7f5a65f2e2dfc842c1c147 --- /dev/null +++ b/modules/Multimodel/Multimodel_skill.R @@ -0,0 +1,377 @@ +# One function to compute all the possible metrics or statistics +# datos is a list returned by load_multimodel or load_multimodel_mean + ## this reduce memory and it can be used to compute several metrics + ## in the mean the ensemble dim is length 1 for each model +# probs is a list returned by load_multimodel_probs + ## this is needed to compute rps/rpss +source("modules/Saving/Saving.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Crossval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Visualization/Visualization.R") # to load .warning from utils +multimodel_metrics <- function(recipe, + datos = NULL, + probs = NULL, + Fair = FALSE) { + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + # Prepare indices for crossval + sdate_dim <- dim(datos$hcst[[1]]$data)['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + tmp <- array(1:length(cross), c(syear = length(cross))) + alpha <- recipe$Analysis$alpha + if (is.null(alpha)) { + alpha <- 0.05 + } + + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]] + skill_metrics <- list() + if (!is.null(datos)) { + # conver $data elements to list to use multiApply: + datos <- append(list(obs = datos$obs$data), + lapply(datos$hcst, function(x) x$data)) + ## CRPS metrics only make sense in pool method: + if (any(c('crps', 'crpss') %in% requested_metrics)) { + crps <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + time_dim = 'syear', + memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps <- crps + # Build the reference forecast: + ref_clim <- Apply(list(datos$obs, tmp), + target_dims = list(c('ensemble', 'syear'), NULL), + function(x, y) { + Subset(x, along = 'syear', + indices = cross[[y]]$train.dexes, drop = T)}, + ncores = ncores, output_dims = 'ensemble')$output1 + datos <- append(list(ref = ref_clim), datos) + crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), + fun = function(ref, obs) { + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = ref, obs = obs, dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps_clim <- crps_clim + + + crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(ref, obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + CRPSS(exp = res, obs = obs, ref = ref, + memb_dim = 'ensemble', Fair = FALSE, + time_dim = 'syear', clim.cross.val = FALSE)}, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + datos <- datos[-1] + } + ## Deterministic metrics using ensemble mean: + if ('enscorr' %in% requested_metrics) { + enscorr <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Corr(exp = res, + obs = obs, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha)}, ncores = ncores) + skill_metrics$enscorr <- enscorr$corr + skill_metrics$enscorr_significance <- enscorr$sign + } + if ('mean_bias' %in% requested_metrics) { + ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) + mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Bias(exp = res, + obs = obs, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha)}, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } + if ('rms' %in% requested_metrics) { + rms <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMS(exp = res, + obs = obs, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha)}, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + ####### Do we need reference fcst to compute rmss?? + if ('rmss' %in% requested_metrics) { + # if (is.null(res$ref_obs_tr)) { + # } + rmss <- Apply(datos, target_dims = c('syear','ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMSSS(exp = res, + obs = obs, + #ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE)}, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + # This are used to compute correlation for the scorecards + # It's the temporal standard deviation of the ensmean + std_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + sd(as.vector(res))}, + ncores = ncores)$output1 + + std_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = 'sd')$output1 + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } + + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + fun = function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + variance <- var(as.vector(res))}, + ncores = ncores)$output1 + + var_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = function(x) {var(as.vector(x))}, + ncores = ncores)$output1 + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs + } + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(datos[[1]], time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } + if (any(c('cov', 'covariance') %in% requested_metrics)) { + # The covariance of the ensemble mean + covariance <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + cov(as.vector(obs), as.vector(res), + use = "everything", + method = "pearson")}, + ncores = ncores)$output1 + skill_metrics$covariance <- covariance + } + } + # Probabilistic metrics for categories + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + # For the pool method: + if (is.null(probs)) { + # calculate probs from the ensemble distribution + # Firs compure tercile limits in crossval + lims <- list() + syears_ind <- list(t = tmp) + lims$hcst <- Apply(append(syears_ind, datos[-1]), + target_dims = append(list(NULL), + lapply(1:length(datos[-1]), + function(x){c('syear', 'ensemble')})), + fun = function(t, ..., crossin, prob_lims) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- Subset(res, along = 'syear', + indices = crossin[[t]]$train.dexes, drop =T) + lapply(prob_lims, function(ps) { + limits <- quantile(as.vector(res), + ps, na.rm = na.rm) + limits <- array(limits, c(cat = length(limits))) + }) + }, prob_lims = categories, + crossin = cross, ncores = ncores) + lims$obs <- Apply(append(syears_ind, datos[1]), + target_dims = list(NULL, c('syear', 'ensemble')), + fun = function(t, obs, crossin, prob_lims) { + res <- Subset(obs, along = 'syear', + indices = crossin[[t]]$train.dexes, drop =T) + lapply(prob_lims, function(ps) { + limits <- quantile(as.vector(res), + ps, na.rm = na.rm) + limits <- array(limits, c(cat = length(limits))) + }) + }, prob_lims = categories, + crossin = cross, ncores = ncores) + probs <- list() + probs$hcst <- list() + probs$fcst <- list() + probs$obs <- list() + for (ps in 1:length(categories)) { + lim <- lims$hcst[ps] + probs$hcst <- append(probs$hcst, + list(Apply(append(lim, datos[-1]), + target_dims = append(list(c('syear', 'cat')), + lapply(1:length(datos[-1]), + function(x){c('syear', 'ensemble')})), + fun = function(lim, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(data = res, time_dim = 'syear', + memb_dim = 'ensemble', + indices_for_quantiles = NULL, + prob_thresholds = NULL, + abs_thresholds = lim, + bin_dim_abs = 'cat', + weights = NULL, cross.val = FALSE)}, + ncores = ncores)$output1)) + lim <- lims$obs[ps] + probs$obs <- append(probs$obs, + list(Apply(append(lim, datos[1]), + target_dims = list(c('syear', 'cat'), + c('syear', 'ensemble')), + fun = function(lim, obs) { + GetProbs(data = obs, time_dim = 'syear', + memb_dim = 'ensemble', + indices_for_quantiles = NULL, + prob_thresholds = NULL, + abs_thresholds = lim, + bin_dim_abs = 'cat', + weights = NULL, cross.val = FALSE)}, + ncores = ncores)$output1)) + + if (!is.null(datos$fcst)) { + ## NEEDS TO COMPUTE LIMS FOR THE FULL HINDCAST PERIOD + lims$fcst[ps] <- Apply(datos[-1], + target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + lapply(prob_lims, function(ps) { + limits <- quantile(as.vector(res), + ps, na.rm = na.rm) + limits <- array(limits, c(cat = length(limits))) + }) + }, prob_lims = categories, + ncores = ncores) + + probs$fcst <- append(probs$fcst, + list(Apply(append(lims$fcst[ps], datos[-1]), + target_dims = append(list('cat'), + lapply(1:length(datos[-1]), + function(x){c('syear', 'ensemble')})), + fun = function(lim, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(data = res, time_dim = 'syear', + memb_dim = 'ensemble', + indices_for_quantiles = NULL, + prob_thresholds = NULL, + abs_thresholds = lim, + bin_dim_abs = 'cat', + weights = NULL, cross.val = FALSE)}, + ncores = ncores)$output1)) + } + } + # At this point all method can have probs calculated + } + # TODO: distinguish between rpss and bss + # if 1 percentile -> bss + # if more than 1 -> rpss + exe_rps <- unlist(lapply(categories, function(x) { + if (length(x) > 1) { + x <- x[1] *100 + } + return(x)})) + if (!is.null(datos)) { + if (Fair) { + nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) + } else { + nmemb <- NULL + } + } else { + nmemb <- NULL + } + + # Compute rps + for (ps in 1:length(exe_rps)) { + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = probs$hcst[[ps]], + obs = probs$obs[[ps]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = Fair, nmemb = nmemb, + ncores = ncores) + rps_clim <- Apply(list(probs$obs[[ps]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = Fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + # names based on the categories: + # To use it when visualization works for more rps + #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps + #skill_metrics[[paste0('rps_clim', + # exe_rps[ps])]] <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = probs$hcst[[ps]], + obs = probs$obs[[ps]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'cat', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = categories[[ps]], # un use param when providing probs + indices_for_clim = NULL, + Fair = Fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + } + } + return(skill_metrics) +} diff --git a/modules/Multimodel/load_multimodel.R b/modules/Multimodel/load_multimodel.R index 778aae6e6d08a517444e367139b1134033696128..389d3d693e24eeed390ef931b8ea9a8c3c65cf67 100644 --- a/modules/Multimodel/load_multimodel.R +++ b/modules/Multimodel/load_multimodel.R @@ -275,3 +275,4 @@ load_multimodel <- function(recipe) { return(list(hcst = hcst, fcst = fcst, obs = obs)) } + diff --git a/modules/Multimodel/load_multimodel_mean.R b/modules/Multimodel/load_multimodel_mean.R new file mode 100644 index 0000000000000000000000000000000000000000..d23f98794d5a733fe888a21709ea57dc4c5ad726 --- /dev/null +++ b/modules/Multimodel/load_multimodel_mean.R @@ -0,0 +1,276 @@ + +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source("modules/Saving/Saving.R") +library(abind) +load_multimodel_mean <- function(recipe) { + + archive <- read_yaml("conf/archive.yml")$esarchive + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') + store.freq <- recipe$Analysis$Variables$freq + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name[1]]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Find the saved data directory + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + if (tolower(recipe$Analysis$Output_format) == "scorecards") { + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + filename <- paste0("scorecards_$model$_", ref.name, "_", + variable, "_hcst_EM_", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + } else { + filename <- "$var$_$file_date$.nc" + } + hcst.path <- file.path(recipe$Run$output_dir, "$model$", ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, filename) + + fcst.path <- obs.path <- hcst.path + fcst.path <- gsub("hcst", "fcst", fcst.path, fixed = T) + obs.path <- file.path(recipe$Run$output_dir, + gsub('\\.','',exp.name[1]), ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, + paste0("scorecards_", + gsub('\\.','',exp.name[1]), "_", ref.name, + "_$var$_-obs_$file_date$__", hcst_start, "-", + hcst_end, "_s", shortdate, ".nc")) + # Load hindcast + #------------------------------------------------------------------- + hcst <- list() + for (sys in exp.name) { + hcst.path_aux <- gsub("$model$", gsub('\\.','', sys), hcst.path, fixed = T) + aux <- Start(dat = hcst.path_aux, + var = 'hcst_EM', + sday = 'all', + sweek = 'all', + syear = 'all', + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'dat'), + retrieve = TRUE) + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(aux))] <- dim(aux) + dim(aux) <- default_dims + # Convert obs to s2dv_cube + hcst <- append(hcst, list(as.s2dv_cube(aux))) + names(hcst)[length(hcst)] <- sys + } + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + ############################### + + + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst <- list() + for (sys in exp.name) { + fcst.path_aux <- gsub("$model$", gsub('\\.','', sys), + fcst.path, fixed = T) + aux <- Start(dat = fcst.path_aux, + var = 'fcst_EM', + sday = 'all', + sweek = 'all', + syear = 'all', + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'dat'), + retrieve = TRUE) + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(aux))] <- dim(aux) + dim(aux) <- default_dims + # Convert obs to s2dv_cube + fcst <- append(fcst, list(as.s2dv_cube(aux))) + names(fcst)[length(fcst)] <- sys + } + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + ############################# + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + if (store.freq == "monthly_mean") { + + obs <- Start(dat = obs.path, + var = variable, + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq %in% c("daily_mean", "daily")) { + + ############################# + #NOTE: NOT TESTED YET + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + ############################# + + } + + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$latitude), as.vector(obs$coords$latitude)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$longitude), as.vector(obs$coords$longitude)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + + +# info(recipe$Run$logger, +# "##### DATA LOADING INDIVIDUAL ENS COMPLETED SUCCESSFULLY #####") + + return(list(hcst = hcst, fcst = fcst, obs = obs)) +} diff --git a/modules/Multimodel/load_multimodel_probs.R b/modules/Multimodel/load_multimodel_probs.R new file mode 100644 index 0000000000000000000000000000000000000000..4b0eb358d6c8f9692ecfad2bb4d27a4935a6f734 --- /dev/null +++ b/modules/Multimodel/load_multimodel_probs.R @@ -0,0 +1,294 @@ + +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source("modules/Saving/Saving.R") +library(abind) +load_multimodel_probs <- function(recipe) { + + archive <- read_yaml("conf/archive.yml")$esarchive + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') + store.freq <- recipe$Analysis$Variables$freq + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name[1]]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Find the saved data directory + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + if (tolower(recipe$Analysis$Output_format) == "scorecards") { + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + filename <- paste0("scorecards_$model$_", ref.name, "_", variable, + "_-probs_$file_date$__", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + } else { + filename <- "$var$_$file_date$.nc" + } + hcst.path <- file.path(recipe$Run$output_dir, "$model$", ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, filename) + + fcst.path <- hcst.path + obs.path <- file.path(recipe$Run$output_dir, gsub('\\.','', exp.name), + ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, paste0("scorecards__", ref.name, "_", variable, + "_-probs_$file_date$__", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc")) + + ## Generate probs names inner files variables: + prob_names <- NULL + all_names <- NULL + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + all_names <- c(all_names, name_elem) + } + prob_names <- append(prob_names, list(all_names)) + } + + # Load hindcast + #------------------------------------------------------------------- + hcst <- list() + probs <- list() + for (ps in 1:length(prob_names)) { + for (sys in exp.name) { + aux <- Start(dat = hcst.path, + var = prob_names[[ps]], + file_date = sdates$hcst, + model = gsub('\\.','', sys), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + names(dim(aux))[2] <- 'cat' + probs <- append(probs, list(as.s2dv_cube(aux))) + names(probs)[length(probs)] <- sys + } + hcst <- append(hcst, list(probs)) + } + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + ############################### + + + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst <- list() + probs <- list() + for (ps in 1:length(prob_names)) { + for (sys in exp.name) { + aux <- Start(dat = fcst.path, + var = prob_names[[ps]], + file_date = sdates$fcst, + model = gsub('\\.','', sys), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + names(dim(aux))[[2]] <- 'cat' + probs <- append(probs, list(as.s2dv_cube(aux))) + names(probs)[length(probs)] <- sys + } + fcst <- append(fcst, list(probs)) + } + + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + ############################# + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + obs <- list() + + if (store.freq == "monthly_mean") { + for (ps in 1:length(prob_names)) { + aux <- Start(dat = obs.path, + var = prob_names[[ps]], + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + names(dim(aux))[[2]] <- 'cat' + obs <- append(obs, list(as.s2dv_cube(aux))) + } + } else if (store.freq %in% c("daily_mean", "daily")) { + + ############################# + #NOTE: NOT TESTED YET + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + ############################# + + } + # Adds ensemble dim to obs (for consistency with hcst/fcst) + #default_dims <- c(dat = 1, var = 1, sday = 1, + # sweek = 1, syear = 1, time = 1, + # latitude = 1, longitude = 1, ensemble = 1) + #default_dims[names(dim(obs))] <- dim(obs) + #dim(obs) <- default_dims + + # Convert obs to s2dv_cube + #obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$latitude), as.vector(obs$coords$latitude)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$longitude), as.vector(obs$coords$longitude)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + + +# info(recipe$Run$logger, +# "##### DATA LOADING INDIVIDUAL ENS COMPLETED SUCCESSFULLY #####") + + return(list(hcst = hcst, fcst = fcst, obs = obs)) +} diff --git a/modules/Multimodel/load_multimodel_splitted.R b/modules/Multimodel/load_multimodel_splitted.R deleted file mode 100644 index 5b9a642f5599602a2318f71b4766dffcd13ef930..0000000000000000000000000000000000000000 --- a/modules/Multimodel/load_multimodel_splitted.R +++ /dev/null @@ -1,58 +0,0 @@ - -load_multimodel_splitted <- function(recipe){ - - # Retrieve data dimension only without loading data - dims <- dims_multimodel(recipe) - data_order <- names(dims$dim.hcst) - - # Create empty array with desired dimensions - data <- list(hcst = NULL, fcst = NULL, obs = NULL) - data$hcst$data <- array(data = NA, dim = dims$dim.hcst) - if (!is.null(recipe$Analysis$Time$fcst_year)) { - data$fcst$data <- array(data = NA, dim = dims$dim.fcst) - } - files_hcst <- list() - files_fcst <- list() - files_obs <- list() - - - # Loop over system to load hindcast and forecast data - for (sys in 1:length(recipe$Analysis$Datasets$System$models)){ - - recipe_aux <- recipe - system_load <- recipe$Analysis$Datasets$System$models[sys] - recipe_aux$Analysis$Datasets$System$models <- system_load - - data_aux <- load_multimodel(recipe_aux) - - data$hcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$hcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$hcst$data, order = data_order) - - if(!is.null(recipe$Analysis$Time$fcst_year)){ - data$fcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$fcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$fcst$data, order = data_order) - } - - files_hcst <- append(files_hcst, data_aux$hcst$attrs$source_files, after = length(files_hcst)) - files_fcst <- append(files_fcst, data_aux$fcst$attrs$source_files, after = length(files_fcst)) - files_obs <- append(files_obs, data_aux$obs$attrs$source_files, after = length(files_obs)) - - } # close loop on sys - - # Define obs data - data$obs$data <- data_aux$obs$data - - # Include data attributes - data$hcst$attrs <- data_aux$hcst$attrs - data$fcst$attrs <- data_aux$fcst$attrs - data$obs$attrs <- data_aux$obs$attrs - data$hcst$coords <- data_aux$hcst$coords - data$fcst$coords <- data_aux$fcst$coords - data$obs$coords <- data_aux$obs$coords - - # Remove temporary data_aux - rm(data_aux) - - return(list(data = data, - files_hcst = files_hcst, - files_fcst = files_fcst, - files_obs = files_obs)) -} diff --git a/modules/Saving/R/drop_dims.R b/modules/Saving/R/drop_dims.R index 7361faa8ea1056b414b5e2ed67740e4ceeb38bae..be68a3332a42bb1d3298b6ea3200750c02239f76 100644 --- a/modules/Saving/R/drop_dims.R +++ b/modules/Saving/R/drop_dims.R @@ -1,8 +1,14 @@ # version victoria https://earth.bsc.es/gitlab/es/auto-s2s/-/blob/dev-Loading-multivar/modules/Skill/Skill.R +# This function takes an array and drops dimensions of length = 1 that are not +# needed by the saving or plotting functions, aka 'droppable_dims'. +# The essential dimensions are: 'var', 'bin', 'time', and 'latitude'/'longitude' +# or 'region'. +# The 'droppable' dimensions are: 'dat', 'sday', 'sweek', 'ensemble', 'nobs', +# 'nexp, 'exp_memb', 'obs_memb'. .drop_dims <- function(metric_array) { # Define dimensions that are not essential for saving droppable_dims <- c("dat", "sday", "sweek", "ensemble", "nobs", - "nexp", "exp_memb", "obs_memb", "bin") + "nexp", "exp_memb", "obs_memb") # , "bin") # Select non-essential dimensions of length 1 dims_to_drop <- intersect(names(which(dim(metric_array) == 1)), droppable_dims) @@ -18,10 +24,6 @@ return(metric_array) } - - - - ## TODO: Replace with ClimProjDiags::Subset and add var and dat dimensions #.drop_dims <- function(metric_array) { # # Drop all singleton dimensions diff --git a/modules/Saving/R/get_filename.R b/modules/Saving/R/get_filename.R index b2345691c8a5ffdc4839a0ca331761aa4548201b..7266ac705436541f0ee09deb05992395f2dd0032 100644 --- a/modules/Saving/R/get_filename.R +++ b/modules/Saving/R/get_filename.R @@ -10,6 +10,7 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { if (tolower(recipe$Analysis$Horizon) == "subseasonal") { shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%V") dd <- "week" + shortdate <- date } else { shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%m") dd <- "month" @@ -17,12 +18,12 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { if (tolower(recipe$Analysis$Horizon) == "decadal") { # to not save the month and day in the filename (needed for the multimodel) - date <- substr(date,1,4) + date <- substr(date, 1, 4) # for the models initialised in January - it may be better to do this in save_* functions archive <- read_yaml(paste0("conf/archive_decadal.yml"))$esarchive exp.name <- recipe$Analysis$Datasets$System$name - if (exp.name != 'Multimodel' && archive$System[[exp.name]]$initial_month == 1){ - date <- as.character(as.numeric(date)-1) + if (exp.name != 'Multimodel' && archive$System[[exp.name]]$initial_month == 1) { + date <- as.character(as.numeric(date) - 1) } } @@ -41,24 +42,24 @@ get_filename <- function(dir, recipe, var, date, agg, file.type) { hcst_end <- recipe$Analysis$Time$hcst_end switch(tolower(file.type), - "skill" = {type_info <- "-skill_"}, - "corr" = {type_info <- "-corr_"}, - "exp" = {type_info <- paste0("_", date, "_")}, - "obs" = {type_info <- paste0("-obs_", date, "_")}, - "percentiles" = {type_info <- "-percentiles_"}, - "probs" = {type_info <- paste0("-probs_", date, "_")}, - "bias" = {type_info <- paste0("-bias_", date, "_")}, - "rps_syear" = {type_info <- "rps_syear"}, - "rps_clim_syear" = {type_info <- "rps_clim_syear"}, - "crps_syear" = {type_info <- "crps_syear"}, - "crps_clim_syear" = {type_info <- "crps_clim_syear"}, - "crps" = {type_info <- "crps"}, - "mean_bias" = {type_info <- "mean_bias"}, - {type_info <- paste0(file.type)}) + "skill" = {type_info <- "-skill"}, + "corr" = {type_info <- "-corr"}, + "exp" = {type_info <- paste0("_", date)}, + "obs" = {type_info <- paste0("-obs_", date)}, + "percentiles" = {type_info <- "-percentiles"}, + "probs" = {type_info <- paste0("-probs_", date)}, + "bias" = {type_info <- paste0("-bias_", date)}, + "rps_syear" = {type_info <- "-rps_syear"}, + "rps_clim_syear" = {type_info <- "-rps_clim_syear"}, + "crps_syear" = {type_info <- "-crps_syear"}, + "crps_clim_syear" = {type_info <- "-crps_clim_syear"}, + "crps" = {type_info <- "-crps"}, + "mean_bias" = {type_info <- "-mean_bias"}, + {type_info <- paste0("-", file.type)}) # Build file name filename <- paste0("scorecards_", system, "_", reference, "_", var, - "_", type_info, "_", hcst_start, "-", hcst_end, + type_info, "_", hcst_start, "-", hcst_end, "_s", shortdate) } else { if (tolower(recipe$Analysis$Horizon) == "decadal") { diff --git a/modules/Saving/R/get_times.R b/modules/Saving/R/get_times.R index ec36b1028889e54f43cd47932f591173677e69dc..8106923486ea86017f89776bf7ea681443ce99f5 100644 --- a/modules/Saving/R/get_times.R +++ b/modules/Saving/R/get_times.R @@ -10,6 +10,16 @@ # Compute initial date fcst.horizon <- tolower(recipe$Analysis$Horizon) # Generate time dimensions and the corresponding metadata. + ## TODO: This addresses subseasonal case, but does not work well + ## when there is missing data. + if (all(c("sweek", "sday") %in% names(dim(data_cube$attrs$Dates)))) { + central_day <- (dim(data_cube$attrs$Dates)[["sday"]] + 1) / 2 + central_week <- (dim(data_cube$attrs$Dates)[["sweek"]] + 1) / 2 + data_cube$attrs$Dates <- Subset(data_cube$attrs$Dates, + along = c("sday", "sweek"), + indices = list(sday = central_day, + sweek = central_week)) + } dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), cal = calendar) leadtimes <- as.numeric(dates - init_date)/3600 @@ -17,7 +27,7 @@ switch(fcst.horizon, "seasonal" = {time <- leadtimes; ref <- 'hours since '; stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "subseasonal" = {len <- leadtimes; ref <- 'hours since '; + "subseasonal" = {time <- leadtimes; ref <- 'hours since '; stdname <- ''}, "decadal" = {time <- leadtimes; ref <- 'hours since '; stdname <- paste(strtoi(leadtimes), collapse=", ")}) diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index db7ceecacd0f6e8af750fae45147e6dcbf07f506..d0b8dc4f267183404b637cf350cdd7a58ae0affd 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -5,8 +5,14 @@ save_metrics <- function(recipe, agg = "global", outdir = NULL, module = "skill") { + # This function adds metadata to the skill metrics in 'skill' # and exports them to a netCDF file inside 'outdir'. + + # Sanity checks + if (!is.list(metrics) || is.null(names(metrics))) { + stop("'metrics' should be a named list.") + } # Define grid dimensions and names lalo <- c('longitude', 'latitude') archive <- get_archive(recipe) @@ -36,12 +42,12 @@ save_metrics <- function(recipe, calendar <- archive$System[[global_attributes$system]]$calendar } if (fcst.horizon == 'decadal') { - # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - # sprintf('%02d', init_month), '-01'), - # cal = calendar) init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, '-01-01'), cal = calendar) + } else if (fcst.horizon == 'subseasonal') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + substr(recipe$Analysis$Time$sdate, 5, 8)), + format = '%Y%m%d', cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -56,6 +62,8 @@ save_metrics <- function(recipe, } else { fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') } + } else if (fcst.horizon == 'subseasonal') { + fcst.sdate <- as.character(recipe$Analysis$Time$sdate) } else { if (!is.null(recipe$Analysis$Time$fcst_year)) { fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, @@ -95,6 +103,15 @@ save_metrics <- function(recipe, }) } } + metric <- names(subset_metric[i]) + long_name <- dictionary$metrics[[metric]]$long_name + missing_val <- -9.e+33 + subset_metric[[i]][is.na(subset_metric[[i]])] <- missing_val + metadata <- list(metric = list(name = metric, + long_name = long_name, + missing_value = missing_val)) + data_cube$attrs$Variable$metadata[metric] <- metadata + attr(subset_metric[[i]], 'variables') <- metadata ## TODO: Maybe 'scorecards' condition could go here to further simplify ## the code extra_string <- get_filename(NULL, recipe, variable, diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index d5dfae16f16346242d8bf1b744d536164fb3e30d..0071eb88e0867e9a24c6852c91ba43f004c8fa0a 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -5,6 +5,11 @@ save_percentiles <- function(recipe, outdir = NULL) { # This function adds metadata to the percentiles # and exports them to a netCDF file inside 'outdir'. + + # percentiles <- lapply(percentiles, + # function(x) { + # .drop_dims(x) + # }) archive <- get_archive(recipe) # Define grid dimensions and names lalo <- c('longitude', 'latitude') @@ -35,16 +40,12 @@ save_percentiles <- function(recipe, # Generate vector containing leadtimes if (fcst.horizon == 'decadal') { - # if (global_attributes$system == 'Multimodel') { - # init_month <- 11 #TODO: put as if init_month is January - # } else { - # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - # } - # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - # sprintf('%02d', init_month), '-01'), - # cal = calendar) - init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, + init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start) + 1, '-01-01'), cal = calendar) + } else if (fcst.horizon == 'subseasonal') { + init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, + substr(recipe$Analysis$Time$sdate, 5, 8)), + format = '%Y%m%d', cal = calendar) } else { init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, recipe$Analysis$Time$sdate), @@ -62,6 +63,8 @@ save_percentiles <- function(recipe, } else { fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') } + } else if (fcst.horizon == 'subseasonal') { + fcst.sdate <- as.character(recipe$Analysis$Time$sdate) } else { if (!is.null(recipe$Analysis$Time$fcst_year)) { fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index a9ddc977e231896de23dc7a49dd7dac4386c45f2..e283c8fd4e9cd0ecd1da1cf8dc652f7ccc6e22c0 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -59,7 +59,6 @@ save_probabilities <- function(recipe, syears <- seq(1:dim(data_cube$data)['syear'][[1]]) ## expect dim = [sday = 1, sweek = 1, syear, time] syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - # Loop over variable dimension for (var in 1:data_cube$dims[['var']]) { subset_probs <- lapply(probs, function(x) { @@ -117,7 +116,14 @@ save_probabilities <- function(recipe, # Generate name of output file outfile <- get_filename(outdir, recipe, variable, fcst.sdate, agg, "probs") - + if (type == 'obs') { + filename <- strsplit(outfile, "/") + filename <- filename[[1]][length(filename[[1]])] + outfile <- file.path(outdir, + sub(gsub('\\.','', + recipe$Analysis$Datasets$System$name), "", + filename)) + } # Get grid data and metadata and export to netCDF if (tolower(agg) == "country") { country <- get_countries(grid) diff --git a/modules/Units/R/transform_units_precipitation.R b/modules/Units/R/transform_units_precipitation.R index d0dd7ffd50dfb6f04db35006a06cd61b1d8f43e9..987b5e7cc88152bc6a46a660798372c3c454c7fb 100644 --- a/modules/Units/R/transform_units_precipitation.R +++ b/modules/Units/R/transform_units_precipitation.R @@ -83,6 +83,8 @@ transform_units_precipitation <- function(data, original_units, new_units, num_days <- .days_in_month(date, cal = .cal) res <- x * num_days }, ncores = ncores)$output1 + } else if (freq == "weekly_mean") { + data_list[[var_index]] <- data_list[[var_index]] * 7 } } diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 31a20e5551c1581e6e060327763e352213ad27f3..cd63313172e8e0e1d1a353ab48570677d1472795 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -1,4 +1,7 @@ -plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, output_conf) { +source("tools/add_logo.R") +plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, + outdir, output_conf, + method = 'median', logo = NULL) { ## TODO: Add 'anomaly' to plot title # Abort if frequency is daily if (recipe$Analysis$Variables$freq %in% c("daily", "daily_mean")) { @@ -34,8 +37,22 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } else { projection <- "cylindrical_equidistant" } - # Compute ensemble mean - ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') + # Compute ensemble mean or other + if (!is.null(method)) { + method <- tolower(method) + } + if (method == 'mean') { + ensemble_mean <- s2dv::MeanDims(fcst$data, 'ensemble') + } else if (method == 'iqr') { + ensemble_mean <- Apply(fcst$data, target_dim = 'ensemble', + fun = function(x) { + IQR(x)})$output1 + } else { + ensemble_mean <- Apply(fcst$data, target_dim = 'ensemble', + fun = function(x) { + median(x)})$output1 + } + # Loop over variable dimension for (var in 1:fcst$dims[['var']]) { variable <- fcst$attrs$Variable$varName[[var]] @@ -91,7 +108,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o along = c("syear"), indices = which(start_date == i_syear), drop = 'selected') - outfile <- paste0(outdir[[var]], "forecast_ensemble_mean-", i_syear) + outfile <- paste0(outdir[[var]], "forecast_ensemble_", method, "-", i_syear) # Mask if (!is.null(mask)) { outfile <- paste0(outfile, "_enscormask") @@ -134,11 +151,11 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } if (tolower(recipe$Analysis$Horizon) == "subseasonal") { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), - "\n", "Forecast Ensemble Mean / ", "Issued on ", + "\n", "Forecast Ensemble", method, " / ", "Issued on ", format(ymd(start_date), "%d-%m-%Y")) } else { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), - "\n", "Forecast Ensemble Mean / ", "Init.: ", i_syear) + "\n", "Forecast Ensemble ", method, "/ ", "Init.: ", i_syear) } # Plots output_configuration <- output_conf$multipanel$forecast_ensemble_mean @@ -201,7 +218,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o if (tolower(recipe$Analysis$Horizon) == 'seasonal') { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), - "\n", "Ensemble Mean / ", + "\n", "Ensemble ", method, " / ", time_labels[i], " ", years[i], " / Start date: ", format(as.Date(i_syear, format = "%Y%m%d"), @@ -210,7 +227,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } else if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), - "\n", "Ensemble Mean / ", + "\n", "Ensemble ", method, " / ", "Issued on ", format(ymd(start_date), "%d-%m-%Y"), "\n", time_labels[i], years[i]) @@ -218,7 +235,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o } else { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), - "\n", "Ensemble Mean / ", + "\n", "Ensemble ", method, " / ", time_labels[i], " ", years[i], " / Start date: ", i_syear) @@ -254,6 +271,22 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o args = c(base_args, list(toptitle = toptitle, fileout = fileout))) + # Convert plots to user-chosen format + if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && + tolower(recipe$Analysis$Workflow$Visualization$file_format) != "pdf") { + extension <- tolower(recipe$Analysis$Workflow$Visualization$file_format) + system_command <- paste("convert -density 300", fileout, + "-resize 40% -alpha remove", + paste0(tools::file_path_sans_ext(fileout), ".", + extension)) + system(system_command) + unlink(fileout) + fileout <- paste0(tools::file_path_sans_ext(fileout), ".", + extension) + } + if (!is.null(logo)) { + add_logo(file = fileout, logo = logo) + } } } } @@ -261,3 +294,4 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, o info(recipe$Run$logger, "##### FORECAST ENSEMBLE MEAN PLOTS SAVED TO OUTPUT DIRECTORY #####") } + diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 9850b2c79fe89a4eb67ec5390ed67c6fa8db62ca..89b6edda0af1acc1b62d3b322f634e0be00a1f21 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -1,8 +1,8 @@ library(stringr) library(lubridate) - +source("tools/add_logo.R") plot_metrics <- function(recipe, data_cube, metrics, - outdir, significance = F, output_conf) { + outdir, significance = F, output_conf, logo) { # recipe: Auto-S2S recipe # archive: Auto-S2S archive # data_cube: s2dv_cube object with the corresponding hindcast data @@ -93,7 +93,7 @@ plot_metrics <- function(recipe, data_cube, metrics, units <- NULL # Define plot characteristics and metric name to display in plot if (name %in% c("rpss", "bss90", "bss10", "frpss", "crpss", - "rpss_specs", "bss90:_specs", "bss10_specs", + "rpss_specs", "bss90_specs", "bss10_specs", "rmsss", "msss")) { display_name <- toupper(strsplit(name, "_")[[1]][1]) metric <- var_metric[[name]] @@ -462,6 +462,22 @@ plot_metrics <- function(recipe, data_cube, metrics, args = c(base_args, list(toptitle = toptitle, fileout = fileout))) + # Convert plots to user-chosen format + if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && + tolower(recipe$Analysis$Workflow$Visualization$file_format) != "pdf") { + extension <- tolower(recipe$Analysis$Workflow$Visualization$file_format) + system_command <- paste("convert -density 300", fileout, + "-resize 40% -alpha remove", + paste0(tools::file_path_sans_ext(fileout), ".", + extension)) + system(system_command) + unlink(fileout) + fileout <- paste0(tools::file_path_sans_ext(fileout), ".", + extension) + } + if (!is.null(logo)) { + add_logo(file = fileout, logo = logo) + } } } } @@ -470,3 +486,4 @@ plot_metrics <- function(recipe, data_cube, metrics, info(recipe$Run$logger, "##### SKILL METRIC PLOTS SAVED TO OUTPUT DIRECTORY #####") } + diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index c97da758bef44d4771af8ff8615755acb2b13a8c..39f8f29ca4af51b1ea4eebaf9056108dcc099c5e 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -8,7 +8,7 @@ source("modules/Visualization/R/tmp/Utils.R") source("modules/Visualization/R/tmp/PlotEquiMap.R") source("modules/Visualization/R/tmp/ColorBar_onebox.R") source("modules/Visualization/R/tmp/GradientCatsColorBar.R") - +source("tools/add_logo.R") ## TODO: Change name plot_most_likely_terciles <- function(recipe, fcst, @@ -16,7 +16,8 @@ plot_most_likely_terciles <- function(recipe, mask, dots, outdir, - output_conf) { + output_conf, + logo = NULL) { ## TODO: Add 'anomaly' to plot title # Abort if frequency is daily @@ -49,20 +50,20 @@ plot_most_likely_terciles <- function(recipe, init_date <- 1 } # Retrieve and rearrange probability bins for the forecast - if (is.null(probabilities$probs_fcst$prob_b33) || - is.null(probabilities$probs_fcst$prob_33_to_66) || - is.null(probabilities$probs_fcst$prob_a66)) { - stop("The forecast tercile probability bins are not present inside ", - "'probabilities', the most likely tercile map cannot be plotted.") - } - - probs_fcst <- abind(probabilities$probs_fcst$prob_b33, - probabilities$probs_fcst$prob_33_to_66, - probabilities$probs_fcst$prob_a66, - along = 0) - names(dim(probs_fcst)) <- c("bin", - names(dim(probabilities$probs_fcst$prob_b33))) +# if (is.null(probabilities$probs_fcst$prob_b33) || +# is.null(probabilities$probs_fcst$prob_33_to_66) || +# is.null(probabilities$probs_fcst$prob_a66)) { +# stop("The forecast tercile probability bins are not present inside ", +# "'probabilities', the most likely tercile map cannot be plotted.") +# } +# probs_fcst <- abind(probabilities$probs_fcst$prob_b33, +# probabilities$probs_fcst$prob_33_to_66, +# probabilities$probs_fcst$prob_a66, +# along = 0) +# names(dim(probs_fcst)) <- c("bin", +# names(dim(probabilities$probs_fcst$prob_b33))) + probs_fcst <- probabilities$probs_fcst ## TODO: Improve this section # Drop extra dims, add time dim if missing: for (var in 1:fcst$dims[['var']]) { @@ -235,10 +236,10 @@ plot_most_likely_terciles <- function(recipe, toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", "Most Likely Tercile / ", - time_labels[i], " ", years[i], - " / Start date: ", + "Start date: ", format(as.Date(i_syear, format = "%Y%m%d"), - "%d-%m-%Y")) + "%d-%m-%Y"), "\n Valid time: ", + time_labels[i], " ", years[i]) } else if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), @@ -300,6 +301,22 @@ plot_most_likely_terciles <- function(recipe, } } dev.off() + # Convert plots to user-chosen format + if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && + tolower(recipe$Analysis$Workflow$Visualization$file_format) != "pdf") { + extension <- tolower(recipe$Analysis$Workflow$Visualization$file_format) + system_command <- paste("convert -density 300", fileout, + "-resize 40% -alpha remove", + paste0(tools::file_path_sans_ext(fileout), ".", + extension)) + system(system_command) + unlink(fileout) + fileout <- paste0(tools::file_path_sans_ext(fileout), ".", + extension) + } + if (!is.null(logo)) { + add_logo(file = fileout, logo = logo) + } } } } @@ -307,3 +324,4 @@ plot_most_likely_terciles <- function(recipe, info(recipe$Run$logger, "##### MOST LIKELY TERCILE PLOTS SAVED TO OUTPUT DIRECTORY #####") } + diff --git a/modules/Visualization/R/tmp/PlotRobinson.R b/modules/Visualization/R/tmp/PlotRobinson.R index 67ca034d6af4b687546ce995c49826955f9bbee1..8b98c3ec15c06a7bb688c01afbb27d657e0ec15a 100644 --- a/modules/Visualization/R/tmp/PlotRobinson.R +++ b/modules/Visualization/R/tmp/PlotRobinson.R @@ -567,3 +567,4 @@ PlotRobinson <- function(data, lon, lat, lon_dim = NULL, lat_dim = NULL, } + diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index 749fe3b64169465fb61738404c8253b0acea01cf..0d52983a92e43e9d1ea6812f6109313bd11ff7d4 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -18,7 +18,8 @@ Visualization <- function(recipe, statistics = NULL, probabilities = NULL, significance = F, - output_conf = NULL) { + output_conf = NULL, + logo = NULL) { # Try to produce and save several basic plots. # recipe: the auto-s2s recipe as read by read_yaml() # data: list containing the hcst, obs and (optional) fcst s2dv_cube objects @@ -96,18 +97,21 @@ Visualization <- function(recipe, if (is.logical(significance)) { plot_metrics(recipe = recipe, data_cube = data$hcst, metrics = skill_metrics, outdir = outdir, - significance = significance, output_conf = output_conf) + significance = significance, output_conf = output_conf, + logo = logo) info(recipe$Run$logger, paste("Skill metrics significance set as", significance)) } else { if (significance %in% c('both', 'dots')) { plot_metrics(recipe, data$hcst, skill_metrics, outdir, - significance = 'dots', output_conf = output_conf) + significance = 'dots', output_conf = output_conf, + logo = logo) info(recipe$Run$logger, "Skill metrics significance as dots") } if (significance %in% c('both', 'mask')) { plot_metrics(recipe, data$hcst, skill_metrics, outdir, - significance = 'mask', output_conf = output_conf) + significance = 'mask', output_conf = output_conf, + logo = log) info(recipe$Run$logger, "Skill metrics significance as mask") } } @@ -122,7 +126,7 @@ Visualization <- function(recipe, if ("statistics" %in% plots) { if (!is.null(statistics)) { plot_metrics(recipe, data$hcst, statistics, outdir, - significance, output_conf = output_conf) + significance, output_conf = output_conf, logo = logo) } else { error(recipe$Run$logger, paste0("The statistics plots have been requested, but the ", @@ -139,6 +143,11 @@ Visualization <- function(recipe, if (is.null(recipe$Analysis$Workflow$Visualization$dots)) { recipe$Analysis$Workflow$Visualization$dots <- FALSE } + if (!is.null(recipe$Analysis$Workflow$Visualization$method)) { + method <- recipe$Analysis$Workflow$Visualization$method + } else { + method <- 'median' + } # Plot without mask or dots if ((recipe$Analysis$Workflow$Visualization$mask_ens %in% c('both', FALSE)) || @@ -146,7 +155,8 @@ Visualization <- function(recipe, %in% c('both', FALSE))) { plot_ensemble_mean(recipe, data$fcst, outdir, mask = NULL, dots = NULL, - output_conf = output_conf) + output_conf = output_conf, + method = method, logo = logo) } # Plots with masked if (recipe$Analysis$Workflow$Visualization$mask_ens %in% @@ -163,7 +173,8 @@ Visualization <- function(recipe, plot_ensemble_mean(recipe, data$fcst, mask = skill_metrics$enscorr, dots = NULL, - outdir, output_conf = output_conf) + outdir, output_conf = output_conf, + method = method, logo = logo) } } # Plots with dotted negative correlated in ens-mean-fcst @@ -180,7 +191,8 @@ Visualization <- function(recipe, plot_ensemble_mean(recipe, data$fcst, mask = NULL, dots = skill_metrics$enscorr, - outdir, output_conf = output_conf) + outdir, output_conf = output_conf, + method = method, logo = logo) } } @@ -209,7 +221,8 @@ Visualization <- function(recipe, probabilities, mask = NULL, dots = NULL, - outdir, output_conf = output_conf) + outdir, output_conf = output_conf, + logo = logo) } # Plots with masked terciles if (recipe$Analysis$Workflow$Visualization$mask_terciles %in% @@ -227,7 +240,8 @@ Visualization <- function(recipe, probabilities, mask = skill_metrics$rpss, dots = NULL, - outdir, output_conf = output_conf) + outdir, output_conf = output_conf, + logo = logo) } } # Plots with dotted terciles @@ -245,7 +259,8 @@ Visualization <- function(recipe, probabilities, mask = NULL, dots = skill_metrics$rpss, - outdir, output_conf = output_conf) + outdir, output_conf = output_conf, + logo = logo) } } } else { @@ -256,20 +271,21 @@ Visualization <- function(recipe, } # Convert plots to user-chosen format - if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && - tolower(recipe$Analysis$Workflow$Visualization$file_format) != "pdf") { - extension <- tolower(recipe$Analysis$Workflow$Visualization$file_format) - plot_files <- list.files(path = recipe$Run$output_dir, pattern = "\\.pdf$", - recursive = TRUE, full.names = TRUE) - for (file in plot_files) { - system_command <- paste("convert -density 300", file, - "-resize 40% -alpha remove", - paste0(tools::file_path_sans_ext(file), ".", - extension)) - system(system_command) - } - unlink(plot_files) - info(recipe$Run$logger, - paste0("##### PLOT FILES CONVERTED TO ", toupper(extension), " #####")) - } +# if (!is.null(recipe$Analysis$Workflow$Visualization$file_format) && +# tolower(recipe$Analysis$Workflow$Visualization$file_format) != "pdf") { +# extension <- tolower(recipe$Analysis$Workflow$Visualization$file_format) +# plot_files <- list.files(path = recipe$Run$output_dir, pattern = "\\.pdf$", +# recursive = TRUE, full.names = TRUE) +# for (file in plot_files) { +# system_command <- paste("convert -density 300", file, +# "-resize 40% -alpha remove", +# paste0(tools::file_path_sans_ext(file), ".", +# extension)) +# system(system_command) +# } +# unlink(plot_files) +# info(recipe$Run$logger, +# paste0("##### PLOT FILES CONVERTED TO ", toupper(extension), " #####")) +# } } + diff --git a/modules/Visualization/output_size.yml b/modules/Visualization/output_size.yml index 6526591678216e4bc4bbcc50ccc0eb6fc75289f1..714524897307f37e4971f3787adcac02f8b8bdfb 100644 --- a/modules/Visualization/output_size.yml +++ b/modules/Visualization/output_size.yml @@ -104,4 +104,27 @@ region: #units inches colNA: "white" Mediterranean: Global: + robinson: + forecast_ensemble_mean: + width: 8 + height: 5 + skill_metrics: + width: 8 + height: 5 + cylindrical_equidistant: + most_likely_terciles: + width: 12 + height: 9 + dot_size: 2 + col_mask: 'white' + plot_margin: !expr c(0, 4.1, 5.5, 2.1) + Kuwait: + cylindrical_equidistant: + skill_metrics: + intxlon: 1 + intylat: 1 + country.borders: TRUE + width: 5 + bar_label_scale: 0.8 + axes_label_scale: 0.8 # Add other regions diff --git a/plot_NAO.R b/plot_NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..8f01c45ae91d3de4789b8e1f68d58f3ba5db849b --- /dev/null +++ b/plot_NAO.R @@ -0,0 +1,307 @@ +plot_NAO <- function(recipe, nao, data) { + +# Read variable long_name to plot it + conf <- yaml::read_yaml("conf/variable-dictionary.yml") + var_name <- conf$vars[[which(names(conf$vars) == + recipe$Analysis$Variables$name)]]$long + plot_ts <- TRUE + plot_sp <- TRUE + alpha <- 0.05 + lons <- data$hcst$coords$longitude + lats <- data$hcst$coords$latitude + nao_region <- c(lonmin = -80, lonmax = 40, + latmin = 20, latmax = 80) + + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + # Use startdates param from SaveExp to correctly name the files: + if (length(data$hcst$attrs$source_files) == dim(data$hcst$data)['syear']) { + file_dates <- Apply(data$hcst$attrs$source_files, target_dim = NULL, + fun = function(x) { + pos <- which(strsplit(x, "")[[1]] == ".") + res <- substr(x, pos-8, pos-1) + })$output1 + } + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + file_dates <- paste0('s', + recipe$Analysis$Time$hcst_start : recipe$Analysis$Time$hcst_end) + } + + if (plot_ts) { + dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + showWarnings = F, recursive = T) + source("modules/old_modules/Indices/R/plot_deterministic_forecast.R") + for (tstep in 1:dim(nao$hcst$data)['time']) { + mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + + (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) + mes <- ifelse(mes > 12, mes - 12, mes) + fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + obs <- Subset(nao$obs$data, along = 'time', ind = tstep) + exp <- Subset(nao$hcst$data, along = 'time', ind = tstep) + if (gsub(".", "", recipe$Analysis$Datasets$System$name) == "") { + system <- recipe$Analysis$Datasets$System$name + } else { + system <-gsub(".", "", recipe$Analysis$Datasets$System$name) + } + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste("NAO Index\n", + month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", + system, "_", recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n") + xlabs <- as.numeric(substr(file_dates, 1, 4)) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste("NAO Index\n", + "Lead time", fmonth, + " / Start dates", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", + system, "_", recipe$Analysis$Datasets$Reference$name, + "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Lead time: ", fmonth, "\n") + xlabs <- file_dates + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + plot_deterministic_forecast(obs, exp, + time_dim = 'syear', + member_dim = 'ensemble', style = 'boxplot', + xlabs = xlabs, + title = toptitle, fileout = plotfile, + caption = caption, caption_line = 6.5, + legend_text = c( + recipe$Analysis$Datasets$Reference$name, + recipe$Analysis$Datasets$System$name)) + } + } + if (plot_sp) { + ## TODO: To be removed when s2dv is released: + source("modules/Visualization/R/tmp/PlotRobinson.R") + source("modules/old_modules/Indices/R/correlation_eno.R") + source("modules/Visualization/R/get_proj_code.R") + dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + showWarnings = F, recursive = T) + # Get correct code for stereographic projection + projection_code <- get_proj_code(proj_name = "stereographic") + correl_obs <- Apply(list(data$obs$data, nao$obs$data), + target_dims = 'syear', fun = .correlation_eno, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE, + ncores = recipe$Analysis$ncores) + correl_hcst <- Apply(list(data$hcst$data, nao$hcst$data), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + x <- apply(x, 1, mean, na.rm = TRUE) + y <- apply(y, 1, mean, na.rm = TRUE) + dim(y) <- c(syear = length(y)) + dim(x) <- c(syear = length(x)) + res <- .correlation_eno(x, y, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE)}, + ncores = recipe$Analysis$ncores) + correl_hcst_full <- Apply(list(data$hcst$data, nao$hcst$data), + target_dims = c('syear', 'ensemble'), + fun = function(x,y) { + dim(y) <- c(syear = length(y)) + dim(x) <- c(syear = length(x)) + res <- .correlation_eno(x, y, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE)}, + ncores = recipe$Analysis$ncores) + + for (tstep in 1:dim(nao$obs$data)['time']) { + fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + ## Observations + map <- drop(Subset(correl_obs$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_obs$sign, along = 'time', ind = tstep)) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + + (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) + mes <- ifelse(mes > 12, mes - 12, mes) + toptitle <- paste(recipe$Analysis$Datasets$Reference$name, "\n", + "NAO Index -",var_name, "\n", + " Correlation /", month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + toptitle <- paste(recipe$Analysis$Datasets$Reference$name, "\n", + "NAO Index -",var_name, "\n", + " Correlation / Start dates ", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Start date: month ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + if (gsub(".", "", recipe$Analysis$Datasets$System$name) == "") { + system <- recipe$Analysis$Datasets$System$name + } else { + system <- gsub(".", "", recipe$Analysis$Datasets$System$name) + } + + PlotRobinson(map, lon = lons, lat = lats, target_proj = projection_code, + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', style = 'polygon', + toptitle = toptitle, crop_coastlines = nao_region, + caption = caption, mask = sig, bar_extra_margin = c(4,0,4,0), + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + ## Ensemble-mean + map <- drop(Subset(correl_hcst$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_hcst$sign, along = 'time', ind = tstep)) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste(recipe$Analysis$Datasets$System$name, "\n", + "NAO Index -", var_name, "\n", + " Correlation /", month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation ensemble mean\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", + "NAO Index -",var_name, "\n", + " Correlation / Start dates ", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation ensemble mean\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + + PlotRobinson(map, lon = lons, lat = lats, target_proj = projection_code, + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', bar_extra_margin = c(4,0,4,0), + toptitle = toptitle, style = 'polygon', + caption = caption, mask = sig, crop_coastline = nao_region, + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + + # Full hcst corr + map <- drop(Subset(correl_hcst_full$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_hcst_full$sign, along = 'time', ind = tstep)) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", + "NAO Index -",var_name, "\n", + " Correlation /", month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_member_", + system, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation all members\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", + "NAO Index -",var_name, "\n", + " Correlation / Start dates ", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_member_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation all members\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + PlotRobinson(map, lon = lons, lat = lats, target_proj = projection_code, + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', bar_extra_margin = c(4,0,4,0), + toptitle = toptitle, style = 'polygon', + caption = caption, mask = sig, crop_coastline = nao_region, + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + } # end tstep loop + } +} diff --git a/recipe_NAO.yml b/recipe_NAO.yml new file mode 100644 index 0000000000000000000000000000000000000000..27687ac816f4f67fc5b48727979acffda83934f2 --- /dev/null +++ b/recipe_NAO.yml @@ -0,0 +1,110 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: psl + freq: monthly_mean + units: hPa + flux: no + Datasets: + System: + - {name: 'ECMWF-SEAS5.1'} + - {name: 'Meteo-France-System8'} + - {name: 'CMCC-SPS3.5'} + - {name: 'UK-MetOffice-Glosea601'} + - {name: 'NCEP-CFSv2'} + - {name: 'DWD-GCFS2.1'} + - {name: 'ECCC-CanCM4i'} + #name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: #'2023' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 #6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 20 + latmax: 80 + lonmin: -80 + lonmax: 40 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #"to_system" #"to_reference" /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Time_aggregation: + execute: no + Indices: + NAO: {obsproj: yes, save: 'none'} + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: EnsCorr rmsss rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: enscorr rmss rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 16 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs #esarchive #gpfs + output_dir: /home/bsc/bsc032339/ #/esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ #/esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_NAO_single.yml b/recipe_NAO_single.yml new file mode 100644 index 0000000000000000000000000000000000000000..aaf166e2566df9587eead5d76311c1101d1319de --- /dev/null +++ b/recipe_NAO_single.yml @@ -0,0 +1,106 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: psl + freq: monthly_mean + units: hPa + flux: no + Datasets: + System: + #- {name: 'ECMWF-SEAS5.1'} + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: #'2023' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 #6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 20 + latmax: 80 + lonmin: -80 + lonmax: 40 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #"to_system" #"to_reference" /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Time_aggregation: + execute: no + Indices: + NAO: {obsproj: yes, save: 'none'} + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: EnsCorr rmsss rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: enscorr rmss rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 16 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs #esarchive #gpfs + output_dir: /home/bsc/bsc032339/ #/esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ #/esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_bigpredidata_oper_subseasonal_tas.yml b/recipe_bigpredidata_oper_subseasonal_tas.yml new file mode 100644 index 0000000000000000000000000000000000000000..322811ff7b0c78454deca1482dddd466113dffa8 --- /dev/null +++ b/recipe_bigpredidata_oper_subseasonal_tas.yml @@ -0,0 +1,83 @@ + +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: subseasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: weekly_mean, units: C} + # - {name: prlr, freq: monthly_mean, units: mm, flux: no} + # - {name: sfcWind, freq: monthly_mean} + Datasets: + System: + - {name: 'NCEP-CFSv2', member: 'all'} + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: 20250102 + fcst_year: '2025' # Optional, int: Forecast year 'YYYY' + hcst_start: '2011' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 4 # Mandatory, int: Last leadtime time step in months + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Calibration: + method: bias # Mandatory, str: bias, evmos, mse_min, crps_min, rpc_based + cross_validation: yes + save: all + Skill: + metric: mean_bias rpss + save: none + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. This avoids saving the probs of 10 to 90 + save: all + Indicators: + index: no + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean + NA_color: white + multi_panel: no + dots: no + mask_terciles: yes + shapefile: /esarchive/scratch/cdelgado/aspect_outputs/casestudy-wine/shp_spain/recintos_provinciales_inspire_peninbal_etrs89/recintos_provinciales_inspire_peninbal_etrs89.shp + file_format: PNG # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + ncores: 8 # Optional, int: number of cores, defaults to 1 + remove_NAs: TRUE # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/ptrascas/R/dev-test_bigpredidata/sunset/sunset # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/ptrascas/R/sunset/bigpredidata_fqa.R # replace with the path to your script + expid: a78g # replace with your EXPID + hpc_user: bsc032413 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 16 + custom_directives: ['#SBATCH --constraint=medmem'] + platform: nord3v2 + email_notifications: no # enable/disable email notifications. Change it if you want to. + email_address: paloma.trascasa@bsc.es # replace with your email address + notify_completed: no # notify me by email when a job finishes + notify_failed: no # notify me by email when a job fails diff --git a/recipe_daily_prlr_cal_subseas.yml b/recipe_daily_prlr_cal_subseas.yml new file mode 100644 index 0000000000000000000000000000000000000000..0bb7a905fc6807caa23dba0960c02b6089b7b7d6 --- /dev/null +++ b/recipe_daily_prlr_cal_subseas.yml @@ -0,0 +1,129 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + #- {name: 'tas', freq: 'weekly_mean', units: 'C'} + - {name: 'prlr', freq: 'daily', units: 'mm', flux: FALSE} +# name: 'tas' +# freq: 'weekly_mean' +# units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: +# - {name: 'prlr', freq: 'weekly_mean', units: 'mm'} +# - {name: 'sfcWind', freq: 'weekly_mean', units: 'm s-1'} +# - {name: 'rsds', freq: 'weekly_mean', units: 'W m-2'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr, sfcWind, rsds', freq: 'weekly_mean', units: {tas: 'C', prlr: 'mm', sfcWind: 'm s-1', rsds:'W m-2'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 20241024 #%Y%m%d # Cambiar a 2023 + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: 20241024 # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "Nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + #- {name: "Kuwait", latmin: 28, latmax: 31, lonmin: 46, lonmax: 49} + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + #- {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + #- {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: "to_system" #"conf/grid_description/griddes_system51c3s.txt" # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Time_aggregation: + execute: no + Calibration: + method: "bias" #evmos # Calibration method. (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Skill: + metric: mean_bias rpss enscorr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'none' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + # percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + percentiles: [[1/3, 2/3], [1/10], [9/10]] + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'all' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + dots_terciles: no # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + # mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Iberia: {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 44} + #Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + #Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + #Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 32 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: gpfs # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /home/bsc/bsc032339/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /home/bsc/bsc032339/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + diff --git a/recipe_daily_subseas.yml b/recipe_daily_subseas.yml new file mode 100644 index 0000000000000000000000000000000000000000..e2ad2bd2fdd5185510d1ef427700907699d30219 --- /dev/null +++ b/recipe_daily_subseas.yml @@ -0,0 +1,131 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + #- {name: 'tas', freq: 'weekly_mean', units: 'C'} + #- {name: 'prlr', freq: 'weekly_mean', units: 'mm', flux: FALSE} + - {name: 'psl', freq: 'daily_mean', units: 'hPa', flux: FALSE} +# name: 'tas' +# freq: 'weekly_mean' +# units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: +# - {name: 'prlr', freq: 'weekly_mean', units: 'mm'} +# - {name: 'sfcWind', freq: 'weekly_mean', units: 'm s-1'} +# - {name: 'rsds', freq: 'weekly_mean', units: 'W m-2'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr, sfcWind, rsds', freq: 'weekly_mean', units: {tas: 'C', prlr: 'mm', sfcWind: 'm s-1', rsds:'W m-2'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # - {name: 'ECMWF-ENS-EXT', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 20241024 #20241024 #%Y%m%d # Cambiar a 2023 + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: 20241024 #20241024 # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 5 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 11 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "Nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + #- {name: "Kuwait", latmin: 28, latmax: 31, lonmin: 46, lonmax: 49} + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + #- {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + #- {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: "to_system" #"conf/grid_description/griddes_system51c3s.txt" # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Time_aggregation: + execute: no + Calibration: + method: "bias" #evmos # Calibration method. (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Skill: + metric: mean_bias rpss enscorr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'none' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + # percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + percentiles: [[1/3, 2/3], [1/10], [9/10]] + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'all' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + dots_terciles: no # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + # mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Iberia: {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 44} + #Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + #Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + #Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 32 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: gpfs # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /home/bsc/bsc032339/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /home/bsc/bsc032339/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + diff --git a/recipe_ecvs_ano_mul_seas.yml b/recipe_ecvs_ano_mul_seas.yml new file mode 100644 index 0000000000000000000000000000000000000000..3df9d13315abf2f2bdbfa529c3a59da3b1686a02 --- /dev/null +++ b/recipe_ecvs_ano_mul_seas.yml @@ -0,0 +1,109 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: 'tas', freq: 'monthly_mean', units: 'C'} + #- {name: 'prlr', freq: 'monthly_mean', units: 'mm', flux: yes} + Datasets: + System: + - {name: 'Meteo-France-System8'} + - {name: 'CMCC-SPS3.5'} + - {name: 'ECMWF-SEAS5.1'} + - {name: 'UK-MetOffice-Glosea603'} + ##- {name: 'NCEP-CFSv2'} + - {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-GEM5.2-NEMO'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '1101' + fcst_year: '2024' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: no + Time_aggregation: + execute: yes + method: average + ini: [1, 2, 3, 4] + end: [3, 4, 5, 6] + Calibration: + method: raw #crps_min #evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 30 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_ecvs_ano_seas.yml b/recipe_ecvs_ano_seas.yml new file mode 100644 index 0000000000000000000000000000000000000000..1ad587c61feca7434e366839b6c4ffc8add2339f --- /dev/null +++ b/recipe_ecvs_ano_seas.yml @@ -0,0 +1,109 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: 'prlr', freq: 'monthly_mean', units: 'C'} + #- {name: 'prlr', freq: 'monthly_mean', units: 'mm', flux: yes} + Datasets: + System: + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + - {name: 'ECMWF-SEAS5.1'} + #- {name: 'UK-MetOffice-Glosea603'} + ##- {name: 'NCEP-CFSv2'} + #- {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-GEM5.2-NEMO'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '1201' + fcst_year: '2024' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 2, 3, 4] + end: [3, 4, 5, 6] + Calibration: + method: raw #crps_min #evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 30 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_ecvs_cal_mul_seas.yml b/recipe_ecvs_cal_mul_seas.yml new file mode 100644 index 0000000000000000000000000000000000000000..c70b883be6bce459e8b391092a2bc732ae3dd8ba --- /dev/null +++ b/recipe_ecvs_cal_mul_seas.yml @@ -0,0 +1,109 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + #- {name: 'tas', freq: 'monthly_mean', units: 'C'} + - {name: 'prlr', freq: 'monthly_mean', units: 'mm', flux: yes} + Datasets: + System: + - {name: 'Meteo-France-System8'} + - {name: 'CMCC-SPS3.5'} + - {name: 'ECMWF-SEAS5.1'} + #- {name: 'UK-MetOffice-Glosea603'} + ##- {name: 'NCEP-CFSv2'} + #- {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-GEM5.2-NEMO'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0101' + fcst_year: '2025' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 2, 3, 4] + end: [3, 4, 5, 6] + Calibration: + method: evmos #crps_min #evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 20 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_ecvs_cal_seas.yml b/recipe_ecvs_cal_seas.yml new file mode 100644 index 0000000000000000000000000000000000000000..0bcf9f201a1576ca6ff33263f40d3f2bac238aa7 --- /dev/null +++ b/recipe_ecvs_cal_seas.yml @@ -0,0 +1,109 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: 'tas', freq: 'monthly_mean', units: 'C'} + - {name: 'prlr', freq: 'monthly_mean', units: 'mm', flux: yes} + Datasets: + System: + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + - {name: 'ECMWF-SEAS5.1'} + #- {name: 'UK-MetOffice-Glosea603'} + ##- {name: 'NCEP-CFSv2'} + #- {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-GEM5.2-NEMO'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0201' + fcst_year: '2025' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 2, 3, 4] + end: [3, 4, 5, 6] + Calibration: + method: evmos #crps_min #evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 16 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_ecvs_cal_subseas.yml b/recipe_ecvs_cal_subseas.yml new file mode 100644 index 0000000000000000000000000000000000000000..abd91f05130eea888172a5490a9cfb0a66420b56 --- /dev/null +++ b/recipe_ecvs_cal_subseas.yml @@ -0,0 +1,129 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'weekly_mean', units: 'C'} + - {name: 'prlr', freq: 'weekly_mean', units: 'mm', flux: yes} +# name: 'tas' +# freq: 'weekly_mean' +# units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: +# - {name: 'prlr', freq: 'weekly_mean', units: 'mm'} +# - {name: 'sfcWind', freq: 'weekly_mean', units: 'm s-1'} +# - {name: 'rsds', freq: 'weekly_mean', units: 'W m-2'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr, sfcWind, rsds', freq: 'weekly_mean', units: {tas: 'C', prlr: 'mm', sfcWind: 'm s-1', rsds:'W m-2'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 20250213 #%Y%m%d # Cambiar a 2023 + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: 20250213 # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "Nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + #- {name: "Kuwait", latmin: 28, latmax: 31, lonmin: 46, lonmax: 49} + #- {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + # - {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: "to_system" #"conf/grid_description/griddes_system51c3s.txt" # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Time_aggregation: + execute: no + Calibration: + method: evmos # Calibration method. (Mandatory, str) + save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Skill: + metric: mean_bias rpss enscorr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'none' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'none' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + # percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + percentiles: [[1/3, 2/3]] + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'none' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + # mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Iberia: {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 44} + #Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + #Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + #Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 32 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: gpfs # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /home/bsc/bsc032339/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /home/bsc/bsc032339/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + diff --git a/recipe_prlr_cal_subseas.yml b/recipe_prlr_cal_subseas.yml new file mode 100644 index 0000000000000000000000000000000000000000..5bf1cd49f65bd2ecc387eb37c26a47bcbf124b26 --- /dev/null +++ b/recipe_prlr_cal_subseas.yml @@ -0,0 +1,130 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + #- {name: 'tas', freq: 'weekly_mean', units: 'C'} + - {name: 'prlr', freq: 'weekly_mean', units: 'mm', flux: FALSE} +# name: 'tas' +# freq: 'weekly_mean' +# units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: +# - {name: 'prlr', freq: 'weekly_mean', units: 'mm'} +# - {name: 'sfcWind', freq: 'weekly_mean', units: 'm s-1'} +# - {name: 'rsds', freq: 'weekly_mean', units: 'W m-2'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr, sfcWind, rsds', freq: 'weekly_mean', units: {tas: 'C', prlr: 'mm', sfcWind: 'm s-1', rsds:'W m-2'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + #- {name: 'NCEP-CFSv2', member: 'all'} + - {name: 'ECMWF-ENS-EXT', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 20221013 #20241024 #%Y%m%d # Cambiar a 2023 + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: 20221013 #20241024 # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "Nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + #- {name: "Kuwait", latmin: 28, latmax: 31, lonmin: 46, lonmax: 49} + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + #- {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + #- {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: "to_system" #"conf/grid_description/griddes_system51c3s.txt" # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Time_aggregation: + execute: no + Calibration: + method: "bias" #evmos # Calibration method. (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Skill: + metric: mean_bias rpss enscorr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'none' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + # percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + percentiles: [[1/3, 2/3], [1/10], [9/10]] + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'all' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + dots_terciles: no # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + # mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Iberia: {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 44} + #Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + #Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + #Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 32 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: gpfs # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /home/bsc/bsc032339/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /home/bsc/bsc032339/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + diff --git a/recipe_subseasonal_ecvs.yml b/recipe_subseasonal_ecvs.yml new file mode 100644 index 0000000000000000000000000000000000000000..2be485a0f8179561bab8c71f22bec1c23d23ed50 --- /dev/null +++ b/recipe_subseasonal_ecvs.yml @@ -0,0 +1,127 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'weekly_mean', units: 'C'} +# name: 'tas' +# freq: 'weekly_mean' +# units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: +# - {name: 'prlr', freq: 'weekly_mean', units: 'mm'} +# - {name: 'sfcWind', freq: 'weekly_mean', units: 'm s-1'} +# - {name: 'rsds', freq: 'weekly_mean', units: 'W m-2'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr, sfcWind, rsds', freq: 'weekly_mean', units: {tas: 'C', prlr: 'mm', sfcWind: 'm s-1', rsds:'W m-2'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 2023 #%Y%m%d # Cambiar a 2023 + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: #20231207 # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + #- {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "Nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + - {name: "Kuwait", latmin: 28, latmax: 31, lonmin: 46, lonmax: 49} + #- {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + # - {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + Regrid: + method: bilinear # Interpolation method (Mandatory, str) + type: "conf/grid_description/griddes_system51c3s.txt" # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Time_aggregation: + execute: no + Calibration: + method: evmos # Calibration method. (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Skill: + metric: mean_bias rpss enscorr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'none' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + # percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + percentiles: [[1/3, 2/3]] + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics # Types of plots to generate (Optional, str) + multi_panel: no # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'cylindrical_equidistant' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Iberia: {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 44} + #Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + #Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + #Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 9 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: gpfs # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /home/bsc/bsc032339/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /home/bsc/bsc032339/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + diff --git a/recipe_tas_decadal.yml b/recipe_tas_decadal.yml new file mode 100644 index 0000000000000000000000000000000000000000..86bcec089021582309eb751c19fd1f216ddf08ac --- /dev/null +++ b/recipe_tas_decadal.yml @@ -0,0 +1,118 @@ +Description: + Author: nperez + Info: Test decadal 25-07-2024 + +Analysis: + Horizon: decadal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: C + flux: no + Datasets: + System: + - {name: 'EC-Earth3-i4', member: 'all'} + # - {name: 'HadGEM3-GC31-MM'} + #- {name: 'BCC-CSM2-MR'} + #- {name: 'CanESM5'} + #- {name: 'CMCC-CM2-SR5'} + #- {name: 'FGOALS-f3-L'} + #- {name: 'IPSL-CM6A-LR'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: yes + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + # sdate: '0501' + fcst_year: #'2021' + hcst_start: '1981' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2010' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 12 # Mandatory, int: Last leadtime time step in months + Region: + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.9 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Time_aggregation: + execute: yes + method: average + user_def: + Y1: [1, 12] # aggregate from 1 to 3 forecast times + Y1-Y5: [1, 60] + Y6-Y10: [61, 120] + #Y1-JJA: [6, 8] + #Y2-Y5: [13, 60] + #Y5-Y10: [49, 120] + Anomalies: + compute: yes + cross_validation: no + save: none + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + #dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 100 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_tas_singl_ano_seas.yml b/recipe_tas_singl_ano_seas.yml new file mode 100644 index 0000000000000000000000000000000000000000..3d2f6a9ec470981081bbadbd06943783fa4e57f4 --- /dev/null +++ b/recipe_tas_singl_ano_seas.yml @@ -0,0 +1,106 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: C + flux: no + Datasets: + System: + - {name: 'ECMWF-SEAS5.1'} + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + # name: UK-MetOffice-Glosea601 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0901' + fcst_year: '2024' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: test, latmin: -10, latmax: 10, lonmin: 0, lonmax: 40} + #- {name: Global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #/esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt # "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: all + Time_aggregation: + execute: yes + method: average + ini: [1,2,3,4] + end: [3,4,5,6] + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: all + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs #esarchive #gpfs + output_dir: /home/bsc/bsc032339/ #/esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # /esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_tas_singl_cal_seas.yml b/recipe_tas_singl_cal_seas.yml new file mode 100644 index 0000000000000000000000000000000000000000..3605b43caaef05ee42fe7554f0b41c199add72cb --- /dev/null +++ b/recipe_tas_singl_cal_seas.yml @@ -0,0 +1,103 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: monthly_mean, units: C, flux: yes} + Datasets: + System: + - {name: 'ECMWF-SEAS5.1'} + # - {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + # name: UK-MetOffice-Glosea601 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '1201' + fcst_year: '2024' + hcst_start: '1996' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 3 # Mandatory, int: Last leadtime time step in months + Region: + - {name: test, latmin: -10, latmax: 10, lonmin: 0, lonmax: 40} + #- {name: Global, latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #/esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt # "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: all + Time_aggregation: + execute: no #yes + method: average + ini: [1,2,3,4] + end: [3,4,5,6] + Calibration: + method: mse_min # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10], [9/10]] + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs #esarchive #gpfs + output_dir: /home/bsc/bsc032339/ #/esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # /esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/rsz_rsz_bsc_logo.png b/rsz_rsz_bsc_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..59406d7a5f242aa87eb276bea39d6544a802b986 Binary files /dev/null and b/rsz_rsz_bsc_logo.png differ diff --git a/subsunset.sh b/subsunset.sh new file mode 100644 index 0000000000000000000000000000000000000000..598a76f3eaaca4f341165ec1293abbf4bef57857 --- /dev/null +++ b/subsunset.sh @@ -0,0 +1,22 @@ +#!/bin/bash +#SBATCH -n 64 +#SBATCH -N 1 +#SBATCH -t 10:00:00 +#SBATCH -J sunset_subprlr +#SBATCH -o sunset_subprlr-%J.out +#SBATCH -e sunset_subprlr-%J.err +#SBATCH --account=bsc32 +#SBATCH --qos=gp_bsces +#SBATCH --constraint=lowmem + +#### --qos=acc_bsces + +source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml + +Rscript /home/bsc/bsc032339/full_test.R recipe_prlr_cal_subseas.yml +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_anomalies.R /home/bsc/bsc032339/sunset/recipe_ecvs_ano_mul_seas.yml + + diff --git a/sunset.sh b/sunset.sh new file mode 100644 index 0000000000000000000000000000000000000000..8d29b8c3004c88baac452d794fc19fd91d1bd080 --- /dev/null +++ b/sunset.sh @@ -0,0 +1,23 @@ +#!/bin/bash +#SBATCH -n 112 +#SBATCH -N 1 +#SBATCH -t 10:00:00 +#SBATCH -J sunset_multimodel +#SBATCH -o sunset_multimodel-%J.out +#SBATCH -e sunset_multimodel-%J.err +#SBATCH --account=bsc32 +#SBATCH --qos=gp_bsces +#SBATCH --constraint=highmem + +#### --qos=acc_bsces + +source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml + +Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_mul_seas.yml + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_anomalies.R /home/bsc/bsc032339/sunset/recipe_ecvs_ano_mul_seas.yml + + diff --git a/sunsetv2.sh b/sunsetv2.sh new file mode 100644 index 0000000000000000000000000000000000000000..5ae55755f5f77bf8a49e81bebd76a2f30b665a1d --- /dev/null +++ b/sunsetv2.sh @@ -0,0 +1,23 @@ +#!/bin/bash +#SBATCH -n 112 +#SBATCH -N 1 +#SBATCH -t 3:00:00 +#SBATCH -J sunset_ecmwf +#SBATCH -o sunset_ecmwf-%J.out +#SBATCH -e sunset_ecmwf-%J.err +#SBATCH --account=bsc32 +#SBATCH --qos=gp_bsces +#SBATCH --constraint=lowmem + + +#### --qos=acc_bsces + +source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 + +Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_seas.yml + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_ecvs_ano_seas.yml + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_tas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R + diff --git a/testing_multimodel.R b/testing_multimodel.R new file mode 100644 index 0000000000000000000000000000000000000000..b1859cde25b3f7211cc886ef249200268b05d06b --- /dev/null +++ b/testing_multimodel.R @@ -0,0 +1,137 @@ +# From Multimodel +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source('modules/Multimodel/load_multimodel.R') +source('modules/Multimodel/load_multimodel_splitted.R') +source('modules/Multimodel/dims_multimodel.R') +source('modules/Multimodel/build_multimodel.R') +source('modules/Multimodel/clean_multimodel.R') + +# recipe <- "recipe_tas.yml" # I have included multimodel yes +library(yaml) +recipe <- read_yaml(recipe) + +# (tolower(recipe$Analysis$Datasets$Multimodel$split_loading) %in% c('true','yes')) ## Fails becuase I don't know where split_loading comes from + +# I guess we need this: +# From load_multmodel + +archive <- read_yaml("conf/archive.yml")$esarchive +ref.name <- recipe$Analysis$Datasets$Reference$name + +# lets try creating a vector +## I don't know where this comes from +## recipe$Analysis$Datasets$System$models +exp.name <- c("ECMWF-SEAS5.1", "Meteo-France-System7") + +library(startR) + store.freq <- recipe$Analysis$Variables$freq + variable <- "tas" #strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name[1]]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + +recipe$Run$output_dir <- file.path("/esarchive/scratch/nperez/git4/recipe_tas_20240531165840", "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + +hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + filename <- paste0("scorecards_$model$_", ref.name, "_$var$__$file_date$__", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + +source("modules/Saving/Saving.R") + hcst.path <- file.path(recipe$Run$output_dir, "$model$", ref.name, + recipe$Analysis$Workflow$Calibration$method, + "$var$", filename) +# hcst.path <- gsub(variable[1], "$var$", hcst.path) +# hcst.path <- gsub('Multimodel', "$model$", hcst.path) + fcst.path <- obs.path <- hcst.path + obs.path <- gsub("_$file_date$", "-obs_$file_date$", obs.path, fixed = T) + obs.path <- gsub("$model$", gsub('\\.','',exp.name[1]), obs.path, fixed = T) + + hcst <- Start(dat = hcst.path, + var = variable, + file_date = sdates$hcst, + model = gsub('\\.','',exp.name), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + # No need to check the dates if the files are saved by SUNSET: + #if (recipe$Analysis$Variables$freq == "monthly_mean") { + # hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + #} + +fcst <- as.s2dv_cube(fcst) + + fcst <- Start(dat = fcst.path, + var = variable, + file_date = sdates$fcst, + model = gsub('\\.','',exp.name), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + +datos <- list(hcst = hcst, fcst = fcst, obs = obs) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) diff --git a/tools/add_logo.R b/tools/add_logo.R index 42fb87c50b1bf1a0c20409cee43cc1703e8aeb3c..92259a2de23fbb8e06be40aab26c568cf8a559d2 100644 --- a/tools/add_logo.R +++ b/tools/add_logo.R @@ -1,15 +1,33 @@ -add_logo <- function(recipe, logo) { - # recipe: SUNSET recipe +add_logo <- function(file, logo, logo_resize_percentage = 0.25) { + # file # logo: URL to the logo - system <- list.files(paste0(recipe$Run$output_dir, "/plots/")) - variable <- recipe$Analysis$Variable$name - files <- lapply(variable, function(x) { - f <- list.files(paste0(recipe$Run$output_dir, "/plots/", - system, "/", x)) - full_path <- paste0(recipe$Run$output_dir, "/plots/", - system, "/", x,"/", f)})[[1]] - dim(files) <- c(file = length(files)) - Apply(list(files), target_dims = NULL, function(x) { - system(paste("composite -gravity southeast -geometry +10+10", - logo, x, x))}, ncores = recipe$Analysis$ncores) + fig_width <- as.numeric(system(paste("identify -format '%w'", file), + intern = TRUE)) + logo_height <- as.numeric(system(paste("convert", logo, + "-resize", fig_width * 0.1, "-format '%h' info:", sep=" "), + intern = TRUE)) + system(paste0("convert ", file, + " -gravity south -background white -splice 0x", + logo_height, " extended_fig.png")) + system(paste0("convert extended_fig.png \\( ", + logo, " -resize ", fig_width * logo_resize_percentage, + " \\) -gravity southeast -composite ", + file)) + file.remove("extended_fig.png") } + + +#fig <- "/home/bsc/bsc032339/recipe_tas_20241106160039/plots/Meto-France-System8/ERA5/evmos/tas/forecast_most_likely_tercile-20240801_ft01.png" + +#fig <- "/home/bsc/bsc032339/recipe_tas_20241106160039/plots/Meto-France-System8/ERA5/evmos/tas/forecast_ensemble_median-20240801_ft01.png" + +#fig <- "/home/bsc/bsc032339/recipe_tas_20241106160039/plots/Meto-France-System8/ERA5/evmos/tas/rpss-august_ft01.png" + +#fig_width <- as.numeric(system(paste("identify -format '%w'", fg), intern = TRUE)) + +#logo <- "rsz_rsz_bsc_logo.png" +#logo_height <- as.numeric(system(paste("convert", logo, "-resiz", fig_width * 0.1, "-format '%h' info:", sep=" "), intern = TRUE)) + +#system(paste0("convert ", fig, " -gravity south -background whie -splice 0x", logo_height, " extended_fig.png")) + +#system(paste0("convert extended_fig.png \\( ", logo, " -resize , map_width * 0.25, " \\) -gravity southeast -composite extended_fig.png")) diff --git a/tools/check_recipe.R b/tools/check_recipe.R index e6ab4b2deefc3bd7862bc25638fdf8e627afd8e3..3dd186c7d116719a661615913ec8d189eaab14f3 100644 --- a/tools/check_recipe.R +++ b/tools/check_recipe.R @@ -607,13 +607,15 @@ check_recipe <- function(recipe) { } # Skill - AVAILABLE_METRICS <- c("enscorr", "corr_individual_members", "rps", "rps_syear", + AVAILABLE_METRICS <- c("enscorr", "corr_individual_members", "rps", + "rps_syear", "cov", "covariance", "std", + "standard_deviation", "n_eff", "spread", "rpss", "frps", "frpss", "crps", "crps_syear", "crpss", "bss10", "bss90", "rms", "mean_bias", "mean_bias_ss", "enssprerr", "rps_clim", "rps_clim_syear", "crps_clim", "crps_clim_syear", "enscorr_specs", "frps_specs", "rpss_specs", - "frpss_specs", "bss10_specs", "bss90_specs", "rms") + "frpss_specs", "bss10_specs", "bss90_specs", "rms", "rmss") if ("Skill" %in% names(recipe$Analysis$Workflow)) { if (is.null(recipe$Analysis$Workflow$Skill$metric)) { error(recipe$Run$logger, @@ -657,10 +659,15 @@ check_recipe <- function(recipe) { "Parameter 'percentiles' must be defined under 'Probabilities'.") error_status <- TRUE } else if (!is.list(recipe$Analysis$Workflow$Probabilities$percentiles)) { - error(recipe$Run$logger, - paste("Parameter 'Probabilities:percentiles' expects a list.", - "See documentation in the wiki for examples.")) - error_status <- TRUE + if (length(recipe$Analysis$Workflow$Probabilities$percentiles) == 1) { + recipe$Analysis$Workflow$Probabilities$percentiles <- + list(recipe$Analysis$Workflow$Probabilities$percentiles) + } else { + error(recipe$Run$logger, + paste("Parameter 'Probabilities:percentiles' expects a list.", + "See documentation in the wiki for examples.")) + error_status <- TRUE + } } # Saving checks SAVING_OPTIONS_PROBS <- c("all", "none", "bins_only", "percentiles_only") diff --git a/tools/libs.R b/tools/libs.R index 401467860ba602c0bd459439e973e3637adb7a4f..5973f2195af4c1b81972d62d48e43edc82aada68 100644 --- a/tools/libs.R +++ b/tools/libs.R @@ -37,6 +37,9 @@ source("tools/get_archive.R") source("tools/Utils.R") source("tools/restructure_recipe.R") # source("tools/add_dims.R") # Not sure if necessary yet +## To be removed after next release of CSTools: +source("modules/Crossval/R/tmp/CST_MergeDims.R") + # Settings options(bitmapType = 'cairo') diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml index 753c1036ce678bb870135d889166d73d170c9a9d..569ddad7b41c884f052684f4f28cc551e13aa607 100644 --- a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml +++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml @@ -28,6 +28,14 @@ Analysis: method: bilinear type: to_system Workflow: + Time_aggregation: + execute: yes + user_def: + ftimes: [1, 3] + JJA: !expr sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) + method: average + ini: [1, 2, 1] + end: [2, 3, 3] Anomalies: compute: yes cross_validation: yes