diff --git a/.gitignore b/.gitignore index 263c4e640a4ffe3bd13bf6a14f80c37553954d4d..9244f19d212d1dd9c4e9d9760b780344da951027 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,9 @@ out-logs/ *.swp *.swo ecsbatch.log* +*.nc modules/Loading/testing_recipes/recipe_decadal_calendartest.yml modules/Loading/testing_recipes/recipe_decadal_daily_calendartest.yml conf/vitigeoss-vars-dict.yml +*.err +*.out diff --git a/Crossval_NAO.R b/Crossval_NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..8fe9fa014a8a1bb2aad2de8c29ee8948a293bb0c --- /dev/null +++ b/Crossval_NAO.R @@ -0,0 +1,322 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_anomalies <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + orig_dims <- names(dim(data$hcst$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst$data))) { + agg = 'region' + nregions <- dim(data$hcst$data)['region'] + } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + ## output objects + ano_hcst_ev_res <- NULL + ano_obs_ev_res <- NULL + ano_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_ano_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_ano_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # compute category limits + lims_ano_hcst_tr <- Apply(ano_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + lims_ano_obs_tr <- Apply(ano_obs_tr, target_dims = c('syear'),#, 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) + #store results + ano_hcst_ev_res <- abind(ano_hcst_ev_res, ano_hcst_ev, + along = length(dim(ano_hcst_ev)) + 1) + ano_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, + along = length(dim(ano_obs_ev)) + 1) + ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, + along = length(dim(ano_obs_tr)) + 1) + for(ps in 1:length(categories)) { + lims_ano_hcst_tr_res[[ps]] <- abind(lims_ano_hcst_tr_res[[ps]], lims_ano_hcst_tr[[ps]], + along = length(dim(lims_ano_hcst_tr[[ps]])) + 1) + lims_ano_obs_tr_res[[ps]] <- abind(lims_ano_obs_tr_res[[ps]], lims_ano_obs_tr[[ps]], + along = length(dim(lims_ano_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + # Add dim names: + names(dim(ano_hcst_ev_res)) <- ev_dim_names + names(dim(ano_obs_ev_res)) <- ev_dim_names + names(dim(ano_obs_tr_res)) <- tr_dim_names + # To make crps_clim to work the reference forecast need to have same dims as obs: + ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('cat', + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('cat', + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) + lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + + # Forecast anomalies: + if (!is.null(data$fcst)) { + clim_hcst <- Apply(ano_hcst_ev_res, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + data$fcst$data <- Ano(data = data$fcst$data, clim = clim_hcst) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + hcst_probs_ev[[ps]] <- GetProbs(ano_hcst_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', abs_thresholds = lims_ano_hcst_tr[[ps]], + ncores = ncores) + obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_ano_obs_tr_res[[ps]], + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(data$fcst$data, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } + } + # Convert to s2dv_cubes the resulting anomalies + ano_hcst <- data$hcst + ano_hcst$data <- ano_hcst_ev_res + ano_obs <- data$obs + ano_obs$data <- ano_obs_ev_res + + info(recipe$Run$logger, + "#### Anomalies and Probabilities Done #####") + if (recipe$Analysis$Workflow$Anomalies$save != 'none') { + info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Anomalies$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Anomalies$save == 'all') { + save_observations(recipe = recipe, data_cube = ano_obs) + } + } + # Save probability bins + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_hcst) + probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_obs) + if (!is.null(data$fcst)) { + probs_fcst <- append(list(Subset(fcst_probs[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_fcst) + } + all_names <- c(all_names, name_elem) + } + } + names(probs_hcst) <- all_names + if (!('var' %in% names(dim(probs_hcst[[1]])))) { + probs_hcst <- lapply(probs_hcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + names(probs_obs) <- all_names + if (!('var' %in% names(dim(probs_obs[[1]])))) { + probs_obs <- lapply(probs_obs, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + if (!('var' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(syear = 1, dim(x)) + return(x)}) + } + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = probs_hcst, + data_cube = data$hcst, agg = agg, + type = "hcst") + save_probabilities(recipe = recipe, probs = probs_obs, + data_cube = data$hcst, agg = agg, + type = "obs") + # TODO Forecast + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + } + # Save ensemble mean for multimodel option: + hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(hcst_EM = + Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), + data_cube = data$hcst, agg = agg, + module = "statistics") + fcst_EM <- NULL + if (!is.null(data$fcst)) { + fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + save_metrics(recipe = recipe, + metrics = list(fcst_EM = + Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), + data_cube = data$fcst, agg = agg, + module = "statistics") + } + return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + hcst_EM = hcst_EM, fcst_EM = fcst_EM, + cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + obs_tr = lims_ano_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev), + ref_obs_tr = ano_obs_tr_res)) +} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/MODULES b/MODULES index 7ed13edc7a7ef0e1532688570915ade26ea28ad6..e88620be04ae598f6c9310e66751ae7967880d21 100644 --- a/MODULES +++ b/MODULES @@ -22,6 +22,16 @@ elif [[ $BSC_MACHINE == "mn5" ]]; then source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 +elif [[ $BSC_MACHINE == "amd" ]]; then + + module purge + module load CDO/1.9.10-foss-2019b + module load R/4.1.2-foss-2019b + module load GEOS/3.7.2-foss-2019b-Python-3.7.4 + module load GDAL/3.5.0-foss-2019b-Python-3.7.4 + module load PROJ/9.0.0-GCCcore-8.3.0 + module load Phantomjs/2.1.1 + elif [[ $HOSTNAME == "bsceshub02.bsc.es" ]]; then module purge diff --git a/Rplots.pdf b/Rplots.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2610e8a00b147a0801d6aad8aec8c2fd21a5a476 Binary files /dev/null and b/Rplots.pdf differ diff --git a/amdsunset.sh b/amdsunset.sh new file mode 100644 index 0000000000000000000000000000000000000000..c7ba06281c90ea3874dd45d6cf63c2482e6a1c19 --- /dev/null +++ b/amdsunset.sh @@ -0,0 +1,20 @@ +#!/bin/bash +#SBATCH -n 50 +#SBATCH -N 1 +#SBATCH -t 4:00:00 +#SBATCH -J sunset_multimodel +#SBATCH -o sunset_multimodel-%J.out +#SBATCH -e sunset_multimodel-%J.err +#SBATCH --account=bsc32 + +#### --qos=acc_bsces + +source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_seas.yml + +Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_mul_seas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R + diff --git a/autosubmit/auto-multimodel.sh b/autosubmit/auto-multimodel.sh index a9912666046bf3d8fc33643f764d864fa390963d..0ca01aebab0cac73d504afd010a92a983f9d92e2 100644 --- a/autosubmit/auto-multimodel.sh +++ b/autosubmit/auto-multimodel.sh @@ -1,18 +1,25 @@ #!/bin/bash ############ AUTOSUBMIT INPUTS ############ -proj_dir=%PROJDIR% +projdir=%PROJDIR% outdir=%common.OUTDIR% script=%common.SCRIPT% +tmpdir=%common.TMPDIR% SPLIT=%SPLIT% ############################### -cd $proj_dir +if [ -d $projdir ]; then + cd $projdir + srcdir=${outdir} +else + cd ${tmpdir}/sunset/$(basename ${outdir})/ + srcdir=${tmpdir}/$(basename ${outdir}) +fi -source split_to_recipe -# atomic_recipe_number=$(printf "%02d" $CHUNK) -atomic_recipe=${outdir}/logs/recipes/multimodel/atomic_recipe_sys-Multimodel${recipe}.yml +atomic_recipe=${srcdir}/logs/recipes/multimodel/atomic_recipe_sys-Multimodel${recipe}.yml +set +eu source MODULES +set -eu Rscript ${script} ${atomic_recipe} diff --git a/autosubmit/auto-scorecards.sh b/autosubmit/auto-scorecards.sh index c30f643f3be53f216ead66675a9545a0e159198a..7722e3b5d2bce0a3e43fae7726ffe671d68b2a30 100644 --- a/autosubmit/auto-scorecards.sh +++ b/autosubmit/auto-scorecards.sh @@ -1,15 +1,24 @@ #!/bin/bash ############ AUTOSUBMIT INPUTS ############ -proj_dir=%PROJDIR% +projdir=%PROJDIR% outdir=%common.OUTDIR% recipe=%common.RECIPE% +tmpdir=%common.TMPDIR% ############################### -cd $proj_dir +if [ -d $projdir ]; then + cd $projdir + srcdir=${outdir} +else + cd ${tmpdir}/sunset/$(basename ${outdir})/ + srcdir=${tmpdir}/$(basename ${outdir}) +fi -recipe=${outdir}/logs/recipes/${recipe} +recipe=${srcdir}/logs/recipes/${recipe} +set +eu source MODULES +set -eu -Rscript modules/Scorecards/execute_scorecards.R ${recipe} ${outdir} +Rscript modules/Scorecards/execute_scorecards.R ${recipe} ${srcdir} diff --git a/autosubmit/auto-transfer_recipes.sh b/autosubmit/auto-transfer_recipes.sh new file mode 100644 index 0000000000000000000000000000000000000000..4b332e8bd3ad45cab9fa9970bdbd14fd42404ad4 --- /dev/null +++ b/autosubmit/auto-transfer_recipes.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +############ AUTOSUBMIT INPUTS ############ +proj_dir=%PROJDIR% +outdir=%common.OUTDIR% +tmpdir=%common.TMPDIR% +############################### + +# cd $proj_dir + +# username=$(whoami) + +destdir=${tmpdir}/$(basename ${outdir}) + +# Copy recipes +mkdir -p $destdir +cp -r /gpfs/archive/bsc32/${outdir}/* ${destdir} + +# Copy repository with code +mkdir -p ${tmpdir}/sunset/$(basename ${outdir})/ +cp -r /gpfs/archive/bsc32/${proj_dir}/* ${tmpdir}/sunset/$(basename ${outdir})/ diff --git a/autosubmit/auto-transfer_results.sh b/autosubmit/auto-transfer_results.sh new file mode 100644 index 0000000000000000000000000000000000000000..fda0d7681702fe667fa74d1e53c03de9286a98d8 --- /dev/null +++ b/autosubmit/auto-transfer_results.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +############ AUTOSUBMIT INPUTS ############ +proj_dir=%PROJDIR% +outdir=%common.OUTDIR% +tmpdir=%common.TMPDIR% +############################### + +srcdir=${tmpdir}/$(basename ${outdir}) + +cp -r ${srcdir}/* /gpfs/archive/bsc32/${outdir} + +rm -r $srcdir + +rm -r ${tmpdir}/sunset/$(basename ${outdir})/ diff --git a/autosubmit/auto-verification.sh b/autosubmit/auto-verification.sh index e909dbfb433ff6d59816734973547e918513bf76..d323c45103c593933eccfa18758a58d9c28e5e8c 100644 --- a/autosubmit/auto-verification.sh +++ b/autosubmit/auto-verification.sh @@ -1,19 +1,27 @@ #!/bin/bash ############ AUTOSUBMIT INPUTS ############ -proj_dir=%PROJDIR% +projdir=%PROJDIR% outdir=%common.OUTDIR% +tmpdir=%common.TMPDIR% script=%common.SCRIPT% CHUNK=%CHUNK% ############################### -cd $proj_dir +if [ -d $projdir ]; then + cd $projdir + srcdir=${outdir} +else + cd ${tmpdir}/sunset/$(basename ${outdir})/ + srcdir=${tmpdir}/$(basename ${outdir}) +fi source chunk_to_recipe -# atomic_recipe_number=$(printf "%02d" $CHUNK) -atomic_recipe=${outdir}/logs/recipes/atomic_recipe_${recipe}.yml +atomic_recipe=${srcdir}/logs/recipes/atomic_recipe_${recipe}.yml +set +eu source MODULES +set -eu Rscript ${script} ${atomic_recipe} diff --git a/autosubmit/conf_gpfs/autosubmit.yml b/autosubmit/conf_gpfs/autosubmit.yml new file mode 100644 index 0000000000000000000000000000000000000000..0fd5d5c6aaf61945d131da77cda08d8d1fdd86cd --- /dev/null +++ b/autosubmit/conf_gpfs/autosubmit.yml @@ -0,0 +1,22 @@ +config: + EXPID: + AUTOSUBMIT_VERSION: 4.0.0b0 + MAXWAITINGJOBS: 16 + # Default maximum number of jobs to be running at the same time at any platform + # Default: 6 + TOTALJOBS: 16 + SAFETYSLEEPTIME: 10 + RETRIALS: 0 +mail: + NOTIFICATIONS: + TO: +communications: + # Communications library used to connect with platforms: paramiko or saga. + # Default: paramiko + API: paramiko +storage: + # Defines the way of storing the progress of the experiment. The available options are: + # A PICKLE file (pkl) or an SQLite database (db). Default: pkl + TYPE: pkl + # Defines if the remote logs will be copied to the local platform. Default: True. + COPY_REMOTE_LOGS: True diff --git a/autosubmit/conf_gpfs/expdef.yml b/autosubmit/conf_gpfs/expdef.yml new file mode 100644 index 0000000000000000000000000000000000000000..8dc29b27843729afa89be242a0d0de96bad1b3ec --- /dev/null +++ b/autosubmit/conf_gpfs/expdef.yml @@ -0,0 +1,44 @@ +DEFAULT: + EXPID: + HPCARCH: nord3v2 +experiment: + DATELIST: + MEMBERS: fc0 + CHUNKSIZEUNIT: month + CHUNKSIZE: 1 + NUMCHUNKS: + CHUNKINI: 1 + CALENDAR: standard +project: + PROJECT_TYPE: local + # Destination folder name for project. type: STRING, default: leave empty, + PROJECT_DESTINATION: auto-s2s +# If PROJECT_TYPE is not git, no need to change +git: + # Repository URL STRING: 'https://github.com/torvalds/linux.git' + PROJECT_ORIGIN: https://earth.bsc.es/gitlab/es/auto-s2s.git + # Select branch or tag, STRING, default: 'master', help: {'master' (default), 'develop', 'v3.1b', ...} + PROJECT_BRANCH: master + # type: STRING, default: leave empty, help: if model branch is a TAG leave empty + PROJECT_COMMIT: '' +svn: + PROJECT_URL: '' + PROJECT_REVISION: '' +# If PROJECT_TYPE is not local, no need to change +local: + # type: STRING, help: /foo/bar/ecearth + PROJECT_PATH: /esarchive/scratch/vagudets/repos/auto-s2s/ +# If PROJECT_TYPE is none, no need to change +project_files: + # Where is PROJECT CONFIGURATION file location relative to project root path + FILE_PROJECT_CONF: '' + # Where is JOBS CONFIGURATION file location relative to project root path + FILE_JOBS_CONF: '' + # Default job scripts type in the project. type: STRING, default: bash, supported: 'bash', 'python' or 'r' + JOB_SCRIPTS_TYPE: '' +rerun: + # Is a rerun or not? [Default: Do set FALSE]. BOOLEAN: TRUE, FALSE + RERUN: FALSE + # If RERUN: TRUE then supply the list of chunks to rerun + # LIST: [ 19601101 [ fc0 [1 2 3 4] fc1 [1] ] 19651101 [ fc0 [16-30] ] ] + CHUNKLIST: '' diff --git a/autosubmit/conf_gpfs/jobs.yml b/autosubmit/conf_gpfs/jobs.yml new file mode 100644 index 0000000000000000000000000000000000000000..9f9ce28148dd78dc3fcc97c268236475c47b3004 --- /dev/null +++ b/autosubmit/conf_gpfs/jobs.yml @@ -0,0 +1,41 @@ +JOBS: + transfer_recipes: + FILE: autosubmit/auto-transfer_recipes.sh + RUNNING: once + WALLCLOCK: 00:10 + PLATFORM: transfer + NOTIFY_ON: + PROCESSORS: 1 + verification: + FILE: autosubmit/auto-verification.sh + RUNNING: chunk + WALLCLOCK: + NOTIFY_ON: + PLATFORM: + PROCESSORS: + # SPLITS: # n_atomic_recipes, number of atomic recipes + multimodel: + FILE: autosubmit/auto-multimodel.sh + RUNNING: once + WALLCLOCK: + NOTIFY_ON: + PLATFORM: + PROCESSORS: + DEPENDENCIES: + verification: + SPLITS_FROM: + SPLITS: # n_atomic_recipes/n_models = n_multimodels + scorecards: + FILE: autosubmit/auto-scorecards.sh + WALLCLOCK: 00:10 + PLATFORM: + NOTIFY_ON: + PROCESSORS: 1 + DEPENDENCIES: + transfer_results: + FILE: autosubmit/auto-transfer_results.sh + RUNNING: once + WALLCLOCK: 00:10 + PLATFORM: transfer + NOTIFY_ON: + PROCESSORS: 1 diff --git a/autosubmit/conf_gpfs/platforms.yml b/autosubmit/conf_gpfs/platforms.yml new file mode 100644 index 0000000000000000000000000000000000000000..03f4b9401d687792ca8b0554c7dd94cd95865294 --- /dev/null +++ b/autosubmit/conf_gpfs/platforms.yml @@ -0,0 +1,33 @@ +## TODO: Change platform +Platforms: + nord3v2: + TYPE: slurm + HOST: nord4.bsc.es + USER: + PROJECT: bsc32 ## TO BE CHANGED + SCRATCH_DIR: /gpfs/scratch/ ## TO BE CHANGED + PROCESSORS_PER_NODE: 16 + SERIAL_QUEUE: debug + QUEUE: bsc_es + mn5: + TYPE: slurm + HOST: glogin1.bsc.es + PROJECT: bsc32 + USER: + QUEUE: gp_bsces + SCRATCH_DIR: /gpfs/scratch/ + TEMP_DIR: '' + amd: + TYPE: slurm + HOST: amdlogin1.bsc.es + PROJECT: bsc32 + USER: + SCRATCH_DIR: /gpfs/scratch/ + TEMP_DIR: '' + transfer: + TYPE: slurm + HOST: transfer2.bsc.es + PROJECT: bsc32 + SCRATCH_DIR: /gpfs/scratch/ + USER: + diff --git a/autosubmit/conf_gpfs/proj.yml b/autosubmit/conf_gpfs/proj.yml new file mode 100644 index 0000000000000000000000000000000000000000..679cf63b1ced38fd833d28ea9acfa145a1e9bc4f --- /dev/null +++ b/autosubmit/conf_gpfs/proj.yml @@ -0,0 +1,4 @@ +common: + MODULES: "MODULES" + OUTDIR: + SCRIPT: diff --git a/conf/archive_decadal.yml b/conf/archive_decadal.yml index c656d0de422d1808246112885239c9148f8c6dae..f3004af25acd202a1bcde4d2a68c668324221725 100644 --- a/conf/archive_decadal.yml +++ b/conf/archive_decadal.yml @@ -1,5 +1,4 @@ -gpfs: - src_sys: "/gpfs/scratch/bsc32/MN4/bsc32/bsc32693/data_amd/" + src_sys: "/gpfs/projects/bsc32/esarchive_cache/" System: # ---- EC-Earth3-i4: @@ -34,7 +33,7 @@ gpfs: member: r1i4p1f1,r2i4p1f1,r3i4p1f1,r4i4p1f1,r5i4p1f1,r6i4p1f1,r7i4p1f1,r8i4p1f1,r9i4p1f1,r10i4p1f1 initial_month: 11 sdate_add: 0 - reference_grid: "/gpfs/scratch/bsc32/MN4/bsc32/bsc32693/data_amd/exp/CMIP6/dcppA-hindcast/EC-Earth3-i4/DCPP/EC-Earth-Consortium/EC-Earth3-i4/dcppA-hindcast/r1i4p1f1/Amon/tas/gr/v20210910/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i4p1f1_gr_196011-196110.nc" + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/CMIP6/dcppA-hindcast/EC-Earth3-i4/DCPP/EC-Earth-Consortium/EC-Earth3-i4/dcppA-hindcast/r1i4p1f1/Amon/tas/gr/v20210910/tas_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i4p1f1_gr_196011-196110.nc" esarchive: src_sys: "/esarchive/" diff --git a/conf/archive_reference.yml b/conf/archive_reference.yml index e1dc58fbb6fac20bc1ea14bf1c22e7a3f8627de0..a6fb4850448d12773989da953e5700608baa2aaf 100644 --- a/conf/archive_reference.yml +++ b/conf/archive_reference.yml @@ -5,6 +5,10 @@ gpfs: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" + weekly_mean: {"tas":"weekly_mean/tas_f1h-r1440x721cds/", + "psl":"weekly_mean/psl_f1h-r1440x721cds/", + "prlr":"weekly_mean/prlr_f1h-r1440x721cds/", + "sfcWind":"weekly_mean/sfcWind_f1h-r1440x721cds/"} monthly_mean: {"tas":"monthly_mean/tas_f1h-r1440x721cds/", "psl":"monthly_mean/psl_f1h-r1440x721cds/", "prlr":"monthly_mean/prlr_f1h-r1440x721cds/", @@ -20,8 +24,12 @@ esarchive: name: "ERA5" institution: "European Centre for Medium-Range Weather Forecasts" src: "recon/ecmwf/era5/" - weekly_mean: {"tas":"weekly_mean/tas_f1h-r1440x721cds/", - "prlr":"weekly_mean/prlr_f1h-r1440x721cds/"} + weekly_mean: {"tas":"weekly_mean/tas_f1h-r1440x721cds/", + "prlr":"weekly_mean/prlr_f1h-r1440x721cds/", + "tasmax":"weekly_mean/tasmax_f24h-r1440x721cds/", + "tasmin":"weekly_mean/tasmin_f24h-r1440x721cds/", + "rsds":"weekly_mean/rsds_f1h-r1440x721cds/", + "sfcWind":"weekly_mean/sfcWind_f1h-r1440x721cds/"} daily_mean: {"tas":"daily_mean/tas_f1h-r1440x721cds/", "rsds":"daily_mean/rsds_f1h-r1440x721cds/", "prlr":"daily_mean/prlr_f1h-r1440x721cds/", @@ -44,8 +52,8 @@ esarchive: "g850":"monthly_mean/g850_f1h-r1440x721cds/", "hurs":"monthly_mean/hurs_f1h-r1440x721cds/", "sfcWind":"monthly_mean/sfcWind_f1h-r1440x721cds/", - "tasmax":"monthly_mean/tasmax_f1h-r1440x721cds/", - "tasmin":"monthly_mean/tasmin_f1h-r1440x721cds/", + "tasmax":"monthly_mean/tasmax_f24h-r1440x721cds/", + "tasmin":"monthly_mean/tasmin_f24h-r1440x721cds/", "ta300":"montly_mean/ta300_f1h-r1440x721cds/", "ta500":"monthly_mean/ta500_f1h-r1440x721cds/", "ta850":"monthly_mean/ta850_f1h-r1440x721cds/", diff --git a/conf/archive_seasonal.yml b/conf/archive_seasonal.yml index 552faee382576febc24c41e8d222614dfb3741df..21a74a22895640d53d6006d21302f4b93d9101ea 100644 --- a/conf/archive_seasonal.yml +++ b/conf/archive_seasonal.yml @@ -25,7 +25,7 @@ gpfs: institution: "European Centre for Medium-Range Weather Forecasts" src: "exp/ecmwf/system51c3s/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", - "prlr":"monthly_mean/prlr_f24h/", + "prlr":"monthly_mean/prlr_s0-24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", "psl":"monthly_mean/psl_f6h/"} nmember: @@ -80,7 +80,7 @@ gpfs: NCEP-CFSv2: name: "NCEP CFSv2" institution: "NOAA NCEP" #? - src: "exp/ncep/cfs-v2/" + src: "exp/ncep/system2c3s/" monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_f24h/", "sfcWind":"monthly_mean/sfcWind_f6h/", @@ -121,6 +121,18 @@ gpfs: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_eccc1.txt" + ECCC-GEM5.2-NEMO: + name: "ECCC GEM5.2-NEMO" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/eccc/eccc5/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", + "prlr":"monthly_mean/prlr_s0-24h/"} + nmember: + fcst: 20 + hcst: 20 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_eccc5.txt" Reference: ERA5: name: "ERA5" @@ -239,6 +251,19 @@ esarchive: calendar: "proleptic_gregorian" time_stamp_lag: "+1" reference_grid: "conf/grid_description/griddes_system2c3s.txt" + JMA-MRI-CPS3: + name: "JMA MRI-CPS3" + institution: "European Centre for Medium-Range Weather Forecasts" + src: "exp/jma/system3c3s/" + monthly_mean: {"tas":"monthly_mean/tas_f6h/", "prlr":"monthly_mean/prlr_s0-24h/", + "tasmax":"monthly_mean/tasmax_f6h/", "tasmin":"monthly_mean/tasmin_f6h/"} + nmember: + fcst: 155 + hcst: 10 + calendar: "proleptic_gregorian" + time_stamp_lag: "+1" + reference_grid: "conf/grid_description/griddes_eccc1.txt" + ECCC-CanCM4i: name: "ECCC CanCM4i" institution: "European Centre for Medium-Range Weather Forecasts" diff --git a/conf/archive_subseasonal.yml b/conf/archive_subseasonal.yml index 39d3a73429d63f1d4546216e8298b4d2017d1d17..a3ecd39a330f4b33efa2a56d938e71815ea7b81c 100644 --- a/conf/archive_subseasonal.yml +++ b/conf/archive_subseasonal.yml @@ -1,3 +1,23 @@ +gpfs: + src_sys: "/gpfs/projects/bsc32/esarchive_cache/" + System: + NCEP-CFSv2: + name: "NCEP CFSv2" + institution: "NOAA NCEP" #? + src: "exp/ncep/cfs-v2/" + weekly_mean: {"tas":"weekly_mean/s2s/tas_f24h/", + "prlr":"weekly_mean/s2s/prlr_f24h/", + "tasmax":"weekly_mean/s2s/tasmax_f24h/", + "tasmin":"weekly_mean/s2s/tasmin_f24h/", + "sfcWind":"weekly_mean/s2s/sfcWind_f24h/", + "rsds":"weekly_mean/s2s/rsds_f24h/"} + daily_mean: {"tas":"daily_mean/s2s/tas_f6h"} + nmember: + fcst: 48 + hcst: 12 + calendar: "proleptic_gregorian" + time_stamp_lag: "0" # Do we need it for subseasonal? + reference_grid: "/gpfs/projects/bsc32/esarchive_cache/exp/ncep/cfs-v2/weekly_mean/s2s/tas_f24h/tas_20050109.nc" # is it the same as seasonal? esarchive: src_sys: "/esarchive/" System: diff --git a/conf/autosubmit.yml b/conf/autosubmit.yml index 25872a0ef510b95ebf0f8dbe911bf5a544d3d5ff..99b29f3c8022de791317ce78d2312143ba916c5b 100644 --- a/conf/autosubmit.yml +++ b/conf/autosubmit.yml @@ -5,6 +5,7 @@ esarchive: conf_format: yaml experiment_dir: /esarchive/autosubmit/ userID: bsc032 + tmp_dir: mars: platform: NORD3 ## TO BE CHANGED module_version: autosubmit/4.0.0b-foss-2015a-Python-3.7.3 ## TO BE CHANGED @@ -12,3 +13,12 @@ mars: conf_format: yaml experiment_dir: /esarchive/autosubmit/ ## TO BE CHANGED userID: bsc32 ## TO BE CHANGED + tmp_dir: +gpfs: + platform: mn5 + module_version: autosubmit/4.0.98-foss-2015a-Python-3.7.3 + auto_version: 4.0.98 + conf_format: yaml + experiment_dir: /esarchive/autosubmit/ ## TO BE CHANGED + userID: bsc032 + tmp_dir: /gpfs/scratch/bsc32/$user$/tmp/ diff --git a/conf/grid_description/griddes_eccc5.txt b/conf/grid_description/griddes_eccc5.txt new file mode 100644 index 0000000000000000000000000000000000000000..782a1b4d356b4f9d4c421a5f84d666ced8013519 --- /dev/null +++ b/conf/grid_description/griddes_eccc5.txt @@ -0,0 +1,22 @@ +# +# gridID 1 +# +gridtype = generic +gridsize = 1 +# +# gridID 2 +# +gridtype = lonlat +gridsize = 64800 +xsize = 360 +ysize = 180 +xname = lon +xlongname = "longitude" +xunits = "degrees_east" +yname = lat +ylongname = "latitude" +yunits = "degrees_north" +xfirst = 0.5 +xinc = 1 +yfirst = 89.5 +yinc = -1 diff --git a/full_NAO.R b/full_NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..b0db00df35f5b844b729e14aa82c67369525f617 --- /dev/null +++ b/full_NAO.R @@ -0,0 +1,29 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_NAO_single.yml" +#recipe <- read_atomic_recipe(recipe_file) +recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) + + +source("modules/Crossval/Crossval_NAO.R") +res <- Crossval_NAO(recipe = recipe, data = data) + +source("plot_NAO.R") +plot_NAO(recipe = recipe, nao = res, data = data) + +source("modules/Crossval/Crossval_metrics.R") +skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + +# No visualisation available for region aggregated + diff --git a/full_ecvs_anomalies.R b/full_ecvs_anomalies.R new file mode 100644 index 0000000000000000000000000000000000000000..a4c46693b501aa95d1cdae2e0339f3ada08f7eb9 --- /dev/null +++ b/full_ecvs_anomalies.R @@ -0,0 +1,52 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +source("modules/Aggregation/Aggregation.R") +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "recipe_tas_singl_ano_seas.yml" +#recipe <- read_atomic_recipe(recipe_file) +recipe <- prepare_outputs(recipe_file) +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) +data_summary(data$fcst, recipe) + + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + data <- Aggregation(recipe = recipe, data = data) + data_summary(data$hcst, recipe) + data_summary(data$obs, recipe) + data_summary(data$fcst, recipe) + } +source("modules/Crossval/Crossval_anomalies.R") +res <- Crossval_anomalies(recipe = recipe, data = data) + +source("modules/Crossval/Crossval_metrics.R") +skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + +# Required to plot a forecast: +data$fcst <- res$fcst +tmp_probs <- list(probs_fcst = res$probs$fcst[[1]]) +nlats <- as.numeric(dim(data$hcst$data)['latitude']) +nlons <- as.numeric(dim(data$hcst$data)['longitude']) +ntimes <- as.numeric(dim(data$hcst$data)['time']) + +# TODO: consider more than terciles: +dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = ntimes, + latitude = nlats, longitude = nlons) + + +Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, significance = TRUE, probabilities = tmp_probs) + +## Check logo size is appropiated for your maps: +source("tools/add_logo.R") +add_logo(recipe, "tools/BSC_logo_95.jpg") #"rsz_rsz_bsc_logo.png") + +# scorecards computation: + + diff --git a/full_ecvs_calibration.R b/full_ecvs_calibration.R new file mode 100644 index 0000000000000000000000000000000000000000..5409d41fc8a49a5a0387a4bffdd9c401afb93d10 --- /dev/null +++ b/full_ecvs_calibration.R @@ -0,0 +1,61 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +source("modules/Aggregation/Aggregation.R") +args = commandArgs(trailingOnly = TRUE) +recipe_file <- args[1] +#recipe_file <- "recipe_tas_singl_cal_seas.yml" +#recipe_file <- "recipe_subseasonal_ecvs.yml" + +recipe <- read_atomic_recipe(recipe_file) +#recipe <- prepare_outputs(recipe_file) + +# Load datasets +data <- Loading(recipe) +data <- Units(recipe, data) +data_summary(data$hcst, recipe) +data_summary(data$obs, recipe) +if (!is.null(data$fcst)) { + data_summary(data$fcst, recipe) +} + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + data <- Aggregation(recipe = recipe, data = data) + data_summary(data$hcst, recipe) + data_summary(data$obs, recipe) + if (!is.null(data$fcst)) { + data_summary(data$fcst, recipe) + } + } + +source("modules/Crossval/Crossval_Calibration.R") +res <- Crossval_Calibration(recipe = recipe, data = data) + +source("modules/Crossval/Crossval_metrics.R") +skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + +if (!is.null(data$fcst)) { + # Required to plot a forecast: + data$fcst <- res$fcst + tmp_probs <- list(probs_fcst = res$probs$fcst[[1]]) + nlats <- as.numeric(dim(data$hcst$data)['latitude']) + nlons <- as.numeric(dim(data$hcst$data)['longitude']) + ntimes <- as.numeric(dim(data$hcst$data)['time']) + + # TODO: consider more than terciles + dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = ntimes, + latitude = nlats, longitude = nlons) + + + Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + significance = TRUE, probabilities = tmp_probs, + logo = "tools/BSC_logo_95.jpg") +} else { + Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, + significance = TRUE, log = "tools/BSC_logo_95.jpg") +} + + + diff --git a/full_ecvs_multimodel_anomalies.R b/full_ecvs_multimodel_anomalies.R new file mode 100644 index 0000000000000000000000000000000000000000..e92a143cd92f2bc4d9693f78eebd58d6d77a077f --- /dev/null +++ b/full_ecvs_multimodel_anomalies.R @@ -0,0 +1,71 @@ +library(pryr) +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +#recipe_file <- "recipe_tas.yml" +recipe_file <- "recipe_ecvs_ano_mul_seas.yml" +# recipe_file <- "recipe_tas_decadal.yml" +# Will it work with a single job? +#recipe <- read_atomic_recipe(recipe_file) +original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) +# Load datasets +models <- unlist(original_recipe$Analysis$Datasets$System) + + +recipe_aux <- original_recipe +datos <- list() +datos$hcst <- list() +datos$fcst <- list() +source("modules/Crossval/Crossval_metrics.R") +source("modules/Crossval/Crossval_anomalies.R") +source("modules/Aggregation/Aggregation.R") +# sys <- models[1] +for (sys in models) { + recipe_aux$Analysis$Datasets$System <- NULL + recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) + # recipe_aux$Analysis$Datasets$System$member <- read_yaml("conf/archive_decadal.yml")$esarchive$System[[sys]]$member + data <- Loading(recipe = recipe_aux) + data <- Units(recipe = recipe_aux, data = data) + if (recipe_aux$Analysis$Workflow$Time_aggregation$execute) { + data <- Aggregation(recipe = recipe_aux, data = data) + } + # verification individual models + product <- Crossval_anomalies(recipe = recipe_aux, data = data) + skill_model <- Crossval_metrics(recipe = recipe_aux, data_crossval = product, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + Visualization(recipe = recipe_aux, data = data, + skill_metrics = skill_model, significance = TRUE) + ## end ver + datos$hcst <- append(datos$hcst, list(data$hcst)) + datos$fcst <- append(datos$fcst, list(data$fcst)) + names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) + names(datos$fcst)[length(datos$fcst)] <- gsub('\\.','', sys) + gc() + print(mem_used()) +} +data_aux <- data +datos$obs <- data$obs +data <- datos +rm(list = 'datos') + +source("modules/Crossval/Crossval_multimodel_anomalies.R") +res <- Crossval_multimodel_anomalies(recipe = original_recipe, data = data) + +source("modules/Crossval/Crossval_multimodel_metrics.R") +skill_metrics <- Crossval_multimodel_metrics(recipe = original_recipe, + data = res, Fair = FALSE) + +recipe_aux$Analysis$Datasets$System$name <- 'Multimodel' +Visualization(recipe = recipe_aux, data = data_aux, skill_metrics = skill_metrics, significance = TRUE) + + +## Check logo size is appropiated for your maps: +source("tools/add_logo.R") +add_logo(recipe_aux, "rsz_rsz_bsc_logo.png") + +# scorecards computation: + + diff --git a/full_ecvs_multimodel_calibrated.R b/full_ecvs_multimodel_calibrated.R new file mode 100644 index 0000000000000000000000000000000000000000..21d2f9d3a52f916cd4d156ea239212ed62500107 --- /dev/null +++ b/full_ecvs_multimodel_calibrated.R @@ -0,0 +1,88 @@ +library(pryr) +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +#recipe_file <- "recipe_tas.yml" +# recipe_file <- "recipe_tas_decadal.yml" +recipe_file <- "recipe_ecvs_cal_mul_seas.yml" +# Will it work with a single job? +#recipe <- read_atomic_recipe(recipe_file) +original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) +# Load datasets +models <- unlist(original_recipe$Analysis$Datasets$System) + + +recipe_aux <- original_recipe +datos <- list() +datos$hcst <- list() +datos$fcst <- list() +source("modules/Crossval/Crossval_metrics.R") +source("modules/Crossval/Crossval_Calibration.R") +source("modules/Aggregation/Aggregation.R") +# sys <- models[1] +for (sys in models) { + recipe_aux$Analysis$Datasets$System <- NULL + recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) + # recipe_aux$Analysis$Datasets$System$member <- read_yaml("conf/archive_decadal.yml")$esarchive$System[[sys]]$member + data <- Loading(recipe = recipe_aux) + data <- Units(recipe = recipe_aux, data = data) + if (recipe$Analysis$Workflow$Time_aggregation$execute) { + data <- Aggregation(recipe = recipe_aux, data = data) + } + # verification individual models + product <- Crossval_Calibration(recipe = recipe_aux, data = data) + skill_model <- Crossval_metrics(recipe = recipe_aux, data_crossval = product, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + + tmp_probs <- list(probs_fcst = product$probs$fcst[[1]]) + lats <- length(data$hcst$coords$latitude) + lons <- length(data$hcst$coords$longitude) + timesteps <- as.numeric(dim(data$hcst$data)['time']) + dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = timesteps, + #latitude = 20, longitude = 20) + latitude = lats, longitude = lons) + + Visualization(recipe = recipe_aux, data = data, probabilities = tmp_probs, + skill_metrics = skill_model, significance = TRUE) + ## end ver + datos$hcst <- append(datos$hcst, list(data$hcst)) + datos$fcst <- append(datos$fcst, list(data$fcst)) + names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) + names(datos$fcst)[length(datos$fcst)] <- gsub('\\.','', sys) + gc() + print(mem_used()) +} +data_aux <- data +datos$obs <- data$obs +data <- datos +rm(list = 'datos') + +source("modules/Crossval/Crossval_multimodel_Calibration.R") +res <- Crossval_multimodel_Calibration(recipe = original_recipe, data = data) + +source("modules/Crossval/Crossval_multimodel_metrics.R") +skill_metrics <- Crossval_multimodel_metrics(recipe = original_recipe, + data = res, Fair = FALSE) + +recipe_aux$Analysis$Datasets$System$name <- 'Multimodel' +tmp_probs <- list(probs_fcst = res$probs$probs_fcst[[1]][[1]]) + + +dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = timesteps, + #latitude = 20, longitude = 20) + latitude = lats, longitude = lons) +info(recipe_aux$Run$logger,dim(tmp_probs$probs_fcst)) + +Visualization(recipe = recipe_aux, data = data_aux, probabilities = tmp_probs, skill_metrics = skill_metrics, significance = TRUE) + + +## Check logo size is appropiated for your maps: +source("tools/add_logo.R") +add_logo(recipe_aux, "rsz_rsz_bsc_logo.png") + +# scorecards computation: + + diff --git a/full_multimodel_NAO.R b/full_multimodel_NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..e53cd1993e4dc62fd6fa65b7d43eaafe499bc533 --- /dev/null +++ b/full_multimodel_NAO.R @@ -0,0 +1,49 @@ + +source("modules/Loading/Loading.R") +source("modules/Saving/Saving.R") +source("modules/Units/Units.R") +source("modules/Visualization/Visualization.R") +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "recipe_NAO.yml" +#recipe <- read_atomic_recipe(recipe_file) +original_recipe <- prepare_outputs(recipe_file, disable_checks = TRUE) +models <- unlist(original_recipe$Analysis$Datasets$System) + +recipe_aux <- original_recipe +datos <- list() +datos$hcst <- list() +datos$fcst <- list() +NAO_ind <- list() +source("modules/Crossval/Crossval_metrics.R") +source("modules/Crossval/Crossval_NAO.R") +for (sys in models[1:2]) { + recipe_aux$Analysis$Datasets$System <- NULL + recipe_aux$Analysis$Datasets$System$name <- as.vector(sys) + # Load datasets + data <- Loading(recipe = recipe_aux) + data <- Units(recipe = recipe_aux, data = data) + # Verification individual models + product <- Crossval_NAO(recipe = recipe_aux, data = data) + source("plot_NAO.R") + plot_NAO(recipe = recipe_aux, nao = product, data = data) + + skill_model <- Crossval_metrics(recipe = recipe_aux, + data_crossval = product, + fair = FALSE, nmemb = NULL, nmemb_ref = NULL) + datos$hcst <- append(datos$hcst, list(data$hcst)) + datos$fcst <- append(datos$fcst, list(data$fcst)) + names(datos$hcst)[length(datos$hcst)] <- gsub('\\.','', sys) + names(datos$fcst)[length(datos$fcst)] <- gsub('\\.','', sys) + NAO_ind <- append(NAO_ind, list(product)) + names(NAO_ind)[length(NAO_ind)] <- gsub('\\.','', sys) +} +# No visualisation available for region aggregated +data_aux <- data +datos$obs <- data$obs +data <- datos +rm(list = 'datos') +source("modules/Crossval/Crossval_multimodel_NAO.R") +res <- Crossval_multimodel_NAO(recipe = original_recipe, data = data) + + diff --git a/launch_SUNSET.sh b/launch_SUNSET.sh index 6149a9639604942f58897363dd0c854f010b79a2..deb4f1124688c9b030ac7bf4e138d4cfc6b6c1b1 100644 --- a/launch_SUNSET.sh +++ b/launch_SUNSET.sh @@ -127,6 +127,13 @@ if [[ $run_method == "sbatch" ]]; then logdir=${outdir}/logs/slurm/ mkdir -p $logdir echo "Slurm job logs will be stored in $logdir" + + # Is machine MN5? + if [[ $BSC_MACHINE == "mn5" ]]; then + platform_params="-A bsc32 -q gp_bsces" + else + platform_params="" + fi # Launch one job per atomic recipe cd $codedir @@ -141,7 +148,7 @@ if [[ $run_method == "sbatch" ]]; then outfile=${logdir}/run-${job_name}.out errfile=${logdir}/run-${job_name}.err # Send batch job and capture job ID - job_ID=$(sbatch --parsable --job-name="SUNSET_verification" --output=$outfile --error=$errfile --time=$wallclock --cpus-per-task=$cpus $custom_directives conf/slurm_templates/run_parallel_workflow.sh ${script} ${atomic_recipe}) + job_ID=$(sbatch --parsable --job-name="SUNSET_verification" $platform_params --output=$outfile --error=$errfile --time=$wallclock --cpus-per-task=$cpus $custom_directives conf/slurm_templates/run_parallel_workflow.sh ${script} ${atomic_recipe}) # Add job ID to array verification_job_list+=($job_ID) echo "Submitted batch job $job_ID" @@ -156,7 +163,7 @@ if [[ $run_method == "sbatch" ]]; then outfile=${logdir}/run-multimodel-${job_name}.out errfile=${logdir}/run-multimodel-${job_name}.err # Send batch job and capture job ID - job_ID=$(sbatch --parsable --dependency=afterok:$(IFS=,; echo "${verification_job_list[*]}") --kill-on-invalid-dep=yes --job-name="SUNSET_multimodel" --output=$outfile --error=$errfile --time=$wallclock --cpus-per-task=$cpus $custom_directives conf/slurm_templates/run_parallel_workflow.sh ${script} ${atomic_recipe}) + job_ID=$(sbatch --parsable --dependency=afterok:$(IFS=,; echo "${verification_job_list[*]}") --kill-on-invalid-dep=yes --job-name="SUNSET_multimodel" $platform_params --output=$outfile --error=$errfile --time=$wallclock --cpus-per-task=$cpus $custom_directives conf/slurm_templates/run_parallel_workflow.sh ${script} ${atomic_recipe}) # Add job ID to array multimodel_job_list+=($job_ID) echo "Submitted batch job $job_ID" @@ -171,7 +178,7 @@ if [[ $run_method == "sbatch" ]]; then echo "Submitting scorecards jobs..." outfile=${logdir}/run-scorecards.out errfile=${logdir}/run-scorecards.err - sbatch --dependency=afterok:$(IFS=,; echo "${verification_job_list[*]} ${multimodel_job_list[*]}") --output=$outfile --error=$errfile --time=01:00:00 conf/slurm_templates/run_scorecards.sh ${recipe} ${outdir} + sbatch --dependency=afterok:$(IFS=,; echo "${verification_job_list[*]} ${multimodel_job_list[*]}") $platform_params --output=$outfile --error=$errfile --time=01:00:00 conf/slurm_templates/run_scorecards.sh ${recipe} ${outdir} fi fi diff --git a/modules/Aggregation/R/agg_ini_end.R b/modules/Aggregation/R/agg_ini_end.R index bb3a410ac2221b38ab5cb50dfd0557cf723da939..120f3b8db29152eea4b92867f5f16e317273d76e 100644 --- a/modules/Aggregation/R/agg_ini_end.R +++ b/modules/Aggregation/R/agg_ini_end.R @@ -14,7 +14,7 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { plotting_attr <- list(names(indices)) } original_dims <- names(dim(x[[1]]$data)) - if (method == 'average') { + if (method == 'average') { x[[1]]$data <- Apply(x[[1]]$data, target_dim = 'time', function(y, ind) { @@ -70,3 +70,4 @@ agg_ini_end <- function(x, ini, end, indices = NULL, method, na.rm ,ncores) { x[[1]]$attrs$Dates <- tmp_dates return(x) } + diff --git a/modules/Crossval/Crossval_Calibration.R b/modules/Crossval/Crossval_Calibration.R index 109e355afe8551c136bc591ae7c579fe575a7202..c7df1c1e36f75528417c688ffd6de64391645360 100644 --- a/modules/Crossval/Crossval_Calibration.R +++ b/modules/Crossval/Crossval_Calibration.R @@ -2,7 +2,7 @@ source("modules/Crossval/R/tmp/GetProbs.R") Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { - + cal_method <- recipe$Analysis$Workflow$Calibration$method cross.method <- recipe$Analysis$cross.method # TODO move check @@ -417,6 +417,7 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { } info(recipe$Run$logger, "#### Calibrated and Probabilities Done #####") + if (recipe$Analysis$Workflow$Calibration$save != FALSE) { info(recipe$Run$logger, "##### START SAVING CALIBRATED #####") # Save forecast @@ -436,7 +437,6 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { probs_obs <- list() all_names <- NULL - ## TODO: Add .drop_dims() here for (ps in 1:length(categories)) { for (perc in 1:(length(categories[[ps]]) + 1)) { if (perc == 1) { @@ -447,22 +447,22 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { name_elem <- paste0("from_", categories[[ps]][perc-1]*100, "_to_", categories[[ps]][perc]*100) } - probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + probs_hcst <- append(probs_hcst, + list(Subset(hcst_probs_ev[[ps]], along = 'bin', indices = perc, - drop = 'selected')), - probs_hcst) - probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + drop = 'selected'))) + probs_obs <- append(probs_obs, + list(Subset(obs_probs_ev[[ps]], along = 'bin', indices = perc, - drop = 'selected')), - probs_obs) + drop = 'selected'))) if (!is.null(data$fcst)) { - probs_fcst <- append(list(Subset(fcst_probs[[ps]], + probs_fcst <- append(probs_fcst, + list(Subset(fcst_probs[[ps]], along = 'bin', indices = perc, - drop = 'selected')), - probs_fcst) + drop = 'selected'))) } all_names <- c(all_names, name_elem) } @@ -503,4 +503,3 @@ Crossval_Calibration <- function(recipe, data, correct_negative = FALSE) { } - diff --git a/modules/Crossval/Crossval_NAO.R b/modules/Crossval/Crossval_NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..d4882f3ec7da242450aa95bc97caccb521883f24 --- /dev/null +++ b/modules/Crossval/Crossval_NAO.R @@ -0,0 +1,375 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Crossval/R/tmp/NAO.R") +source("modules/Crossval/R/tmp/ProjectField.R") +source("modules/Crossval/R/tmp/EOF.R") +source("modules/Crossval/R/tmp/Utils.R") + +Crossval_NAO <- function(recipe, data) { + obsproj <- recipe$Analysis$Workflow$Indices$NAO$obsproj + if (is.null(obsproj)) { + obsproj <- TRUE + } + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst$data)['syear'] + orig_dims <- names(dim(data$hcst$data)) + # spatial dims + nlats <- dim(data$hcst$data)['latitude'] + nlons <- dim(data$hcst$data)['longitude'] + agg = 'region' + + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + ## output objects + nao_hcst_ev_res <- NULL + nao_obs_ev_res <- NULL + nao_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_nao_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_nao_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # subset years: Subset works at BSC not at Athos + ## training indices + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + hcst_tr <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + # compute climatology: + clim_obs_tr <- MeanDims(obs_tr, 'syear') + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # compute NAO: + nao <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_hcst_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores) + + nao_obs_ev <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_obs_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores)$exp_cor + #Standarisation: + # Need the nao_hcst (for the train.dexes) to standarize the eval.dexes? + nao_hcst_ev <- Apply(list(nao$exp, nao$exp_cor), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_ev <- Apply(list(nao$obs, nao_obs_ev), + target_dims = c('syear','ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_tr <- Apply(list(nao$obs), target_dims = 'syear', + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, 1, function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores, + output_dims = 'syear')$output1 + nao_hcst_tr <- Apply(list(nao$exp), target_dims = c('syear', 'ensemble'), + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + + # compute category limits + lims_nao_hcst_tr <- Apply(nao_hcst_tr, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + lims_nao_obs_tr <- Apply(nao_obs_tr, target_dims = c('syear'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'cat'}), + prob_lims = categories, + ncores = ncores) + #store results + nao_hcst_ev_res <- abind(nao_hcst_ev_res, nao_hcst_ev, + along = length(dim(nao_hcst_ev)) + 1) + nao_obs_ev_res <- abind(nao_obs_ev_res, nao_obs_ev, + along = length(dim(nao_obs_ev)) + 1) + nao_obs_tr_res <- abind(nao_obs_tr_res, nao_obs_tr, + along = length(dim(nao_obs_tr)) + 1) + for(ps in 1:length(categories)) { + lims_nao_hcst_tr_res[[ps]] <- abind(lims_nao_hcst_tr_res[[ps]], + lims_nao_hcst_tr[[ps]], + along = length(dim(lims_nao_hcst_tr[[ps]])) + 1) + lims_nao_obs_tr_res[[ps]] <- abind(lims_nao_obs_tr_res[[ps]], + lims_nao_obs_tr[[ps]], + along = length(dim(lims_nao_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + # Add dim names: + names(dim(nao_hcst_ev_res)) <- c('unneeded', 'ensemble', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + names(dim(nao_obs_ev_res)) <- c('unneeded', 'ensemble', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + names(dim(nao_obs_tr_res)) <- c('ensemble', 'unneeded', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + # To make crps_clim to work the reference forecast need to have same dims as obs: + nao_hcst_ev_res <- Subset(nao_hcst_ev_res, along = 'unneeded', + indices = 1, drop = 'selected') + nao_obs_ev_res <- Subset(nao_obs_ev_res, along = 'unneeded', + indices = 1, drop = 'selected') + nao_obs_tr_res <- Subset(nao_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_nao_hcst_tr_res[[ps]])) <- c('cat', 'dat', 'var', 'sday', + 'sweek', 'time', 'syear') + names(dim(lims_nao_obs_tr_res[[ps]])) <- c('cat', 'unneeded', 'dat', 'var', + 'sday', 'sweek', 'time', 'syear') + lims_nao_obs_tr_res[[ps]] <- Subset(lims_nao_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + # Forecast anomalies: + nao_fcst <- NULL + if (!is.null(data$fcst)) { + clim_hcst <- Apply(data$hcst$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + clim_obs <- Apply(data$obs$data, + target_dims = c('syear', 'ensemble'), + mean, + na.rm = na.rm, + ncores = ncores)$output1 + ano_fcst <- Ano(data = data$fcst$data, clim = clim_hcst) + ano_hcst <- Ano(data = data$hcst$data, clim = clim_hcst) + ano_obs <- Ano(data= data$obs$data, clim = clim_obs) + nao_fcst <- NAO(exp = ano_hcst, obs = ano_obs, exp_cor = ano_fcst, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(nao_fcst$exp, target_dims = c('syear', 'ensemble'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'cat'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + hcst_probs_ev[[ps]] <- GetProbs(nao_hcst_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_nao_hcst_tr_res[[ps]], + ncores = ncores) + obs_probs_ev[[ps]] <- GetProbs(nao_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_nao_obs_tr_res[[ps]], + ncores = ncores) + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- GetProbs(nao_fcst$exp_cor, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'cat', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], + ncores = ncores) + } + } + # Convert to s2dv_cubes + nao_hcst <- data$hcst + nao_hcst$data <- nao_hcst_ev_res + nao_obs <- data$obs + nao_obs$data <- nao_obs_ev_res + + info(recipe$Run$logger, + "#### NAO and Probabilities Done #####") + if (recipe$Analysis$Workflow$Indices$NAO$save != 'none') { + info(recipe$Run$logger, "##### START SAVING NAO #####") + recipe$Run$output_dir <- paste0(recipe$Run$output_dir, + "/outputs/Anomalies/") + # Save forecast + if ((recipe$Analysis$Workflow$Indices$NAO$save %in% + c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { + save_forecast(recipe = recipe, data_cube = nao_fcst, type = 'fcst') + } + # Save hindcast + if (recipe$Analysis$Workflow$Indices$save %in% + c('all', 'exp_only')) { + save_forecast(recipe = recipe, data_cube = nao_hcst, type = 'hcst') + } + # Save observation + if (recipe$Analysis$Workflow$Indices$NAO$save == 'all') { + save_observations(recipe = recipe, data_cube = nao_obs) + } + } + # Save probability bins + probs_hcst <- list() + probs_fcst <- list() + probs_obs <- list() + all_names <- NULL + # Make categories rounded number to use as names: + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_hcst) + probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_obs) + if (!is.null(data$fcst)) { + probs_fcst <- append(list(Subset(fcst_probs[[ps]], + along = 'cat', indices = perc, drop = 'all')), + probs_fcst) + } + all_names <- c(all_names, name_elem) + } + } + names(probs_hcst) <- all_names + if (!('var' %in% names(dim(probs_hcst[[1]])))) { + probs_hcst <- lapply(probs_hcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + names(probs_obs) <- all_names + if (!('var' %in% names(dim(probs_obs[[1]])))) { + probs_obs <- lapply(probs_obs, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + + if (!is.null(data$fcst)) { + names(probs_fcst) <- all_names + if (!('var' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(var = 1, dim(x)) + return(x)}) + } + if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + probs_fcst <- lapply(probs_fcst, function(x) { + dim(x) <- c(syear = 1, dim(x)) + return(x)}) + } + } + if (recipe$Analysis$Workflow$Probabilities$save %in% + c('all', 'bins_only')) { + save_probabilities(recipe = recipe, probs = probs_hcst, + data_cube = data$hcst, agg = agg, + type = "hcst") + save_probabilities(recipe = recipe, probs = probs_obs, + data_cube = data$hcst, agg = agg, + type = "obs") + # TODO Forecast + if (!is.null(probs_fcst)) { + save_probabilities(recipe = recipe, probs = probs_fcst, + data_cube = data$fcst, agg = agg, + type = "fcst") + } + } + return(list(hcst = nao_hcst, obs = nao_obs, fcst = nao_fcst, + # Mean Bias for the NAO doesn't make sense, so + # not returning fullvals + #hcst.full_val = data$hcst, obs.full_val = data$obs, + cat_lims = list(hcst_tr = lims_nao_hcst_tr_res, + obs_tr = lims_nao_obs_tr_res), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev), + ref_obs_tr = nao_obs_tr_res)) +} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/modules/Crossval/Crossval_anomalies.R b/modules/Crossval/Crossval_anomalies.R index 04bd8ed7ceed6d86dee54f24b96d46e806e60187..43b8e6e1bee23f0a66e6bc990ca7d19bbb4aca0b 100644 --- a/modules/Crossval/Crossval_anomalies.R +++ b/modules/Crossval/Crossval_anomalies.R @@ -1,4 +1,3 @@ - # Full-cross-val workflow ## This code should be valid for individual months and temporal averages source("modules/Crossval/R/tmp/GetProbs.R") @@ -257,22 +256,22 @@ Crossval_anomalies <- function(recipe, data) { name_elem <- paste0("from_", categories[[ps]][perc-1], "_to_", categories[[ps]][perc]) } - probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + probs_hcst <- append(probs_hcst, + list(Subset(hcst_probs_ev[[ps]], along = 'bin', indices = perc, - drop = 'selected')), - probs_hcst) - probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + drop = 'selected'))) + probs_obs <- append(probs_obs, + list(Subset(obs_probs_ev[[ps]], along = 'bin', indices = perc, - drop = 'selected')), - probs_obs) + drop = 'selected'))) if (!is.null(data$fcst)) { - probs_fcst <- append(list(Subset(fcst_probs[[ps]], + probs_fcst <- append(probs_fcst, + list(Subset(fcst_probs[[ps]], along = 'bin', indices = perc, - drop = 'selected')), - probs_fcst) + drop = 'selected'))) } all_names <- c(all_names, name_elem) } @@ -351,4 +350,3 @@ Crossval_anomalies <- function(recipe, data) { ## to be used as reference forecast in the CRPSS and CRPS_clim ## it is computed from the training indices - diff --git a/modules/Crossval/Crossval_multimodel_Calibration.R b/modules/Crossval/Crossval_multimodel_Calibration.R new file mode 100644 index 0000000000000000000000000000000000000000..229abd0a99458f5ddfb15783736ec8225cc7c245 --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_Calibration.R @@ -0,0 +1,389 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_multimodel_Calibration <- function(recipe, data) { + cal_method <- recipe$Analysis$Workflow$Calibration$method + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst[[1]]$data)['syear'] + orig_dims <- names(dim(data$hcst[[1]]$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst[[1]]$data)['latitude'] + nlons <- dim(data$hcst[[1]]$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst[[1]]$data))) { + agg = 'region' + nregions <- dim(data$hcst[[1]]$data)['region'] + } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + ## output objects + obs_ev_res <- NULL + cal_hcst_ev_res <- lapply(data$hcst, function(x) {NULL}) + cal_hcst_tr_res <- lapply(data$hcst, function(x) {NULL}) + obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_cal_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + hcst_res <- list() + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + # Observations + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + # Store cross validation loops: + obs_ev_res <- abind(obs_ev_res, obs_ev, + along = length(dim(obs_ev)) + 1) + obs_tr_res <- abind(obs_tr_res, obs_tr, + along = length(dim(obs_tr)) + 1) + + # Calibrate individual models + cal_hcst_tr <- list() + for (sys in 1:length(data$hcst)) { + hcst_tr <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + # calibrate: + cal_hcst_tr <- append(cal_hcst_tr, + list( + Calibration(exp = hcst_tr, + obs = obs_tr, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = FALSE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, ncores = ncores))) + cal_hcst_ev <- #append(cal_hcst_ev, + #list( + Calibration(exp = hcst_tr, + obs = obs_tr, + exp_cor = hcst_ev, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + eval.method = 'in-sample', + na.fill = FALSE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, ncores = ncores)#)#) + + cal_hcst_ev_res[[sys]] <- abind(cal_hcst_ev_res[[sys]], + cal_hcst_ev, + along = length(dim(cal_hcst_ev)) + 1) + # cal_hcst_tr_res[[sys]] <- abind(cal_hcst_tr_res[[sys]], + # cal_hcst_tr, +# along = length(dim(cal_hcst_tr)) + 1) + } + # compute category limits + lims_cal_hcst_tr <- Apply(cal_hcst_tr, + target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + lapply(prob_lims, function(ps) { + quantile(as.vector(res), + ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + lims_obs_tr <- Apply(obs_tr, + target_dims = c('syear'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'bin'}), + prob_lims = categories, + ncores = ncores) + + #store results + for (ps in 1:length(categories)) { + lims_cal_hcst_tr_res[[ps]] <- abind(lims_cal_hcst_tr_res[[ps]], + lims_cal_hcst_tr[[ps]], + along = length(dim(lims_cal_hcst_tr[[ps]])) + 1) + lims_obs_tr_res[[ps]] <- abind(lims_obs_tr_res[[ps]], + lims_obs_tr[[ps]], + along = length(dim(lims_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Calibration Cross-validation loop ended #####") + gc() + # Add dim names: + cal_hcst_ev_res <- lapply(cal_hcst_ev_res, function(x) { + dim(x) <- dim(x)[-1] + names(dim(x)) <- ev_dim_names + return(x)}) + names(dim(obs_ev_res)) <- ev_dim_names + names(dim(obs_tr_res)) <- tr_dim_names + # To make crps_clim to work the reference forecast need to have same dims as obs: + obs_tr_res <- Subset(obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_cal_hcst_tr_res[[ps]])) <- c('bin', + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') + names(dim(lims_obs_tr_res[[ps]])) <- c('bin', + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) + lims_obs_tr_res[[ps]] <- Subset(lims_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + # Forecast calibration: + cal_fcst <- lims_fcst <- NULL + if (!is.null(data$fcst)) { + for (sys in 1:length(data$hcst)) { + cal_fcst <- append(cal_fcst, + list(Calibration(exp = data$hcst[[sys]]$data, + obs = data$obs$data, + exp_cor = data$fcst[[sys]]$data, + cal.method = cal_method, + memb_dim = 'ensemble', sdate_dim = 'syear', + na.fill = FALSE, na.rm = TRUE, + apply_to = NULL, + alpha = NULL, ncores = ncores))) + } + names(cal_fcst) <- names(data$hcst) + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(cal_hcst_ev_res, target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + lapply(prob_lims, function(ps) { + quantile(as.vector(res), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + # create a list of unknown length of systems and limits: + target_dims_list <- append(list(lims = 'bin'), + lapply(cal_hcst_ev_res, function(x) { + c('syear', 'ensemble')})) + hcst_probs_ev[[ps]] <- Apply(append(list(lims = lims_cal_hcst_tr[[ps]]), + cal_hcst_ev_res), + target_dims = target_dims_list, + function(lims, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims)}, + ncores = ncores)$output1 + obs_probs_ev[[ps]] <- GetProbs(obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_obs_tr_res[[ps]], + ncores = ncores) +fcst_probs <- NULL + if (!is.null(data$fcst)) { + fcst_probs[[ps]] <- Apply(append(list(lims = lims_fcst[[ps]]), + cal_fcst), + target_dims = target_dims_list, + fun = function(lims, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims)}, + ncores = ncores) + } + } + return(list(hcst = cal_hcst_ev_res, obs = obs_ev_res, fcst = cal_fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + #hcst_EM = hcst_EM, fcst_EM = fcst_EM, + cat_lims = list(hcst_tr = lims_cal_hcst_tr_res, + obs_tr = lims_obs_tr_res, + fcst = lims_fcst), + probs = list(hcst_probs = hcst_probs_ev, + obs_probs = obs_probs_ev, probs_fcst = fcst_probs), + ref_obs_tr = obs_tr_res)) +} +##### TO SAVE edit the lines below: + # Convert to s2dv_cubes the resulting anomalies +# ano_hcst <- data$hcst +# ano_hcst$data <- ano_hcst_ev_res +# ano_obs <- data$obs +# ano_obs$data <- ano_obs_ev_res + +# info(recipe$Run$logger, +# "#### Anomalies and Probabilities Done #####") +# if (recipe$Analysis$Workflow$Anomalies$save != 'none') { +# info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") +# recipe$Run$output_dir <- paste0(recipe$Run$output_dir, +# "/outputs/Anomalies/") + # Save forecast +# if ((recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { +# save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') +# } + # Save hindcast +# if (recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only')) { +# save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') +# } +# # Save observation +# if (recipe$Analysis$Workflow$Anomalies$save == 'all') { +# save_observations(recipe = recipe, data_cube = ano_obs) +# } +# } + # Save probability bins +# probs_hcst <- list() +# probs_fcst <- list() +# probs_obs <- list() +# all_names <- NULL + # Make categories rounded number to use as names: +# categories <- recipe$Analysis$Workflow$Probabilities$percentiles +# categories <- lapply(categories, function (x) { +# sapply(x, function(y) { +# round(eval(parse(text = y)),2)})}) + + # for (ps in 1:length(categories)) { + # for (perc in 1:(length(categories[[ps]]) + 1)) { + # if (perc == 1) { + # name_elem <- paste0("below_", categories[[ps]][perc]) + # } else if (perc == length(categories[[ps]]) + 1) { + # name_elem <- paste0("above_", categories[[ps]][perc-1]) + # } else { + # name_elem <- paste0("from_", categories[[ps]][perc-1], + # "_to_", categories[[ps]][perc]) + # } + # probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_hcst) + # probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_obs) + # if (!is.null(data$fcst)) { + # probs_fcst <- append(list(Subset(fcst_probs[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_fcst) + # } + # all_names <- c(all_names, name_elem) + # } + # } + # names(probs_hcst) <- all_names + # if (!('var' %in% names(dim(probs_hcst[[1]])))) { + # probs_hcst <- lapply(probs_hcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # names(probs_obs) <- all_names + # if (!('var' %in% names(dim(probs_obs[[1]])))) { + # probs_obs <- lapply(probs_obs, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + + # if (!is.null(data$fcst)) { + # names(probs_fcst) <- all_names + # if (!('var' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(syear = 1, dim(x)) + # return(x)}) + # } + # } + # if (recipe$Analysis$Workflow$Probabilities$save %in% + # c('all', 'bins_only')) { + # save_probabilities(recipe = recipe, probs = probs_hcst, + # data_cube = data$hcst, agg = agg, + # type = "hcst") + # save_probabilities(recipe = recipe, probs = probs_obs, + # data_cube = data$hcst, agg = agg, + # type = "obs") + # TODO Forecast + # if (!is.null(probs_fcst)) { + # save_probabilities(recipe = recipe, probs = probs_fcst, + # data_cube = data$fcst, agg = agg, + # type = "fcst") + # } + # } + # Save ensemble mean for multimodel option: + # hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(hcst_EM = + # Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$hcst, agg = agg, + # module = "statistics") + #fcst_EM <- NULL + #if (!is.null(data$fcst)) { + # fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(fcst_EM = + # Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$fcst, agg = agg, + # module = "statistics") + #} + #return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + # hcst.full_val = data$hcst, obs.full_val = data$obs, + # hcst_EM = hcst_EM, fcst_EM = fcst_EM, + # cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + # obs_tr = lims_ano_obs_tr_res), + # probs = list(hcst_ev = hcst_probs_ev, + # obs_ev = obs_probs_ev), + # ref_obs_tr = ano_obs_tr_res)) +#} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/modules/Crossval/Crossval_multimodel_NAO.R b/modules/Crossval/Crossval_multimodel_NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..b82aea16183852d0ec8138e72d00ed1d52d731cb --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_NAO.R @@ -0,0 +1,139 @@ +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_multimodel_NAO <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + obsproj <- recipe$Analysis$Workflow$Indices$NAO$obsproj + if (is.null(obsproj)) { + obsproj <- TRUE + } + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst[[1]]$data)['syear'] + orig_dims <- names(dim(data$hcst[[1]]$data)) + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + + # Define output dims and names: + nao_hcst_tr_aux <- list() + nao_hcst_ev_aux <- list() + + nao_obs_tr_res <- list() + nao_hcst_ev_res <- list() + # Cross-val loop starts + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + nao_hcst_tr_res <- list() + nao_hcst_ev_res <- list() + + # Observations + obs_tr <- Subset(data $obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + clim_obs_tr <- MeanDims(obs_tr, 'syear') + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # NAO for individual models + for (sys in 1:length(data$hcst)) { + hcst_tr <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$eval.dexes) + # compute climatology: + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + ano_hcst_tr <- s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores) + ano_hcst_ev <- s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores) + # compute NAO: + nao <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, exp_cor = ano_hcst_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores) + + nao_obs_ev <- NAO(exp = ano_hcst_tr, obs = ano_obs_tr, + exp_cor = ano_obs_ev, + ftime_avg = NULL, time_dim = 'syear', + memb_dim = 'ensemble', + space_dim = c('latitude', 'longitude'), + ftime_dim = 'time', obsproj = obsproj, + lat = data$obs$coords$latitude, + lon = data$obs$coords$longitude, + ncores = recipe$Analysis$ncores)$exp_cor + #Standarisation: + # Need the nao_hcst (for the train.dexes) to standarize the eval.dexes? + nao_hcst_ev <- Apply(list(nao$exp, nao$exp_cor), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_ev <- Apply(list(nao$obs, nao_obs_ev), + target_dims = c('syear','ensemble'), + fun = function(x, y) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(y, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + nao_obs_tr <- Apply(list(nao$obs), target_dims = 'syear', + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, 1, function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores, + output_dims = 'syear')$output1 + nao_hcst_tr <- Apply(list(nao$exp), target_dims = c('syear', 'ensemble'), + fun = function(x) { + sd <- sqrt(var(as.vector(x), na.rm = TRUE)) + means <- mean(as.vector(x), na.rm = TRUE) + res <- apply(x, c(1,2), function(z) {(z-means)/sd})}, + ncores = recipe$Analysis$ncores)$output1 + # store results: + nao_hcst_tr_res <- append(nao_hcst_tr_res, list(nao_hcst_tr)) + names(nao_hcst_tr_res)[length(nao_hcst_tr_res)] <- names(data$hcst)[[sys]] + nao_hcst_ev_res <- append(nao_hcst_ev_res, list(nao_hcst_ev)) + names(nao_hcst_ev_res)[length(nao_hcst_ev_res)] <- names(data$hcst)[[sys]] + } + if (t == 1) { + nao_hcst_ev_aux <- nao_hcst_ev_res + } else { + for (sys in 1:length(data$hcst)) { + nao_hcst_ev_aux[[sys]] <- abind(nao_hcst_ev_aux[[sys]], + nao_hcst_ev_res[[sys]], + along = length(dim(nao_hcst_ev_res[[sys]])) + 1) + nao_hcst_tr_aux[[sys]] <- abind(nao_hcst_tr_aux[[sys]], + nao_hcst_tr_res[[sys]], + along = length(dim(nao_hcst_tr_res[[sys]])) + 1) + } + } + } + nao_hcst_ev_aux <- lapply(1:length(nao_hcst_ev_aux), function(x) { + names(dim(nao_hcst_ev_aux[[x]])) <- c(names(dim(nao_hcst_ev_res[[x]])), 'sample') + return(nao_hcst_ev_aux[[x]])}) + nao_hcst_tr_aux <- lapply(1:length(nao_hcst_tr_aux), function(x) { + names(dim(nao_hcst_tr_aux[[x]])) <- c(names(dim(nao_hcst_tr_res[[x]])), 'sample') + return(nao_hcst_tr_aux[[x]])}) + + # Observed NAO should be the same for all models. + nao_obs_ev_res <- append(nao_obs_ev_res, list(nao_obs_ev)) + names(nao_obs_ev_res)[length(nao_obs_ev_res)] <- names(data$hcst)[[1]] + + diff --git a/modules/Crossval/Crossval_multimodel_anomalies.R b/modules/Crossval/Crossval_multimodel_anomalies.R new file mode 100644 index 0000000000000000000000000000000000000000..fd7fd0fe089760bf6648c22d1ea260b3bd4b781e --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_anomalies.R @@ -0,0 +1,369 @@ +# Full-cross-val workflow +## This code should be valid for individual months and temporal averages +source("modules/Crossval/R/tmp/GetProbs.R") + +Crossval_multimodel_anomalies <- function(recipe, data) { + cross.method <- recipe$Analysis$cross.method + # TODO move check + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + ## data dimensions + sdate_dim <- dim(data$hcst[[1]]$data)['syear'] + orig_dims <- names(dim(data$hcst[[1]]$data)) + # spatial dims + if ('latitude' %in% names(dim(data$hcst$data))) { + nlats <- dim(data$hcst[[1]]$data)['latitude'] + nlons <- dim(data$hcst[[1]]$data)['longitude'] + agg = 'global' + } else if ('region' %in% names(dim(data$hcst[[1]]$data))) { + agg = 'region' + nregions <- dim(data$hcst[[1]]$data)['region'] + } + # output_dims from loop base on original dimensions + ## ex: 'dat', 'var', 'sday', 'sweek', 'ensemble', 'time', + ## 'latitude', 'longitude', 'unneeded', 'syear' + ev_dim_names <- c(orig_dims[-which(orig_dims %in% 'syear')], + names(sdate_dim)) + orig_dims[orig_dims %in% 'ensemble'] <- 'unneeded' + orig_dims[orig_dims %in% 'syear'] <- 'ensemble' + tr_dim_names <-c(orig_dims, + names(sdate_dim)) + # TODO fix it to use new version https://earth.bsc.es/gitlab/external/cstools/-/blob/dev-cross-indices/R/CST_Calibration.R#L570 + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) # k = ? + ## output objects + ano_obs_ev_res <- NULL + ano_hcst_ev_res <- lapply(data$hcst, function(x) {NULL}) + ano_obs_tr_res <- NULL + # as long as probs requested in recipe: + lims_ano_hcst_tr_res <- lapply(categories, function(X) {NULL}) + lims_ano_obs_tr_res <- lapply(categories, function(X) {NULL}) + + fcst_probs <- lapply(categories, function(x){NULL}) + hcst_probs_ev <- lapply(categories, function(x){NULL}) + obs_probs_ev <- lapply(categories, function(x){NULL}) + hcst_res <- list() + ano_hcst_tr <- ano_hcst_ev <- ano_fcst <- list() + + for (t in 1:length(cross)) { + info(recipe$Run$logger, paste("crossval:", t)) + + # Observations + obs_tr <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$train.dexes) + obs_ev <- Subset(data$obs$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + clim_obs_tr <- MeanDims(obs_tr, 'syear') + ano_obs_tr <- s2dv::Ano(obs_tr, clim_obs_tr, + ncores = ncores) + ano_obs_ev <- s2dv::Ano(obs_ev, clim_obs_tr, + ncores = ncores) + # Store cross validation loops: + ano_obs_ev_res <- abind(ano_obs_ev_res, ano_obs_ev, + along = length(dim(ano_obs_ev)) + 1) + ano_obs_tr_res <- abind(ano_obs_tr_res, ano_obs_tr, + along = length(dim(ano_obs_tr)) + 1) + + # Anomalies of individual models + for (sys in 1:length(data$hcst)) { + hcst_tr <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$train.dexes) + ## evaluation indices + hcst_ev <- Subset(data$hcst[[sys]]$data, along = 'syear', + indices = cross[[t]]$eval.dexes, drop = 'selected') + # compute climatology: + clim_hcst_tr <- MeanDims(hcst_tr, c('syear', 'ensemble')) + # compute anomalies: + ano_hcst_tr <- append(ano_hcst_tr, + list(s2dv::Ano(hcst_tr, clim_hcst_tr, + ncores = ncores))) + ano_hcst_ev <- append(ano_hcst_ev, + list(s2dv::Ano(hcst_ev, clim_hcst_tr, + ncores = ncores))) + ano_hcst_ev_res[[sys]] <- abind(ano_hcst_ev_res[[sys]], + ano_hcst_ev[[sys]], + along = length(dim(ano_hcst_ev[[sys]])) + 1) + } + # compute category limits + lims_ano_hcst_tr <- Apply(ano_hcst_tr, + target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + lapply(prob_lims, function(ps) { + quantile(as.vector(res), + ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + lims_ano_obs_tr <- Apply(ano_obs_tr, + target_dims = c('syear'), + fun = function(x, prob_lims) { + lapply(prob_lims, function(ps) { + quantile(as.vector(x), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x){'bin'}), + prob_lims = categories, + ncores = ncores) + + #store results + for(ps in 1:length(categories)) { + lims_ano_hcst_tr_res[[ps]] <- abind(lims_ano_hcst_tr_res[[ps]], lims_ano_hcst_tr[[ps]], + along = length(dim(lims_ano_hcst_tr[[ps]])) + 1) + lims_ano_obs_tr_res[[ps]] <- abind(lims_ano_obs_tr_res[[ps]], lims_ano_obs_tr[[ps]], + along = length(dim(lims_ano_obs_tr[[ps]])) + 1) + } + } + info(recipe$Run$logger, + "#### Anomalies Cross-validation loop ended #####") + gc() + # Add dim names: + ano_hcst_ev_res <- lapply(ano_hcst_ev_res, function(x) { + names(dim(x)) <- ev_dim_names + return(x)}) + names(dim(ano_obs_ev_res)) <- ev_dim_names + names(dim(ano_obs_tr_res)) <- tr_dim_names + # To make crps_clim to work the reference forecast need to have same dims as obs: + ano_obs_tr_res <- Subset(ano_obs_tr_res, along = 'unneeded', + indices = 1, drop = 'selected') + for(ps in 1:length(categories)) { + names(dim(lims_ano_hcst_tr_res[[ps]])) <- c('bin', + orig_dims[-which(orig_dims %in% c('ensemble', 'unneeded'))], 'syear') + names(dim(lims_ano_obs_tr_res[[ps]])) <- c('bin', + tr_dim_names[-which(tr_dim_names %in% c('ensemble'))]) + lims_ano_obs_tr_res[[ps]] <- Subset(lims_ano_obs_tr_res[[ps]], + along = 'unneeded', indices = 1, drop = 'selected') + } + # Forecast anomalies: + ano_fcst <- lims_fcst <- NULL + if (!is.null(data$fcst)) { + ano_fcst <- Apply(ano_hcst_ev_res, + target_dims = c('syear', 'ensemble'), + function(...) { + res <- abind(..., along = 2) + clim <- mean(as.vector(res), na.rm = na.rm) + res <- res - clim}, + ncores = ncores, + output_dims = c('syear', 'ensemble'))$output1 + # Terciles limits using the whole hindcast period: + lims_fcst <- Apply(ano_hcst_ev_res, target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + lapply(prob_lims, function(ps) { + quantile(as.vector(res), ps, na.rm = na.rm)})}, + output_dims = lapply(categories, function(x) {'bin'}), + prob_lims = categories, + ncores = ncores) + } + + # Compute Probabilities + for (ps in 1:length(categories)) { + # create a list of unknown length of systems and limits: + target_dims_list <- append(list(lims = 'bin'), + lapply(ano_hcst_ev_res, function(x) { + c('syear', 'ensemble')})) + hcst_probs_ev[[ps]] <- Apply(append(list(lims = lims_ano_hcst_tr[[ps]]), + ano_hcst_ev_res), + target_dims = target_dims_list, + function(lims, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims)}, + ncores = ncores)$output1 + obs_probs_ev[[ps]] <- GetProbs(ano_obs_ev_res, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_ano_obs_tr_res[[ps]], + ncores = ncores) +fcst_probs <- NULL + if (!is.null(data$fcst)) { +# fcst_probs[[ps]] <- Apply(append(list(lims = lims_fcst[[ps]]), +# ano_fcst), +# target_dims = list('cat', +# c('syear', 'ensemble')), +# function(lims, fcst) { + fcst_probs[[ps]] <- GetProbs(ano_fcst, time_dim = 'syear', + prob_thresholds = NULL, + bin_dim_abs = 'bin', + indices_for_quantiles = NULL, + memb_dim = 'ensemble', + abs_thresholds = lims_fcst[[ps]], +#}, + ncores = ncores) + } + } + return(list(hcst = ano_hcst_ev_res, obs = ano_obs_ev_res, fcst = ano_fcst, + hcst.full_val = data$hcst, obs.full_val = data$obs, + #hcst_EM = hcst_EM, fcst_EM = fcst_EM, + cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + obs_tr = lims_ano_obs_tr_res, + fcst = lims_fcst), + probs = list(hcst_ev = hcst_probs_ev, + obs_ev = obs_probs_ev, fcst_probs), + ref_obs_tr = ano_obs_tr_res)) +} +##### TO SAVE edit the lines below: + # Convert to s2dv_cubes the resulting anomalies +# ano_hcst <- data$hcst +# ano_hcst$data <- ano_hcst_ev_res +# ano_obs <- data$obs +# ano_obs$data <- ano_obs_ev_res + +# info(recipe$Run$logger, +# "#### Anomalies and Probabilities Done #####") +# if (recipe$Analysis$Workflow$Anomalies$save != 'none') { +# info(recipe$Run$logger, "##### START SAVING ANOMALIES #####") +# recipe$Run$output_dir <- paste0(recipe$Run$output_dir, +# "/outputs/Anomalies/") + # Save forecast +# if ((recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only', 'fcst_only')) && !is.null(data$fcst)) { +# save_forecast(recipe = recipe, data_cube = data$fcst, type = 'fcst') +# } + # Save hindcast +# if (recipe$Analysis$Workflow$Anomalies$save %in% +# c('all', 'exp_only')) { +# save_forecast(recipe = recipe, data_cube = ano_hcst, type = 'hcst') +# } +# # Save observation +# if (recipe$Analysis$Workflow$Anomalies$save == 'all') { +# save_observations(recipe = recipe, data_cube = ano_obs) +# } +# } + # Save probability bins +# probs_hcst <- list() +# probs_fcst <- list() +# probs_obs <- list() +# all_names <- NULL + # Make categories rounded number to use as names: +# categories <- recipe$Analysis$Workflow$Probabilities$percentiles +# categories <- lapply(categories, function (x) { +# sapply(x, function(y) { +# round(eval(parse(text = y)),2)})}) + + # for (ps in 1:length(categories)) { + # for (perc in 1:(length(categories[[ps]]) + 1)) { + # if (perc == 1) { + # name_elem <- paste0("below_", categories[[ps]][perc]) + # } else if (perc == length(categories[[ps]]) + 1) { + # name_elem <- paste0("above_", categories[[ps]][perc-1]) + # } else { + # name_elem <- paste0("from_", categories[[ps]][perc-1], + # "_to_", categories[[ps]][perc]) + # } + # probs_hcst <- append(list(Subset(hcst_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_hcst) + # probs_obs <- append(list(Subset(obs_probs_ev[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_obs) + # if (!is.null(data$fcst)) { + # probs_fcst <- append(list(Subset(fcst_probs[[ps]], + # along = 'cat', indices = perc, drop = 'all')), + # probs_fcst) + # } + # all_names <- c(all_names, name_elem) + # } + # } + # names(probs_hcst) <- all_names + # if (!('var' %in% names(dim(probs_hcst[[1]])))) { + # probs_hcst <- lapply(probs_hcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # names(probs_obs) <- all_names + # if (!('var' %in% names(dim(probs_obs[[1]])))) { + # probs_obs <- lapply(probs_obs, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + + # if (!is.null(data$fcst)) { + # names(probs_fcst) <- all_names + # if (!('var' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(var = 1, dim(x)) + # return(x)}) + # } + # if (!('syear' %in% names(dim(probs_fcst[[1]])))) { + # probs_fcst <- lapply(probs_fcst, function(x) { + # dim(x) <- c(syear = 1, dim(x)) + # return(x)}) + # } + # } + # if (recipe$Analysis$Workflow$Probabilities$save %in% + # c('all', 'bins_only')) { + # save_probabilities(recipe = recipe, probs = probs_hcst, + # data_cube = data$hcst, agg = agg, + # type = "hcst") + # save_probabilities(recipe = recipe, probs = probs_obs, + # data_cube = data$hcst, agg = agg, + # type = "obs") + # TODO Forecast + # if (!is.null(probs_fcst)) { + # save_probabilities(recipe = recipe, probs = probs_fcst, + # data_cube = data$fcst, agg = agg, + # type = "fcst") + # } + # } + # Save ensemble mean for multimodel option: + # hcst_EM <- MeanDims(ano_hcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(hcst_EM = + # Subset(hcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$hcst, agg = agg, + # module = "statistics") + #fcst_EM <- NULL + #if (!is.null(data$fcst)) { + # fcst_EM <- MeanDims(data$fcst$data, 'ensemble', drop = T) + # save_metrics(recipe = recipe, + # metrics = list(fcst_EM = + # Subset(fcst_EM, along = 'dat', indices = 1, drop = 'selected')), + # data_cube = data$fcst, agg = agg, + # module = "statistics") + #} + #return(list(hcst = ano_hcst, obs = ano_obs, fcst = data$fcst, + # hcst.full_val = data$hcst, obs.full_val = data$obs, + # hcst_EM = hcst_EM, fcst_EM = fcst_EM, + # cat_lims = list(hcst_tr = lims_ano_hcst_tr_res, + # obs_tr = lims_ano_obs_tr_res), + # probs = list(hcst_ev = hcst_probs_ev, + # obs_ev = obs_probs_ev), + # ref_obs_tr = ano_obs_tr_res)) +#} + + +## The result contains the inputs for Skill_full_crossval. +## this is a list with the required elements: + ## probs is a list with + ## probs$hcst_ev and probs$obs_ev + ## probs$hcst_ev will have as many elements in the $Probabilities$percentiles + ## each element will be an array with 'cat' dimension + ## the same for probs$obs_ev + ## hcst is a s2dv_cube for the post-processed hindcast for the evalutaion indices + ## in this case cross validated anomalies + ## obs is a s2dv_cube for the post-processed obs + ## in this case cross validated anomalies + ## fcst is a s2dv_cube for the post-processed fcst + ## in this case cross anomalies with the full hindcast period + ## this object is not required for skill assessment + ## hcst.full_val and obs.full_val are the original data to compute mean bias + ## cat_lims used to compute the probabilities + ## this object is not required for skill assessment + ## ref_obs_tr is an array with the cross-validate observed anomalies + ## to be used as reference forecast in the CRPSS and CRPS_clim + ## it is computed from the training indices + diff --git a/modules/Crossval/Crossval_multimodel_metrics.R b/modules/Crossval/Crossval_multimodel_metrics.R new file mode 100644 index 0000000000000000000000000000000000000000..2adfb3610adb5cf5ece055ba06c50ff29573649d --- /dev/null +++ b/modules/Crossval/Crossval_multimodel_metrics.R @@ -0,0 +1,273 @@ +# One function to compute all the possible metrics or statistics +# datos is a list returned by load_multimodel or load_multimodel_mean + ## this reduce memory and it can be used to compute several metrics + ## in the mean the ensemble dim is length 1 for each model +# probs is a list returned by load_multimodel_probs + ## this is needed to compute rps/rpss +source("modules/Saving/Saving.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Crossval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Visualization/Visualization.R") # to load .warning from utils +Crossval_multimodel_metrics <- function(recipe, + data = NULL, + Fair = FALSE) { + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + # Prepare indices for crossval + sdate_dim <- dim(data$hcst[[1]])['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + tmp <- array(1:length(cross), c(syear = length(cross))) + alpha <- recipe$Analysis$alpha + if (is.null(alpha)) { + alpha <- 0.05 + } + + requested_metrics <- tolower(strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]]) + skill_metrics <- list() + if (!is.null(data)) { + # conver $data elements to list to use multiApply: + datos <- append(list(obs = data$obs), data$hcst) + ## CRPS metrics only make sense in pool method: + if (any(c('crps', 'crpss') %in% requested_metrics)) { + crps <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + mean(s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + time_dim = 'syear', + memb_dim = 'ensemble'))}, + ncores = ncores)$output1 + skill_metrics$crps <- crps + # Build the reference forecast: + ref_clim <- Apply(list(datos$obs, tmp), + target_dims = list(c('ensemble', 'syear'), NULL), + function(x, y) { + Subset(x, along = 'syear', + indices = cross[[y]]$train.dexes, drop = T)}, + ncores = ncores, output_dims = 'ensemble')$output1 + datos <- append(list(ref = ref_clim), datos) + crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), + fun = function(ref, obs) { + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + mean(s2dv:::.CRPS(exp = ref, obs = obs, + dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble'))}, + ncores = ncores)$output1 + skill_metrics$crps_clim <- crps_clim + + + crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(ref, obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + CRPSS(exp = res, obs = obs, ref = ref, + memb_dim = 'ensemble', Fair = FALSE, + time_dim = 'syear', clim.cross.val = FALSE)}, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + datos <- datos[-1] + } + ## Deterministic metrics using ensemble mean: + if ('enscorr' %in% requested_metrics) { + enscorr <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Corr(exp = res, + obs = obs, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha)}, ncores = ncores) + skill_metrics$enscorr <- enscorr$corr + skill_metrics$enscorr_significance <- enscorr$sign + } + if ('mean_bias' %in% requested_metrics) { + ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) + mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Bias(exp = res, + obs = obs, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha)}, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } + if ('rms' %in% requested_metrics) { + rms <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMS(exp = res, + obs = obs, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha)}, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + ####### Do we need reference fcst to compute rmss?? + if ('rmss' %in% requested_metrics) { + # if (is.null(res$ref_obs_tr)) { + # } + rmss <- Apply(datos, target_dims = c('syear','ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMSSS(exp = res, + obs = obs, + #ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE)}, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + # This are used to compute correlation for the scorecards + # It's the temporal standard deviation of the ensmean + std_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + sd(as.vector(res))}, + ncores = ncores)$output1 + + std_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = 'sd')$output1 + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } + + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + fun = function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + variance <- var(as.vector(res))}, + ncores = ncores)$output1 + + var_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = function(x) {var(as.vector(x))}, + ncores = ncores)$output1 + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs + } + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(datos[[1]], time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } + if (any(c('cov', 'covariance') %in% requested_metrics)) { + # The covariance of the ensemble mean + covariance <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + cov(as.vector(obs), as.vector(res), + use = "everything", + method = "pearson")}, + ncores = ncores)$output1 + skill_metrics$covariance <- covariance + } + } + # Probabilistic metrics for categories + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + # TODO: distinguish between rpss and bss + # if 1 percentile -> bss + # if more than 1 -> rpss + exe_rps <- unlist(lapply(categories, function(x) { + if (length(x) > 1) { + x <- x[1] *100 + } + return(x)})) + if (!is.null(datos)) { + if (Fair) { + nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) + } else { + nmemb <- NULL + } + } else { + nmemb <- NULL + } + + # Compute rps + for (ps in 1:length(exe_rps)) { + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], memb_dim = NULL, + cat_dim = 'bin', cross.val = FALSE, time_dim = 'syear', + Fair = Fair, nmemb = nmemb, + ncores = ncores) + rps_clim <- Apply(list(data$probs$obs[[ps]]), + target_dims = c('bin', 'syear'), + RPS_clim, bin_dim_abs = 'bin', Fair = Fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + # names based on the categories: + # To use it when visualization works for more rps + #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps + #skill_metrics[[paste0('rps_clim', + # exe_rps[ps])]] <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = data$probs$hcst[[ps]], + obs = data$probs$obs[[ps]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'bin', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = categories[[ps]], # un use param when providing probs + indices_for_clim = NULL, + Fair = Fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + } + } + skill_metrics <- lapply(skill_metrics, function(x) {drop(x)}) + skill_metrics <- lapply(skill_metrics, function(x){ + InsertDim(x, pos = 1, len = 1, name = 'var')}) + return(skill_metrics) +} diff --git a/modules/Crossval/R/CRPS_clim.R b/modules/Crossval/R/CRPS_clim.R index 80e3172e4a40aa0e063403379ebd7e9b01fcb82a..0e6bef6572673741bb6df3fbe77f92ace2e515af 100644 --- a/modules/Crossval/R/CRPS_clim.R +++ b/modules/Crossval/R/CRPS_clim.R @@ -33,4 +33,3 @@ CRPS_clim <- function(obs, memb_dim ='ensemble', return_mean = TRUE, clim.cross. } } - diff --git a/modules/Crossval/R/RPS_clim.R b/modules/Crossval/R/RPS_clim.R index 058f1be3082742f056b7ee5f672f2e3a33cb5055..6deab3ec9cb7ba05c811f2957630a3a2f49ab7fc 100644 --- a/modules/Crossval/R/RPS_clim.R +++ b/modules/Crossval/R/RPS_clim.R @@ -37,4 +37,3 @@ RPS_clim <- function(obs, indices_for_clim = NULL, return(rps_ref) } } - diff --git a/modules/Crossval/R/tmp/Bias.R b/modules/Crossval/R/tmp/Bias.R index 988290e9e8388f79ee634ae32ea51885ce3e59e5..b9292cae24e5c425f2b5654e49b8d9b2358d916b 100644 --- a/modules/Crossval/R/tmp/Bias.R +++ b/modules/Crossval/R/tmp/Bias.R @@ -214,4 +214,3 @@ Bias <- function(exp, obs, time_dim = 'sdate', memb_dim = NULL, dat_dim = NULL, return(bias) } } - diff --git a/modules/Crossval/R/tmp/CST_MergeDims.R b/modules/Crossval/R/tmp/CST_MergeDims.R index 7df2f7707ad3845ead85ff7e2f676d162d304d69..bb06bf608051d06ecabce9a9cd3163c03128c6ad 100644 --- a/modules/Crossval/R/tmp/CST_MergeDims.R +++ b/modules/Crossval/R/tmp/CST_MergeDims.R @@ -1,4 +1,3 @@ - #'Function to Merge Dimensions #' #'@author Nuria Perez-Zanon, \email{nuria.perez@bsc.es} @@ -161,4 +160,3 @@ MergeDims <- function(data, merge_dims = c('time', 'monthly'), } return(data) } - diff --git a/modules/Crossval/R/tmp/Corr.R b/modules/Crossval/R/tmp/Corr.R index aaa3e1eadabad40895fc85d9cd2df8f24d117c4b..744ff10996d9261e8e8ef8eded34c5b442537ae2 100644 --- a/modules/Crossval/R/tmp/Corr.R +++ b/modules/Crossval/R/tmp/Corr.R @@ -482,4 +482,3 @@ Corr <- function(exp, obs, time_dim = 'sdate', dat_dim = NULL, return(res) } - diff --git a/modules/Crossval/R/tmp/EOF.R b/modules/Crossval/R/tmp/EOF.R new file mode 100644 index 0000000000000000000000000000000000000000..87795b66ec37c62de76c283f88cdebb535292e12 --- /dev/null +++ b/modules/Crossval/R/tmp/EOF.R @@ -0,0 +1,293 @@ +#'Area-weighted empirical orthogonal function analysis using SVD +#' +#'Perform an area-weighted EOF analysis using single value decomposition (SVD) +#'based on a covariance matrix or a correlation matrix if parameter 'corr' is +#'set to TRUE. +#' +#'@param ano A numerical array of anomalies with named dimensions to calculate +#' EOF. The dimensions must have at least 'time_dim' and 'space_dim'. NAs +#' could exist but it should be consistent along time_dim. That is, if one grid +#' point has NAs, all the time steps at this point should be NAs. +#'@param lat A vector of the latitudes of 'ano'. +#'@param lon A vector of the longitudes of 'ano'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param neofs A positive integer of the modes to be kept. The default value is +#' 15. If time length or the product of the length of space_dim is smaller than +#' neofs, neofs will be changed to the minimum of the three values. +#'@param corr A logical value indicating whether to base on a correlation (TRUE) +#' or on a covariance matrix (FALSE). The default value is FALSE. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list containing: +#'\item{EOFs}{ +#' An array of EOF patterns normalized to 1 (unitless) with dimensions +#' (number of modes, rest of the dimensions of 'ano' except 'time_dim'). +#' Multiplying \code{EOFs} by \code{PCs} gives the original reconstructed +#' field. +#'} +#'\item{PCs}{ +#' An array of principal components with the units of the original field to +#' the power of 2, with dimensions (time_dim, number of modes, rest of the +#' dimensions of 'ano' except 'space_dim'). +#' 'PCs' contains already the percentage of explained variance so, +#' to reconstruct the original field it's only needed to multiply 'EOFs' +#' by 'PCs'. +#'} +#'\item{var}{ +#' An array of the percentage (%) of variance fraction of total variance +#' explained by each mode (number of modes). The dimensions are (number of +#' modes, rest of the dimensions of 'ano' except 'time_dim' and 'space_dim'). +#'} +#'\item{mask}{ +#' An array of the mask with dimensions (space_dim, rest of the dimensions of +#' 'ano' except 'time_dim'). It is made from 'ano', 1 for the positions that +#' 'ano' has value and NA for the positions that 'ano' has NA. It is used to +#' replace NAs with 0s for EOF calculation and mask the result with NAs again +#' after the calculation. +#'} +#'\item{wght}{ +#' An array of the area weighting with dimensions 'space_dim'. It is calculated +#' by cosine of 'lat' and used to compute the fraction of variance explained by +#' each EOFs. +#'} +#'\item{tot_var}{ +#' A number or a numeric array of the total variance explained by all the modes. +#' The dimensions are same as 'ano' except 'time_dim' and 'space_dim'. +#'} +#' +#'@seealso ProjectField, NAO, PlotBoxWhisker +#'@examples +#'# This example computes the EOFs along forecast horizons and plots the one +#'# that explains the greatest amount of variability. The example data has low +#'# resolution so the result may not be explanatory, but it displays how to +#'# use this function. +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'tmp <- MeanDims(ano$exp, c('dataset', 'member')) +#'ano <- tmp[1, , ,] +#'names(dim(ano)) <- names(dim(tmp))[-2] +#'eof <- EOF(ano, sampleData$lat, sampleData$lon) +#'\dontrun{ +#'PlotEquiMap(eof$EOFs[1, , ], sampleData$lon, sampleData$lat) +#'} +#' +#'@import multiApply +#'@importFrom stats sd +#'@export +EOF <- function(ano, lat, lon, time_dim = 'sdate', space_dim = c('lat', 'lon'), + neofs = 15, corr = FALSE, ncores = NULL) { + + # Check inputs + ## ano + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!all(space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## lat + if (!is.numeric(lat) | length(lat) != dim(ano)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'ano'.") + } + if (any(lat > 90 | lat < -90)) { + stop("Parameter 'lat' must contain values within the range [-90, 90].") + } + ## lon + if (!is.numeric(lon) | length(lon) != dim(ano)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'ano'.") + } + if (any(lon > 360 | lon < -360)) { + .warning("Some 'lon' is out of the range [-360, 360].") + } + ## neofs + if (!is.numeric(neofs) | neofs %% 1 != 0 | neofs <= 0 | length(neofs) > 1) { + stop("Parameter 'neofs' must be a positive integer.") + } + ## corr + if (!is.logical(corr) | length(corr) > 1) { + stop("Parameter 'corr' must be one logical value.") + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + ############################### + # Calculate EOF + +# # Replace mask of NAs with 0s for EOF analysis. +# ano[!is.finite(ano)] <- 0 + + # Area weighting. Weights for EOF; needed to compute the + # fraction of variance explained by each EOFs + space_ind <- sapply(space_dim, function(a) which(names(dim(ano)) == a)) + wght <- array(cos(lat * pi / 180), dim = dim(ano)[space_ind]) + + # We want the covariance matrix to be weigthed by the grid + # cell area so the anomaly field is weighted by its square + # root since the covariance matrix equals transpose(ano) + # times ano. + wght <- sqrt(wght) + + # neofs is bounded + if (neofs != min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs)) { + neofs <- min(dim(ano)[time_dim], prod(dim(ano)[space_dim]), neofs) + .warning(paste0("Parameter 'neofs' is changed to ", neofs, ", the minimum among ", + "the length of time_dim, the production of the length of space_dim, ", + "and neofs.")) + } + + res <- Apply(ano, + target_dims = c(time_dim, space_dim), + output_dims = list(EOFs = c('mode', space_dim), + PCs = c(time_dim, 'mode'), + var = 'mode', + tot_var = NULL, + mask = space_dim), + fun = .EOF, + corr = corr, neofs = neofs, + wght = wght, + ncores = ncores) + + return(c(res, wght = list(wght))) + +} + +.EOF <- function(ano, neofs = 15, corr = FALSE, wght = wght) { + # ano: [time, lat, lon] + + # Dimensions + nt <- dim(ano)[1] + ny <- dim(ano)[2] + nx <- dim(ano)[3] + + # Check if all the time steps at one grid point are NA-consistent. + # The grid point should have all NAs or no NA along time dim. + if (anyNA(ano)) { + ano_latlon <- array(ano, dim = c(nt, ny * nx)) # [time, lat*lon] + na_ind <- which(is.na(ano_latlon), arr.ind = T) + if (dim(na_ind)[1] != nt * length(unique(na_ind[, 2]))) { + stop("Detect certain grid points have NAs but not consistent across time ", + "dimension. If the grid point is NA, it should have NA at all time step.") + } + } + + # Build the mask + mask <- ano[1, , ] + mask[!is.finite(mask)] <- NA + mask[is.finite(mask)] <- 1 + dim(mask) <- c(ny, nx) + + # Replace mask of NAs with 0s for EOF analysis. + ano[!is.finite(ano)] <- 0 + + ano <- ano * InsertDim(wght, 1, nt) + + # The use of the correlation matrix is done under the option corr. + if (corr) { + stdv <- apply(ano, c(2, 3), sd, na.rm = T) + ano <- ano / InsertDim(stdv, 1, nt) + } + + # Time/space matrix for SVD + dim(ano) <- c(nt, ny * nx) + dim.dat <- dim(ano) + + # 'transpose' means the array needs to be transposed before + # calling La.svd for computational efficiency because the + # spatial dimension is larger than the time dimension. This + # goes with transposing the outputs of LA.svd also. + if (dim.dat[2] > dim.dat[1]) { + transpose <- TRUE + } else { + transpose <- FALSE + } + if (transpose) { + pca <- La.svd(t(ano)) + } else { + pca <- La.svd(ano) + } + + # La.svd conventions: decomposition X = U D t(V) La.svd$u + # returns U La.svd$d returns diagonal values of D La.svd$v + # returns t(V) !! The usual convention is PC=U and EOF=V. + # If La.svd is called for ano (transpose=FALSE case): EOFs: + # $v PCs: $u If La.svd is called for t(ano) (transposed=TRUE + # case): EOFs: t($u) PCs: t($v) + + if (transpose) { + pca.EOFs <- t(pca$u) + pca.PCs <- t(pca$v) + } else { + pca.EOFs <- pca$v + pca.PCs <- pca$u + } + + # The numbers of transposition is limited to neofs + PC <- pca.PCs[, 1:neofs] + EOF <- pca.EOFs[1:neofs, ] + dim(EOF) <- c(neofs, ny, nx) + + # To sort out crash when neofs=1. + if (neofs == 1) { + PC <- InsertDim(PC, 2, 1, name = 'new') + } + + # Computation of the % of variance associated with each mode + W <- pca$d[1:neofs] + tot.var <- sum(pca$d^2) + var.eof <- 100 * pca$d[1:neofs]^2 / tot.var + + for (e in 1:neofs) { + # Set all masked grid points to NA in the EOFs + # Divide patterns by area weights so that EOF * PC gives unweigthed (original) data + EOF[e, , ] <- EOF[e, , ] * mask / wght + # PC is multiplied by the explained variance, + # so that the reconstruction is only EOF * PC + PC[, e] <- PC[, e] * W[e] + } + + if (neofs == 1) { + var.eof <- as.array(var.eof) + } + + return(invisible(list(EOFs = EOF, PCs = PC, var = var.eof, tot_var = tot.var, mask = mask))) +} + diff --git a/modules/Crossval/R/tmp/Eno.R b/modules/Crossval/R/tmp/Eno.R index 97027e426c134a2cb6d09ce01a0b5f2494a0cde1..cb927602221e10f6d3c0853bf0935aad93834099 100644 --- a/modules/Crossval/R/tmp/Eno.R +++ b/modules/Crossval/R/tmp/Eno.R @@ -101,4 +101,3 @@ Eno <- function(data, time_dim = 'sdate', na.action = na.pass, ncores = NULL) { return(eno) } - diff --git a/modules/Crossval/R/tmp/GetProbs.R b/modules/Crossval/R/tmp/GetProbs.R index 3bd4f3a75307ce010fc352e99784075e3e755fff..fb2cda0c9cc839c7f88e44ec6f4d0cb4a6944019 100644 --- a/modules/Crossval/R/tmp/GetProbs.R +++ b/modules/Crossval/R/tmp/GetProbs.R @@ -349,4 +349,3 @@ GetProbs <- function(data, time_dim = 'sdate', memb_dim = 'member', return(list(data = data_vector[sorter], cumulative_weights = cumulative_weights)) } - diff --git a/modules/Crossval/R/tmp/NAO.R b/modules/Crossval/R/tmp/NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..255e2a9fd587b1d1bad90374b2aae7d803299ba6 --- /dev/null +++ b/modules/Crossval/R/tmp/NAO.R @@ -0,0 +1,574 @@ +#'Compute the North Atlantic Oscillation (NAO) Index +#' +#'Compute the North Atlantic Oscillation (NAO) index based on the leading EOF +#'of the sea level pressure (SLP) anomalies over the north Atlantic region +#'(20N-80N, 80W-40E). The PCs are obtained by projecting the forecast and +#'observed anomalies onto the observed EOF pattern or the forecast +#'anomalies onto the EOF pattern of the other years of the forecast. +#'By default (ftime_avg = 2:4), NAO() computes the NAO index for 1-month +#'lead seasonal forecasts that can be plotted with PlotBoxWhisker(). It returns +#'cross-validated PCs of the NAO index for hindcast (exp) and observations +#'(obs) based on the leading EOF pattern, or, if forecast (exp_cor) is provided, +#'the NAO index for forecast and the corresponding data (exp and obs). +#' +#'@param exp A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' hindcast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of observational data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param obs A named numeric array of North Atlantic SLP (20N-80N, 80W-40E) +#' observed anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimensions 'time_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of experimental data needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param exp_cor A named numeric array of the Nort Atlantic SLP (20-80N, 80W-40E) +#' forecast anomalies from \code{Ano()} or \code{Ano_CrossValid()} with +#' dimension 'time_dim' of length 1 (as in the case of an operational +#' forecast), 'memb_dim', 'ftime_dim', and 'space_dim' at least. +#' If only NAO of reference period needs to be computed, this parameter can +#' be left to NULL. The default value is NULL. +#'@param lat A vector of the latitudes of 'exp' and 'obs'. +#'@param lon A vector of the longitudes of 'exp' and 'obs'. +#'@param time_dim A character string indicating the name of the time dimension +#' of 'exp' and 'obs'. The default value is 'sdate'. +#'@param memb_dim A character string indicating the name of the member +#' dimension of 'exp' (and 'obs', optional). If 'obs' has memb_dim, the length +#' must be 1. The default value is 'member'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param ftime_dim A character string indicating the name of the forecast time +#' dimension of 'exp' and 'obs'. The default value is 'ftime'. +#'@param ftime_avg A numeric vector of the forecast time steps to average +#' across the target period. If average is not needed, set NULL. The default +#' value is 2:4, i.e., from 2nd to 4th forecast time steps. +#'@param obsproj A logical value indicating whether to compute the NAO index by +#' projecting the forecast anomalies onto the leading EOF of observational +#' reference (TRUE, default) or compute the NAO by first computing the leading +#' EOF of the forecast anomalies (in cross-validation mode, i.e. leave the +#' evaluated year out), then projecting forecast anomalies onto this EOF +#' (FALSE). If 'exp_cor' is provided, 'obs' will be used when obsproj is TRUE +#' and 'exp' will be used when obsproj is FALSE, and no cross-validation is +#' applied. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return +#'A list which contains some of the following items depending on the data inputs: +#'\item{exp}{ +#' A numeric array of hindcast NAO index in verification format with the same +#' dimensions as 'exp' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#'\item{obs}{ +#' A numeric array of observation NAO index in verification format with the same +#' dimensions as 'obs' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#'} +#'\item{exp_cor}{ +#' A numeric array of forecast NAO index in verification format with the same +#' dimensions as 'exp_cor' except space_dim and ftime_dim. If ftime_avg is NULL, +#' ftime_dim remains. +#' } +#' +#'@references +#'Doblas-Reyes, F.J., Pavan, V. and Stephenson, D. (2003). The skill of +#' multi-model seasonal forecasts of the wintertime North Atlantic +#' Oscillation. Climate Dynamics, 21, 501-514. +#' DOI: 10.1007/s00382-003-0350-4 +#' +#'@examples +#'# Make up synthetic data +#'set.seed(1) +#'exp <- array(rnorm(1620), dim = c(member = 2, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'set.seed(2) +#'obs <- array(rnorm(1620), dim = c(member = 1, sdate = 3, ftime = 5, lat = 6, lon = 9)) +#'lat <- seq(20, 80, length.out = 6) +#'lon <- seq(-80, 40, length.out = 9) +#'nao <- NAO(exp = exp, obs = obs, lat = lat, lon = lon) +#' +#'exp_cor <- array(rnorm(540), dim = c(member = 2, sdate = 1, ftime = 5, lat = 6, lon = 9)) +#'nao <- NAO(exp = exp, obs = obs, exp_cor = exp_cor, lat = lat, lon = lon, obsproj = TRUE) +#'# plot the NAO index +#' \dontrun{ +#'nao$exp <- Reorder(nao$exp, c(2, 1)) +#'nao$obs <- Reorder(nao$obs, c(2, 1)) +#'PlotBoxWhisker(nao$exp, nao$obs, "NAO index, DJF", "NAO index (PC1) TOS", +#' monini = 12, yearini = 1985, freq = 1, "Exp. A", "Obs. X") +#' } +#' +#'@import multiApply +#'@importFrom ClimProjDiags Subset +#'@export +NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, lon, time_dim = 'sdate', + memb_dim = 'member', space_dim = c('lat', 'lon'), + ftime_dim = 'ftime', ftime_avg = 2:4, + obsproj = TRUE, ncores = NULL) { + # Check inputs + ## exp, obs, and exp_cor (1) + if (is.null(obs) & is.null(exp)) { + stop("Parameter 'exp' and 'obs' cannot both be NULL.") + } + if (!is.null(exp)) { + if (!is.numeric(exp)) { + stop("Parameter 'exp' must be a numeric array.") + } + if (is.null(dim(exp))) { + stop("Parameter 'exp' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.") + } + if (any(is.null(names(dim(exp)))) | any(nchar(names(dim(exp))) == 0)) { + stop("Parameter 'exp' must have dimension names.") + } + } + if (!is.null(obs)) { + if (!is.numeric(obs)) { + stop("Parameter 'obs' must be a numeric array.") + } + if (is.null(dim(obs))) { + stop("Parameter 'obs' must have at least dimensions ", + "time_dim, space_dim, and ftime_dim.") + } + if (any(is.null(names(dim(obs)))) | any(nchar(names(dim(obs))) == 0)) { + stop("Parameter 'obs' must have dimension names.") + } + } + if (!is.null(exp_cor)) { + if (!is.numeric(exp_cor)) { + stop("Parameter 'exp_cor' must be a numeric array.") + } + if (is.null(dim(exp_cor))) { + stop(paste0("Parameter 'exp_cor' must have at least dimensions ", + "time_dim, memb_dim, space_dim, and ftime_dim.")) + } + if (any(is.null(names(dim(exp_cor)))) | any(nchar(names(dim(exp_cor))) == 0)) { + stop("Parameter 'exp_cor' must have dimension names.") + } + if (is.null(exp) || is.null(obs)) { + stop("Parameters 'exp' and 'obs' are required when 'exp_cor' is not provided.") + } + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!time_dim %in% names(dim(exp))) { + stop("Parameter 'time_dim' is not found in 'exp' dimension.") + } + } + if (!is.null(obs)) { + if (!time_dim %in% names(dim(obs))) { + stop("Parameter 'time_dim' is not found in 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (!time_dim %in% names(dim(exp_cor))) { + stop("Parameter 'time_dim' is not found in 'exp_cor' dimension.") + } + if (dim(exp_cor)[time_dim] > 1) { + stop("Parameter 'exp_cor' is expected to have length 1 in ", + time_dim, "dimension.") + } + } + + ## memb_dim + if (!is.character(memb_dim) | length(memb_dim) > 1) { + stop("Parameter 'memb_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!memb_dim %in% names(dim(exp))) { + stop("Parameter 'memb_dim' is not found in 'exp' dimension.") + } + } + add_member_back <- FALSE + if (!is.null(obs)) { + if (memb_dim %in% names(dim(obs))) { + if (dim(obs)[memb_dim] != 1) { + stop("The length of parameter 'memb_dim' in 'obs' must be 1.") + } else { + add_member_back <- TRUE + obs <- ClimProjDiags::Subset(obs, memb_dim, 1, drop = 'selected') + } + } + } + if (!is.null(exp_cor)) { + if (!memb_dim %in% names(dim(exp_cor))) { + stop("Parameter 'memb_dim' is not found in 'exp_cor' dimension.") + } + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!is.null(exp)) { + if (!all(space_dim %in% names(dim(exp)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!all(space_dim %in% names(dim(obs)))) { + stop("Parameter 'space_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (any(!space_dim %in% names(dim(exp_cor)))) { + stop("Parameter 'space_dim' is not found in 'exp_cor' dimensions.") + } + } + ## ftime_dim + if (!is.character(ftime_dim) | length(ftime_dim) > 1) { + stop("Parameter 'ftime_dim' must be a character string.") + } + if (!is.null(exp)) { + if (!ftime_dim %in% names(dim(exp))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(obs)) { + if (!ftime_dim %in% names(dim(obs))) { + stop("Parameter 'ftime_dim' is not found in 'exp' or 'obs' dimension.") + } + } + if (!is.null(exp_cor)) { + if (!ftime_dim %in% names(dim(exp_cor))) { + stop("Parameter 'ftime_dim' is not found in 'exp_cor' dimensions.") + } + } + ## exp and obs (2) + #TODO: Add checks for exp_cor + if (!is.null(exp) & !is.null(obs)) { + name_exp <- sort(names(dim(exp))) + name_obs <- sort(names(dim(obs))) + name_exp <- name_exp[-which(name_exp == memb_dim)] + throw_error <- FALSE + if (length(name_exp) != length(name_obs)) { + throw_error <- TRUE + } else if (any(name_exp != name_obs)) { + throw_error <- TRUE + } else if (!all(dim(exp)[name_exp] == dim(obs)[name_obs])) { + throw_error <- TRUE + } + if (throw_error) { + stop("Parameter 'exp' and 'obs' must have the same names and lengths ", + "of all the dimensions except 'memb_dim'.") + } + } + ## ftime_avg + if (!is.null(ftime_avg)) { + if (!is.vector(ftime_avg) | !is.numeric(ftime_avg)) { + stop("Parameter 'ftime_avg' must be an integer vector.") + } + if (!is.null(exp)) { + if (max(ftime_avg) > dim(exp)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + if (!is.null(obs)) { + if (max(ftime_avg) > dim(obs)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + if (!is.null(exp_cor)) { + if (max(ftime_avg) > dim(exp_cor)[ftime_dim] | min(ftime_avg) < 1) { + stop("Parameter 'ftime_avg' must be within the range of ftime_dim length.") + } + } + } + ## sdate >= 2 + if (!is.null(exp)) { + if (dim(exp)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } else { + if (dim(obs)[time_dim] < 2) { + stop("The length of time_dim must be at least 2.") + } + } + ## lat and lon + if (!is.null(exp)) { + if (!is.numeric(lat) | length(lat) != dim(exp)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + } + if (!is.numeric(lon) | length(lon) != dim(exp)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + } + } + if (!is.null(obs)) { + if (!is.numeric(lat) | length(lat) != dim(obs)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp' and 'obs'.") + } + if (!is.numeric(lon) | length(lon) != dim(obs)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp' and 'obs'.") + } + } + if (!is.null(exp_cor)) { + if (!is.numeric(lat) | length(lat) != dim(exp_cor)[space_dim[1]]) { + stop("Parameter 'lat' must be a numeric vector with the same ", + "length as the latitude dimension of 'exp_cor'.") + } + if (!is.numeric(lon) | length(lon) != dim(exp_cor)[space_dim[2]]) { + stop("Parameter 'lon' must be a numeric vector with the same ", + "length as the longitude dimension of 'exp_cor'.") + } + } + stop_needed <- FALSE + if (max(lat) > 80 | min(lat) < 20) { + stop_needed <- TRUE + } + #NOTE: different from s2dverification + # lon is not used in the calculation actually. EOF only uses lat to do the + # weight. So we just need to ensure the data is in this region, regardless + # the order. + if (any(lon < 0)) { #[-180, 180] + if (!(min(lon) > -90 & min(lon) < -70 & max(lon) < 50 & max(lon) > 30)) { + stop_needed <- TRUE + } + } else { #[0, 360] + if (any(lon >= 50 & lon <= 270)) { + stop_needed <- TRUE + } else { + lon_E <- lon[which(lon < 50)] + lon_W <- lon[-which(lon < 50)] + if (max(lon_E) < 30 | min(lon_W) > 290) { + stop_needed <- TRUE + } + } + } + if (stop_needed) { + stop("The typical domain used to compute the NAO is 20N-80N, ", + "80W-40E. 'lat' or 'lon' is out of range.") + } + ## obsproj + if (!is.logical(obsproj) | length(obsproj) > 1) { + stop("Parameter 'obsproj' must be either TRUE or FALSE.") + } + if (obsproj) { + if (is.null(obs)) { + stop("Parameter 'obsproj' set to TRUE but no 'obs' provided.") + } + if (is.null(exp) & is.null(exp_cor)) { + .warning("parameter 'obsproj' set to TRUE but no 'exp' nor 'exp_cor' provided.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores == 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + + # Average ftime + if (!is.null(ftime_avg)) { + if (!is.null(exp)) { + exp_sub <- ClimProjDiags::Subset(exp, ftime_dim, ftime_avg, drop = FALSE) + exp <- MeanDims(exp_sub, ftime_dim, na.rm = TRUE) + ## Cross-validated PCs. Fabian. This should be extended to + ## nmod and nlt by simple loops. Virginie + } + if (!is.null(obs)) { + obs_sub <- ClimProjDiags::Subset(obs, ftime_dim, ftime_avg, drop = FALSE) + obs <- MeanDims(obs_sub, ftime_dim, na.rm = TRUE) + } + if (!is.null(exp_cor)) { + exp_cor_sub <- ClimProjDiags::Subset(exp_cor, ftime_dim, ftime_avg, drop = FALSE) + exp_cor <- MeanDims(exp_cor_sub, ftime_dim, na.rm = TRUE) + } + } + + # wght + wght <- array(sqrt(cos(lat * pi / 180)), dim = c(length(lat), length(lon))) + if (is.null(exp_cor)) { + if (!is.null(exp) & !is.null(obs)) { + res <- Apply(list(exp, obs), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } else if (!is.null(exp)) { + res <- Apply(list(exp = exp), + target_dims = list(exp = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, obs = NULL, + obsproj = obsproj, add_member_back = FALSE, + ncores = ncores) + } else if (!is.null(obs)) { + if (add_member_back) { + output_dims <- list(obs = c(time_dim, memb_dim)) + } else { + output_dims <- list(obs = time_dim) + } + res <- Apply(list(obs = obs), + target_dims = list(obs = c(time_dim, space_dim)), + output_dims = output_dims, + fun = .NAO, + lat = lat, wght = wght, exp = NULL, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + } else { # exp_cor provided + res <- Apply(list(exp = exp, obs = obs, exp_cor = exp_cor), + target_dims = list(exp = c(memb_dim, time_dim, space_dim), + obs = c(time_dim, space_dim), + exp_cor = c(memb_dim, time_dim, space_dim)), + fun = .NAO, + lat = lat, wght = wght, + obsproj = obsproj, add_member_back = add_member_back, + ncores = ncores) + } + + return(res) +} + +.NAO <- function(exp = NULL, obs = NULL, exp_cor = NULL, lat, wght, obsproj = TRUE, + add_member_back = FALSE) { + # exp: [memb_exp, sdate, lat, lon] + # obs: [sdate, lat, lon] + # exp_cor: [memb, sdate = 1, lat, lon] + # wght: [lat, lon] + + if (!is.null(exp)) { + ntime <- dim(exp)[2] + nlat <- dim(exp)[3] + nlon <- dim(exp)[4] + nmemb_exp <- dim(exp)[1] + } else { + ntime <- dim(obs)[1] + nlat <- dim(obs)[2] + nlon <- dim(obs)[3] + } + if (!is.null(exp_cor)) { + ntime_exp_cor <- dim(exp_cor)[2] # should be 1 + nmemb_exp_cor <- dim(exp_cor)[1] + } + + if (!is.null(obs)) nao_obs <- array(NA, dim = ntime) + if (!is.null(exp)) nao_exp <- array(NA, dim = c(ntime, nmemb_exp)) + if (!is.null(exp_cor)) { + nao_exp_cor <- array(NA, dim = c(ntime_exp_cor, nmemb_exp_cor)) + #NOTE: The dimensions are flipped to fill in data correctly. Need to flip it back later. + } + + if (is.null(exp_cor)) { + + for (tt in 1:ntime) { # cross-validation + + if (!is.null(obs)) { + ## Calculate observation EOF. Excluding one forecast start year. + obs_sub <- obs[(1:ntime)[-tt], , , drop = FALSE] + EOF_obs <- .EOF(obs_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode = 1, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies. + PF <- .ProjectField(obs, eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + ## Keep PCs of excluded forecast start year. Fabian. + nao_obs[tt] <- PF[tt] + } + + if (!is.null(exp)) { + if (!obsproj) { + exp_sub <- exp[, (1:ntime)[-tt], , , drop = FALSE] + # Combine 'memb' and 'sdate' to calculate EOF + dim(exp_sub) <- c(nmemb_exp * (ntime - 1), nlat, nlon) + EOF_exp <- .EOF(exp_sub, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + + ## Correct polarity of pattern + ##NOTE: different from s2dverification, which doesn't use mean(). +# if (0 < EOF_exp[1, which.min(abs(lat - 65)), ]) { + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } + + ### Lines below could be simplified further by computing + ### ProjectField() only on the year of interest... (though this is + ### not vital). Lauriane + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_exp[1, , ], wght = wght) # [sdate, memb] + nao_exp[tt, imemb] <- PF[tt] + } + } else { + ## Project forecast anomalies on obs EOF + for (imemb in 1:nmemb_exp) { + PF <- .ProjectField(exp[imemb, , , ], eof_mode = EOF_obs[1, , ], wght = wght) # [sdate] + nao_exp[tt, imemb] <- PF[tt] + } + } + } + + } # for loop sdate + + } else { # exp_cor provided + + ## Calculate observation EOF. Without cross-validation + EOF_obs <- .EOF(obs, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + # EOF_obs: [mode, lat, lon] + if (0 < mean(EOF_obs[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_obs <- EOF_obs * (-1) + } + ## Project observed anomalies + PF <- .ProjectField(obs, eof_mode = EOF_obs, wght = wght) # [mode = 1, sdate] + nao_obs[] <- PF[1, ] + + if (!obsproj) { + # Calculate EOF_exp + tmp <- array(exp, dim = c(nmemb_exp * ntime, nlat, nlon)) + EOF_exp <- .EOF(tmp, neofs = 1, wght = wght)$EOFs # [mode = 1, lat, lon] + ## Correct polarity of pattern + if (0 < mean(EOF_exp[1, which.min(abs(lat - 65)), ], na.rm = T)) { + EOF_exp <- EOF_exp * (-1) + } + eof_mode_input <- EOF_exp[1, , ] + } else { + eof_mode_input <- EOF_obs[1, , ] + } + + # Calculate NAO_exp + for (imemb in 1:dim(exp)[1]) { + exp_sub <- ClimProjDiags::Subset(exp, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp[ , imemb] <- PF + } + + # Calculate NAO_exp_cor + for (imemb in 1:dim(exp_cor)[1]) { + exp_sub <- ClimProjDiags::Subset(exp_cor, along = 1, indices = imemb, + drop = 'selected') + PF <- .ProjectField(exp_sub, eof_mode = eof_mode_input, wght = wght) # [sdate] + nao_exp_cor[, imemb] <- PF + } + + } + # add_member_back + if (add_member_back) { + memb_dim_name <- ifelse(!is.null(names(dim(exp))[1]), names(dim(exp))[1], 'member') + nao_obs <- InsertDim(nao_obs, 2, 1, name = memb_dim_name) + } + + # Return results + if (is.null(exp_cor)) { + res <- NULL + if (!is.null(exp)) { + res <- c(res, list(exp = nao_exp)) + } + if (!is.null(obs)) { + res <- c(res, list(obs = nao_obs)) + } + return(res) + + } else { + return(list(exp = nao_exp, obs = nao_obs, exp_cor = nao_exp_cor)) + } +} + diff --git a/modules/Crossval/R/tmp/ProjectField.R b/modules/Crossval/R/tmp/ProjectField.R new file mode 100644 index 0000000000000000000000000000000000000000..efa35dc35313e6934f354e41e39cd979a4293b33 --- /dev/null +++ b/modules/Crossval/R/tmp/ProjectField.R @@ -0,0 +1,272 @@ +#'Project anomalies onto modes of variability +#' +#'Project anomalies onto modes of variability to get the temporal evolution of +#'the EOF mode selected. It returns principal components (PCs) by area-weighted +#'projection onto EOF pattern (from \code{EOF()}) or REOF pattern (from +#'\code{REOF()} or \code{EuroAtlanticTC()}). The calculation removes NA and +#'returns NA if the whole spatial pattern is NA. +#' +#'@param ano A numerical array of anomalies with named dimensions. The +#' dimensions must have at least 'time_dim' and 'space_dim'. It can be +#' generated by Ano(). +#'@param eof A list that contains at least 'EOFs' or 'REOFs' and 'wght', which +#' are both arrays. 'EOFs' or 'REOFs' must have dimensions 'mode' and +#' 'space_dim' at least. 'wght' has dimensions space_dim. It can be generated +#' by EOF() or REOF(). +#'@param time_dim A character string indicating the name of the time dimension +#' of 'ano'. The default value is 'sdate'. +#'@param space_dim A vector of two character strings. The first is the dimension +#' name of latitude of 'ano' and the second is the dimension name of longitude +#' of 'ano'. The default value is c('lat', 'lon'). +#'@param mode An integer of the variability mode number in the EOF to be +#' projected on. The default value is NULL, which means all the modes of 'eof' +#' is calculated. +#'@param ncores An integer indicating the number of cores to use for parallel +#' computation. The default value is NULL. +#' +#'@return A numerical array of the principal components in the verification +#' format. The dimensions are the same as 'ano' except 'space_dim'. +#' +#'@seealso EOF, NAO, PlotBoxWhisker +#'@examples +#'\dontshow{ +#'startDates <- c('19851101', '19901101', '19951101', '20001101', '20051101') +#'sampleData <- s2dv:::.LoadSampleData('tos', c('experiment'), +#' c('observation'), startDates, +#' leadtimemin = 1, +#' leadtimemax = 4, +#' output = 'lonlat', +#' latmin = 27, latmax = 48, +#' lonmin = -12, lonmax = 40) +#'} +#'ano <- Ano_CrossValid(sampleData$mod, sampleData$obs) +#'eof_exp <- EOF(ano$exp, sampleData$lat, sampleData$lon) +#'eof_obs <- EOF(ano$obs, sampleData$lat, sampleData$lon) +#'mode1_exp <- ProjectField(ano$exp, eof_exp, mode = 1) +#'mode1_obs <- ProjectField(ano$obs, eof_obs, mode = 1) +#' +#'\dontrun{ +#' # Plot the forecast and the observation of the first mode for the last year +#' # of forecast +#' sdate_dim_length <- dim(mode1_obs)['sdate'] +#' plot(mode1_obs[sdate_dim_length, 1, 1, ], type = "l", ylim = c(-1, 1), +#' lwd = 2) +#' for (i in 1:dim(mode1_exp)['member']) { +#' par(new = TRUE) +#' plot(mode1_exp[sdate_dim_length, 1, i, ], type = "l", col = rainbow(10)[i], +#' ylim = c(-15000, 15000)) +#' } +#'} +#' +#'@import multiApply +#'@export +ProjectField <- function(ano, eof, time_dim = 'sdate', space_dim = c('lat', 'lon'), + mode = NULL, ncores = NULL) { + + # Check inputs + ## ano (1) + if (is.null(ano)) { + stop("Parameter 'ano' cannot be NULL.") + } + if (!is.numeric(ano)) { + stop("Parameter 'ano' must be a numeric array.") + } + if (any(is.null(names(dim(ano)))) | any(nchar(names(dim(ano))) == 0)) { + stop("Parameter 'ano' must have dimension names.") + } + ## eof (1) + if (is.null(eof)) { + stop("Parameter 'eof' cannot be NULL.") + } + if (!is.list(eof)) { + stop("Parameter 'eof' must be a list generated by EOF() or REOF().") + } + if ('EOFs' %in% names(eof)) { + EOFs <- "EOFs" + } else if ('REOFs' %in% names(eof)) { + EOFs <- "REOFs" + } else if ('patterns' %in% names(eof)) { + EOFs <- "patterns" + } else { + stop("Parameter 'eof' must be a list that contains 'EOFs', 'REOFs', ", + "or 'patterns'. It can be generated by EOF(), REOF(), or EuroAtlanticTC().") + } + if (!'wght' %in% names(eof)) { + stop("Parameter 'eof' must be a list that contains 'wght'. ", + "It can be generated by EOF() or REOF().") + } + if (!is.numeric(eof[[EOFs]]) || !is.array(eof[[EOFs]])) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be a numeric array.") + } + if (!is.numeric(eof$wght) || !is.array(eof$wght)) { + stop("The component 'wght' of parameter 'eof' must be a numeric array.") + } + ## time_dim + if (!is.character(time_dim) | length(time_dim) > 1) { + stop("Parameter 'time_dim' must be a character string.") + } + if (!time_dim %in% names(dim(ano))) { + stop("Parameter 'time_dim' is not found in 'ano' dimension.") + } + ## space_dim + if (!is.character(space_dim) | length(space_dim) != 2) { + stop("Parameter 'space_dim' must be a character vector of 2.") + } + if (!all(space_dim %in% names(dim(ano)))) { + stop("Parameter 'space_dim' is not found in 'ano' dimension.") + } + ## ano (2) + if (!all(space_dim %in% names(dim(ano))) | !time_dim %in% names(dim(ano))) { + stop("Parameter 'ano' must be an array with dimensions named as ", + "parameter 'space_dim' and 'time_dim'.") + } + ## eof (2) + if (!all(space_dim %in% names(dim(eof[[EOFs]]))) | + !'mode' %in% names(dim(eof[[EOFs]]))) { + stop("The component 'EOFs' or 'REOFs' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim' and 'mode'.") + } + if (length(dim(eof$wght)) != 2 | !all(names(dim(eof$wght)) %in% space_dim)) { + stop("The component 'wght' of parameter 'eof' must be an array ", + "with dimensions named as parameter 'space_dim'.") + } + ## mode + if (!is.null(mode)) { + if (!is.numeric(mode) | mode %% 1 != 0 | mode < 0 | length(mode) > 1) { + stop("Parameter 'mode' must be NULL or a positive integer.") + } + if (mode > dim(eof[[EOFs]])['mode']) { + stop("Parameter 'mode' is greater than the number of available ", + "modes in 'eof'.") + } + } + ## ncores + if (!is.null(ncores)) { + if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | + length(ncores) > 1) { + stop("Parameter 'ncores' must be a positive integer.") + } + } + +#------------------------------------------------------- + + # Keep the chosen mode + if (!is.null(mode)) { + eof_mode <- ClimProjDiags::Subset(eof[[EOFs]], 'mode', mode, drop = 'selected') + } else { + eof_mode <- eof[[EOFs]] + } + + if ('mode' %in% names(dim(eof_mode))) { + dimnames_without_mode <- names(dim(eof_mode))[-which(names(dim(eof_mode)) == 'mode')] + } else { + dimnames_without_mode <- names(dim(eof_mode)) + } + + if (all(dimnames_without_mode %in% space_dim)) { # eof_mode: [lat, lon] or [mode, lat, lon] + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + + } else { + + if (!all(dimnames_without_mode %in% names(dim(ano)))) { + stop("The array 'EOF' in parameter 'eof' has dimension not in parameter ", + "'ano'. Check if 'ano' and 'eof' are compatible.") + } + + common_dim_ano <- dim(ano)[which(names(dim(ano)) %in% dimnames_without_mode)] + if (any(common_dim_ano[match(dimnames_without_mode, names(common_dim_ano))] != + dim(eof_mode)[dimnames_without_mode])) { + stop("Found paramter 'ano' and 'EOF' in parameter 'eof' have common dimensions ", + "with different length. Check if 'ano' and 'eof' are compatible.") + } + + # Enlarge eof/ano is needed. The margin_dims of Apply() must be consistent + # between ano and eof. + additional_dims <- dim(ano)[-which(names(dim(ano)) %in% names(dim(eof_mode)))] + additional_dims <- additional_dims[-which(names(additional_dims) == time_dim)] + if (length(additional_dims) != 0) { + for (i in seq_along(additional_dims)) { + eof_mode <- InsertDim(eof_mode, posdim = (length(dim(eof_mode)) + 1), + lendim = additional_dims[i], name = names(additional_dims)[i]) + } + } + if ('mode' %in% names(dim(eof_mode))) { + eof_mode_target <- c('mode', space_dim) + output_dims <- c('mode', time_dim) + } else { + eof_mode_target <- space_dim + output_dims <- time_dim + } + res <- Apply(list(ano, eof_mode), + target_dims = list(c(time_dim, space_dim), + eof_mode_target), + output_dims = output_dims, + wght = eof$wght, + fun = .ProjectField, + ncores = ncores)$output1 + } + + return(res) +} + + +.ProjectField <- function(ano, eof_mode, wght) { + # ano: [sdate, lat, lon] + # eof_mode: [lat, lon] or [mode, lat, lon] + # wght: [lat, lon] + + ntime <- dim(ano)[1] + if (length(dim(eof_mode)) == 2) { # mode != NULL + # Initialization of pc.ver. + pc.ver <- array(NA, dim = ntime) #[sdate] + + # Weight + e.1 <- eof_mode * wght + ano <- ano * InsertDim(wght, 1, ntime) + #ano <- aaply(ano, 1, '*', wght) # much heavier + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + #na <- apply(ano, 1, mean, na.rm = TRUE) # much heavier + tmp <- ano * InsertDim(e.1, 1, ntime) # [sdate, lat, lon] + rm(ano) + #pc.ver <- apply(tmp, 1, sum, na.rm = TRUE) # much heavier + pc.ver <- rowSums(tmp, na.rm = TRUE) + pc.ver[which(is.na(na))] <- NA + + } else { # mode = NULL + # Weight + e.1 <- eof_mode * InsertDim(wght, 1, dim(eof_mode)[1]) + dim(e.1) <- c(dim(eof_mode)[1], prod(dim(eof_mode)[2:3])) # [mode, lat*lon] + ano <- ano * InsertDim(wght, 1, ntime) + dim(ano) <- c(ntime, prod(dim(ano)[2:3])) # [sdate, lat*lon] + + na <- rowMeans(ano, na.rm = TRUE) # if [lat, lon] all NA, it's NA + na <- aperm(array(na, dim = c(ntime, dim(e.1)[1])), c(2, 1)) + + # Matrix multiplication e.1 [mode, lat*lon] by ano [lat*lon, sdate] + # Result: [mode, sdate] + pc.ver <- e.1 %*% t(ano) + pc.ver[which(is.na(na))] <- NA + +# # Change back dimensions to feet original input +# dim(projection) <- c(moredims, mode = unname(neofs)) +# return(projection) + } + + return(pc.ver) +} + + diff --git a/modules/Crossval/R/tmp/RPS.R b/modules/Crossval/R/tmp/RPS.R index 05b6932543570af144ff45e2cd0e68b3a2a54877..0ed599ac4c145e605bbe39b76feb2ebfdf204d29 100644 --- a/modules/Crossval/R/tmp/RPS.R +++ b/modules/Crossval/R/tmp/RPS.R @@ -406,4 +406,3 @@ RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', cat_dim = NUL return(rps) } - diff --git a/modules/Crossval/R/tmp/RPSS.R b/modules/Crossval/R/tmp/RPSS.R index dd474424fd9f9f93b826d3352cd2c0d71a91b0cd..fc9931ad8610f400f3cc59e902c9ee5bb3256508 100644 --- a/modules/Crossval/R/tmp/RPSS.R +++ b/modules/Crossval/R/tmp/RPSS.R @@ -636,4 +636,3 @@ RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', return(list(rpss = rpss, sign = sign)) } - diff --git a/modules/Crossval/R/tmp/RandomWalkTest.R b/modules/Crossval/R/tmp/RandomWalkTest.R index 36a85d283f089c84deea2cd3c4b94819495e352d..16d89f6d8b34bf824188677dd4f1728823725eca 100644 --- a/modules/Crossval/R/tmp/RandomWalkTest.R +++ b/modules/Crossval/R/tmp/RandomWalkTest.R @@ -182,4 +182,3 @@ RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', return(output) } - diff --git a/modules/Crossval/R/tmp/SprErr.R b/modules/Crossval/R/tmp/SprErr.R index fbe86adc1d085577f5bdc8a66a044c83c483e983..33642eab1c48740736623c168cb0edc167778b2d 100644 --- a/modules/Crossval/R/tmp/SprErr.R +++ b/modules/Crossval/R/tmp/SprErr.R @@ -225,4 +225,3 @@ SprErr <- function(exp, obs, dat_dim = NULL, memb_dim = 'member', return(res) } - diff --git a/modules/Crossval/R/tmp/Utils.R b/modules/Crossval/R/tmp/Utils.R new file mode 100644 index 0000000000000000000000000000000000000000..cd7a1e10db1500f69426057ba60d36d2a7bb4cab --- /dev/null +++ b/modules/Crossval/R/tmp/Utils.R @@ -0,0 +1,1885 @@ +#'@importFrom abind abind +#'@import plyr ncdf4 +#'@importFrom grDevices png jpeg pdf svg bmp tiff +#'@importFrom easyVerification convert2prob + +## Function to tell if a regexpr() match is a complete match to a specified name +.IsFullMatch <- function(x, name) { + x > 0 && attributes(x)$match.length == nchar(name) +} + +.ConfigReplaceVariablesInString <- function(string, replace_values, + allow_undefined_key_vars = FALSE) { + # This function replaces all the occurrences of a variable in a string by + # their corresponding string stored in the replace_values. + if (length(strsplit(string, "\\$")[[1]]) > 1) { + parts <- strsplit(string, "\\$")[[1]] + output <- "" + i <- 0 + for (part in parts) { + if (i %% 2 == 0) { + output <- paste0(output, part) + } else { + if (part %in% names(replace_values)) { + output <- paste0(output, + .ConfigReplaceVariablesInString(replace_values[[part]], + replace_values, + allow_undefined_key_vars)) + } else if (allow_undefined_key_vars) { + output <- paste0(output, "$", part, "$") + } else { + stop('Error: The variable $', part, + '$ was not defined in the configuration file.', sep = '') + } + } + i <- i + 1 + } + output + } else { + string + } +} + +.KnownLonNames <- function() { + known_lon_names <- c('lon', 'longitude', 'x', 'i', 'nav_lon') + return(known_lon_names) +} + +.KnownLatNames <- function() { + known_lat_names <- c('lat', 'latitude', 'y', 'j', 'nav_lat') + return(known_lat_names) +} + +.t2nlatlon <- function(t) { + ## As seen in cdo's griddes.c: ntr2nlat() + nlats <- (t * 3 + 1) / 2 + if ((nlats > 0) && (nlats - trunc(nlats) >= 0.5)) { + nlats <- ceiling(nlats) + } else { + nlats <- round(nlats) + } + if (nlats %% 2 > 0) { + nlats <- nlats + 1 + } + ## As seen in cdo's griddes.c: compNlon(), and as specified in ECMWF + nlons <- 2 * nlats + keep_going <- TRUE + while (keep_going) { + n <- nlons + if (n %% 8 == 0) n <- trunc(n / 8) + while (n %% 6 == 0) n <- trunc(n / 6) + while (n %% 5 == 0) n <- trunc(n / 5) + while (n %% 4 == 0) n <- trunc(n / 4) + while (n %% 3 == 0) n <- trunc(n / 3) + if (n %% 2 == 0) n <- trunc(n / 2) + if (n <= 8) { + keep_going <- FALSE + } else { + nlons <- nlons + 2 + if (nlons > 9999) { + stop("Error: pick another gaussian grid truncation. ", + "It doesn't fulfill the standards to apply FFT.") + } + } + } + c(nlats, nlons) +} + +.nlat2t <- function(nlats) { + trunc((nlats * 2 - 1) / 3) +} + +.LoadDataFile <- function(work_piece, explore_dims = FALSE, silent = FALSE) { + # The purpose, working modes, inputs and outputs of this function are + # explained in ?LoadDataFile + #suppressPackageStartupMessages({library(ncdf4)}) + #suppressPackageStartupMessages({library(bigmemory)}) + #suppressPackageStartupMessages({library(plyr)}) + # Auxiliar function to convert array indices to lineal indices + arrayIndex2VectorIndex <- function(indices, dims) { + if (length(indices) > length(dims)) { + stop("Error: indices do not match dimensions in arrayIndex2VectorIndex.") + } + position <- 1 + dims <- rev(dims) + indices <- rev(indices) + for (i in seq_along(indices)) { + position <- position + (indices[i] - 1) * prod(dims[-(1:i)]) + } + position + } + + found_file <- NULL + dims <- NULL + grid_name <- units <- var_long_name <- NULL + is_2d_var <- array_across_gw <- NULL + data_across_gw <- NULL + + filename <- work_piece[['filename']] + namevar <- work_piece[['namevar']] + output <- work_piece[['output']] + # The names of all data files in the directory of the repository that match + # the pattern are obtained. + if (any(grep("^http", filename))) { + is_url <- TRUE + files <- filename + ## TODO: Check that the user is not using shell globbing exps. + } else { + is_url <- FALSE + files <- Sys.glob(filename) + } + + # If we don't find any, we leave the flag 'found_file' with a NULL value. + if (length(files) > 0) { + # The first file that matches the pattern is chosen and read. + filename <- head(files, 1) + filein <- filename + found_file <- filename + mask <- work_piece[['mask']] + + if (!silent && explore_dims) { + .message(paste("Exploring dimensions...", filename)) + ##} else { + ## cat(paste("* Reading & processing data...", filename, '\n')) + ##} + } + + # We will fill in 'expected_dims' with the names of the expected dimensions of + # the data array we'll retrieve from the file. + expected_dims <- NULL + remap_needed <- FALSE + # But first we open the file and work out whether the requested variable is 2d + fnc <- nc_open(filein) + if (!(namevar %in% names(fnc$var))) { + stop("Error: The variable", namevar, "is not defined in the file", filename) + } + var_long_name <- fnc$var[[namevar]]$longname + units <- fnc$var[[namevar]]$units + file_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + # The following two 'ifs' are to allow for 'lon'/'lat' by default, instead of + # 'longitude'/'latitude'. + if (!(work_piece[['dimnames']][['lon']] %in% file_dimnames) && + (work_piece[['dimnames']][['lon']] == 'longitude') && + ('lon' %in% file_dimnames)) { + work_piece[['dimnames']][['lon']] <- 'lon' + } + if (!(work_piece[['dimnames']][['lat']] %in% file_dimnames) && + (work_piece[['dimnames']][['lat']] == 'latitude') && + ('lat' %in% file_dimnames)) { + work_piece[['dimnames']][['lat']] <- 'lat' + } + if (is.null(work_piece[['is_2d_var']])) { + is_2d_var <- all(c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) %in% + unlist(lapply(fnc$var[[namevar]][['dim']], + '[[', 'name'))) + } else { + is_2d_var <- work_piece[['is_2d_var']] + } + if ((is_2d_var || work_piece[['is_file_per_dataset']])) { + if (Sys.which("cdo")[[1]] == "") { + stop("Error: CDO libraries not available") + } + + cdo_version <- + strsplit(suppressWarnings( + system2("cdo", args = '-V', stderr = TRUE))[[1]], ' ')[[1]][5] + + cdo_version <- + as.numeric_version(unlist(strsplit(cdo_version, "[A-Za-z]", fixed = FALSE))[[1]]) + + } + # If the variable to load is 2-d, we need to determine whether: + # - interpolation is needed + # - subsetting is requested + if (is_2d_var) { + ## We read the longitudes and latitudes from the file. + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + first_lon_in_original_file <- lon[1] + # If a common grid is requested or we are exploring the file dimensions + # we need to read the grid type and size of the file to finally work out the + # CDO grid name. + if (!is.null(work_piece[['grid']]) || explore_dims) { + # Here we read the grid type and its number of longitudes and latitudes + file_info <- system(paste('cdo -s griddes', filein, '2> /dev/null'), intern = TRUE) + grids_positions <- grep('# gridID', file_info) + if (length(grids_positions) < 1) { + stop("The grid should be defined in the files.") + } + grids_first_lines <- grids_positions + 2 + grids_last_lines <- c((grids_positions - 2)[-1], length(file_info)) + grids_info <- as.list(seq_along(grids_positions)) + grids_info <- lapply(grids_info, + function (x) file_info[grids_first_lines[x]:grids_last_lines[x]]) + grids_info <- lapply(grids_info, function (x) gsub(" *", " ", x)) + grids_info <- lapply(grids_info, function (x) gsub("^ | $", "", x)) + grids_info <- lapply(grids_info, function (x) unlist(strsplit(x, " | = "))) + grids_types <- unlist(lapply(grids_info, function (x) x[grep('gridtype', x) + 1])) + grids_matches <- unlist(lapply(grids_info, function (x) { + nlons <- if (any(grep('xsize', x))) { + as.numeric(x[grep('xsize', x) + 1]) + } else { + NA + } + nlats <- if (any(grep('ysize', x))) { + as.numeric(x[grep('ysize', x) + 1]) + } else { + NA + } + result <- FALSE + if (!anyNA(c(nlons, nlats))) { + if ((nlons == length(lon)) && + (nlats == length(lat))) { + result <- TRUE + } + } + result + })) + grids_matches <- grids_matches[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_info <- grids_info[which(grids_types %in% c('gaussian', 'lonlat'))] + grids_types <- grids_types[which(grids_types %in% c('gaussian', 'lonlat'))] + if (length(grids_matches) == 0) { + stop("Error: Only 'gaussian' and 'lonlat' grids supported. See e.g: cdo sinfo ", filename) + } + if (sum(grids_matches) > 1) { + if ((all(grids_types[which(grids_matches)] == 'gaussian') || + all(grids_types[which(grids_matches)] == 'lonlat')) && + all(unlist(lapply(grids_info[which(grids_matches)], identical, + grids_info[which(grids_matches)][[1]])))) { + grid_type <- grids_types[which(grids_matches)][1] + } else { + stop("Error: Load() can't disambiguate: ", + "More than one lonlat/gaussian grids with the same size as ", + "the requested variable defined in ", filename) + } + } else if (sum(grids_matches) == 1) { + grid_type <- grids_types[which(grids_matches)] + } else { + stop("Unexpected error.") + } + grid_lons <- length(lon) + grid_lats <- length(lat) + # Convert to CDO grid name as seen in cdo's griddes.c: nlat2ntr() + if (grid_type == 'lonlat') { + grid_name <- paste0('r', grid_lons, 'x', grid_lats) + } else { + grid_name <- paste0('t', .nlat2t(grid_lats), 'grid') + } + if (is.null(work_piece[['grid']])) { + .warning(paste0("Detect the grid type to be '", grid_name, "'. ", + "If it is not expected, assign parameter 'grid' to avoid wrong result.")) + } + } + # If a common grid is requested, we will also calculate its size which we will use + # later on. + if (!is.null(work_piece[['grid']])) { + # Now we calculate the common grid type and its lons and lats + if (any(grep('^t\\d{1,+}grid$', work_piece[['grid']]))) { + common_grid_type <- 'gaussian' + common_grid_res <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + nlonlat <- .t2nlatlon(common_grid_res) + common_grid_lats <- nlonlat[1] + common_grid_lons <- nlonlat[2] + } else if (any(grep('^r\\d{1,+}x\\d{1,+}$', work_piece[['grid']]))) { + common_grid_type <- 'lonlat' + common_grid_lons <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][2]) + common_grid_lats <- as.numeric(strsplit(work_piece[['grid']], '[^0-9]{1,+}')[[1]][3]) + } else { + stop("Error: Only supported grid types in parameter 'grid' are tgrid and rx") + } + } else { + ## If no 'grid' is specified, there is no common grid. + ## But these variables are filled in for consistency in the code. + common_grid_lons <- length(lon) + common_grid_lats <- length(lat) + } + first_common_grid_lon <- 0 + last_common_grid_lon <- 360 - 360 / common_grid_lons + ## This is not true for gaussian grids or for some regular grids, but + ## is a safe estimation + first_common_grid_lat <- -90 + last_common_grid_lat <- 90 + # And finally determine whether interpolation is needed or not + remove_shift <- FALSE + if (!is.null(work_piece[['grid']])) { + if ((grid_lons != common_grid_lons) || + (grid_lats != common_grid_lats) || + (grid_type != common_grid_type) || + (lon[1] != first_common_grid_lon)) { + if (grid_lons == common_grid_lons && grid_lats == common_grid_lats && + grid_type == common_grid_type && lon[1] != first_common_grid_lon) { + remove_shift <- TRUE + } + remap_needed <- TRUE + common_grid_name <- work_piece[['grid']] + } + } else if ((lon[1] != first_common_grid_lon) && explore_dims && + !work_piece[['single_dataset']]) { + remap_needed <- TRUE + common_grid_name <- grid_name + remove_shift <- TRUE + } + if (remap_needed && (work_piece[['remap']] == 'con') && + (cdo_version >= as.numeric_version('1.7.0'))) { + work_piece[['remap']] <- 'ycon' + } + if (remove_shift && !explore_dims) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + .warning(paste0("The dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "' doesn't start at longitude 0 and will be re-interpolated in order ", + "to align its longitudes with the standard CDO grids definable with ", + "the names 'tgrid' or 'rx', which are by definition ", + "starting at the longitude 0.\n")) + if (!is.null(mask)) { + .warning(paste0("A mask was provided for the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "'. This dataset has been re-interpolated to align its longitudes to ", + "start at 0. You must re-interpolate the corresponding mask to align ", + "its longitudes to start at 0 as well, if you haven't done so yet. ", + "Running cdo remapcon,", common_grid_name, + " original_mask_file.nc new_mask_file.nc will fix it.\n")) + } + } + if (remap_needed && (grid_lons < common_grid_lons || grid_lats < common_grid_lats)) { + if (!is.null(work_piece[['progress_amount']])) { + cat("\n") + } + if (!explore_dims) { + .warning(paste0("The dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is originally on ", + "a grid coarser than the common grid and it has been ", + "extrapolated. Check the results carefully. It is ", + "recommended to specify as common grid the coarsest grid ", + "among all requested datasets via the parameter 'grid'.\n")) + } + } + # Now calculate if the user requests for a lonlat subset or for the + # entire field + lonmin <- work_piece[['lon_limits']][1] + lonmax <- work_piece[['lon_limits']][2] + latmin <- work_piece[['lat_limits']][1] + latmax <- work_piece[['lat_limits']][2] + lon_subsetting_requested <- FALSE + lonlat_subsetting_requested <- FALSE + if (lonmin <= lonmax) { + if ((lonmin > first_common_grid_lon) || (lonmax < last_common_grid_lon)) { + lon_subsetting_requested <- TRUE + } + } else { + if ((lonmin - lonmax) > 360 / common_grid_lons) { + lon_subsetting_requested <- TRUE + } else { + gap_width <- floor(lonmin / (360 / common_grid_lons)) - + floor(lonmax / (360 / common_grid_lons)) + if (gap_width > 0) { + if (!(gap_width == 1 && (lonmin %% (360 / common_grid_lons) == 0) && + (lonmax %% (360 / common_grid_lons) == 0))) { + lon_subsetting_requested <- TRUE + } + } + } + } + if ((latmin > first_common_grid_lat) || (latmax < last_common_grid_lat) + || (lon_subsetting_requested)) { + lonlat_subsetting_requested <- TRUE + } + # Now that we know if subsetting was requested, we can say if final data + # will go across greenwich + if (lonmax < lonmin) { + data_across_gw <- TRUE + } else { + data_across_gw <- !lon_subsetting_requested + } + + # When remap is needed but no subsetting, the file is copied locally + # so that cdo works faster, and then interpolated. + # Otherwise the file is kept as is and the subset will have to be + # interpolated still. + if (!lonlat_subsetting_requested && remap_needed) { + nc_close(fnc) + filecopy <- tempfile(pattern = "load", fileext = ".nc") + file.copy(filein, filecopy) + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + # "-L" is to serialize I/O accesses. It prevents potential segmentation fault in the + # underlying hdf5 library. + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " -selname,", namevar, " ", filecopy, " ", filein, + " 2>/dev/null")) + file.remove(filecopy) + work_piece[['dimnames']][['lon']] <- 'lon' + work_piece[['dimnames']][['lat']] <- 'lat' + fnc <- nc_open(filein) + lon <- ncvar_get(fnc, work_piece[['dimnames']][['lon']]) + lat <- ncvar_get(fnc, work_piece[['dimnames']][['lat']]) + } + + # Read and check also the mask + if (!is.null(mask)) { + ###mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + if (is.list(mask)) { + if (!file.exists(mask[['path']])) { + stop("Error: Couldn't find the mask file", mask[['path']]) + } + mask_file <- mask[['path']] + ###file.copy(work_piece[['mask']][['path']], mask_file) + fnc_mask <- nc_open(mask_file) + vars_in_mask <- sapply(fnc_mask$var, '[[', 'name') + if ('nc_var_name' %in% names(mask)) { + if (!(mask[['nc_var_name']] %in% + vars_in_mask)) { + stop("Error: couldn't find variable", mask[['nc_var_name']], + "in the mask file", mask[['path']]) + } + } else { + if (length(vars_in_mask) != 1) { + stop("Error: one and only one non-coordinate variable should be ", + "defined in the mask file", + mask[['path']], + "if the component 'nc_var_name' is not specified. ", + "Currently found: ", + toString(vars_in_mask), ".") + } else { + mask[['nc_var_name']] <- vars_in_mask + } + } + if (sum(fnc_mask$var[[mask[['nc_var_name']]]]$size > 1) != 2) { + stop("Error: the variable '", + mask[['nc_var_name']], + "' must be defined only over the dimensions '", + work_piece[['dimnames']][['lon']], "' and '", + work_piece[['dimnames']][['lat']], + "' in the mask file ", + mask[['path']]) + } + mask <- ncvar_get(fnc_mask, mask[['nc_var_name']], collapse_degen = TRUE) + nc_close(fnc_mask) + ### mask_lon <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lon']]) + ### mask_lat <- ncvar_get(fnc_mask, work_piece[['dimnames']][['lat']]) + ###} else { + ### dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + ### dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ### ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + ### fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ### ncvar_put(fnc_mask, ncdf_var, work_piece[['mask']]) + ### nc_close(fnc_mask) + ### fnc_mask <- nc_open(mask_file) + ### work_piece[['mask']] <- list(path = mask_file, nc_var_name = 'LSM') + ### mask_lon <- lon + ### mask_lat <- lat + ###} + ###} + ### Now ready to check that the mask is right + ##if (!(lonlat_subsetting_requested && remap_needed)) { + ### if ((dim(mask)[2] != length(lon)) || (dim(mask)[1] != length(lat))) { + ### stop(paste("Error: the mask of the dataset with index ", + ### tail(work_piece[['indices']], 1), " in '", + ### work_piece[['dataset_type']], "' is wrong. ", + ### "It must be on the common grid if the selected output type is 'lonlat', ", + ### "'lon' or 'lat', or 'areave' and 'grid' has been specified. It must be on ", + ### "the grid of the corresponding dataset if the selected output type is ", + ### "'areave' and no 'grid' has been specified. For more information ", + ### "check ?Load and see help on parameters 'grid', 'maskmod' and ", + ### "'maskobs'.", sep = "")) + ### } + ###if (!(identical(mask_lon, lon) && identical(mask_lat, lat))) { + ### stop(paste0("Error: the longitudes and latitudes in the masks must be ", + ### "identical to the ones in the corresponding data files if output = 'areave' ", + ### " or, if the selected output is 'lon', 'lat' or 'lonlat', the longitudes in ", + ### "the mask file must start by 0 and the latitudes must be ordered from ", + ### "highest to lowest. See\n ", + ### work_piece[['mask']][['path']], " and ", filein)) + ###} + } + } + + lon_indices <- seq_along(lon) + if (!(lonlat_subsetting_requested && remap_needed)) { + lon[which(lon < 0)] <- lon[which(lon < 0)] + 360 + } + if (lonmax >= lonmin) { + lon_indices <- lon_indices[which(((lon %% 360) >= lonmin) & ((lon %% 360) <= lonmax))] + } else if (!remap_needed) { + lon_indices <- lon_indices[which(((lon %% 360) <= lonmax) | ((lon %% 360) >= lonmin))] + } + lat_indices <- which(lat >= latmin & lat <= latmax) + ## In most of the cases the latitudes are ordered from -90 to 90. + ## We will reorder them to be in the order from 90 to -90, so mostly + ## always the latitudes are reordered. + ## TODO: This could be avoided in future. + if (lat[1] < lat[length(lat)]) { + lat_indices <- lat_indices[rev(seq_along(lat_indices))] + } + if (!is.null(mask) && !(lonlat_subsetting_requested && remap_needed)) { + if ((dim(mask)[1] != length(lon)) || (dim(mask)[2] != length(lat))) { + stop("Error: the mask of the dataset with index ", tail(work_piece[['indices']], 1), + " in '", work_piece[['dataset_type']], "' is wrong. It must be on the ", + "common grid if the selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must be on the grid of ", + "the corresponding dataset if the selected output type is 'areave' and ", + "no 'grid' has been specified. For more information check ?Load and see ", + "help on parameters 'grid', 'maskmod' and 'maskobs'.") + } + mask <- mask[lon_indices, lat_indices] + } + ## If the user requests subsetting, we must extend the lon and lat limits if possible + ## so that the interpolation after is done properly + maximum_extra_points <- work_piece[['remapcells']] + if (lonlat_subsetting_requested && remap_needed) { + if ((maximum_extra_points > (head(lon_indices, 1) - 1)) || + (maximum_extra_points > (length(lon) - tail(lon_indices, 1)))) { + ## if the requested number of points goes beyond the left or right + ## sides of the map, we need to take the entire map so that the + ## interpolation works properly + lon_indices <- seq_along(lon) + } else { + extra_points <- min(maximum_extra_points, head(lon_indices, 1) - 1) + if (extra_points > 0) { + lon_indices <- + c((head(lon_indices, 1) - extra_points):(head(lon_indices, 1) - 1), lon_indices) + } + extra_points <- min(maximum_extra_points, length(lon) - tail(lon_indices, 1)) + if (extra_points > 0) { + lon_indices <- c(lon_indices, + (tail(lon_indices, 1) + 1):(tail(lon_indices, 1) + extra_points)) + } + } + min_lat_ind <- min(lat_indices) + max_lat_ind <- max(lat_indices) + extra_points <- min(maximum_extra_points, min_lat_ind - 1) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c(lat_indices, (min_lat_ind - 1):(min_lat_ind - extra_points)) + } else { + lat_indices <- c((min_lat_ind - extra_points):(min_lat_ind - 1), lat_indices) + } + } + extra_points <- min(maximum_extra_points, length(lat) - max_lat_ind) + if (extra_points > 0) { + if (lat[1] < tail(lat, 1)) { + lat_indices <- c((max_lat_ind + extra_points):(max_lat_ind + 1), lat_indices) + } else { + lat_indices <- c(lat_indices, (max_lat_ind + 1):(max_lat_ind + extra_points)) + } + } + } + lon <- lon[lon_indices] + lat <- lat[lat_indices] + expected_dims <- c(work_piece[['dimnames']][['lon']], + work_piece[['dimnames']][['lat']]) + } else { + lon <- 0 + lat <- 0 + } + # We keep on filling the expected dimensions + var_dimnames <- unlist(lapply(fnc$var[[namevar]][['dim']], '[[', 'name')) + nmemb <- nltime <- NULL + ## Sometimes CDO renames 'members' dimension to 'lev' + old_members_dimname <- NULL + if (('lev' %in% var_dimnames) && !(work_piece[['dimnames']][['member']] %in% var_dimnames)) { + old_members_dimname <- work_piece[['dimnames']][['member']] + work_piece[['dimnames']][['member']] <- 'lev' + } + if (work_piece[['dimnames']][['member']] %in% var_dimnames) { + nmemb <- fnc$var[[namevar]][['dim']][[match(work_piece[['dimnames']][['member']], + var_dimnames)]]$len + expected_dims <- c(expected_dims, work_piece[['dimnames']][['member']]) + } else { + nmemb <- 1 + } + if (length(expected_dims) > 0) { + dim_matches <- match(expected_dims, var_dimnames) + if (anyNA(dim_matches)) { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop("Error: the expected dimension(s)", + toString(expected_dims[which(is.na(dim_matches))]), + "were not found in", filename) + } + time_dimname <- var_dimnames[-dim_matches] + } else { + time_dimname <- var_dimnames + } + if (length(time_dimname) > 0) { + if (length(time_dimname) == 1) { + nltime <- fnc$var[[namevar]][['dim']][[match(time_dimname, var_dimnames)]]$len + expected_dims <- c(expected_dims, time_dimname) + dim_matches <- match(expected_dims, var_dimnames) + } else { + if (!is.null(old_members_dimname)) { + expected_dims[which(expected_dims == 'lev')] <- old_members_dimname + } + stop("Error: the variable ", namevar, + " is defined over more dimensions than the expected (", + toString(c(expected_dims, 'time')), + "). It could also be that the members, longitude or latitude ", + "dimensions are named incorrectly. In that case, either rename ", + "the dimensions in the file or adjust Load() to recognize the actual ", + "name with the parameter 'dimnames'. See file ", filename) + } + } else { + nltime <- 1 + } + + # Now we must retrieve the data from the file, but only the asked indices. + # So we build up the indices to retrieve. + # Longitudes or latitudes have been retrieved already. + if (explore_dims) { + # If we're exploring the file we only want one time step from one member, + # to regrid it and work out the number of longitudes and latitudes. + # We don't need more. + members <- 1 + ltimes_list <- list(1) + } else { + # The data is arranged in the array 'tmp' with the dimensions in a + # common order: + # 1) Longitudes + # 2) Latitudes + # 3) Members (even if is not a file per member experiment) + # 4) Lead-times + if (work_piece[['is_file_per_dataset']]) { + time_indices <- 1:nltime + mons <- strsplit(system(paste('cdo showmon ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + years <- strsplit(system(paste('cdo showyear ', filein, + ' 2>/dev/null'), intern = TRUE), split = ' ') + mons <- as.numeric(mons[[1]][which(mons[[1]] != "")]) + years <- as.numeric(years[[1]][which(years[[1]] != "")]) + time_indices <- ts(time_indices, start = c(years[1], mons[1]), + end = c(years[length(years)], mons[length(mons)]), + frequency = 12) + ltimes_list <- list() + for (sdate in work_piece[['startdates']]) { + selected_time_indices <- window(time_indices, start = c(as.numeric( + substr(sdate, 1, 4)), as.numeric(substr(sdate, 5, 6))), + end = c(3000, 12), frequency = 12, extend = TRUE) + selected_time_indices <- selected_time_indices[work_piece[['leadtimes']]] + ltimes_list <- c(ltimes_list, list(selected_time_indices)) + } + } else { + ltimes <- work_piece[['leadtimes']] + #if (work_piece[['dataset_type']] == 'exp') { + ltimes_list <- list(ltimes[which(ltimes <= nltime)]) + #} + } + ## TODO: Put, when reading matrices, this kind of warnings + # if (nmember < nmemb) { + # cat("Warning: + members <- 1:work_piece[['nmember']] + members <- members[which(members <= nmemb)] + } + + # Now, for each list of leadtimes to load (usually only one list with all leadtimes), + # we'll join the indices and retrieve data + found_disordered_dims <- FALSE + for (ltimes in ltimes_list) { + if (is_2d_var) { + start <- c(min(lon_indices), min(lat_indices)) + end <- c(max(lon_indices), max(lat_indices)) + if (lonlat_subsetting_requested && remap_needed) { + subset_indices <- list(min(lon_indices):max(lon_indices) - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], "degrees_east", lon) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], "degrees_north", lat) + ncdf_dims <- list(dim_longitudes, dim_latitudes) + } else { + subset_indices <- list(lon_indices - min(lon_indices) + 1, + lat_indices - min(lat_indices) + 1) + ncdf_dims <- list() + } + final_dims <- c(length(subset_indices[[1]]), length(subset_indices[[2]]), 1, 1) + } else { + start <- end <- NULL + subset_indices <- list() + ncdf_dims <- list() + final_dims <- c(1, 1, 1, 1) + } + + if (work_piece[['dimnames']][['member']] %in% expected_dims) { + start <- c(start, head(members, 1)) + end <- c(end, tail(members, 1)) + subset_indices <- c(subset_indices, list(members - head(members, 1) + 1)) + dim_members <- ncdim_def(work_piece[['dimnames']][['member']], "", members) + ncdf_dims <- c(ncdf_dims, list(dim_members)) + final_dims[3] <- length(members) + } + if (time_dimname %in% expected_dims) { + if (!all(is.na(ltimes))) { + start <- c(start, head(ltimes[which(!is.na(ltimes))], 1)) + end <- c(end, tail(ltimes[which(!is.na(ltimes))], 1)) + subset_indices <- c(subset_indices, + list(ltimes - head(ltimes[which(!is.na(ltimes))], 1) + 1)) + } else { + start <- c(start, NA) + end <- c(end, NA) + subset_indices <- c(subset_indices, list(ltimes)) + } + dim_time <- ncdim_def(time_dimname, "", seq_along(ltimes), unlim = TRUE) + ncdf_dims <- c(ncdf_dims, list(dim_time)) + final_dims[4] <- length(ltimes) + } + count <- end - start + 1 + start <- start[dim_matches] + count <- count[dim_matches] + subset_indices <- subset_indices[dim_matches] + # Now that we have the indices to retrieve, we retrieve the data + if (prod(final_dims) > 0) { + tmp <- take(ncvar_get(fnc, namevar, start, count, + collapse_degen = FALSE), + seq_along(subset_indices), subset_indices) + # The data is regridded if it corresponds to an atmospheric variable. When + # the chosen output type is 'areave' the data is not regridded to not + # waste computing time unless the user specified a common grid. + if (is_2d_var) { + ###if (!is.null(work_piece[['mask']]) && !(lonlat_subsetting_requested && remap_needed)) { + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### start[dim_matches[1:2]], count[dim_matches[1:2]], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + if (lonlat_subsetting_requested && remap_needed) { + filein <- tempfile(pattern = "loadRegridded", fileext = ".nc") + filein2 <- tempfile(pattern = "loadRegridded2", fileext = ".nc") + ncdf_var <- ncvar_def(namevar, "", ncdf_dims[dim_matches], + fnc$var[[namevar]]$missval, + prec = if (fnc$var[[namevar]]$prec == 'int') { + 'integer' + } else { + fnc$var[[namevar]]$prec + }) + scale_factor <- ifelse(fnc$var[[namevar]]$hasScaleFact, fnc$var[[namevar]]$scaleFact, 1) + add_offset <- ifelse(fnc$var[[namevar]]$hasAddOffset, fnc$var[[namevar]]$addOffset, 0) + if (fnc$var[[namevar]]$hasScaleFact || fnc$var[[namevar]]$hasAddOffset) { + tmp <- (tmp - add_offset) / scale_factor + } + #nc_close(fnc) + fnc2 <- nc_create(filein2, list(ncdf_var)) + ncvar_put(fnc2, ncdf_var, tmp) + if (add_offset != 0) { + ncatt_put(fnc2, ncdf_var, 'add_offset', add_offset) + } + if (scale_factor != 1) { + ncatt_put(fnc2, ncdf_var, 'scale_factor', scale_factor) + } + nc_close(fnc2) + system(paste0("cdo -L -s -sellonlatbox,", if (lonmin > lonmax) { + "0,360," + } else { + paste0(lonmin, ",", lonmax, ",") + }, latmin, ",", latmax, + " -remap", work_piece[['remap']], ",", common_grid_name, + " ", filein2, " ", filein, " 2>/dev/null")) + file.remove(filein2) + fnc2 <- nc_open(filein) + sub_lon <- ncvar_get(fnc2, 'lon') + sub_lat <- ncvar_get(fnc2, 'lat') + ## We read the longitudes and latitudes from the file. + ## In principle cdo should put in order the longitudes + ## and slice them properly unless data is across greenwich + sub_lon[which(sub_lon < 0)] <- sub_lon[which(sub_lon < 0)] + 360 + sub_lon_indices <- seq_along(sub_lon) + if (lonmax < lonmin) { + sub_lon_indices <- sub_lon_indices[which((sub_lon <= lonmax) | (sub_lon >= lonmin))] + } + sub_lat_indices <- seq_along(sub_lat) + ## In principle cdo should put in order the latitudes + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + sub_lat_indices <- rev(seq_along(sub_lat)) + } + final_dims[c(1, 2)] <- c(length(sub_lon_indices), length(sub_lat_indices)) + subset_indices[[dim_matches[1]]] <- sub_lon_indices + subset_indices[[dim_matches[2]]] <- sub_lat_indices + + tmp <- take(ncvar_get(fnc2, namevar, collapse_degen = FALSE), + seq_along(subset_indices), subset_indices) + + if (!is.null(mask)) { + ## We create a very simple 2d netcdf file that is then interpolated to the common + ## grid to know what are the lons and lats of our slice of data + mask_file <- tempfile(pattern = 'loadMask', fileext = '.nc') + mask_file_remap <- tempfile(pattern = 'loadMask', fileext = '.nc') + dim_longitudes <- ncdim_def(work_piece[['dimnames']][['lon']], + "degrees_east", c(0, 360)) + dim_latitudes <- ncdim_def(work_piece[['dimnames']][['lat']], + "degrees_north", c(-90, 90)) + ncdf_var <- ncvar_def('LSM', "", list(dim_longitudes, dim_latitudes), NA, 'double') + fnc_mask <- nc_create(mask_file, list(ncdf_var)) + ncvar_put(fnc_mask, ncdf_var, array(rep(0, 4), dim = c(2, 2))) + nc_close(fnc_mask) + system(paste0("cdo -L -s remap", work_piece[['remap']], ",", + common_grid_name, + " ", mask_file, " ", mask_file_remap, " 2>/dev/null")) + fnc_mask <- nc_open(mask_file_remap) + mask_lons <- ncvar_get(fnc_mask, 'lon') + mask_lats <- ncvar_get(fnc_mask, 'lat') + nc_close(fnc_mask) + file.remove(mask_file, mask_file_remap) + if ((dim(mask)[1] != common_grid_lons) || (dim(mask)[2] != common_grid_lats)) { + stop("Error: the mask of the dataset with index ", + tail(work_piece[['indices']], 1), " in '", + work_piece[['dataset_type']], + "' is wrong. It must be on the common grid if the ", + "selected output type is 'lonlat', 'lon' or 'lat', ", + "or 'areave' and 'grid' has been specified. It must ", + "be on the grid of the corresponding dataset if the ", + "selected output type is 'areave' and no 'grid' has been ", + "specified. For more information check ?Load and see help ", + "on parameters 'grid', 'maskmod' and 'maskobs'.") + } + mask_lons[which(mask_lons < 0)] <- mask_lons[which(mask_lons < 0)] + 360 + if (lonmax >= lonmin) { + mask_lon_indices <- which((mask_lons >= lonmin) & (mask_lons <= lonmax)) + } else { + mask_lon_indices <- which((mask_lons >= lonmin) | (mask_lons <= lonmax)) + } + mask_lat_indices <- which((mask_lats >= latmin) & (mask_lats <= latmax)) + if (sub_lat[1] < sub_lat[length(sub_lat)]) { + mask_lat_indices <- mask_lat_indices[rev(seq_along(mask_lat_indices))] + } + mask <- mask[mask_lon_indices, mask_lat_indices] + } + sub_lon <- sub_lon[sub_lon_indices] + sub_lat <- sub_lat[sub_lat_indices] + ### nc_close(fnc_mask) + ### system(paste0("cdo -s -sellonlatbox,", if (lonmin > lonmax) { + ### "0,360," + ### } else { + ### paste0(lonmin, ",", lonmax, ",") + ### }, latmin, ",", latmax, + ### " -remap", work_piece[['remap']], ",", common_grid_name, + ###This is wrong: same files + ### " ", mask_file, " ", mask_file, " 2>/dev/null", sep = "")) + ### fnc_mask <- nc_open(mask_file) + ### mask <- take(ncvar_get(fnc_mask, work_piece[['mask']][['nc_var_name']], + ### collapse_degen = FALSE), 1:2, subset_indices[dim_matches[1:2]]) + ###} + } + } + if (is.unsorted(dim_matches)) { + if (!found_disordered_dims && + rev(work_piece[['indices']])[2] == 1 && + rev(work_piece[['indices']])[3] == 1) { + found_disordered_dims <- TRUE + .warning(paste0("The dimensions for the variable ", namevar, + " in the files of the experiment with index ", + tail(work_piece[['indices']], 1), + " are not in the optimal order for loading with Load(). ", + "The optimal order would be '", + toString(expected_dims), + "'. One of the files of the dataset is stored in ", filename)) + } + tmp <- aperm(tmp, dim_matches) + } + dim(tmp) <- final_dims + # If we are exploring the file we don't need to process and arrange + # the retrieved data. We only need to keep the dimension sizes. + if (is_2d_var && lonlat_subsetting_requested && remap_needed) { + final_lons <- sub_lon + final_lats <- sub_lat + } else { + final_lons <- lon + final_lats <- lat + } + if (explore_dims) { + if (work_piece[['is_file_per_member']]) { + ## TODO: When the exp_full_path contains asterisks and is file_per_member + ## members from different datasets may be accounted. + ## Also if one file member is missing the accounting will be wrong. + ## Should parse the file name and extract number of members. + if (is_url) { + nmemb <- NULL + } else { + nmemb <- length(files) + } + } + dims <- list(member = nmemb, ftime = nltime, lon = final_lons, lat = final_lats) + } else { + # If we are not exploring, then we have to process the retrieved data + if (is_2d_var) { + tmp <- apply(tmp, c(3, 4), function(x) { + # Disable of large values. + if (!is.na(work_piece[['var_limits']][2])) { + x[which(x > work_piece[['var_limits']][2])] <- NA + } + if (!is.na(work_piece[['var_limits']][1])) { + x[which(x < work_piece[['var_limits']][1])] <- NA + } + if (!is.null(mask)) { + x[which(mask < 0.5)] <- NA + } + + if (output == 'areave' || output == 'lon') { + weights <- InsertDim(cos(final_lats * pi / 180), 1, + length(final_lons), name = 'lon') + weights[which(is.na(x))] <- NA + if (output == 'areave') { + weights <- weights / mean(weights, na.rm = TRUE) + mean(x * weights, na.rm = TRUE) + } else { + weights <- weights / InsertDim(MeanDims(weights, 2, na.rm = TRUE), 2, + length(final_lats), name = 'lat') + MeanDims(x * weights, 2, na.rm = TRUE) + } + } else if (output == 'lat') { + MeanDims(x, 1, na.rm = TRUE) + } else if (output == 'lonlat') { + signif(x, 5) + } + }) + if (output == 'areave') { + dim(tmp) <- c(1, 1, final_dims[3:4]) + } else if (output == 'lon') { + dim(tmp) <- c(final_dims[1], 1, final_dims[3:4]) + } else if (output == 'lat') { + dim(tmp) <- c(1, final_dims[c(2, 3, 4)]) + } else if (output == 'lonlat') { + dim(tmp) <- final_dims + } + } + var_data <- attach.big.matrix(work_piece[['out_pointer']]) + if (work_piece[['dims']][['member']] > 1 && nmemb > 1 && + work_piece[['dims']][['ftime']] > 1 && + nltime < work_piece[['dims']][['ftime']]) { + work_piece[['indices']][2] <- work_piece[['indices']][2] - 1 + for (jmemb in members) { + work_piece[['indices']][2] <- work_piece[['indices']][2] + 1 + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp[, , jmemb, ]) - 1) + var_data[out_indices] <- as.vector(tmp[, , jmemb, ]) + } + work_piece[['indices']][2] <- work_piece[['indices']][2] - tail(members, 1) + 1 + } else { + out_position <- arrayIndex2VectorIndex(work_piece[['indices']], work_piece[['dims']]) + out_indices <- out_position:(out_position + length(tmp) - 1) + a <- aperm(tmp, c(1, 2, 4, 3)) + as.vector(a) + var_data[out_indices] <- as.vector(aperm(tmp, c(1, 2, 4, 3))) + } + work_piece[['indices']][3] <- work_piece[['indices']][3] + 1 + } + } + } + nc_close(fnc) + if (is_2d_var) { + if (remap_needed) { + array_across_gw <- FALSE + file.remove(filein) + ###if (!is.null(mask) && lonlat_subsetting_requested) { + ### file.remove(mask_file) + ###} + } else { + if (first_lon_in_original_file < 0) { + array_across_gw <- data_across_gw + } else { + array_across_gw <- FALSE + } + } + } + } + if (explore_dims) { + list(dims = dims, is_2d_var = is_2d_var, grid = grid_name, + units = units, var_long_name = var_long_name, + data_across_gw = data_across_gw, array_across_gw = array_across_gw) + } else { + ###if (!silent && !is.null(progress_connection) && !is.null(work_piece[['progress_amount']])) { + ### foobar <- writeBin(work_piece[['progress_amount']], progress_connection) + ###} + if (!silent && !is.null(work_piece[['progress_amount']])) { + message(work_piece[['progress_amount']], appendLF = FALSE) + } + found_file + } +} + +.LoadSampleData <- function(var, exp = NULL, obs = NULL, sdates, + nmember = NULL, nmemberobs = NULL, + nleadtime = NULL, leadtimemin = 1, + leadtimemax = NULL, storefreq = 'monthly', + sampleperiod = 1, lonmin = 0, lonmax = 360, + latmin = -90, latmax = 90, output = 'areave', + method = 'conservative', grid = NULL, + maskmod = vector("list", 15), + maskobs = vector("list", 15), + configfile = NULL, suffixexp = NULL, + suffixobs = NULL, varmin = NULL, varmax = NULL, + silent = FALSE, nprocs = NULL) { + ## This function loads and selects sample data stored in sampleMap and + ## sampleTimeSeries and is used in the examples instead of Load() so as + ## to avoid nco and cdo system calls and computation time in the stage + ## of running examples in the CHECK process on CRAN. + selected_start_dates <- match(sdates, c('19851101', '19901101', '19951101', + '20001101', '20051101')) + start_dates_position <- 3 + lead_times_position <- 4 + + if (output == 'lonlat') { + sampleData <- s2dv::sampleMap + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times, , ] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times, , ] + } else if (output == 'areave') { + sampleData <- s2dv::sampleTimeSeries + if (is.null(leadtimemax)) { + leadtimemax <- dim(sampleData$mod)[lead_times_position] + } + selected_lead_times <- leadtimemin:leadtimemax + + dataOut <- sampleData + dataOut$mod <- sampleData$mod[, , selected_start_dates, selected_lead_times] + dataOut$obs <- sampleData$obs[, , selected_start_dates, selected_lead_times] + } + + dims_out <- dim(sampleData$mod) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$mod) <- dims_out + + dims_out <- dim(sampleData$obs) + dims_out[start_dates_position] <- length(selected_start_dates) + dims_out[lead_times_position] <- length(selected_lead_times) + dim(dataOut$obs) <- dims_out + + invisible(list(mod = dataOut$mod, obs = dataOut$obs, + lat = dataOut$lat, lon = dataOut$lon)) +} + +.ConfigGetDatasetInfo <- function(matching_entries, table_name) { + # This function obtains the information of a dataset and variable pair, + # applying all the entries that match in the configuration file. + if (table_name == 'experiments') { + id <- 'EXP' + } else { + id <- 'OBS' + } + defaults <- c(paste0('$DEFAULT_', id, '_MAIN_PATH$'), + paste0('$DEFAULT_', id, '_FILE_PATH$'), + '$DEFAULT_NC_VAR_NAME$', '$DEFAULT_SUFFIX$', + '$DEFAULT_VAR_MIN$', '$DEFAULT_VAR_MAX$') + info <- NULL + + for (entry in matching_entries) { + if (is.null(info)) { + info <- entry[-1:-2] + info[which(info == '*')] <- defaults[which(info == '*')] + } else { + info[which(entry[-1:-2] != '*')] <- entry[-1:-2][which(entry[-1:-2] != '*')] + } + } + + info <- as.list(info) + names(info) <- c('main_path', 'file_path', 'nc_var_name', 'suffix', 'var_min', 'var_max') + info +} + +.ReplaceGlobExpressions <- function(path_with_globs, actual_path, + replace_values, tags_to_keep, + dataset_name, permissive) { + # The goal of this function is to replace the shell globbing expressions in + # a path pattern (that may contain shell globbing expressions and Load() + # tags) by the corresponding part of the real existing path. + # What is done actually is to replace all the values of the tags in the + # actual path by the corresponding $TAG$ + # + # It takes mainly two inputs. The path with expressions and tags, e.g.: + # /data/experiments/*/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_*$START_DATE$*.nc + # and a complete known path to one of the matching files, e.g.: + # /data/experiments/ecearth/i00k/tos/tos_fc0-1_19901101_199011-199110.nc + # and it returns the path pattern but without shell globbing expressions: + # /data/experiments/ecearth/$EXP_NAME$/$VAR_NAME$/$VAR_NAME$_fc0-1_$START_DATE$_199011-199110.nc + # + # To do that, it needs also as inputs the list of replace values (the + # association of each tag to their value). + # + # All the tags not present in the parameter tags_to_keep will be repalced. + # + # Not all cases can be resolved with the implemented algorithm. In an + # unsolvable case a warning is given and one possible guess is returned. + # + # In some cases it is interesting to replace only the expressions in the + # path to the file, but not the ones in the file name itself. To keep the + # expressions in the file name, the parameter permissive can be set to + # TRUE. To replace all the expressions it can be set to FALSE. + clean <- function(x) { + if (nchar(x) > 0) { + x <- gsub('\\\\', '', x) + x <- gsub('\\^', '', x) + x <- gsub('\\$', '', x) + x <- unname(sapply(strsplit(x, '[', fixed = TRUE)[[1]], function(y) gsub('.*]', '.', y))) + do.call(paste0, as.list(x)) + } else { + x + } + } + + strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "") + + if (permissive) { + actual_path_chunks <- strsplit(actual_path, '/')[[1]] + actual_path <- paste(actual_path_chunks[-length(actual_path_chunks)], collapse = '/') + file_name <- tail(actual_path_chunks, 1) + if (length(actual_path_chunks) > 1) { + file_name <- paste0('/', file_name) + } + path_with_globs_chunks <- strsplit(path_with_globs, '/')[[1]] + path_with_globs <- paste(path_with_globs_chunks[-length(path_with_globs_chunks)], + collapse = '/') + path_with_globs <- .ConfigReplaceVariablesInString(path_with_globs, replace_values) + file_name_with_globs <- tail(path_with_globs_chunks, 1) + if (length(path_with_globs_chunks) > 1) { + file_name_with_globs <- paste0('/', file_name_with_globs) + } + right_known <- head(strsplit(file_name_with_globs, '*', fixed = TRUE)[[1]], 1) + right_known_no_tags <- .ConfigReplaceVariablesInString(right_known, replace_values) + path_with_globs_rx <- utils::glob2rx(paste0(path_with_globs, right_known_no_tags)) + match <- regexpr(gsub('$', '', path_with_globs_rx, fixed = TRUE), + paste0(actual_path, file_name)) + if (match != 1) { + stop("Incorrect parameters to replace glob expressions. ", + "The path with expressions does not match the actual path.") + } + if (attr(match, 'match.length') - nchar(right_known_no_tags) < nchar(actual_path)) { + path_with_globs <- paste0(path_with_globs, right_known_no_tags, '*') + file_name_with_globs <- sub(right_known, '/*', file_name_with_globs) + } + } + path_with_globs_rx <- utils::glob2rx(path_with_globs) + values_to_replace <- NULL + tags_to_replace_starts <- NULL + tags_to_replace_ends <- NULL + give_warning <- FALSE + for (tag in tags_to_keep) { + matches <- gregexpr(paste0('$', tag, '$'), path_with_globs_rx, fixed = TRUE)[[1]] + lengths <- attr(matches, 'match.length') + if (!(length(matches) == 1 && matches[1] == -1)) { + for (i in seq_along(matches)) { + left <- NULL + if (matches[i] > 1) { + left <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, 1, + matches[i] - 1), replace_values) + left_known <- + strReverse(head(strsplit(strReverse(left), + strReverse('.*'), fixed = TRUE)[[1]], 1)) + } + right <- NULL + if ((matches[i] + lengths[i] - 1) < nchar(path_with_globs_rx)) { + right <- + .ConfigReplaceVariablesInString(substr(path_with_globs_rx, + matches[i] + lengths[i], + nchar(path_with_globs_rx)), + replace_values) + right_known <- head(strsplit(right, '.*', fixed = TRUE)[[1]], 1) + } + match_limits <- NULL + if (!is.null(left)) { + left_match <- regexpr(paste0(left, replace_values[[tag]], right_known), actual_path) + match_len <- attr(left_match, 'match.length') + left_match_limits <- + c(left_match + match_len - 1 - nchar(clean(right_known)) - + nchar(replace_values[[tag]]) + 1, + left_match + match_len - 1 - nchar(clean(right_known))) + if (!(left_match < 1)) { + match_limits <- left_match_limits + } + } + right_match <- NULL + if (!is.null(right)) { + right_match <- regexpr(paste0(left_known, replace_values[[tag]], right), actual_path) + match_len <- attr(right_match, 'match.length') + right_match_limits <- + c(right_match + nchar(clean(left_known)), + right_match + nchar(clean(left_known)) + + nchar(replace_values[[tag]]) - 1) + if (is.null(match_limits) && !(right_match < 1)) { + match_limits <- right_match_limits + } + } + if (!is.null(right_match) && !is.null(left_match)) { + if (!identical(right_match_limits, left_match_limits)) { + give_warning <- TRUE + } + } + if (is.null(match_limits)) { + stop("Too complex path pattern specified for ", dataset_name, + ". Specify a simpler path pattern for this dataset.") + } + values_to_replace <- c(values_to_replace, tag) + tags_to_replace_starts <- c(tags_to_replace_starts, match_limits[1]) + tags_to_replace_ends <- c(tags_to_replace_ends, match_limits[2]) + } + } + } + + if (length(tags_to_replace_starts) > 0) { + reorder <- sort(tags_to_replace_starts, index.return = TRUE) + tags_to_replace_starts <- reorder$x + values_to_replace <- values_to_replace[reorder$ix] + tags_to_replace_ends <- tags_to_replace_ends[reorder$ix] + while (length(values_to_replace) > 0) { + actual_path <- paste0(substr(actual_path, 1, head(tags_to_replace_starts, 1) - 1), + '$', head(values_to_replace, 1), '$', + substr(actual_path, head(tags_to_replace_ends, 1) + 1, + nchar(actual_path))) + extra_chars <- nchar(head(values_to_replace, 1)) + 2 - + (head(tags_to_replace_ends, 1) - + head(tags_to_replace_starts, 1) + 1) + values_to_replace <- values_to_replace[-1] + tags_to_replace_starts <- tags_to_replace_starts[-1] + tags_to_replace_ends <- tags_to_replace_ends[-1] + tags_to_replace_starts <- tags_to_replace_starts + extra_chars + tags_to_replace_ends <- tags_to_replace_ends + extra_chars + } + } + + if (give_warning) { + .warning(paste0("Too complex path pattern specified for ", dataset_name, + ". Double check carefully the '$Files' fetched for this dataset ", + "or specify a simpler path pattern.")) + } + + if (permissive) { + paste0(actual_path, file_name_with_globs) + } else { + actual_path + } +} + +.FindTagValue <- function(path_with_globs_and_tag, actual_path, tag) { + tag <- paste0('\\$', tag, '\\$') + path_with_globs_and_tag <- paste0('^', path_with_globs_and_tag, '$') + parts <- strsplit(path_with_globs_and_tag, '*', fixed = TRUE)[[1]] + parts <- as.list(grep(tag, parts, value = TRUE)) + longest_couples <- NULL + pos_longest_couples <- NULL + found_value <- NULL + for (i in seq_along(parts)) { + parts[[i]] <- strsplit(parts[[i]], tag)[[1]] + if (length(parts[[i]]) == 1) { + parts[[i]] <- c(parts[[i]], '') + } + len_parts <- sapply(parts[[i]], nchar) + len_couples <- len_parts[-length(len_parts)] + len_parts[2:length(len_parts)] + pos_longest_couples <- c(pos_longest_couples, which.max(len_couples)) + longest_couples <- c(longest_couples, max(len_couples)) + } + chosen_part <- which.max(longest_couples) + parts[[chosen_part]] <- + parts[[chosen_part]][pos_longest_couples[chosen_part]:(pos_longest_couples[chosen_part] + 1)] + if (nchar(parts[[chosen_part]][1]) >= nchar(parts[[chosen_part]][2])) { + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + actual_path <- + substr(actual_path, match_left + attr(match_left, 'match.length'), nchar(actual_path)) + } + } + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + found_value <- substr(actual_path, 0, match_right - 1) + } + } + } else { + if (nchar(parts[[chosen_part]][2]) > 0) { + matches <- gregexpr(parts[[chosen_part]][2], actual_path)[[1]] + if (length(matches) == 1) { + match_right <- matches + actual_path <- substr(actual_path, 0, match_right - 1) + } + } + if (nchar(parts[[chosen_part]][1]) > 0) { + matches <- gregexpr(parts[[chosen_part]][1], actual_path)[[1]] + if (length(matches) == 1) { + match_left <- matches + found_value <- + substr(actual_path, match_left + attr(match_left, 'match.length'), + nchar(actual_path)) + } + } + } + found_value +} + +.FilterUserGraphicArgs <- function(excludedArgs, ...) { + # This function filter the extra graphical parameters passed by the user in + # a plot function, excluding the ones that the plot function uses by default. + # Each plot function has a different set of arguments that are not allowed to + # be modified. + args <- list(...) + userArgs <- list() + for (name in names(args)) { + if ((name != "") & !is.element(name, excludedArgs)) { + # If the argument has a name and it is not in the list of excluded + # arguments, then it is added to the list that will be used + userArgs[[name]] <- args[[name]] + } else { + .warning(paste0("the argument '", name, "' can not be + modified and the new value will be ignored")) + } + } + userArgs +} + +.SelectDevice <- function(fileout, width, height, units, res) { + # This function is used in the plot functions to check the extension of the + # files where the graphics will be stored and select the right R device to + # save them. + # If the vector of filenames ('fileout') has files with different + # extensions, then it will only accept the first one, changing all the rest + # of the filenames to use that extension. + + # We extract the extension of the filenames: '.png', '.pdf', ... + ext <- regmatches(fileout, regexpr("\\.[a-zA-Z0-9]*$", fileout)) + + if (length(ext) != 0) { + # If there is an extension specified, select the correct device + ## units of width and height set to accept inches + if (ext[1] == ".png") { + saveToFile <- function(fileout) { + png(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".jpeg") { + saveToFile <- function(fileout) { + jpeg(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] %in% c(".eps", ".ps")) { + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".pdf") { + saveToFile <- function(fileout) { + pdf(file = fileout, width = width, height = height) + } + } else if (ext[1] == ".svg") { + saveToFile <- function(fileout) { + svg(filename = fileout, width = width, height = height) + } + } else if (ext[1] == ".bmp") { + saveToFile <- function(fileout) { + bmp(filename = fileout, width = width, height = height, res = res, units = units) + } + } else if (ext[1] == ".tiff") { + saveToFile <- function(fileout) { + tiff(filename = fileout, width = width, height = height, res = res, units = units) + } + } else { + .warning("file extension not supported, it will be used '.eps' by default.") + ## In case there is only one filename + fileout[1] <- sub("\\.[a-zA-Z0-9]*$", ".eps", fileout[1]) + ext[1] <- ".eps" + saveToFile <- function(fileout) { + postscript(file = fileout, width = width, height = height) + } + } + # Change filenames when necessary + if (any(ext != ext[1])) { + .warning(paste0("some extensions of the filenames provided in 'fileout' ", + "are not ", ext[1], + ". The extensions are being converted to ", ext[1], ".")) + fileout <- sub("\\.[a-zA-Z0-9]*$", ext[1], fileout) + } + } else { + # Default filenames when there is no specification + .warning("there are no extensions specified in the filenames, default to '.eps'") + fileout <- paste0(fileout, ".eps") + saveToFile <- postscript + } + + # return the correct function with the graphical device, and the correct + # filenames + list(fun = saveToFile, files = fileout) +} + +.message <- function(...) { + # Function to use the 'message' R function with our custom settings + # Default: new line at end of message, indent to 0, exdent to 3, + # collapse to \n* + args <- list(...) + + ## In case we need to specify message arguments + if (!is.null(args[["appendLF"]])) { + appendLF <- args[["appendLF"]] + } else { + ## Default value in message function + appendLF <- TRUE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value in message function + domain <- NULL + } + args[["appendLF"]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n*" + } + args[["collapse"]] <- NULL + + ## Message tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "* " + } + args[["tag"]] <- NULL + + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + message(tmp, appendLF = appendLF, domain = domain) +} + +.warning <- function(...) { + # Function to use the 'warning' R function with our custom settings + # Default: no call information, indent to 0, exdent to 3, + # collapse to \n + args <- list(...) + + ## In case we need to specify warning arguments + if (!is.null(args[["call."]])) { + call <- args[["call."]] + } else { + ## Default: don't show info about the call where the warning came up + call <- FALSE + } + if (!is.null(args[["immediate."]])) { + immediate <- args[["immediate."]] + } else { + ## Default value in warning function + immediate <- FALSE + } + if (!is.null(args[["noBreaks."]])) { + noBreaks <- args[["noBreaks."]] + } else { + ## Default value warning function + noBreaks <- FALSE + } + if (!is.null(args[["domain"]])) { + domain <- args[["domain"]] + } else { + ## Default value warning function + domain <- NULL + } + args[["call."]] <- NULL + args[["immediate."]] <- NULL + args[["noBreaks."]] <- NULL + args[["domain"]] <- NULL + + ## To modify strwrap indent and exdent arguments + if (!is.null(args[["indent"]])) { + indent <- args[["indent"]] + } else { + indent <- 0 + } + if (!is.null(args[["exdent"]])) { + exdent <- args[["exdent"]] + } else { + exdent <- 3 + } + args[["indent"]] <- NULL + args[["exdent"]] <- NULL + + ## To modify paste collapse argument + if (!is.null(args[["collapse"]])) { + collapse <- args[["collapse"]] + } else { + collapse <- "\n!" + } + args[["collapse"]] <- NULL + + ## Warning tag + if (!is.null(args[["tag"]])) { + tag <- args[["tag"]] + } else { + tag <- "! Warning: " + } + args[["tag"]] <- NULL + + tmp <- paste0(tag, + paste(strwrap(args, indent = indent, exdent = exdent), collapse = collapse)) + warning(tmp, call. = call, immediate. = immediate, + noBreaks. = noBreaks, domain = domain) +} + +.IsColor <- function(x) { + res <- try(col2rgb(x), silent = TRUE) + return(!is(res, "try-error")) +} + +# This function switches to a specified figure at position (row, col) in a layout. +# This overcomes the bug in par(mfg = ...). However the mode par(new = TRUE) is +# activated, i.e., all drawn elements will be superimposed. Additionally, after +# using this function, the automatical pointing to the next figure in the layout +# will be spoiled: once the last figure in the layout is drawn, the pointer won't +# move to the first figure in the layout. +# Only figures with numbers other than 0 (when creating the layout) will be +# accessible. +# Inputs: either row and col, or n and mat +.SwitchToFigure <- function(row = NULL, col = NULL, n = NULL, mat = NULL) { + if (!is.null(n) && !is.null(mat)) { + if (!is.numeric(n) || length(n) != 1) { + stop("Parameter 'n' must be a single numeric value.") + } + n <- round(n) + if (!is.array(mat)) { + stop("Parameter 'mat' must be an array.") + } + target <- which(mat == n, arr.ind = TRUE)[1, ] + row <- target[1] + col <- target[2] + } else if (!is.null(row) && !is.null(col)) { + if (!is.numeric(row) || length(row) != 1) { + stop("Parameter 'row' must be a single numeric value.") + } + row <- round(row) + if (!is.numeric(col) || length(col) != 1) { + stop("Parameter 'col' must be a single numeric value.") + } + col <- round(col) + } else { + stop("Either 'row' and 'col' or 'n' and 'mat' must be provided.") + } + next_attempt <- c(row, col) + par(mfg = next_attempt) + i <- 1 + layout_size <- par('mfrow') + layout_cells <- matrix(1:prod(layout_size), layout_size[1], layout_size[2], + byrow = TRUE) + while (any((par('mfg')[1:2] != c(row, col)))) { + next_attempt <- which(layout_cells == i, arr.ind = TRUE)[1, ] + par(mfg = next_attempt) + i <- i + 1 + if (i > prod(layout_size)) { + stop("Figure not accessible.") + } + } + plot(0, type = 'n', axes = FALSE, ann = FALSE) + par(mfg = next_attempt) +} + +# Function to permute arrays of non-atomic elements (e.g. POSIXct) +.aperm2 <- function(x, new_order) { + old_dims <- dim(x) + attr_bk <- attributes(x) + if ('dim' %in% names(attr_bk)) { + attr_bk[['dim']] <- NULL + } + if (is.numeric(x)) { + x <- aperm(x, new_order) + } else { + y <- array(seq_along(x), dim = dim(x)) + y <- aperm(y, new_order) + x <- x[as.vector(y)] + } + dim(x) <- old_dims[new_order] + attributes(x) <- c(attributes(x), attr_bk) + x +} + +# This function is a helper for the function .MergeArrays. +# It expects as inputs two named numeric vectors, and it extends them +# with dimensions of length 1 until an ordered common dimension +# format is reached. +# The first output is dims1 extended with 1s. +# The second output is dims2 extended with 1s. +# The third output is a merged dimension vector. If dimensions with +# the same name are found in the two inputs, and they have a different +# length, the maximum is taken. +.MergeArrayDims <- function(dims1, dims2) { + new_dims1 <- NULL + new_dims2 <- NULL + while (length(dims1) > 0) { + if (names(dims1)[1] %in% names(dims2)) { + pos <- which(names(dims2) == names(dims1)[1]) + dims_to_add <- rep(1, pos - 1) + if (length(dims_to_add) > 0) { + names(dims_to_add) <- names(dims2[1:(pos - 1)]) + } + new_dims1 <- c(new_dims1, dims_to_add, dims1[1]) + new_dims2 <- c(new_dims2, dims2[1:pos]) + dims1 <- dims1[-1] + dims2 <- dims2[-(1:pos)] + } else { + new_dims1 <- c(new_dims1, dims1[1]) + new_dims2 <- c(new_dims2, 1) + names(new_dims2)[length(new_dims2)] <- names(dims1)[1] + dims1 <- dims1[-1] + } + } + if (length(dims2) > 0) { + dims_to_add <- rep(1, length(dims2)) + names(dims_to_add) <- names(dims2) + new_dims1 <- c(new_dims1, dims_to_add) + new_dims2 <- c(new_dims2, dims2) + } + list(new_dims1, new_dims2, pmax(new_dims1, new_dims2)) +} + +# This function takes two named arrays and merges them, filling with +# NA where needed. +# dim(array1) +# 'b' 'c' 'e' 'f' +# 1 3 7 9 +# dim(array2) +# 'a' 'b' 'd' 'f' 'g' +# 2 3 5 9 11 +# dim(.MergeArrays(array1, array2, 'b')) +# 'a' 'b' 'c' 'e' 'd' 'f' 'g' +# 2 4 3 7 5 9 11 +.MergeArrays <- function(array1, array2, along) { + if (!(is.null(array1) || is.null(array2))) { + if (!(identical(names(dim(array1)), names(dim(array2))) && + identical(dim(array1)[-which(names(dim(array1)) == along)], + dim(array2)[-which(names(dim(array2)) == along)]))) { + new_dims <- .MergeArrayDims(dim(array1), dim(array2)) + dim(array1) <- new_dims[[1]] + dim(array2) <- new_dims[[2]] + for (j in seq_along(dim(array1))) { + if (names(dim(array1))[j] != along) { + if (dim(array1)[j] != dim(array2)[j]) { + if (which.max(c(dim(array1)[j], dim(array2)[j])) == 1) { + na_array_dims <- dim(array2) + na_array_dims[j] <- dim(array1)[j] - dim(array2)[j] + na_array <- array(dim = na_array_dims) + array2 <- abind(array2, na_array, along = j) + names(dim(array2)) <- names(na_array_dims) + } else { + na_array_dims <- dim(array1) + na_array_dims[j] <- dim(array2)[j] - dim(array1)[j] + na_array <- array(dim = na_array_dims) + array1 <- abind(array1, na_array, along = j) + names(dim(array1)) <- names(na_array_dims) + } + } + } + } + } + if (!(along %in% names(dim(array2)))) { + stop("The dimension specified in 'along' is not present in the ", + "provided arrays.") + } + array1 <- abind(array1, array2, along = which(names(dim(array1)) == along)) + names(dim(array1)) <- names(dim(array2)) + } else if (is.null(array1)) { + array1 <- array2 + } + array1 +} + +# only can be used in Trend(). Needs generalization or be replaced by other function. +.reorder <- function(output, time_dim, dim_names) { + # Add dim name back + if (is.null(dim(output))) { + dim(output) <- c(stats = length(output)) + } else { #is an array + if (length(dim(output)) == 1) { + if (!is.null(names(dim(output)))) { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { + names(dim(output)) <- time_dim + } + } else { # more than one dim + if (names(dim(output))[1] != "") { + dim(output) <- c(1, dim(output)) + names(dim(output))[1] <- time_dim + } else { #regular case + names(dim(output))[1] <- time_dim + } + } + } + # reorder + pos <- match(dim_names, names(dim(output))) + output <- aperm(output, pos) + names(dim(output)) <- dim_names + names(dim(output))[names(dim(output)) == time_dim] <- 'stats' + return(output) +} + +# to be used in AMV.R, TPI.R, SPOD.R, GSAT.R and GMST.R +.Indices <- function(data, type, monini, indices_for_clim, + fmonth_dim, sdate_dim, year_dim, month_dim, na.rm) { + + if (type == 'dcpp') { + + fyear_dim <- 'fyear' + data <- Season(data = data, time_dim = fmonth_dim, + monini = monini, moninf = 1, monsup = 12, + method = mean, na.rm = na.rm) + names(dim(data))[which(names(dim(data)) == fmonth_dim)] <- fyear_dim + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + + anom <- data + + } else { ## Different indices_for_clim for each forecast year (to use the same calendar years) + + n_fyears <- as.numeric(dim(data)[fyear_dim]) + n_sdates <- as.numeric(dim(data)[sdate_dim]) + + if (is.null(indices_for_clim)) { ## climatology over the whole (common) period + first_years_for_clim <- n_fyears : 1 + last_years_for_clim <- n_sdates : (n_sdates - n_fyears + 1) + } else { ## indices_for_clim specified as a numeric vector + first_years_for_clim <- seq(from = indices_for_clim[1], by = -1, length.out = n_fyears) + last_years_for_clim <- + seq(from = indices_for_clim[length(indices_for_clim)], + by = -1, length.out = n_fyears) + } + + data <- s2dv::Reorder(data = data, order = c(fyear_dim, sdate_dim)) + anom <- array(data = NA, dim = dim(data)) + for (i in 1:n_fyears) { + clim <- mean(data[i, first_years_for_clim[i]:last_years_for_clim[i]], na.rm = na.rm) + anom[i, ] <- data[i, ] - clim + } + } + + } else if (type %in% c('obs', 'hist')) { + + data <- multiApply::Apply(data = data, target_dims = month_dim, + fun = mean, na.rm = na.rm)$output1 + + if (identical(indices_for_clim, FALSE)) { ## data is already anomalies + clim <- 0 + } else if (is.null(indices_for_clim)) { + ## climatology over the whole period + clim <- multiApply::Apply(data = data, target_dims = year_dim, fun = mean, + na.rm = na.rm)$output1 + } else { + ## indices_for_clim specified as a numeric vector + clim <- multiApply::Apply(data = ClimProjDiags::Subset(x = data, along = year_dim, + indices = indices_for_clim), + target_dims = year_dim, fun = mean, na.rm = na.rm)$output1 + } + + anom <- data - clim + + } else { + stop('type must be dcpp, hist or obs') + } + + return(anom) +} + +#TODO: Remove from s2dv when PlotLayout can get colorbar info from plotting function directly. +# The function is temporarily here because PlotLayout() needs to draw the colorbars of +# PlotMostLikelyQuantileMap(). +#Draws Color Bars for Categories +#A wrapper of s2dv::ColorBar to generate multiple color bars for different +#categories, and each category has different color set. +GradientCatsColorBar <- function(nmap, brks = NULL, cols = NULL, vertical = TRUE, subsampleg = NULL, + bar_limits, var_limits = NULL, + triangle_ends = NULL, plot = TRUE, + draw_separators = FALSE, + bar_titles = NULL, title_scale = 1, + label_scale = 1, extra_margin = rep(0, 4), + ...) { + # bar_limits + if (!is.numeric(bar_limits) || length(bar_limits) != 2) { + stop("Parameter 'bar_limits' must be a numeric vector of length 2.") + } + + # Check brks + if (is.null(brks) || (is.numeric(brks) && length(brks) == 1)) { + num_brks <- 5 + if (is.numeric(brks)) { + num_brks <- brks + } + brks <- seq(from = bar_limits[1], to = bar_limits[2], length.out = num_brks) + } + if (!is.numeric(brks)) { + stop("Parameter 'brks' must be a numeric vector.") + } + # Check cols + col_sets <- list(c("#A1D99B", "#74C476", "#41AB5D", "#238B45"), + c("#6BAED6FF", "#4292C6FF", "#2171B5FF", "#08519CFF"), + c("#FFEDA0FF", "#FED976FF", "#FEB24CFF", "#FD8D3CFF"), + c("#FC4E2AFF", "#E31A1CFF", "#BD0026FF", "#800026FF"), + c("#FCC5C0", "#FA9FB5", "#F768A1", "#DD3497")) + if (is.null(cols)) { + if (length(col_sets) >= nmap) { + chosen_sets <- 1:nmap + chosen_sets <- chosen_sets + floor((length(col_sets) - length(chosen_sets)) / 2) + } else { + chosen_sets <- array(seq_along(col_sets), nmap) + } + cols <- col_sets[chosen_sets] + } else { + if (!is.list(cols)) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (!all(sapply(cols, is.character))) { + stop("Parameter 'cols' must be a list of character vectors.") + } + if (length(cols) != nmap) { + stop("Parameter 'cols' must be a list of the same length as the number of ", + "maps in 'maps'.") + } + } + for (i in seq_along(cols)) { + if (length(cols[[i]]) != (length(brks) - 1)) { + cols[[i]] <- grDevices::colorRampPalette(cols[[i]])(length(brks) - 1) + } + } + + # Check bar_titles + if (is.null(bar_titles)) { + if (nmap == 3) { + bar_titles <- c("Below normal (%)", "Normal (%)", "Above normal (%)") + } else if (nmap == 5) { + bar_titles <- c("Low (%)", "Below normal (%)", + "Normal (%)", "Above normal (%)", "High (%)") + } else { + bar_titles <- paste0("Cat. ", 1:nmap, " (%)") + } + } + + if (plot) { + for (k in 1:nmap) { + s2dv::ColorBar(brks = brks, cols = cols[[k]], vertical = FALSE, subsampleg = subsampleg, +# bar_limits = bar_limits, var_limits = var_limits, + triangle_ends = triangle_ends, plot = TRUE, + draw_separators = draw_separators, + title = bar_titles[[k]], title_scale = title_scale, + label_scale = label_scale, extra_margin = extra_margin) + } + } else { + #TODO: col_inf and col_sup + return(list(brks = brks, cols = cols)) + } + +} + + diff --git a/modules/Crossval/recipe_crossval_ecvs.yml b/modules/Crossval/recipe_crossval_ecvs.yml new file mode 100644 index 0000000000000000000000000000000000000000..e23495ecfb51976af8a20958de0c07c25576293c --- /dev/null +++ b/modules/Crossval/recipe_crossval_ecvs.yml @@ -0,0 +1,184 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N. Pérez-Zanón + Info: This recipe can be use to test Crossval_anomlaies.R and Crossval_skill.R for single and multimodel. +Analysis: + Horizon: seasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'monthly_mean', units: 'K'} + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'ECMWF-SEAS5.1', member: 'all'} + - {name: 'Meteo-France-System7'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2020' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1993' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '1996' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 6 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + - {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: raw # Calibration method. (Mandatory, str) + save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: no # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + user_def: + DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr # List of skill metrics separated by spaces or commas. (Mandatory, str) + alpha: 0.05 + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: no # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'robinson' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: yes # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + cross.method: 'leave-one-out' # Default 'leave-one-out' + ncores: 16 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /esarchive/scratch/nperez/git4/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git4/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032339 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/modules/Crossval/recipe_crossval_ecvs_global.yml b/modules/Crossval/recipe_crossval_ecvs_global.yml new file mode 100644 index 0000000000000000000000000000000000000000..830225af29c54fd1623ae5435547a4cf2f8c54ff --- /dev/null +++ b/modules/Crossval/recipe_crossval_ecvs_global.yml @@ -0,0 +1,184 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N. Pérez-Zanón + Info: This recipe can be use to test Crossval_anomlaies.R and Crossval_skill.R for single and multimodel. +Analysis: + Horizon: seasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'monthly_mean', units: 'K'} + # To request more variables to be divided in atomic recipes, add them this way: + # - {name: 'prlr', freq: 'monthly_mean', units: 'mm'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr', freq: 'monthly_mean', units: {tas: 'C', prlr: 'mm'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'ECMWF-SEAS5.1', member: 'all'} + - {name: 'Meteo-France-System7'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: '2020' # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1993' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 6 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + - {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + Regrid: + method: conservative # Interpolation method (Mandatory, str) + type: to_system # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Calibration: + method: raw # Calibration method. (Mandatory, str) + save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Anomalies: + compute: yes # Either yes/true or no/false (Mandatory, bool) + cross_validation: no # Either yes/true or no/false (Mandatory if 'compute: yes', bool) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Downscaling: + # Assumption 1: leave-one-out cross-validation is always applied + # Assumption 2: for analogs, we select the best analog (minimum distance) + type: intbc # mandatory, 'none', 'int', 'intbc', 'intlr', 'analogs', 'logreg'. + int_method: conservative # regridding method accepted by CDO. (Mandatory, str) + bc_method: bias # If type=intbc. Options: 'bias', 'calibration', 'quantile_mapping', 'qm', 'evmos', 'mse_min', 'crps_min', 'rpc-based'. + lr_method: # If type=intlr. Options: 'basic', 'large_scale', '9nn' + log_reg_method: # If type=logreg. Options: 'ens_mean', 'ens_mean_sd', 'sorted_members' + target_grid: /esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h/tas_200002.nc # nc file or grid accepted by CDO + nanalogs: # If type analgs. Number of analogs to be searched + save: 'all' # Options: 'all'/'none'/'exp_only' (Mandatory, str) + Time_aggregation: + execute: no # # Either yes/true or no/false. Defaults to false. (Mandatory, bool) + method: average # Aggregation method. Available methods: 'average, 'accumulated'. (Mandatory, string) + # ini and end: list, pairs initial and final time steps to aggregate. + # In this example, aggregate from 1 to 2; from 2 to 3 and from 1 to 3 + ini: [1, 2, 1] + end: [2, 3, 3] + # user_def: List of lists, Custom user-defined forecast times to aggregate. + # Elements should be named, named can be chosen by the user. + # An R expression can be entered using '!expr"; it will be evaluated by the code. + # If both ini/end and user_def are defined, ini/end takes preference. + user_def: + DJF_Y1: [1, 3] # aggregate from 1 to 3 forecast times + DJF: !expr sort(c(seq(1, 120, 12), seq(2, 120, 13), seq(3, 120, 14))) # aggregate 1,2,3,13,14,15,... + Indices: + ## Indices available: - NAO (for psl and/or z500); + # - Nino1+2, Nino3, Nino3.4, Nino4 (for tos) + ## Each index can only be computed if its area is within the selected region. + # obsproj: NAO computation method (see s2dv::NAO()) Default is yes/true. (Optional, bool) + # save: What to save. Options: 'all'/'none'. Default is 'all'. + # plot_ts: Generate time series plot? Default is yes/true. (Optional, bool) + # plot_sp: Generate spatial pattern plot? Default is yes/true. (Optional, bool) + # alpha: Significance threshold. Default value is 0.05 (Optional, numeric) + #Nino1+2: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino3.4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + #Nino4: {save: 'all', plot_ts: yes, plot_sp: yes, alpha: 0.05} + # Also available if variable is psl and/or z500: + # NAO: {obsproj: yes, save: 'all', plot_ts: yes, plot_sp: yes} + Skill: + metric: mean_bias enscorr rpss crpss enssprerr rps crps rms rmss # List of skill metrics separated by spaces or commas. (Mandatory, str) + alpha: 0.05 + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + percentiles: [[1/3, 2/3]] # Thresholds + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean # Types of plots to generate (Optional, str) + multi_panel: no # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'robinson' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: yes # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + cross.method: 'leave-one-out' # Default 'leave-one-out' + ncores: 16 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: esarchive # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /esarchive/scratch/nperez/git4/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /esarchive/scratch/nperez/git4/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + auto_conf: + script: ./example_scripts/multimodel_seasonal.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032339 # replace with your hpc username + wallclock: 01:00 # wallclock for single-model jobs, hh:mm + wallclock_multimodel: 02:00 # wallclock for multi-model jobs, hh:mm. If empty, 'wallclock' will be used. + processors_per_job: 4 # processors to request for each single-model job. + processors_multimodel: 16 # processors to request for each multi-model job. If empty, 'processors_per_job' will be used. + custom_directives: ['#SBATCH --exclusive'] # custom scheduler directives for single-model jobs. + custom_directives_multimodel: ['#SBATCH --exclusive', '#SBATCH --constraint=highmem'] # custom scheduler directives for multi-model jobs. If empty, 'custom_directives' will be used. + platform: nord3v2 # platform (for now, only nord3v2 is available) + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails diff --git a/modules/Loading/R/dates2load.R b/modules/Loading/R/dates2load.R index 7cf80b8f080a097eca647c6d349cdfb15cedf50d..08a15bb8187cf13dd1e9b5e4a701d09017b669cf 100644 --- a/modules/Loading/R/dates2load.R +++ b/modules/Loading/R/dates2load.R @@ -83,6 +83,7 @@ dates2load <- function(recipe, logger) { } return(list(hcst = file_dates, fcst = file_dates.fcst)) ## TODO: document header of fun + } # adds the correspondent dims to each sdate array @@ -93,3 +94,4 @@ dates2load <- function(recipe, logger) { dim(data) <- default_dims return(data) } + diff --git a/modules/Loading/R/load_decadal.R b/modules/Loading/R/load_decadal.R index 40c1932995222135cf8f14bf7e8b8cbf8f7fdb04..3844e51d18ad2dd2f69e414c95cd6010ae1308f8 100644 --- a/modules/Loading/R/load_decadal.R +++ b/modules/Loading/R/load_decadal.R @@ -82,7 +82,6 @@ load_decadal <- function(recipe) { if (identical(member, 'all')) { member <- strsplit(archive$System[[exp.name]]$member, ',')[[1]] } - #------------------------- # derived from above: #------------------------- @@ -125,7 +124,8 @@ load_decadal <- function(recipe) { transform = regrid_params$fcst.transform, transform_extra_cells = 2, transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), + method = regrid_params$fcst.gridmethod, + print_sys_msg = TRUE), transform_vars = c('latitude', 'longitude'), synonims = list(longitude = c('lon', 'longitude'), latitude = c('lat', 'latitude')), @@ -269,7 +269,8 @@ load_decadal <- function(recipe) { transform = regrid_params$obs.transform, transform_extra_cells = 2, transform_params = list(grid = regrid_params$obs.gridtype, #nc file - method = regrid_params$obs.gridmethod), + method = regrid_params$obs.gridmethod, + print_sys_msg = TRUE), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat','latitude'), longitude = c('lon','longitude')), @@ -296,7 +297,8 @@ load_decadal <- function(recipe) { transform = regrid_params$obs.transform, transform_extra_cells = 2, transform_params = list(grid = regrid_params$obs.gridtype, #nc file - method = regrid_params$obs.gridmethod), + method = regrid_params$obs.gridmethod, + print_sys_msg = TRUE), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat','latitude'), longitude = c('lon','longitude')), @@ -432,6 +434,6 @@ load_decadal <- function(recipe) { info(recipe$Run$logger, "##### DATA LOADING COMPLETED SUCCESSFULLY #####") - .log_memory_usage(recipe$Run$logger, when = "After loading") +# .log_memory_usage(recipe$Run$logger, when = "After loading") return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/modules/Loading/R/load_seasonal.R b/modules/Loading/R/load_seasonal.R index bbee8d4d6a8615f0e0eac79576d8a1a5fdfd6fa4..8e65ec03334306a7a7ad90e844deecafce77a495 100644 --- a/modules/Loading/R/load_seasonal.R +++ b/modules/Loading/R/load_seasonal.R @@ -18,7 +18,6 @@ load_seasonal <- function(recipe) { lons.max <- recipe$Analysis$Region$lonmax ref.name <- recipe$Analysis$Datasets$Reference$name exp.name <- recipe$Analysis$Datasets$System$name - variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] store.freq <- recipe$Analysis$Variables$freq @@ -31,7 +30,6 @@ load_seasonal <- function(recipe) { recipe$Analysis$Time$ftime_min, recipe$Analysis$Time$ftime_max, time_freq=store.freq) - if (!(is.null(sdates$fcst))) { idxs$fcst <- get_timeidx(sdates$fcst, recipe$Analysis$Time$ftime_min, @@ -83,7 +81,6 @@ load_seasonal <- function(recipe) { fcst.path <- paste0(archive$src_sys, hcst.dir, "$var_dir$", "$var$_$file_date$.nc") - # Define regrid parameters: #------------------------------------------------------------------- regrid_params <- get_regrid_params(recipe, archive) @@ -100,7 +97,6 @@ load_seasonal <- function(recipe) { # Load hindcast #------------------------------------------------------------------- - hcst <- Start(dat = hcst.path, var = variable, var_dir = var_dir_exp, @@ -113,7 +109,8 @@ load_seasonal <- function(recipe) { longitude_reorder = circularsort, transform = regrid_params$fcst.transform, transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), + method = regrid_params$fcst.gridmethod, + print_sys_msg = TRUE), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude'), @@ -177,7 +174,8 @@ load_seasonal <- function(recipe) { longitude_reorder = circularsort, transform = regrid_params$fcst.transform, transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), + method = regrid_params$fcst.gridmethod, + print_sys_msg = TRUE), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude'), @@ -249,7 +247,8 @@ load_seasonal <- function(recipe) { longitude_reorder = circularsort, transform = regrid_params$obs.transform, transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), + method = regrid_params$obs.gridmethod, + print_sys_msg = TRUE), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat','latitude'), longitude = c('lon','longitude')), @@ -287,7 +286,8 @@ load_seasonal <- function(recipe) { longitude_reorder = circularsort, transform = regrid_params$obs.transform, transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), + method = regrid_params$obs.gridmethod, + print_sys_msg = TRUE), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat','latitude'), longitude = c('lon','longitude')), @@ -387,7 +387,7 @@ load_seasonal <- function(recipe) { ############################################################################ ############################################################################ - .log_memory_usage(recipe$Run$logger, when = "After loading") +# .log_memory_usage(recipe$Run$logger, when = "After loading") return(list(hcst = hcst, fcst = fcst, obs = obs)) } diff --git a/modules/Loading/R/load_subseasonal.R b/modules/Loading/R/load_subseasonal.R index 9176ad7e70da79ac8917a55c507c7313a75e8170..cb1fa7f3df42448b0453895f235b48eb3b4971df 100644 --- a/modules/Loading/R/load_subseasonal.R +++ b/modules/Loading/R/load_subseasonal.R @@ -104,7 +104,8 @@ load_subseasonal <- function(recipe) { longitude_reorder = circularsort, transform = regrid_params$fcst.transform, transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), + method = regrid_params$fcst.gridmethod, + print_sys_msg = TRUE), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude'), @@ -169,7 +170,8 @@ load_subseasonal <- function(recipe) { longitude_reorder = circularsort, transform = regrid_params$fcst.transform, transform_params = list(grid = regrid_params$fcst.gridtype, - method = regrid_params$fcst.gridmethod), + method = regrid_params$fcst.gridmethod, + print_sys_msg = TRUE), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat', 'latitude'), longitude = c('lon', 'longitude'), @@ -242,7 +244,8 @@ load_subseasonal <- function(recipe) { longitude_reorder = circularsort, transform = regrid_params$obs.transform, transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), + method = regrid_params$obs.gridmethod, + print_sys_msg = TRUE), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat','latitude'), longitude = c('lon','longitude')), @@ -280,7 +283,8 @@ load_subseasonal <- function(recipe) { longitude_reorder = circularsort, transform = regrid_params$obs.transform, transform_params = list(grid = regrid_params$obs.gridtype, - method = regrid_params$obs.gridmethod), + method = regrid_params$obs.gridmethod, + print_sys_msg = TRUE), transform_vars = c('latitude', 'longitude'), synonims = list(latitude = c('lat','latitude'), longitude = c('lon','longitude')), @@ -384,3 +388,4 @@ load_subseasonal <- function(recipe) { } + diff --git a/modules/Loading/R/load_tas_tos.R b/modules/Loading/R/load_tas_tos.R index 4710920ba902507291d350ebebc7c809064609c2..8a6047690b0bed87a8e03fed433160f16ed97423 100644 --- a/modules/Loading/R/load_tas_tos.R +++ b/modules/Loading/R/load_tas_tos.R @@ -385,7 +385,8 @@ load_tas_tos <- function(recipe) { method = recipe$Analysis$Regrid$method, avoid_writes = TRUE, crop = c(lons.min, lons.max,lats.min, lats.max), - force_remap = TRUE) + force_remap = TRUE, + print_sys_msg = TRUE) obs$data <- aux$data_array obs$coords$longitude <- aux$lons @@ -402,7 +403,8 @@ load_tas_tos <- function(recipe) { lons = hcst$coords$longitude, lats = hcst$coords$latitude, grid = regrid_params$fcst.gridtype, method = recipe$Analysis$Regrid$method, avoid_writes = TRUE, crop = TRUE, - force_remap = TRUE) + force_remap = TRUE, + print_sys_msg = TRUE) hcst$data <- aux$data_array hcst$coords$longitude <- aux$lons @@ -418,7 +420,8 @@ load_tas_tos <- function(recipe) { grid = regrid_params$fcst.gridtype, method = recipe$Analysis$Regrid$method, avoid_writes = TRUE, crop = TRUE, - force_remap = TRUE) + force_remap = TRUE, + print_sys_msg = TRUE) fcst$data <- aux$data_array fcst$coords$longitude <- aux$lons @@ -436,7 +439,8 @@ load_tas_tos <- function(recipe) { lons = hcst$coords$longitude, lats = hcst$coords$latitude, grid = regrid_params$fcst.gridtype, method = recipe$Analysis$Regrid$method, avoid_writes = TRUE, crop = TRUE, - force_remap = TRUE) + force_remap = TRUE, + print_sys_msg = TRUE) hcst$data <- aux$data_array hcst$coords$longitude <- aux$lons @@ -452,7 +456,8 @@ load_tas_tos <- function(recipe) { grid = regrid_params$fcst.gridtype, method = recipe$Analysis$Regrid$method, avoid_writes = TRUE, crop = TRUE, - force_remap = TRUE) + force_remap = TRUE, + print_sys_msg = TRUE) fcst$data <- aux$data_array fcst$coords$longitude <- aux$lons @@ -468,7 +473,8 @@ load_tas_tos <- function(recipe) { grid = regrid_params$obs.gridtype, method = recipe$Analysis$Regrid$method, avoid_writes = TRUE, crop = TRUE, - force_remap = TRUE) + force_remap = TRUE, + print_sys_msg = TRUE) obs$data <- aux$data_array obs$coords$longitude <- aux$lons diff --git a/modules/Multimodel/Multimodel.R b/modules/Multimodel/Multimodel.R index 192e92928974906e6698d2e708cd4c0193559c66..4aa48924e00405456b388201c1e4616be5608f1b 100644 --- a/modules/Multimodel/Multimodel.R +++ b/modules/Multimodel/Multimodel.R @@ -9,41 +9,57 @@ source("modules/Loading/R/dates2load.R") source("modules/Loading/R/get_timeidx.R") source("modules/Loading/R/check_latlon.R") source('modules/Multimodel/load_multimodel.R') -source('modules/Multimodel/load_multimodel_splitted.R') -source('modules/Multimodel/dims_multimodel.R') source('modules/Multimodel/build_multimodel.R') source('modules/Multimodel/clean_multimodel.R') +library(startR) +library(CSTools) +library(yaml) +recipe <- read_yaml("recipe_tas.yml") +source("modules/Saving/R/get_dir.R") +source("modules/Loading/Loading.R") +recipe$Run$output_dir <- paste0(recipe$Run$output_dir, "recipe_tas_20240605141443/") +# DON'T KNOW HOW TO SOLVE LOGGER: +recipe$Run$Loglevel <- 'INFO' +recipe$Run$logfile <- "/esarchive/scratch/nperez/git4//recipe_tas_20240531165840/logs/main.log" +recipe$Run$logger <- logger <- log4r::logger(threshold = recipe$Run$Loglevel, + appenders = list(console_appender(layout = default_log_layout()), + file_appender(logfile, append = TRUE, + layout = default_log_layout()))) Multimodel <- function(recipe) { - # recipe: auto-s2s recipe as provided by read_yaml - - # Loading data saved in the jobs for individual models - if (tolower(recipe$Analysis$Datasets$Multimodel$split_loading) %in% c('true','yes')){ - # Load data splitting by system - data_aux <- load_multimodel_splitted(recipe) - - data <- data_aux$data - - files_hcst <- data_aux$files_hcst - files_fcst <- data_aux$files_fcst - files_obs <- data_aux$files_obs - - rm(data_aux) - - } else { - # Load data without splitting - data <- load_multimodel(recipe) - files_hcst <- data$hcst$attrs$source_files - files_fcst <- data$fcst$attrs$source_files - files_obs <- data$obs$attrs$source_files + # Load data splitting by system individual members: + if (recipe$Analysis$Datasets$Multimodel$approach == 'pooled') { + datos <- load_multimodel(recipe) + } else if (recipe$Analysis$Datasets$Multimodel$approach == 'mean') { + # In this case the ensemble dim is 1 for each model + # The functions in skill can still work + source("modules/Multimodel/load_multimodel_mean.R") + datos <- load_multimodel_mean(recipe) + source("modules/Multimodel/load_multimodel_probs.R") + probs <- load_multimodel_probs(recipe) + } else { #(recipe$Analysis$Datasets$Multimodel$approac == "weighted") + stop("Implement the weighting method here") + # I imagine the probs and the ensmean are modified and returned. + multimodel_aux <- build_multimodel(data, recipe) + ensmean <- multimodel_aux$ensmean + probs <- multimodel_aux$prob + rm(multimodel_aux) } - # Building the multi-model - multimodel_aux <- build_multimodel(data, recipe) - data <- multimodel_aux$data - prob <- multimodel_aux$prob - rm(multimodel_aux) + # assessment: + # IF Fair is TRUE, the number of nmembs used to compute the probabilities + # need to be passed + + source("modules/Multimodel/Multimodel_skill.R") + skill_metrics <- multimodel_metrics(recipe = recipe, + datos = datos, + probs = probs, + Fair = FALSE) + + + + # Saving multimodel if (recipe$Analysis$Workflow[[recipe$Analysis$Datasets$Multimodel$createFrom]]$save != 'none') { diff --git a/modules/Multimodel/Multimodel_skill.R b/modules/Multimodel/Multimodel_skill.R new file mode 100644 index 0000000000000000000000000000000000000000..242398dcd0a286b2bd7f5a65f2e2dfc842c1c147 --- /dev/null +++ b/modules/Multimodel/Multimodel_skill.R @@ -0,0 +1,377 @@ +# One function to compute all the possible metrics or statistics +# datos is a list returned by load_multimodel or load_multimodel_mean + ## this reduce memory and it can be used to compute several metrics + ## in the mean the ensemble dim is length 1 for each model +# probs is a list returned by load_multimodel_probs + ## this is needed to compute rps/rpss +source("modules/Saving/Saving.R") +source("modules/Crossval/R/tmp/RPS.R") +source("modules/Crossval/R/RPS_clim.R") +source("modules/Crossval/R/CRPS_clim.R") +source("modules/Crossval/R/tmp/RPSS.R") +source("modules/Crossval/R/tmp/RandomWalkTest.R") +source("modules/Crossval/R/tmp/Corr.R") +source("modules/Crossval/R/tmp/Bias.R") +source("modules/Crossval/R/tmp/SprErr.R") +source("modules/Crossval/R/tmp/Eno.R") +source("modules/Crossval/R/tmp/GetProbs.R") +source("modules/Visualization/Visualization.R") # to load .warning from utils +multimodel_metrics <- function(recipe, + datos = NULL, + probs = NULL, + Fair = FALSE) { + ncores <- recipe$Analysis$ncores + na.rm <- recipe$Analysis$remove_NAs + cross.method <- recipe$Analysis$cross.method + if (is.null(cross.method)) { + cross.method <- 'leave-one-out' + } + # Prepare indices for crossval + sdate_dim <- dim(datos$hcst[[1]]$data)['syear'] + cross <- CSTools:::.make.eval.train.dexes(eval.method = cross.method, + amt.points = sdate_dim, + amt.points_cor = NULL) + tmp <- array(1:length(cross), c(syear = length(cross))) + alpha <- recipe$Analysis$alpha + if (is.null(alpha)) { + alpha <- 0.05 + } + + requested_metrics <- strsplit(recipe$Analysis$Workflow$Skill$metric, + ", | |,")[[1]] + skill_metrics <- list() + if (!is.null(datos)) { + # conver $data elements to list to use multiApply: + datos <- append(list(obs = datos$obs$data), + lapply(datos$hcst, function(x) x$data)) + ## CRPS metrics only make sense in pool method: + if (any(c('crps', 'crpss') %in% requested_metrics)) { + crps <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = res, obs = obs, dat_dim = NULL, + time_dim = 'syear', + memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps <- crps + # Build the reference forecast: + ref_clim <- Apply(list(datos$obs, tmp), + target_dims = list(c('ensemble', 'syear'), NULL), + function(x, y) { + Subset(x, along = 'syear', + indices = cross[[y]]$train.dexes, drop = T)}, + ncores = ncores, output_dims = 'ensemble')$output1 + datos <- append(list(ref = ref_clim), datos) + crps_clim <- Apply(datos[1:2], target_dims = c('syear', 'ensemble'), + fun = function(ref, obs) { + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + s2dv:::.CRPS(exp = ref, obs = obs, dat_dim = NULL, + time_dim = 'syear', memb_dim = 'ensemble')}, + ncores = ncores)$output1 + skill_metrics$crps_clim <- crps_clim + + + crpss <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(ref, obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- names(dim(obs)) + obs <- Subset(obs, along = 'ensemble', + indices = 1, drop = 'selected') + CRPSS(exp = res, obs = obs, ref = ref, + memb_dim = 'ensemble', Fair = FALSE, + time_dim = 'syear', clim.cross.val = FALSE)}, + ncores = ncores) + skill_metrics$crpss <- crpss$crpss + skill_metrics$crpss_significance <- crpss$sign + datos <- datos[-1] + } + ## Deterministic metrics using ensemble mean: + if ('enscorr' %in% requested_metrics) { + enscorr <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Corr(exp = res, + obs = obs, + dat_dim = NULL, + time_dim = 'syear', + method = 'pearson', + memb_dim = 'ensemble', + memb = F, + conf = F, + pval = F, + sign = T, + alpha = alpha)}, ncores = ncores) + skill_metrics$enscorr <- enscorr$corr + skill_metrics$enscorr_significance <- enscorr$sign + } + if ('mean_bias' %in% requested_metrics) { + ## Mean Bias can make sense for non-anomalies (e.g. calibrated data) + mean_bias <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + Bias(exp = res, + obs = obs, + time_dim = 'syear', + memb_dim = 'ensemble', + alpha = alpha)}, + ncores = ncores) + skill_metrics$mean_bias <- mean_bias$bias + skill_metrics$mean_bias_significance <- mean_bias$sig + } + if ('rms' %in% requested_metrics) { + rms <- Apply(datos, target_dims = c('syear', 'ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMS(exp = res, + obs = obs, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha)}, + ncores = ncores) + skill_metrics$rms <- rms$rms + } + ####### Do we need reference fcst to compute rmss?? + if ('rmss' %in% requested_metrics) { + # if (is.null(res$ref_obs_tr)) { + # } + rmss <- Apply(datos, target_dims = c('syear','ensemble'), + function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + RMSSS(exp = res, + obs = obs, + #ref = res$ref_obs_tr, + memb_dim = 'ensemble', dat_dim = NULL, + time_dim = 'syear', alpha = alpha, sign = TRUE)}, + ncores = ncores) + skill_metrics$rmss <- rmss$rmss + skill_metrics$rmss_significance <- rmss$sign + } + if (any(c('std', 'standard_deviation') %in% requested_metrics)) { + # This are used to compute correlation for the scorecards + # It's the temporal standard deviation of the ensmean + std_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + sd(as.vector(res))}, + ncores = ncores)$output1 + + std_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = 'sd')$output1 + skill_metrics[['std_hcst']] <- std_hcst + skill_metrics[['std_obs']] <- std_obs + } + + if (any(c('var', 'variance') %in% requested_metrics)) { + ## Calculate variance + var_hcst <- Apply(datos[-1], target_dims = c('syear', 'ensemble'), + fun = function(...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + variance <- var(as.vector(res))}, + ncores = ncores)$output1 + + var_obs <- Apply(datos[1], target_dims = c('syear', 'ensemble'), + fun = function(x) {var(as.vector(x))}, + ncores = ncores)$output1 + skill_metrics[['var_hcst']] <- var_hcst + skill_metrics[['var_obs']] <- var_obs + } + if ('n_eff' %in% requested_metrics) { + ## Calculate degrees of freedom + n_eff <- s2dv::Eno(datos[[1]], time_dim = 'syear', + na.action = na.pass, + ncores = ncores) + skill_metrics[['n_eff']] <- n_eff + } + if (any(c('cov', 'covariance') %in% requested_metrics)) { + # The covariance of the ensemble mean + covariance <- Apply(datos, target_dims = c('syear', 'ensemble'), + fun = function(obs, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- MeanDims(res, 'ensemble') + cov(as.vector(obs), as.vector(res), + use = "everything", + method = "pearson")}, + ncores = ncores)$output1 + skill_metrics$covariance <- covariance + } + } + # Probabilistic metrics for categories + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + eval(parse(text = y))})}) + # For the pool method: + if (is.null(probs)) { + # calculate probs from the ensemble distribution + # Firs compure tercile limits in crossval + lims <- list() + syears_ind <- list(t = tmp) + lims$hcst <- Apply(append(syears_ind, datos[-1]), + target_dims = append(list(NULL), + lapply(1:length(datos[-1]), + function(x){c('syear', 'ensemble')})), + fun = function(t, ..., crossin, prob_lims) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + res <- Subset(res, along = 'syear', + indices = crossin[[t]]$train.dexes, drop =T) + lapply(prob_lims, function(ps) { + limits <- quantile(as.vector(res), + ps, na.rm = na.rm) + limits <- array(limits, c(cat = length(limits))) + }) + }, prob_lims = categories, + crossin = cross, ncores = ncores) + lims$obs <- Apply(append(syears_ind, datos[1]), + target_dims = list(NULL, c('syear', 'ensemble')), + fun = function(t, obs, crossin, prob_lims) { + res <- Subset(obs, along = 'syear', + indices = crossin[[t]]$train.dexes, drop =T) + lapply(prob_lims, function(ps) { + limits <- quantile(as.vector(res), + ps, na.rm = na.rm) + limits <- array(limits, c(cat = length(limits))) + }) + }, prob_lims = categories, + crossin = cross, ncores = ncores) + probs <- list() + probs$hcst <- list() + probs$fcst <- list() + probs$obs <- list() + for (ps in 1:length(categories)) { + lim <- lims$hcst[ps] + probs$hcst <- append(probs$hcst, + list(Apply(append(lim, datos[-1]), + target_dims = append(list(c('syear', 'cat')), + lapply(1:length(datos[-1]), + function(x){c('syear', 'ensemble')})), + fun = function(lim, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(data = res, time_dim = 'syear', + memb_dim = 'ensemble', + indices_for_quantiles = NULL, + prob_thresholds = NULL, + abs_thresholds = lim, + bin_dim_abs = 'cat', + weights = NULL, cross.val = FALSE)}, + ncores = ncores)$output1)) + lim <- lims$obs[ps] + probs$obs <- append(probs$obs, + list(Apply(append(lim, datos[1]), + target_dims = list(c('syear', 'cat'), + c('syear', 'ensemble')), + fun = function(lim, obs) { + GetProbs(data = obs, time_dim = 'syear', + memb_dim = 'ensemble', + indices_for_quantiles = NULL, + prob_thresholds = NULL, + abs_thresholds = lim, + bin_dim_abs = 'cat', + weights = NULL, cross.val = FALSE)}, + ncores = ncores)$output1)) + + if (!is.null(datos$fcst)) { + ## NEEDS TO COMPUTE LIMS FOR THE FULL HINDCAST PERIOD + lims$fcst[ps] <- Apply(datos[-1], + target_dims = c('syear', 'ensemble'), + fun = function(..., prob_lims) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + lapply(prob_lims, function(ps) { + limits <- quantile(as.vector(res), + ps, na.rm = na.rm) + limits <- array(limits, c(cat = length(limits))) + }) + }, prob_lims = categories, + ncores = ncores) + + probs$fcst <- append(probs$fcst, + list(Apply(append(lims$fcst[ps], datos[-1]), + target_dims = append(list('cat'), + lapply(1:length(datos[-1]), + function(x){c('syear', 'ensemble')})), + fun = function(lim, ...) { + res <- abind(..., along = 2) + names(dim(res)) <- c('syear', 'ensemble') + GetProbs(data = res, time_dim = 'syear', + memb_dim = 'ensemble', + indices_for_quantiles = NULL, + prob_thresholds = NULL, + abs_thresholds = lim, + bin_dim_abs = 'cat', + weights = NULL, cross.val = FALSE)}, + ncores = ncores)$output1)) + } + } + # At this point all method can have probs calculated + } + # TODO: distinguish between rpss and bss + # if 1 percentile -> bss + # if more than 1 -> rpss + exe_rps <- unlist(lapply(categories, function(x) { + if (length(x) > 1) { + x <- x[1] *100 + } + return(x)})) + if (!is.null(datos)) { + if (Fair) { + nmemb <- sum(unlist(lapply(datos[-1], function(x) dim(x)['ensemble']))) + } else { + nmemb <- NULL + } + } else { + nmemb <- NULL + } + + # Compute rps + for (ps in 1:length(exe_rps)) { + if ('rps' %in% requested_metrics) { + rps <- RPS(exp = probs$hcst[[ps]], + obs = probs$obs[[ps]], memb_dim = NULL, + cat_dim = 'cat', cross.val = FALSE, time_dim = 'syear', + Fair = Fair, nmemb = nmemb, + ncores = ncores) + rps_clim <- Apply(list(probs$obs[[ps]]), + target_dims = c('cat', 'syear'), + RPS_clim, bin_dim_abs = 'cat', Fair = Fair, + cross.val = FALSE, ncores = ncores)$output1 + skill_metrics$rps <- rps + skill_metrics$rps_clim <- rps_clim + # names based on the categories: + # To use it when visualization works for more rps + #skill_metrics[[paste0('rps', exe_rps[ps])]] <- rps + #skill_metrics[[paste0('rps_clim', + # exe_rps[ps])]] <- rps_clim + } + if ('rpss' %in% requested_metrics) { + rpss <- RPSS(exp = probs$hcst[[ps]], + obs = probs$obs[[ps]], + ref = NULL, # ref is 1/3 by default if terciles + time_dim = 'syear', memb_dim = NULL, + cat_dim = 'cat', nmemb = nmemb, + dat_dim = NULL, + prob_thresholds = categories[[ps]], # un use param when providing probs + indices_for_clim = NULL, + Fair = Fair, weights_exp = NULL, weights_ref = NULL, + cross.val = FALSE, na.rm = na.rm, + sig_method.type = 'two.sided.approx', alpha = alpha, + ncores = ncores) + skill_metrics$rpss <- rpss$rpss + skill_metrics$rpss_significance <- rpss$sign + } + } + return(skill_metrics) +} diff --git a/modules/Multimodel/load_multimodel.R b/modules/Multimodel/load_multimodel.R index 778aae6e6d08a517444e367139b1134033696128..389d3d693e24eeed390ef931b8ea9a8c3c65cf67 100644 --- a/modules/Multimodel/load_multimodel.R +++ b/modules/Multimodel/load_multimodel.R @@ -275,3 +275,4 @@ load_multimodel <- function(recipe) { return(list(hcst = hcst, fcst = fcst, obs = obs)) } + diff --git a/modules/Multimodel/load_multimodel_mean.R b/modules/Multimodel/load_multimodel_mean.R new file mode 100644 index 0000000000000000000000000000000000000000..d23f98794d5a733fe888a21709ea57dc4c5ad726 --- /dev/null +++ b/modules/Multimodel/load_multimodel_mean.R @@ -0,0 +1,276 @@ + +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source("modules/Saving/Saving.R") +library(abind) +load_multimodel_mean <- function(recipe) { + + archive <- read_yaml("conf/archive.yml")$esarchive + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') + store.freq <- recipe$Analysis$Variables$freq + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name[1]]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Find the saved data directory + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + if (tolower(recipe$Analysis$Output_format) == "scorecards") { + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + filename <- paste0("scorecards_$model$_", ref.name, "_", + variable, "_hcst_EM_", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + } else { + filename <- "$var$_$file_date$.nc" + } + hcst.path <- file.path(recipe$Run$output_dir, "$model$", ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, filename) + + fcst.path <- obs.path <- hcst.path + fcst.path <- gsub("hcst", "fcst", fcst.path, fixed = T) + obs.path <- file.path(recipe$Run$output_dir, + gsub('\\.','',exp.name[1]), ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, + paste0("scorecards_", + gsub('\\.','',exp.name[1]), "_", ref.name, + "_$var$_-obs_$file_date$__", hcst_start, "-", + hcst_end, "_s", shortdate, ".nc")) + # Load hindcast + #------------------------------------------------------------------- + hcst <- list() + for (sys in exp.name) { + hcst.path_aux <- gsub("$model$", gsub('\\.','', sys), hcst.path, fixed = T) + aux <- Start(dat = hcst.path_aux, + var = 'hcst_EM', + sday = 'all', + sweek = 'all', + syear = 'all', + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'dat'), + retrieve = TRUE) + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(aux))] <- dim(aux) + dim(aux) <- default_dims + # Convert obs to s2dv_cube + hcst <- append(hcst, list(as.s2dv_cube(aux))) + names(hcst)[length(hcst)] <- sys + } + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + ############################### + + + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst <- list() + for (sys in exp.name) { + fcst.path_aux <- gsub("$model$", gsub('\\.','', sys), + fcst.path, fixed = T) + aux <- Start(dat = fcst.path_aux, + var = 'fcst_EM', + sday = 'all', + sweek = 'all', + syear = 'all', + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'dat'), + retrieve = TRUE) + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(aux))] <- dim(aux) + dim(aux) <- default_dims + # Convert obs to s2dv_cube + fcst <- append(fcst, list(as.s2dv_cube(aux))) + names(fcst)[length(fcst)] <- sys + } + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + ############################# + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + + if (store.freq == "monthly_mean") { + + obs <- Start(dat = obs.path, + var = variable, + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + } else if (store.freq %in% c("daily_mean", "daily")) { + + ############################# + #NOTE: NOT TESTED YET + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + ############################# + + } + + # Adds ensemble dim to obs (for consistency with hcst/fcst) + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$latitude), as.vector(obs$coords$latitude)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$longitude), as.vector(obs$coords$longitude)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + + +# info(recipe$Run$logger, +# "##### DATA LOADING INDIVIDUAL ENS COMPLETED SUCCESSFULLY #####") + + return(list(hcst = hcst, fcst = fcst, obs = obs)) +} diff --git a/modules/Multimodel/load_multimodel_probs.R b/modules/Multimodel/load_multimodel_probs.R new file mode 100644 index 0000000000000000000000000000000000000000..4b0eb358d6c8f9692ecfad2bb4d27a4935a6f734 --- /dev/null +++ b/modules/Multimodel/load_multimodel_probs.R @@ -0,0 +1,294 @@ + +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source("modules/Saving/Saving.R") +library(abind) +load_multimodel_probs <- function(recipe) { + + archive <- read_yaml("conf/archive.yml")$esarchive + ref.name <- recipe$Analysis$Datasets$Reference$name + exp.name <- sapply(recipe$Analysis$Datasets$System, '[[', 'name') + store.freq <- recipe$Analysis$Variables$freq + variable <- strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name[1]]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + categories <- recipe$Analysis$Workflow$Probabilities$percentiles + categories <- lapply(categories, function (x) { + sapply(x, function(y) { + round(eval(parse(text = y)),2)})}) + + if (recipe$Analysis$Variables$freq == "monthly_mean") { + split_multiselected_dims = TRUE + } else { + split_multiselected_dims = FALSE + } + + # Find the saved data directory + recipe$Run$output_dir <- file.path(recipe$Run$output_dir, "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + if (tolower(recipe$Analysis$Output_format) == "scorecards") { + hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + filename <- paste0("scorecards_$model$_", ref.name, "_", variable, + "_-probs_$file_date$__", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + } else { + filename <- "$var$_$file_date$.nc" + } + hcst.path <- file.path(recipe$Run$output_dir, "$model$", ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, filename) + + fcst.path <- hcst.path + obs.path <- file.path(recipe$Run$output_dir, gsub('\\.','', exp.name), + ref.name, + recipe$Analysis$Workflow$Calibration$method, + variable, paste0("scorecards__", ref.name, "_", variable, + "_-probs_$file_date$__", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc")) + + ## Generate probs names inner files variables: + prob_names <- NULL + all_names <- NULL + for (ps in 1:length(categories)) { + for (perc in 1:(length(categories[[ps]]) + 1)) { + if (perc == 1) { + name_elem <- paste0("below_", categories[[ps]][perc]) + } else if (perc == length(categories[[ps]]) + 1) { + name_elem <- paste0("above_", categories[[ps]][perc-1]) + } else { + name_elem <- paste0("from_", categories[[ps]][perc-1], + "_to_", categories[[ps]][perc]) + } + all_names <- c(all_names, name_elem) + } + prob_names <- append(prob_names, list(all_names)) + } + + # Load hindcast + #------------------------------------------------------------------- + hcst <- list() + probs <- list() + for (ps in 1:length(prob_names)) { + for (sys in exp.name) { + aux <- Start(dat = hcst.path, + var = prob_names[[ps]], + file_date = sdates$hcst, + model = gsub('\\.','', sys), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + names(dim(aux))[2] <- 'cat' + probs <- append(probs, list(as.s2dv_cube(aux))) + names(probs)[length(probs)] <- sys + } + hcst <- append(hcst, list(probs)) + } + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(hcst))[which(names(dim(hcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(hcst))] <- dim(hcst) + dim(hcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(hcst, "Variables")$common$time))[which(names( + dim(attr(hcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(hcst, "Variables")$common$time))] <- + dim(attr(hcst, "Variables")$common$time) + dim(attr(hcst, "Variables")$common$time) <- default_time_dims + } + ############################### + + + + # Load forecast + #------------------------------------------------------------------- + if (!is.null(recipe$Analysis$Time$fcst_year)) { + fcst <- list() + probs <- list() + for (ps in 1:length(prob_names)) { + for (sys in exp.name) { + aux <- Start(dat = fcst.path, + var = prob_names[[ps]], + file_date = sdates$fcst, + model = gsub('\\.','', sys), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude')), + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + names(dim(aux))[[2]] <- 'cat' + probs <- append(probs, list(as.s2dv_cube(aux))) + names(probs)[length(probs)] <- sys + } + fcst <- append(fcst, list(probs)) + } + + ############################# + #NOTE: NOT TESTED YET + if (store.freq %in% c("daily_mean", "daily")) { + # Adjusts dims for daily case, could be removed if startR allows + # multidim split + names(dim(fcst))[which(names(dim(fcst)) == 'file_date')] <- "syear" + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(fcst))] <- dim(fcst) + dim(fcst) <- default_dims + # Change time attribute dimensions + default_time_dims <- c(sday = 1, sweek = 1, syear = 1, time = 1) + names(dim(attr(fcst, "Variables")$common$time))[which(names( + dim(attr(fcst, "Variables")$common$time)) == 'file_date')] <- "syear" + default_time_dims[names(dim(attr(fcst, "Variables")$common$time))] <- + dim(attr(fcst, "Variables")$common$time) + dim(attr(fcst, "Variables")$common$time) <- default_time_dims + } + ############################# + } else { + fcst <- NULL + } + + # Load reference + #------------------------------------------------------------------- + obs <- list() + + if (store.freq == "monthly_mean") { + for (ps in 1:length(prob_names)) { + aux <- Start(dat = obs.path, + var = prob_names[[ps]], + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + names(dim(aux))[[2]] <- 'cat' + obs <- append(obs, list(as.s2dv_cube(aux))) + } + } else if (store.freq %in% c("daily_mean", "daily")) { + + ############################# + #NOTE: NOT TESTED YET + + # Obtain dates and date dimensions from the loaded hcst data to make sure + # the corresponding observations are loaded correctly. + dates <- hcst$attrs$Dates + dim(dates) <- hcst$dims[c("sday", "sweek", "syear", "time")] + + # Get year and month for file_date + dates_file <- sapply(dates, format, '%Y%m') + dim(dates_file) <- dim(dates) + # Set hour to 12:00 to ensure correct date retrieval for daily data + lubridate::hour(dates) <- 12 + lubridate::minute(dates) <- 00 + # Restore correct dimensions + dim(dates) <- dim(dates_file) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sort(unique(dates_file)), + time = dates, + time_var = 'time', + time_across = 'file_date', + merge_across_dims = TRUE, + merge_across_dims_narm = TRUE, + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + ############################# + + } + # Adds ensemble dim to obs (for consistency with hcst/fcst) + #default_dims <- c(dat = 1, var = 1, sday = 1, + # sweek = 1, syear = 1, time = 1, + # latitude = 1, longitude = 1, ensemble = 1) + #default_dims[names(dim(obs))] <- dim(obs) + #dim(obs) <- default_dims + + # Convert obs to s2dv_cube + #obs <- as.s2dv_cube(obs) + + # Check for consistency between hcst and obs grid + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$latitude), as.vector(obs$coords$latitude)))) { + lat_error_msg <- paste("Latitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lat_error_msg) + hcst_lat_msg <- paste0("First hcst lat: ", hcst$lat[1], + "; Last hcst lat: ", hcst$lat[length(hcst$lat)]) + info(recipe$Run$logger, hcst_lat_msg) + obs_lat_msg <- paste0("First obs lat: ", obs$lat[1], + "; Last obs lat: ", obs$lat[length(obs$lat)]) + info(recipe$Run$logger, obs_lat_msg) + stop("hcst and obs don't share the same latitudes.") + } + if (!isTRUE(all.equal(as.vector(hcst[[1]]$coords$longitude), as.vector(obs$coords$longitude)))) { + lon_error_msg <- paste("Longitude mismatch between hcst and obs.", + "Please check the original grids and the", + "regrid parameters in your recipe.") + error(recipe$Run$logger, lon_error_msg) + hcst_lon_msg <- paste0("First hcst lon: ", hcst$lon[1], + "; Last hcst lon: ", hcst$lon[length(hcst$lon)]) + info(recipe$Run$logger, hcst_lon_msg) + obs_lon_msg <- paste0("First obs lon: ", obs$lon[1], + "; Last obs lon: ", obs$lon[length(obs$lon)]) + info(recipe$Run$logger, obs_lon_msg) + stop("hcst and obs don't share the same longitudes.") + } + + +# info(recipe$Run$logger, +# "##### DATA LOADING INDIVIDUAL ENS COMPLETED SUCCESSFULLY #####") + + return(list(hcst = hcst, fcst = fcst, obs = obs)) +} diff --git a/modules/Multimodel/load_multimodel_splitted.R b/modules/Multimodel/load_multimodel_splitted.R deleted file mode 100644 index 5b9a642f5599602a2318f71b4766dffcd13ef930..0000000000000000000000000000000000000000 --- a/modules/Multimodel/load_multimodel_splitted.R +++ /dev/null @@ -1,58 +0,0 @@ - -load_multimodel_splitted <- function(recipe){ - - # Retrieve data dimension only without loading data - dims <- dims_multimodel(recipe) - data_order <- names(dims$dim.hcst) - - # Create empty array with desired dimensions - data <- list(hcst = NULL, fcst = NULL, obs = NULL) - data$hcst$data <- array(data = NA, dim = dims$dim.hcst) - if (!is.null(recipe$Analysis$Time$fcst_year)) { - data$fcst$data <- array(data = NA, dim = dims$dim.fcst) - } - files_hcst <- list() - files_fcst <- list() - files_obs <- list() - - - # Loop over system to load hindcast and forecast data - for (sys in 1:length(recipe$Analysis$Datasets$System$models)){ - - recipe_aux <- recipe - system_load <- recipe$Analysis$Datasets$System$models[sys] - recipe_aux$Analysis$Datasets$System$models <- system_load - - data_aux <- load_multimodel(recipe_aux) - - data$hcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$hcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$hcst$data, order = data_order) - - if(!is.null(recipe$Analysis$Time$fcst_year)){ - data$fcst$data[,,,,,which(recipe$Analysis$Datasets$System$models == system_load),,,,1:dim(data_aux$fcst$data)['ensemble']] <- s2dv::Reorder(data = data_aux$fcst$data, order = data_order) - } - - files_hcst <- append(files_hcst, data_aux$hcst$attrs$source_files, after = length(files_hcst)) - files_fcst <- append(files_fcst, data_aux$fcst$attrs$source_files, after = length(files_fcst)) - files_obs <- append(files_obs, data_aux$obs$attrs$source_files, after = length(files_obs)) - - } # close loop on sys - - # Define obs data - data$obs$data <- data_aux$obs$data - - # Include data attributes - data$hcst$attrs <- data_aux$hcst$attrs - data$fcst$attrs <- data_aux$fcst$attrs - data$obs$attrs <- data_aux$obs$attrs - data$hcst$coords <- data_aux$hcst$coords - data$fcst$coords <- data_aux$fcst$coords - data$obs$coords <- data_aux$obs$coords - - # Remove temporary data_aux - rm(data_aux) - - return(list(data = data, - files_hcst = files_hcst, - files_fcst = files_fcst, - files_obs = files_obs)) -} diff --git a/modules/Saving/R/Utils.R b/modules/Saving/R/Utils.R index a5bd5d0c9d6d299f2b4fa8340a529a9eecb05459..18442583b9141c38961af269c79c214c6ce41faf 100644 --- a/modules/Saving/R/Utils.R +++ b/modules/Saving/R/Utils.R @@ -64,6 +64,6 @@ attr(latitude, 'variables') <- metadata names(dim(latitude)) <- 'latitude' - return(list(lat=latitude, lon=longitude)) + return(list(lon = longitude, lat = latitude)) } diff --git a/modules/Saving/R/get_filename_old.R b/modules/Saving/R/get_filename_old.R deleted file mode 100644 index b2345691c8a5ffdc4839a0ca331761aa4548201b..0000000000000000000000000000000000000000 --- a/modules/Saving/R/get_filename_old.R +++ /dev/null @@ -1,81 +0,0 @@ -## TODO: Separate by time aggregation -## TODO: Build a default path that accounts for: -## variable, system, reference, start date and region name - -get_filename <- function(dir, recipe, var, date, agg, file.type) { - # This function builds the path of the output file based on directory, - # variable, forecast date, startdate, aggregation, forecast horizon and - # type of metric/forecast/probability. - - if (tolower(recipe$Analysis$Horizon) == "subseasonal") { - shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%V") - dd <- "week" - } else { - shortdate <- format(as.Date(as.character(date), "%Y%m%d"), "%m") - dd <- "month" - } - - if (tolower(recipe$Analysis$Horizon) == "decadal") { - # to not save the month and day in the filename (needed for the multimodel) - date <- substr(date,1,4) - # for the models initialised in January - it may be better to do this in save_* functions - archive <- read_yaml(paste0("conf/archive_decadal.yml"))$esarchive - exp.name <- recipe$Analysis$Datasets$System$name - if (exp.name != 'Multimodel' && archive$System[[exp.name]]$initial_month == 1){ - date <- as.character(as.numeric(date)-1) - } - } - - switch(tolower(agg), - "region" = {gg <- "-region"}, - "global" = {gg <- ""}) - - system <- gsub('.','', recipe$Analysis$Datasets$System$name, fixed = T) - reference <- gsub('.','', recipe$Analysis$Datasets$Reference$name, fixed = T) - - if (tolower(recipe$Analysis$Output_format) == 'scorecards') { - # Define output dir name accordint to Scorecards format - dict <- read_yaml("conf/output_dictionaries/scorecards.yml") - # Get necessary names - hcst_start <- recipe$Analysis$Time$hcst_start - hcst_end <- recipe$Analysis$Time$hcst_end - - switch(tolower(file.type), - "skill" = {type_info <- "-skill_"}, - "corr" = {type_info <- "-corr_"}, - "exp" = {type_info <- paste0("_", date, "_")}, - "obs" = {type_info <- paste0("-obs_", date, "_")}, - "percentiles" = {type_info <- "-percentiles_"}, - "probs" = {type_info <- paste0("-probs_", date, "_")}, - "bias" = {type_info <- paste0("-bias_", date, "_")}, - "rps_syear" = {type_info <- "rps_syear"}, - "rps_clim_syear" = {type_info <- "rps_clim_syear"}, - "crps_syear" = {type_info <- "crps_syear"}, - "crps_clim_syear" = {type_info <- "crps_clim_syear"}, - "crps" = {type_info <- "crps"}, - "mean_bias" = {type_info <- "mean_bias"}, - {type_info <- paste0(file.type)}) - - # Build file name - filename <- paste0("scorecards_", system, "_", reference, "_", var, - "_", type_info, "_", hcst_start, "-", hcst_end, - "_s", shortdate) - } else { - if (tolower(recipe$Analysis$Horizon) == "decadal") { - shortdate_aux <- '' - } else { - shortdate_aux <- paste0("_", dd, shortdate) - } - switch(tolower(file.type), - {filename <- paste0(var, gg, "-", tolower(file.type), shortdate_aux)}, - "skill" = {filename <- paste0(var, gg, "-skill", shortdate_aux)}, - "corr" = {filename <- paste0(var, gg, "-corr", shortdate_aux)}, - "exp" = {filename <- paste0(var, gg, "_", date)}, - "obs" = {filename <- paste0(var, gg, "-obs_", date)}, - "percentiles" = {filename <- paste0(var, gg, "-percentiles", shortdate_aux)}, - "probs" = {filename <- paste0(var, gg, "-probs_", date)}, - "bias" = {filename <- paste0(var, gg, "-bias_", date)}) - } - return(paste0(dir, filename, ".nc")) -} - diff --git a/modules/Saving/R/get_times.R b/modules/Saving/R/get_times.R index 155b634b085761a02d340369d9deca6be94678bc..613dbea67dd8c5aa22e58dbec853aaef20d81dae 100644 --- a/modules/Saving/R/get_times.R +++ b/modules/Saving/R/get_times.R @@ -27,7 +27,8 @@ switch(fcst.horizon, "seasonal" = {time <- leadtimes; ref <- 'hours since '; stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "subseasonal" = {time <- leadtimes; ref <- 'hours since '; + "subseasonal" = {time <- seq(7, 7*dim(data_cube$attrs$Dates)[['time']], by = 7); + ref <- 'days since '; stdname <- ''}, "decadal" = {time <- leadtimes; ref <- 'hours since '; stdname <- paste(strtoi(leadtimes), collapse=", ")}) diff --git a/modules/Saving/R/get_times_old.R b/modules/Saving/R/get_times_old.R deleted file mode 100644 index ec36b1028889e54f43cd47932f591173677e69dc..0000000000000000000000000000000000000000 --- a/modules/Saving/R/get_times_old.R +++ /dev/null @@ -1,56 +0,0 @@ -# leadtimes: list 2 arrays, 'start' and 'end', containing the time bounds for -# each leadtime -# sdate: start date in POSIXt/POSIXct format -# calendar: name of the calendar in string format - -.get_times <- function(recipe, data_cube, sdate, calendar, init_date) { - - # Define list to contain metadata for time and time_bnds - times <- list() - # Compute initial date - fcst.horizon <- tolower(recipe$Analysis$Horizon) - # Generate time dimensions and the corresponding metadata. - dates <- as.PCICt(ClimProjDiags::Subset(data_cube$attrs$Dates, 'syear', 1), - cal = calendar) - leadtimes <- as.numeric(dates - init_date)/3600 - - switch(fcst.horizon, - "seasonal" = {time <- leadtimes; ref <- 'hours since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}, - "subseasonal" = {len <- leadtimes; ref <- 'hours since '; - stdname <- ''}, - "decadal" = {time <- leadtimes; ref <- 'hours since '; - stdname <- paste(strtoi(leadtimes), collapse=", ")}) - dim(time) <- length(time) - sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting - metadata <- list(time = list(units = paste0(ref, sdate, 'T00:00:00'), - calendar = calendar)) - attr(time, 'variables') <- metadata - names(dim(time)) <- 'time' - times$time <- time - # Generate time_bnds dimensions and the corresponding metadata. - if (!is.null(data_cube$attrs$time_bounds)) { - time_bounds <- lapply(data_cube$attrs$time_bounds, - function(x) { - y <- as.PCICt(ClimProjDiags::Subset(x, - along = 'syear', - indices = 1, - drop = 'non-selected'), - cal = calendar) - y <- as.numeric(y - init_date)/3600 - return(y) - }) - # Generates time_bnds dimensions and the corresponding metadata. - time_bnds <- abind(time_bounds, along = 0) - names(dim(time_bnds)) <- c("bnds", "time") - sdate <- as.Date(sdate, format = '%Y%m%d') # reformatting - metadata <- list(time_bnds = list(units = paste0(ref, sdate, 'T00:00:00'), - calendar = calendar, - long_name = "time bounds")) - attr(time_bnds, 'variables') <- metadata - attr(time, 'variables')$bounds <- "time_bounds" - times$time_bnds <- time_bnds - } - return(times) -} - diff --git a/modules/Saving/R/save_corr.R b/modules/Saving/R/save_corr.R index 47310bf0e5b80d3aa67c064a654192bfe603f0ec..9de533bf0099d5cfce5e5ef447945f82fbba7879 100644 --- a/modules/Saving/R/save_corr.R +++ b/modules/Saving/R/save_corr.R @@ -9,7 +9,7 @@ save_corr <- function(recipe, archive <- get_archive(recipe) dictionary <- read_yaml("conf/variable-dictionary.yml") # Define grid dimensions and names - lalo <- c('longitude', 'latitude') + lalo <- c("latitude", "longitude") # Add global and variable attributes global_attributes <- .get_global_attributes(recipe, archive) ## TODO: Sort out the logic once default behavior is decided @@ -114,7 +114,7 @@ save_corr <- function(recipe, longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, subset_skill) + vars <- c(times, latlon, subset_skill) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_forecast.R b/modules/Saving/R/save_forecast.R index b2d74fc9936dfafc4fa4962f5b2d9948d4d7c676..1367b4b4ac8753cba3c9815b0e202edbbc2f1729 100644 --- a/modules/Saving/R/save_forecast.R +++ b/modules/Saving/R/save_forecast.R @@ -10,7 +10,7 @@ save_forecast <- function(recipe, # outdir: directory where the files should be saved # agg: aggregation, "global" or "country" - lalo <- c('longitude', 'latitude') + lalo <- c("latitude", "longitude") archive <- get_archive(recipe) dictionary <- read_yaml("conf/variable-dictionary.yml") global_attributes <- .get_global_attributes(recipe, archive) @@ -132,7 +132,7 @@ save_forecast <- function(recipe, longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, fcst) + vars <- c(times, latlon, fcst) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_metrics.R b/modules/Saving/R/save_metrics.R index 74c3de01e16ac219e3397c1fbd951e5825df6152..7a8173b7b039157408d51e451a1efd127b68dd62 100644 --- a/modules/Saving/R/save_metrics.R +++ b/modules/Saving/R/save_metrics.R @@ -14,7 +14,7 @@ save_metrics <- function(recipe, stop("'metrics' should be a named list.") } # Define grid dimensions and names - lalo <- c('longitude', 'latitude') + lalo <- c("latitude", "longitude") archive <- get_archive(recipe) dictionary <- read_yaml("conf/variable-dictionary.yml") @@ -118,8 +118,8 @@ save_metrics <- function(recipe, fcst.sdate, agg, names(subset_metric)[[i]]) SaveExp(data = subset_metric[[i]], destination = outdir, Dates = dates, - coords = c(data_cube$coords['longitude'], - data_cube$coords['latitude']), + coords = list(latitude = data_cube$coords[['latitude']], + longitude = data_cube$coords[['longitude']]), time_bounds = time_bounds, varname = names(subset_metric)[[i]], metadata = data_cube$attrs$Variable$metadata, Datasets = NULL, @@ -177,7 +177,7 @@ save_metrics <- function(recipe, longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, subset_metric) + vars <- c(times, latlon, subset_metric) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_metrics_old.R b/modules/Saving/R/save_metrics_old.R deleted file mode 100644 index db7ceecacd0f6e8af750fae45147e6dcbf07f506..0000000000000000000000000000000000000000 --- a/modules/Saving/R/save_metrics_old.R +++ /dev/null @@ -1,171 +0,0 @@ -save_metrics <- function(recipe, - metrics, - dictionary = NULL, - data_cube, - agg = "global", - outdir = NULL, - module = "skill") { - # This function adds metadata to the skill metrics in 'skill' - # and exports them to a netCDF file inside 'outdir'. - # Define grid dimensions and names - lalo <- c('longitude', 'latitude') - archive <- get_archive(recipe) - dictionary <- read_yaml("conf/variable-dictionary.yml") - - global_attributes <- .get_global_attributes(recipe, archive) - time_bounds <- NULL - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) - } else { - global_attributes <- c(list(from_anomalies = "No"), - global_attributes) - } - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - if (global_attributes$system == 'Multimodel') { - if (fcst.horizon == 'decadal'){ - calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]$name]]$calendar - } else { - calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar - } - } else { - calendar <- archive$System[[global_attributes$system]]$calendar - } - if (fcst.horizon == 'decadal') { - # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - # sprintf('%02d', init_month), '-01'), - # cal = calendar) - init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, - '-01-01'), cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - # Select start date - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (fcst.horizon == 'decadal') { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - #PROBLEM: May be more than one fcst_year - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1]) - } else { - fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') - } - } else { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) - } - } - - times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) - # Loop over variable dimension - for (var in 1:data_cube$dims[['var']]) { - # Subset skill arrays - subset_metric <- lapply(metrics, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) - # Generate name of output file - variable <- data_cube$attrs$Variable$varName[[var]] - outdir <- get_dir(recipe = recipe, variable = variable) - if (!dir.exists(outdir)) { - dir.create(outdir, recursive = T) - } - if (tolower(recipe$Analysis$Output_format) == "scorecards") { - for (i in 1:length(subset_metric)) { - if (any('syear' %in% names(dim(subset_metric[[i]])))) { - sdate_dim_save = 'syear' - dates <- data_cube$attrs$Dates - time_bounds <- data_cube$attrs$time_bounds - } else { - sdate_dim_save = NULL - dates <- Subset(data_cube$attrs$Dates, along = 'syear', indices = 1) - if (!is.null(data_cube$attrs$time_bounds)) { - time_bounds <- lapply(data_cube$attrs$time_bounds, - FUN = function (x) { - Subset(x, along = 'syear', indices = 1) - }) - } - } - ## TODO: Maybe 'scorecards' condition could go here to further simplify - ## the code - extra_string <- get_filename(NULL, recipe, variable, - fcst.sdate, agg, names(subset_metric)[[i]]) - SaveExp(data = subset_metric[[i]], destination = outdir, - Dates = dates, - coords = c(data_cube$coords['longitude'], - data_cube$coords['latitude']), - time_bounds = time_bounds, - varname = names(subset_metric)[[i]], - metadata = data_cube$attrs$Variable$metadata, Datasets = NULL, - startdates = NULL, dat_dim = NULL, sdate_dim = sdate_dim_save, - ftime_dim = 'time', var_dim = NULL, memb_dim = NULL, - drop_dims = NULL, single_file = TRUE, - extra_string = extra_string) - } - } else { - outfile <- get_filename(outdir, recipe, variable, - fcst.sdate, agg, module) - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - subset_metric <- lapply(subset_metric, function(x) { - Reorder(x, c(lalo, 'time'))}) - } - attr(subset_metric[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(subset_metric)) { - metric <- names(subset_metric[i]) - long_name <- dictionary$metrics[[metric]]$long_name - missing_val <- -9.e+33 - subset_metric[[i]][is.na(subset_metric[[i]])] <- missing_val - if (tolower(agg) == "country") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('Country', 'time') - } else if (tolower(agg) == "region") { - sdname <- paste0(metric, " region-aggregated metric") - dims <- c('region', 'time') - } else { - sdname <- paste0(metric) - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = metric, - standard_name = sdname, - long_name = long_name, - missing_value = missing_val)) - attr(subset_metric[[i]], 'variables') <- metadata - names(dim(subset_metric[[i]])) <- dims - } - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, times$time, subset_metric), outfile) - } else if (tolower(agg) == "region") { - region <- list(region = array(1:dim(metrics[[1]])['region'], - c(dim(metrics[[1]])['region']))) - ## TODO: check metadata when more than 1 region is store in the data array - attr(region, 'variables') <- data_cube$attrs$Variable$metadata['region'] - vars <- c(region, times) - vars <- c(vars, subset_metric) - ArrayToNc(vars, outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- c(latlon, times, subset_metric) - ArrayToNc(vars, outfile) - } - } - } - info(recipe$Run$logger, - paste("#####", toupper(module), "METRICS SAVED TO NETCDF FILE #####")) -} - diff --git a/modules/Saving/R/save_observations.R b/modules/Saving/R/save_observations.R index 794697190903fdd584867e6a5695a522f5a3bdc1..bb98162df9ca4f404fc775463d8eb7ef3afc1f82 100644 --- a/modules/Saving/R/save_observations.R +++ b/modules/Saving/R/save_observations.R @@ -9,7 +9,7 @@ save_observations <- function(recipe, # outdir: directory where the files should be saved # agg: aggregation, "global" or "country" - lalo <- c('longitude', 'latitude') + lalo <- c("latitude", "longitude") archive <- get_archive(recipe) dictionary <- read_yaml("conf/variable-dictionary.yml") global_attributes <- .get_global_attributes(recipe, archive) @@ -130,7 +130,7 @@ save_observations <- function(recipe, longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, fcst) + vars <- c(times, latlon, fcst) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_percentiles.R b/modules/Saving/R/save_percentiles.R index 4572289648e328fc69a327f5e13f57ec9c4c657b..279b1e352db3b636216bc85e22de8c60783a6a56 100644 --- a/modules/Saving/R/save_percentiles.R +++ b/modules/Saving/R/save_percentiles.R @@ -12,7 +12,7 @@ save_percentiles <- function(recipe, # }) archive <- get_archive(recipe) # Define grid dimensions and names - lalo <- c('longitude', 'latitude') + lalo <- c("latitude", "longitude") # Add global and variable attributes global_attributes <- .get_global_attributes(recipe, archive) ## TODO: Sort out the logic once default behavior is decided @@ -121,7 +121,7 @@ save_percentiles <- function(recipe, longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, subset_percentiles) + vars <- c(times, latlon, subset_percentiles) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_percentiles_old.R b/modules/Saving/R/save_percentiles_old.R deleted file mode 100644 index d5dfae16f16346242d8bf1b744d536164fb3e30d..0000000000000000000000000000000000000000 --- a/modules/Saving/R/save_percentiles_old.R +++ /dev/null @@ -1,126 +0,0 @@ -save_percentiles <- function(recipe, - percentiles, - data_cube, - agg = "global", - outdir = NULL) { - # This function adds metadata to the percentiles - # and exports them to a netCDF file inside 'outdir'. - archive <- get_archive(recipe) - # Define grid dimensions and names - lalo <- c('longitude', 'latitude') - # Add global and variable attributes - global_attributes <- .get_global_attributes(recipe, archive) - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) - } else { - global_attributes <- c(list(from_anomalies = "No"), - global_attributes) - } - - # Time indices and metadata - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - if (global_attributes$system == 'Multimodel') { - if (fcst.horizon == 'decadal') { - calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]$name]]$calendar - } else { - calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar - } - } else { - calendar <- archive$System[[global_attributes$system]]$calendar - } - - # Generate vector containing leadtimes - if (fcst.horizon == 'decadal') { - # if (global_attributes$system == 'Multimodel') { - # init_month <- 11 #TODO: put as if init_month is January - # } else { - # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - # } - # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - # sprintf('%02d', init_month), '-01'), - # cal = calendar) - init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, - '-01-01'), cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - # Select start date - # If a fcst is provided, use that as the ref. year. Otherwise use 1970. - if (fcst.horizon == 'decadal') { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - #PROBLEM: May be more than one fcst_year - # fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1], - # sprintf('%02d', init_month), '01') - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year[1]) - } else { - fcst.sdate <- paste0("1970", sprintf('%02d', init_month), '01') - } - } else { - if (!is.null(recipe$Analysis$Time$fcst_year)) { - fcst.sdate <- paste0(recipe$Analysis$Time$fcst_year, - recipe$Analysis$Time$sdate) - } else { - fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate) - } - } - times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) - - for (var in 1:data_cube$dims[['var']]) { - # Subset arrays - subset_percentiles <- lapply(percentiles, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) - # Generate name of output file - variable <- data_cube$attrs$Variable$varName[[var]] - outdir <- get_dir(recipe = recipe, variable = variable) - if (!dir.exists(outdir)) { - dir.create(outdir, recursive = T) - } - outfile <- get_filename(outdir, recipe, variable, - fcst.sdate, agg, "percentiles") - - # Remove singleton dimensions and rearrange lon, lat and time dims - if (tolower(agg) == "global") { - subset_percentiles <- lapply(subset_percentiles, function(x) { - Reorder(x, c(lalo, 'time'))}) - } - - attr(subset_percentiles[[1]], 'global_attrs') <- global_attributes - - for (i in 1:length(subset_percentiles)) { - ## TODO: replace with proper standard names - percentile <- names(subset_percentiles[i]) - long_name <- paste0(gsub("^.*_", "", percentile), "th percentile") - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - } else { - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = percentile, long_name = long_name)) - attr(subset_percentiles[[i]], 'variables') <- metadata - names(dim(subset_percentiles[[i]])) <- dims - } - - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, time, subset_percentiles), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- c(latlon, times, subset_percentiles) - ArrayToNc(vars, outfile) - } - } - info(recipe$Run$logger, "##### PERCENTILES SAVED TO NETCDF FILE #####") -} diff --git a/modules/Saving/R/save_probabilities.R b/modules/Saving/R/save_probabilities.R index b1806c48e60fe19ee4f3d2ace2e685f026f91288..c2ebaaf671b3d342290714431ea52a5526d89cc5 100644 --- a/modules/Saving/R/save_probabilities.R +++ b/modules/Saving/R/save_probabilities.R @@ -14,7 +14,7 @@ save_probabilities <- function(recipe, # agg: aggregation, "global" or "country" # type: 'hcst' or 'fcst' - lalo <- c('longitude', 'latitude') + lalo <- c("latitude", "longitude") archive <- get_archive(recipe) global_attributes <- .get_global_attributes(recipe, archive) # Add anomaly computation to global attributes @@ -133,7 +133,7 @@ save_probabilities <- function(recipe, longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] latlon <- .get_latlon(latitude, longitude) # Compile variables into a list and export to netCDF - vars <- c(latlon, times, probs_syear) + vars <- c(times, latlon, probs_syear) ArrayToNc(vars, outfile) } } diff --git a/modules/Saving/R/save_probabilities_paloma.R b/modules/Saving/R/save_probabilities_paloma.R deleted file mode 100644 index d5f74deb1022080b64c4bedde760777b53d40cc6..0000000000000000000000000000000000000000 --- a/modules/Saving/R/save_probabilities_paloma.R +++ /dev/null @@ -1,146 +0,0 @@ -save_probabilities <- function(recipe, - probs, - data_cube, - agg = "global", - type = "hcst", - outdir = NULL) { - # Loops over the years in the s2dv_cube containing a hindcast or forecast - # and exports the corresponding category probabilities to a netCDF file. - # probs: array containing the probability data - # recipe: the auto-s2s recipe - # data_cube: s2dv_cube containing the data and metadata - # outdir: directory where the files should be saved - # type: 'exp' (hcst and fcst) or 'obs' - # agg: aggregation, "global" or "country" - # type: 'hcst' or 'fcst' - - lalo <- c('longitude', 'latitude') - archive <- get_archive(recipe) - global_attributes <- .get_global_attributes(recipe, archive) - # Add anomaly computation to global attributes - ## TODO: Sort out the logic once default behavior is decided - if ((!is.null(recipe$Analysis$Workflow$Anomalies$compute)) && - (recipe$Analysis$Workflow$Anomalies$compute)) { - global_attributes <- c(list(from_anomalies = "Yes"), - global_attributes) - } else { - global_attributes <- c(list(from_anomalies = "No"), - global_attributes) - } - fcst.horizon <- tolower(recipe$Analysis$Horizon) - store.freq <- recipe$Analysis$Variables$freq - if (global_attributes$system == 'Multimodel'){ - if (fcst.horizon == 'decadal') { - calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]$name]]$calendar - } else { - calendar <- archive$System[[recipe$Analysis$Datasets$System$models[[1]]]]$calendar - } - } else { - calendar <- archive$System[[global_attributes$system]]$calendar - } - - if (fcst.horizon == 'decadal') { - # if (global_attributes$system == 'Multimodel') { - # init_month <- 11 #TODO: put as if init_month is January - # } else { - # init_month <- archive$System[[recipe$Analysis$Datasets$System$name]]$initial_month - # } - # init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, '-', - # sprintf('%02d', init_month), '-01'), - # cal = calendar) - init_date <- as.PCICt(paste0(as.numeric(recipe$Analysis$Time$hcst_start)+1, - '-01-01'), cal = calendar) - } else { - init_date <- as.PCICt(paste0(recipe$Analysis$Time$hcst_start, - recipe$Analysis$Time$sdate), - format = '%Y%m%d', cal = calendar) - } - - syears <- seq(1:dim(data_cube$data)['syear'][[1]]) - ## expect dim = [sday = 1, sweek = 1, syear, time] - syears_val <- lubridate::year(data_cube$attrs$Dates[1, 1, , 1]) - - # Loop over variable dimension - for (var in 1:data_cube$dims[['var']]) { - subset_probs <- lapply(probs, function(x) { - ClimProjDiags::Subset(x, along = 'var', - indices = var, - drop = 'selected')}) - # Create output directory - variable <- data_cube$attrs$Variable$varName[[var]] - outdir <- get_dir(recipe = recipe, variable = variable) - if (!dir.exists(outdir)) { - dir.create(outdir, recursive = T) - } - - # Loop over each year in the data and save independently - for (i in syears) { - # Select year from array and rearrange dimensions - probs_syear <- lapply(subset_probs, ClimProjDiags::Subset, 'syear', i, drop = 'selected') - if (tolower(agg) == "global") { - probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c(lalo, 'time'))}) - } else { - probs_syear <- lapply(probs_syear, function(x) { - Reorder(x, c('country', 'time'))}) - } - - for (bin in 1:length(probs_syear)) { - prob_bin <- names(probs_syear[bin]) - long_name <- paste0(prob_bin, " probability category") - if (tolower(agg) == "country") { - dims <- c('Country', 'time') - } else { - dims <- c(lalo, 'time') - } - metadata <- list(metric = list(name = prob_bin, long_name = long_name)) - attr(probs_syear[[bin]], 'variables') <- metadata - names(dim(probs_syear[[bin]])) <- dims # is this necessary? - } - - # Add global attributes - attr(probs_syear[[1]], 'global_attrs') <- global_attributes - - # Select start date - if (fcst.horizon == 'decadal') { - # init_date is like "1990-11-01" - init_date <- as.POSIXct(init_date) - fcst.sdate <- init_date + lubridate::years(syears_val[i] - lubridate::year(init_date)) - fcst.sdate <- format(fcst.sdate, '%Y%m%d') - } else { - fcst.sdate <- data_cube$attrs$load_parameters$dat1$file_date[[1]][i] - } - - # Get time dimension values and metadata - times <- .get_times(recipe, data_cube, fcst.sdate, calendar, init_date) - - # Generate name of output file - outfile <- get_filename(outdir, recipe, variable, fcst.sdate, agg, - "probs") - if (type == 'obs') { - filename <- strsplit(outfile, "/") - filename <- filename[[1]][length(filename[[1]])] - outfile <- file.path(outdir, - sub(gsub('\\.','', - recipe$Analysis$Datasets$System$name), "", - filename)) - } - # Get grid data and metadata and export to netCDF - if (tolower(agg) == "country") { - country <- get_countries(grid) - ArrayToNc(append(country, times, probs_syear), outfile) - } else { - latitude <- data_cube$coords$lat[1:length(data_cube$coords$lat)] - longitude <- data_cube$coords$lon[1:length(data_cube$coords$lon)] - latlon <- .get_latlon(latitude, longitude) - # Compile variables into a list and export to netCDF - vars <- c(latlon, times, probs_syear) - ArrayToNc(vars, outfile) - } - } - } - info(recipe$Run$logger, - paste("#####", toupper(type), - "PROBABILITIES SAVED TO NETCDF FILE #####")) -} - diff --git a/modules/Scorecards/Scorecards_calculations.R b/modules/Scorecards/Scorecards_calculations.R index 093aa4260d7e6f36209b71d6e3579f469f43383e..f9f0f478a2a8b2fda78518b6bc93fdd38ebb7fbd 100644 --- a/modules/Scorecards/Scorecards_calculations.R +++ b/modules/Scorecards/Scorecards_calculations.R @@ -360,7 +360,6 @@ Scorecards_calculations <- function(recipe, data, skill_metrics, } ## Save metric result in array - browser() aggr_metrics[,,,which(metrics == met)] <- Reorder(data = mean_bias, order = c('var', 'time', 'region')) aggr_significance[,,,which(metrics == met)] <- Reorder(data = sign_mean_bias, diff --git a/modules/Scorecards/execute_scorecards.R b/modules/Scorecards/execute_scorecards.R index 2fa27549a20f414ae20d3db188b7918bd5ec26b0..763877b62a2bbf94b98b188bd3cb6207e0e831a2 100644 --- a/modules/Scorecards/execute_scorecards.R +++ b/modules/Scorecards/execute_scorecards.R @@ -1,5 +1,6 @@ source('tools/libs.R') source('modules/Scorecards/Scorecards_plotting.R') +Sys.setenv(OPENSSL_CONF="/dev/null") args = commandArgs(trailingOnly = TRUE) recipe_file <- args[1] diff --git a/modules/Visualization/R/plot_ensemble_mean.R b/modules/Visualization/R/plot_ensemble_mean.R index 125ec9b285262f551ae1c44b717a6fd0b6bbf81d..be1b2d458d3a22318db18d07cd58c843718c4c4e 100644 --- a/modules/Visualization/R/plot_ensemble_mean.R +++ b/modules/Visualization/R/plot_ensemble_mean.R @@ -1,5 +1,4 @@ source("tools/add_logo.R") -library(rgdal) plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, outdir, output_conf, method = 'median', logo = NULL) { @@ -11,6 +10,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, latitude <- fcst$coords$lat longitude <- fcst$coords$lon archive <- get_archive(recipe) + if (!is.null(recipe$Analysis$Workflow$Visualization$shapefile)) { library(sp) library(rgdal) @@ -18,7 +18,6 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, s1 <- spTransform(shp, CRS("+proj=longlat")) shapefile <- SpatialPolygons2map(s1) } - if (recipe$Analysis$Datasets$System$name == 'Multimodel') { system_name <- paste0('Multimodel-', recipe$Analysis$Datasets$Multimodel$approach) @@ -177,8 +176,8 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, subtitle_margin_scale = 2, titles = titles, units = units, cols = cols, brks = brks, fileout = paste0(outfile, ".pdf"), - bar_label_digits = 4, extra_margin = rep(1, 4), - shapefile = shapefile, + shapefile = shapefile, + bar_label_digits = 4, extra_margin = rep(1, 4), bar_label_scale = 1.5, axes_label_scale = 1.1) base_args[names(output_configuration)] <- output_configuration do.call(PlotLayout, base_args) @@ -194,7 +193,7 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, bar_label_digits = 4, bar_label_scale = 1.5, axes_label_scale = 1, shapefile = shapefile, - units = units) + units = units) } else { fun <- PlotRobinson common_projections <- c("robinson", "stereographic", "lambert_europe") @@ -307,4 +306,3 @@ plot_ensemble_mean <- function(recipe, fcst, mask = NULL, dots = NULL, "##### FORECAST ENSEMBLE MEAN PLOTS SAVED TO OUTPUT DIRECTORY #####") } - diff --git a/modules/Visualization/R/plot_metrics.R b/modules/Visualization/R/plot_metrics.R index 399d72cbe95fa51d4211cf289c6ba3cfbe4e2857..20fa3904ac6082d2008d9742598ebc475656ddcf 100644 --- a/modules/Visualization/R/plot_metrics.R +++ b/modules/Visualization/R/plot_metrics.R @@ -1,7 +1,4 @@ -library(stringr) -library(lubridate) -library(rgdal) - +source("tools/add_logo.R") plot_metrics <- function(recipe, data_cube, metrics, outdir, significance = F, output_conf, logo = NULL) { diff --git a/modules/Visualization/R/plot_most_likely_terciles_map.R b/modules/Visualization/R/plot_most_likely_terciles_map.R index 68f87011f011d9f1fb2be9aa2bdc5757ace9a149..467af3a1fcd86afc208aa8eb7ea664a70d638f70 100644 --- a/modules/Visualization/R/plot_most_likely_terciles_map.R +++ b/modules/Visualization/R/plot_most_likely_terciles_map.R @@ -9,7 +9,7 @@ source("modules/Visualization/R/tmp/Utils.R") source("modules/Visualization/R/tmp/PlotEquiMap.R") source("modules/Visualization/R/tmp/ColorBar_onebox.R") source("modules/Visualization/R/tmp/GradientCatsColorBar.R") - +source("tools/add_logo.R") ## TODO: Change name plot_most_likely_terciles <- function(recipe, fcst, @@ -256,10 +256,10 @@ plot_most_likely_terciles <- function(recipe, toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), "\n", "Most Likely Tercile / ", - time_labels[i], " ", years[i], - " / Start date: ", + "Start date: ", format(as.Date(i_syear, format = "%Y%m%d"), - "%d-%m-%Y")) + "%d-%m-%Y"), "\n Valid time: ", + time_labels[i], " ", years[i]) } else if (tolower(recipe$Analysis$Horizon) == 'subseasonal') { toptitle <- paste0(system_name, " / ", str_to_title(var_long_name), diff --git a/modules/Visualization/R/tmp/PlotRobinson.R b/modules/Visualization/R/tmp/PlotRobinson.R index 67ca034d6af4b687546ce995c49826955f9bbee1..8b98c3ec15c06a7bb688c01afbb27d657e0ec15a 100644 --- a/modules/Visualization/R/tmp/PlotRobinson.R +++ b/modules/Visualization/R/tmp/PlotRobinson.R @@ -567,3 +567,4 @@ PlotRobinson <- function(data, lon, lat, lon_dim = NULL, lat_dim = NULL, } + diff --git a/modules/Visualization/Visualization.R b/modules/Visualization/Visualization.R index d3446d79d7a3c72a70fbb6c9559480ab6217e349..6adbda671182459d388b9a653e9043c482d56070 100644 --- a/modules/Visualization/Visualization.R +++ b/modules/Visualization/Visualization.R @@ -107,14 +107,21 @@ Visualization <- function(recipe, plot_metrics(recipe = recipe, data_cube = data$hcst, metrics = skill_metrics, outdir = outdir, significance = significance, output_conf = output_conf, - logo = logo) + logo = logo) info(recipe$Run$logger, paste("Skill metrics significance set as", significance)) - } else { - if (significance %in% c('both', 'dots')) { - plot_metrics(recipe, data$hcst, skill_metrics, outdir, - significance = 'dots', output_conf = output_conf) - info(recipe$Run$logger, "Skill metrics significance as dots") + } else { + if (significance %in% c('both', 'dots')) { + plot_metrics(recipe, data$hcst, skill_metrics, outdir, + significance = 'dots', output_conf = output_conf, + logo = logo) + info(recipe$Run$logger, "Skill metrics significance as dots") + } + if (significance %in% c('both', 'mask')) { + plot_metrics(recipe, data$hcst, skill_metrics, outdir, + significance = 'mask', output_conf = output_conf, + logo = logo) + info(recipe$Run$logger, "Skill metrics significance as mask") } if (significance %in% c('both', 'mask')) { plot_metrics(recipe, data$hcst, skill_metrics, outdir, diff --git a/plot_NAO.R b/plot_NAO.R new file mode 100644 index 0000000000000000000000000000000000000000..8f01c45ae91d3de4789b8e1f68d58f3ba5db849b --- /dev/null +++ b/plot_NAO.R @@ -0,0 +1,307 @@ +plot_NAO <- function(recipe, nao, data) { + +# Read variable long_name to plot it + conf <- yaml::read_yaml("conf/variable-dictionary.yml") + var_name <- conf$vars[[which(names(conf$vars) == + recipe$Analysis$Variables$name)]]$long + plot_ts <- TRUE + plot_sp <- TRUE + alpha <- 0.05 + lons <- data$hcst$coords$longitude + lats <- data$hcst$coords$latitude + nao_region <- c(lonmin = -80, lonmax = 40, + latmin = 20, latmax = 80) + + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + # Use startdates param from SaveExp to correctly name the files: + if (length(data$hcst$attrs$source_files) == dim(data$hcst$data)['syear']) { + file_dates <- Apply(data$hcst$attrs$source_files, target_dim = NULL, + fun = function(x) { + pos <- which(strsplit(x, "")[[1]] == ".") + res <- substr(x, pos-8, pos-1) + })$output1 + } + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + file_dates <- paste0('s', + recipe$Analysis$Time$hcst_start : recipe$Analysis$Time$hcst_end) + } + + if (plot_ts) { + dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + showWarnings = F, recursive = T) + source("modules/old_modules/Indices/R/plot_deterministic_forecast.R") + for (tstep in 1:dim(nao$hcst$data)['time']) { + mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + + (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) + mes <- ifelse(mes > 12, mes - 12, mes) + fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + obs <- Subset(nao$obs$data, along = 'time', ind = tstep) + exp <- Subset(nao$hcst$data, along = 'time', ind = tstep) + if (gsub(".", "", recipe$Analysis$Datasets$System$name) == "") { + system <- recipe$Analysis$Datasets$System$name + } else { + system <-gsub(".", "", recipe$Analysis$Datasets$System$name) + } + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste("NAO Index\n", + month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", + system, "_", recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n") + xlabs <- as.numeric(substr(file_dates, 1, 4)) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste("NAO Index\n", + "Lead time", fmonth, + " / Start dates", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_", + system, "_", recipe$Analysis$Datasets$Reference$name, + "_ftime", + sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min), ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Lead time: ", fmonth, "\n") + xlabs <- file_dates + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + plot_deterministic_forecast(obs, exp, + time_dim = 'syear', + member_dim = 'ensemble', style = 'boxplot', + xlabs = xlabs, + title = toptitle, fileout = plotfile, + caption = caption, caption_line = 6.5, + legend_text = c( + recipe$Analysis$Datasets$Reference$name, + recipe$Analysis$Datasets$System$name)) + } + } + if (plot_sp) { + ## TODO: To be removed when s2dv is released: + source("modules/Visualization/R/tmp/PlotRobinson.R") + source("modules/old_modules/Indices/R/correlation_eno.R") + source("modules/Visualization/R/get_proj_code.R") + dir.create(paste0(recipe$Run$output_dir, "/plots/Indices/"), + showWarnings = F, recursive = T) + # Get correct code for stereographic projection + projection_code <- get_proj_code(proj_name = "stereographic") + correl_obs <- Apply(list(data$obs$data, nao$obs$data), + target_dims = 'syear', fun = .correlation_eno, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE, + ncores = recipe$Analysis$ncores) + correl_hcst <- Apply(list(data$hcst$data, nao$hcst$data), + target_dims = c('syear', 'ensemble'), + fun = function(x, y) { + x <- apply(x, 1, mean, na.rm = TRUE) + y <- apply(y, 1, mean, na.rm = TRUE) + dim(y) <- c(syear = length(y)) + dim(x) <- c(syear = length(x)) + res <- .correlation_eno(x, y, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE)}, + ncores = recipe$Analysis$ncores) + correl_hcst_full <- Apply(list(data$hcst$data, nao$hcst$data), + target_dims = c('syear', 'ensemble'), + fun = function(x,y) { + dim(y) <- c(syear = length(y)) + dim(x) <- c(syear = length(x)) + res <- .correlation_eno(x, y, + time_dim = 'syear', method = 'pearson', alpha = alpha, + test.type = 'two-sided', pval = FALSE)}, + ncores = recipe$Analysis$ncores) + + for (tstep in 1:dim(nao$obs$data)['time']) { + fmonth <- sprintf("%02d", tstep - 1 + recipe$Analysis$Time$ftime_min) + ## Observations + map <- drop(Subset(correl_obs$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_obs$sign, along = 'time', ind = tstep)) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + mes <- as.numeric(substr(recipe$Analysis$Time$sdate, 1,2)) + + (tstep - 1) + (recipe$Analysis$Time$ftime_min - 1) + mes <- ifelse(mes > 12, mes - 12, mes) + toptitle <- paste(recipe$Analysis$Datasets$Reference$name, "\n", + "NAO Index -",var_name, "\n", + " Correlation /", month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal") { + toptitle <- paste(recipe$Analysis$Datasets$Reference$name, "\n", + "NAO Index -",var_name, "\n", + " Correlation / Start dates ", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_", + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Start date: month ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + if (gsub(".", "", recipe$Analysis$Datasets$System$name) == "") { + system <- recipe$Analysis$Datasets$System$name + } else { + system <- gsub(".", "", recipe$Analysis$Datasets$System$name) + } + + PlotRobinson(map, lon = lons, lat = lats, target_proj = projection_code, + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', style = 'polygon', + toptitle = toptitle, crop_coastlines = nao_region, + caption = caption, mask = sig, bar_extra_margin = c(4,0,4,0), + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + ## Ensemble-mean + map <- drop(Subset(correl_hcst$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_hcst$sign, along = 'time', ind = tstep)) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste(recipe$Analysis$Datasets$System$name, "\n", + "NAO Index -", var_name, "\n", + " Correlation /", month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation ensemble mean\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", + "NAO Index -",var_name, "\n", + " Correlation / Start dates ", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_ensmean_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation ensemble mean\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + + PlotRobinson(map, lon = lons, lat = lats, target_proj = projection_code, + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', bar_extra_margin = c(4,0,4,0), + toptitle = toptitle, style = 'polygon', + caption = caption, mask = sig, crop_coastline = nao_region, + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + + # Full hcst corr + map <- drop(Subset(correl_hcst_full$r, along = 'time', ind = tstep)) + sig <- drop(Subset(correl_hcst_full$sign, along = 'time', ind = tstep)) + if (tolower(recipe$Analysis$Horizon) == "seasonal") { + toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", + "NAO Index -",var_name, "\n", + " Correlation /", month.abb[mes], + "/", recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_member_", + system, + "_s", recipe$Analysis$Time$sdate, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation all members\n", + "Nominal start date: 1st of ", + month.name[as.numeric(substr(recipe$Analysis$Time$sdate, 1,2))], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else if (tolower(recipe$Analysis$Horizon) == "decadal"){ + toptitle <- paste(recipe$Analysis$Datasets$System$name,"\n", + "NAO Index -",var_name, "\n", + " Correlation / Start dates ", + recipe$Analysis$Time$hcst_start, "-", + recipe$Analysis$Time$hcst_end) + plotfile <- paste0(recipe$Run$output_dir, "/plots/Indices/NAO_correlation_", + recipe$Analysis$Variable$name, "_member_", + system, + recipe$Analysis$Datasets$Reference$name, + "_ftime", fmonth, ".pdf") + caption <- paste0("NAO method: ", + ifelse(recipe$Analysis$Workflow$Indices$NAO$obsproj, + "Pobs", "Pmod"), " (Doblas-Reyes et al., 2003)\n", + "Correlation all members\n", + "Start date month: ", + month.name[get_archive(recipe)$System[[recipe$Analysis$Datasets$System$name]]$initial_month], + "\n", + "Forecast month: ", fmonth, "\n", + "Pearson correlation; alpha = ", alpha) + } else { + toptitle <- NULL + warning("The plot title is not defined for this time horizon. ", + "The plots will not have a title.") + } + PlotRobinson(map, lon = lons, lat = lats, target_proj = projection_code, + lat_dim = 'latitude', lon_dim = 'longitude', + legend = 's2dv', bar_extra_margin = c(4,0,4,0), + toptitle = toptitle, style = 'polygon', + caption = caption, mask = sig, crop_coastline = nao_region, + fileout = plotfile, width = 8, height = 6, + brks = seq(-1, 1, 0.2), cols = brewer.pal(10, 'PuOr')) + } # end tstep loop + } +} diff --git a/recipe_NAO.yml b/recipe_NAO.yml new file mode 100644 index 0000000000000000000000000000000000000000..27687ac816f4f67fc5b48727979acffda83934f2 --- /dev/null +++ b/recipe_NAO.yml @@ -0,0 +1,110 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: psl + freq: monthly_mean + units: hPa + flux: no + Datasets: + System: + - {name: 'ECMWF-SEAS5.1'} + - {name: 'Meteo-France-System8'} + - {name: 'CMCC-SPS3.5'} + - {name: 'UK-MetOffice-Glosea601'} + - {name: 'NCEP-CFSv2'} + - {name: 'DWD-GCFS2.1'} + - {name: 'ECCC-CanCM4i'} + #name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: #'2023' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 #6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 20 + latmax: 80 + lonmin: -80 + lonmax: 40 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #"to_system" #"to_reference" /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Time_aggregation: + execute: no + Indices: + NAO: {obsproj: yes, save: 'none'} + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: EnsCorr rmsss rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: enscorr rmss rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 16 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs #esarchive #gpfs + output_dir: /home/bsc/bsc032339/ #/esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ #/esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_NAO_single.yml b/recipe_NAO_single.yml new file mode 100644 index 0000000000000000000000000000000000000000..aaf166e2566df9587eead5d76311c1101d1319de --- /dev/null +++ b/recipe_NAO_single.yml @@ -0,0 +1,106 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: psl + freq: monthly_mean + units: hPa + flux: no + Datasets: + System: + #- {name: 'ECMWF-SEAS5.1'} + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + name: ECMWF-SEAS5.1 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0501' + fcst_year: #'2023' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 #6 # Mandatory, int: Last leadtime time step in months + Region: + latmin: 20 + latmax: 80 + lonmin: -80 + lonmax: 40 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #"to_system" #"to_reference" /esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Time_aggregation: + execute: no + Indices: + NAO: {obsproj: yes, save: 'none'} + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: EnsCorr rmsss rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: enscorr rmss rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 16 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs #esarchive #gpfs + output_dir: /home/bsc/bsc032339/ #/esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ #/esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_bigpredidata_oper_subseasonal_tas.yml b/recipe_bigpredidata_oper_subseasonal_tas.yml new file mode 100644 index 0000000000000000000000000000000000000000..322811ff7b0c78454deca1482dddd466113dffa8 --- /dev/null +++ b/recipe_bigpredidata_oper_subseasonal_tas.yml @@ -0,0 +1,83 @@ + +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: subseasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: weekly_mean, units: C} + # - {name: prlr, freq: monthly_mean, units: mm, flux: no} + # - {name: sfcWind, freq: monthly_mean} + Datasets: + System: + - {name: 'NCEP-CFSv2', member: 'all'} + Multimodel: no # Mandatory, bool: Either yes/true or no/false + Reference: + - {name: ERA5} # Mandatory, str: Reference codename. See docu. + Time: + sdate: 20250102 + fcst_year: '2025' # Optional, int: Forecast year 'YYYY' + hcst_start: '2011' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 4 # Mandatory, int: Last leadtime time step in months + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + - {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "to_system" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Calibration: + method: bias # Mandatory, str: bias, evmos, mse_min, crps_min, rpc_based + cross_validation: yes + save: all + Skill: + metric: mean_bias rpss + save: none + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3], [1/10, 9/10]] # frac: Quantile thresholds. This avoids saving the probs of 10 to 90 + save: all + Indicators: + index: no + Visualization: + plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean + NA_color: white + multi_panel: no + dots: no + mask_terciles: yes + shapefile: /esarchive/scratch/cdelgado/aspect_outputs/casestudy-wine/shp_spain/recintos_provinciales_inspire_peninbal_etrs89/recintos_provinciales_inspire_peninbal_etrs89.shp + file_format: PNG # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + ncores: 8 # Optional, int: number of cores, defaults to 1 + remove_NAs: TRUE # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ # replace with the directory where you want to save the outputs + code_dir: /esarchive/scratch/ptrascas/R/dev-test_bigpredidata/sunset/sunset # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/ptrascas/R/sunset/bigpredidata_fqa.R # replace with the path to your script + expid: a78g # replace with your EXPID + hpc_user: bsc032413 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 16 + custom_directives: ['#SBATCH --constraint=medmem'] + platform: nord3v2 + email_notifications: no # enable/disable email notifications. Change it if you want to. + email_address: paloma.trascasa@bsc.es # replace with your email address + notify_completed: no # notify me by email when a job finishes + notify_failed: no # notify me by email when a job fails diff --git a/recipe_ecvs_ano_mul_seas.yml b/recipe_ecvs_ano_mul_seas.yml new file mode 100644 index 0000000000000000000000000000000000000000..5be9fce86e82e3db76354ae28cb30dc98c361601 --- /dev/null +++ b/recipe_ecvs_ano_mul_seas.yml @@ -0,0 +1,109 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: 'tas', freq: 'monthly_mean', units: 'C'} + #- {name: 'prlr', freq: 'monthly_mean', units: 'mm', flux: yes} + Datasets: + System: + - {name: 'Meteo-France-System8'} + - {name: 'CMCC-SPS3.5'} + - {name: 'ECMWF-SEAS5.1'} + - {name: 'UK-MetOffice-Glosea603'} + ##- {name: 'NCEP-CFSv2'} + - {name: 'DWD-GCFS2.1'} + - {name: 'ECCC-GEM5.2-NEMO'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '1101' + fcst_year: '2024' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: no + Time_aggregation: + execute: yes + method: average + ini: [1, 2, 3, 4] + end: [3, 4, 5, 6] + Calibration: + method: raw #crps_min #evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 30 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_ecvs_ano_seas.yml b/recipe_ecvs_ano_seas.yml new file mode 100644 index 0000000000000000000000000000000000000000..1ad587c61feca7434e366839b6c4ffc8add2339f --- /dev/null +++ b/recipe_ecvs_ano_seas.yml @@ -0,0 +1,109 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: 'prlr', freq: 'monthly_mean', units: 'C'} + #- {name: 'prlr', freq: 'monthly_mean', units: 'mm', flux: yes} + Datasets: + System: + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + - {name: 'ECMWF-SEAS5.1'} + #- {name: 'UK-MetOffice-Glosea603'} + ##- {name: 'NCEP-CFSv2'} + #- {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-GEM5.2-NEMO'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '1201' + fcst_year: '2024' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 2, 3, 4] + end: [3, 4, 5, 6] + Calibration: + method: raw #crps_min #evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: 'all' + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 30 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_ecvs_cal_mul_seas.yml b/recipe_ecvs_cal_mul_seas.yml new file mode 100644 index 0000000000000000000000000000000000000000..a4576dc1e5ef7b9afa10513f1e83fdbb65c9941b --- /dev/null +++ b/recipe_ecvs_cal_mul_seas.yml @@ -0,0 +1,109 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: 'tas', freq: 'monthly_mean', units: 'C'} + #- {name: 'prlr', freq: 'monthly_mean', units: 'mm', flux: yes} + Datasets: + System: + - {name: 'Meteo-France-System8'} + - {name: 'CMCC-SPS3.5'} + - {name: 'ECMWF-SEAS5.1'} + - {name: 'UK-MetOffice-Glosea603'} + ##- {name: 'NCEP-CFSv2'} + - {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-GEM5.2-NEMO'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0101' + fcst_year: '2025' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 2, 3, 4] + end: [3, 4, 5, 6] + Calibration: + method: evmos #crps_min #evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 20 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_ecvs_cal_seas.yml b/recipe_ecvs_cal_seas.yml new file mode 100644 index 0000000000000000000000000000000000000000..3417d2d9387644db3b2d9fb63e7f3b6aa65e0b22 --- /dev/null +++ b/recipe_ecvs_cal_seas.yml @@ -0,0 +1,109 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: 'tas', freq: 'monthly_mean', units: 'C'} + - {name: 'prlr', freq: 'monthly_mean', units: 'mm', flux: yes} + Datasets: + System: + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + - {name: 'ECMWF-SEAS5.1'} + #- {name: 'UK-MetOffice-Glosea603'} + ##- {name: 'NCEP-CFSv2'} + #- {name: 'DWD-GCFS2.1'} + #- {name: 'ECCC-GEM5.2-NEMO'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0101' + fcst_year: '2025' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + #- {name: 'Global', latmin: 0, latmax: 10, lonmin: 0, lonmax: 10} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: none + Time_aggregation: + execute: yes + method: average + ini: [1, 2, 3, 4] + end: [3, 4, 5, 6] + Calibration: + method: evmos #crps_min #evmos # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 16 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_ecvs_cal_subseas.yml b/recipe_ecvs_cal_subseas.yml new file mode 100644 index 0000000000000000000000000000000000000000..01b60087f70d838fa7e3e89bed203baf40b6bb0c --- /dev/null +++ b/recipe_ecvs_cal_subseas.yml @@ -0,0 +1,128 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'weekly_mean', units: 'C'} +# name: 'tas' +# freq: 'weekly_mean' +# units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: +# - {name: 'prlr', freq: 'weekly_mean', units: 'mm'} +# - {name: 'sfcWind', freq: 'weekly_mean', units: 'm s-1'} +# - {name: 'rsds', freq: 'weekly_mean', units: 'W m-2'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr, sfcWind, rsds', freq: 'weekly_mean', units: {tas: 'C', prlr: 'mm', sfcWind: 'm s-1', rsds:'W m-2'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 20250109 #%Y%m%d # Cambiar a 2023 + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: 20250109 # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "Nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + #- {name: "Kuwait", latmin: 28, latmax: 31, lonmin: 46, lonmax: 49} + #- {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + # - {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + - {name: 'Global', latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + Regrid: + method: bilinear # Interpolation method (Mandatory, str) + type: "to_system" #"conf/grid_description/griddes_system51c3s.txt" # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Time_aggregation: + execute: no + Calibration: + method: evmos # Calibration method. (Mandatory, str) + save: 'none' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Skill: + metric: mean_bias rpss enscorr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + # percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + percentiles: [[1/3, 2/3]] + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics forecast_ensemble_mean most_likely_terciles + multi_panel: no + mask_terciles: 'both' + mask_ens: 'both' + projection: Robinson + file_format: 'PNG' + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + # mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Iberia: {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 44} + #Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + #Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + #Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 16 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: gpfs # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /home/bsc/bsc032339/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /home/bsc/bsc032339/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + diff --git a/recipe_subseasonal_ecvs.yml b/recipe_subseasonal_ecvs.yml new file mode 100644 index 0000000000000000000000000000000000000000..2be485a0f8179561bab8c71f22bec1c23d23ed50 --- /dev/null +++ b/recipe_subseasonal_ecvs.yml @@ -0,0 +1,127 @@ +# IMPORTANT: This is recipe is not intended to represent a real workflow: it is only a template showcasing ALL available options. +Description: + Author: N.Pérez-Zanón + Info: # Complete recipe containing all possible fields. +Analysis: + Horizon: subseasonal # Mandatory, str: 'seasonal', or 'decadal'. Subseasonal is in development + Variables: + # name: variable name(s) in the /esarchive (Mandatory, str) + # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # units: desired data units for each variable. Only available for temperature, + # precipitation, and pressure variables. + - {name: 'tas', freq: 'weekly_mean', units: 'C'} +# name: 'tas' +# freq: 'weekly_mean' +# units: 'C' + # To request more variables to be divided in atomic recipes, add them this way: +# - {name: 'prlr', freq: 'weekly_mean', units: 'mm'} +# - {name: 'sfcWind', freq: 'weekly_mean', units: 'm s-1'} +# - {name: 'rsds', freq: 'weekly_mean', units: 'W m-2'} + # To request multiple variables *in the same* atomic recipe, add them this way: + # - {name: 'tas, prlr, sfcWind, rsds', freq: 'weekly_mean', units: {tas: 'C', prlr: 'mm', sfcWind: 'm s-1', rsds:'W m-2'}} + Datasets: + System: + # name: System name (Mandatory, str) + # member: 'all' or individual members, separated by a comma and in quotes (decadal only, str) + - {name: 'NCEP-CFSv2', member: 'all'} + # To request more Systems to be divided in atomic recipes, add them this way: + # - {name: 'Meteo-France-System7'} + Multimodel: + execute: no # Either yes/true or no/false (Mandatory, bool) + approach: pooled # Multimodel computation approach. 'pooled' currently the only option (str) + createFrom: Anomalies # Which module should the anomalies be created from (str) + Reference: + - {name: 'ERA5'} # Reference name (Mandatory, str) + # To request more References to be divided into atomic recipes, add them this way: + # - {name: 'ERA5Land'} + Time: + sdate: 2023 #%Y%m%d # Cambiar a 2023 + #- '1201' # Start date, 'mmdd' (Mandatory, int) + # To request more startdates to be divided into atomic recipes, add them this way: + # - '0101' + # - '0201' + # ... + fcst_year: #20231207 # Forecast initialization year 'YYYY' (Optional, int) + hcst_start: '1999' # Hindcast initialization start year 'YYYY' (Mandatory, int) + hcst_end: '2016' # Hindcast initialization end year 'YYYY' (Mandatory, int) + ftime_min: 1 # First forecast time step in months. Starts at “1”. (Mandatory, int) + ftime_max: 4 # Last forecast time step in months. Starts at “1”. (Mandatory, int) + week_day: Thursday + sweek_window: 9 + sday_window: 3 + Region: + # latmin: minimum latitude (Mandatory, int) + # latmax: maximum latitude (Mandatory, int) + # lonmin: # minimum longitude (Mandatory, int) + # lonmax: # maximum longitude (Mandatory, int) + #- {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + # To request more regions to be divided in atomic recipes, add them this way: + # {name: "Nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} + - {name: "Kuwait", latmin: 28, latmax: 31, lonmin: 46, lonmax: 49} + #- {name: "Iberia", latmin: 36, latmax: 44, lonmin: -10, lonmax: 5} + # - {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} + Regrid: + method: bilinear # Interpolation method (Mandatory, str) + type: "conf/grid_description/griddes_system51c3s.txt" # Interpolate to: 'to_system', 'to_reference', 'none', + # or CDO-accepted grid. (Mandatory, str) + Workflow: + # This is the section of the recipe where the parameters for each module are specified + Time_aggregation: + execute: no + Calibration: + method: evmos # Calibration method. (Mandatory, str) + save: 'all' # Options: 'all', 'none', 'exp_only', 'fcst_only' (Mandatory, str) + Skill: + metric: mean_bias rpss enscorr # List of skill metrics separated by spaces or commas. (Mandatory, str) + save: 'none' # Options: 'all', 'none' (Mandatory, str) + Statistics: + metric: cov std var n_eff # List of statistics separated by spaces or commas. (Mandatory, str) + save: 'all' # Options: 'all', 'none' (Mandatory, str) + Probabilities: + # percentiles: [[1/3, 2/3], [1/10, 9/10], [1/4, 2/4, 3/4]] # Thresholds + percentiles: [[1/3, 2/3]] + # for quantiles and probability categories. Each set of thresholds should be + # enclosed within brackets. For now, they are INDEPENDENT from skill metrics. (Optional) + save: 'percentiles_only' # Options: 'all', 'none', 'bins_only', 'percentiles_only' (Mandatory, str) + Visualization: + plots: skill_metrics # Types of plots to generate (Optional, str) + multi_panel: no # Multi-panel plot or single-panel plots. Default is 'no/false'. (Optional, bool) + projection: 'cylindrical_equidistant' # Options: 'cylindrical_equidistant', 'robinson', 'lambert_europe'. Default is cylindrical equidistant. (Optional, str) + mask_terciles: no # Whether to mask the non-significant points by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + dots_terciles: yes # Whether to dot the non-significant by rpss in the most likely tercile plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + mask_ens: no # Whether to mask the non-significant points by rpss in the forecast ensemble mean plot. yes/true, no/false or 'both'. Default is no/false. (Optional, str) + file_format: 'PNG' # Final file format of the plots. Formats available: PNG, JPG, JPEG, EPS. Defaults to PDF. + Scorecards: + execute: no # yes/no + regions: + # Mandatory: Define regions for which the spatial aggregation will be performed. + # The regions must be included within the area defined in the 'Analysis:Region' section. + Iberia: {lon.min: -10, lon.max: 5, lat.min: 36, lat.max: 44} + #Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + #Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + #Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: 1, 2, 3 # Mandatory, int: start months to visualise in scorecard table. Options: 'all' or a sequence of numbers. + metric: mean_bias enscorr rpss crpss enssprerr # Mandatory: metrics to visualise in scorecard table + metric_aggregation: 'score' # Mandatory, str: level of aggregation for skill scores. Options: 'score' or 'skill' + inf_to_na: True # Optional, bool: set inf values in data to NA, default is no/False + table_label: NULL # Optional, str: extra information to add in scorecard table title + fileout_label: NULL # Optional, str: extra information to add in scorecard output filename + col1_width: NULL # Optional, int: to adjust width of first column in scorecards table + col2_width: NULL # Optional, int: to adjust width of second column in scorecards table + calculate_diff: False # Mandatory, bool: True/False + ncores: 9 # Number of cores to be used in parallel computation. + # If left empty, defaults to 1. (Optional, int) + remove_NAs: yes # Whether to remove NAs. + # If left empty, defaults to no/false. (Optional, bool) + Output_format: 'Scorecards' # 'S2S4E' or 'Scorecards'. Determines the format of the output. Default is 'S2S4E'. +Run: + filesystem: gpfs # Name of the filesystem as defined in the archive configuration file + Loglevel: INFO # Minimum category of log messages to display: 'DEBUG', 'INFO', 'WARN', 'ERROR' or 'FATAL'. + # Default value is 'INFO'. (Optional, str) + Terminal: yes # Optional, bool: Whether to display log messages in the terminal. + # Default is yes/true. + output_dir: /home/bsc/bsc032339/ # Output directory. Must have write permissions. (Mandatory, str) + code_dir: /home/bsc/bsc032339/sunset/ # Directory where the code is stored. Is used when launching jobs (not running interactively) + autosubmit: no # Whether or not to run with Autosubmit. Only for non-atomic recipes (not running interactively) + # fill only if using autosubmit + diff --git a/recipe_tas_decadal.yml b/recipe_tas_decadal.yml new file mode 100644 index 0000000000000000000000000000000000000000..86bcec089021582309eb751c19fd1f216ddf08ac --- /dev/null +++ b/recipe_tas_decadal.yml @@ -0,0 +1,118 @@ +Description: + Author: nperez + Info: Test decadal 25-07-2024 + +Analysis: + Horizon: decadal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: C + flux: no + Datasets: + System: + - {name: 'EC-Earth3-i4', member: 'all'} + # - {name: 'HadGEM3-GC31-MM'} + #- {name: 'BCC-CSM2-MR'} + #- {name: 'CanESM5'} + #- {name: 'CMCC-CM2-SR5'} + #- {name: 'FGOALS-f3-L'} + #- {name: 'IPSL-CM6A-LR'} + # name: Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: yes + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + # sdate: '0501' + fcst_year: #'2021' + hcst_start: '1981' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2010' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 12 # Mandatory, int: Last leadtime time step in months + Region: + latmin: -90 + latmax: 90 + lonmin: 0 + lonmax: 359.9 + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Time_aggregation: + execute: yes + method: average + user_def: + Y1: [1, 12] # aggregate from 1 to 3 forecast times + Y1-Y5: [1, 60] + Y6-Y10: [61, 120] + #Y1-JJA: [6, 8] + #Y2-Y5: [13, 60] + #Y5-Y10: [49, 120] + Anomalies: + compute: yes + cross_validation: no + save: none + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: none + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + #dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 100 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + alpha: 0.05 + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_tas_singl_ano_seas.yml b/recipe_tas_singl_ano_seas.yml new file mode 100644 index 0000000000000000000000000000000000000000..3d2f6a9ec470981081bbadbd06943783fa4e57f4 --- /dev/null +++ b/recipe_tas_singl_ano_seas.yml @@ -0,0 +1,106 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + name: tas + freq: monthly_mean + units: C + flux: no + Datasets: + System: + - {name: 'ECMWF-SEAS5.1'} + #- {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + # name: UK-MetOffice-Glosea601 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '0901' + fcst_year: '2024' + hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 6 # Mandatory, int: Last leadtime time step in months + Region: + - {name: test, latmin: -10, latmax: 10, lonmin: 0, lonmax: 40} + #- {name: Global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #/esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt # "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: all + Time_aggregation: + execute: yes + method: average + ini: [1,2,3,4] + end: [3,4,5,6] + Calibration: + method: raw # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: all + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs #esarchive #gpfs + output_dir: /home/bsc/bsc032339/ #/esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # /esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_tas_singl_cal_seas.yml b/recipe_tas_singl_cal_seas.yml new file mode 100644 index 0000000000000000000000000000000000000000..d649e082dcb75a047874c035487dec1a6287ddc1 --- /dev/null +++ b/recipe_tas_singl_cal_seas.yml @@ -0,0 +1,103 @@ +Description: + Author: nperez + Info: ECVs Oper ESS ECMWF SEAS5 Seasonal Forecast recipe (monthly mean, tas) + +Analysis: + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Variables: + - {name: tas, freq: monthly_mean, units: C, flux: yes} + Datasets: + System: + - {name: 'ECMWF-SEAS5.1'} + # - {name: 'Meteo-France-System8'} + #- {name: 'CMCC-SPS3.5'} + # name: UK-MetOffice-Glosea601 #Meteo-France-System7 #ECMWF-SEAS5.1 #ECMWF-i2o2 #UKMO-System602 #Meteo-France-System8 + Multimodel: + execute: no + approach: pooled # Mandatory, bool: Either yes/true or no/false + createFrom: Anomalies + Reference: + name: ERA5 # Mandatory, str: Reference codename. See docu. + Time: + sdate: '1201' + fcst_year: '2024' + hcst_start: '1996' # Mandatory, int: Hindcast start year 'YYYY' + hcst_end: '2000' # Mandatory, int: Hindcast end year 'YYYY' + ftime_min: 1 # Mandatory, int: First leadtime time step in months + ftime_max: 3 # Mandatory, int: Last leadtime time step in months + Region: + #- {name: test, latmin: -10, latmax: 10, lonmin: 0, lonmax: 40} + - {name: Global, latmin: -90, latmax: 90, lonmin: -180, lonmax: 179.9} + Regrid: + method: conservative # Mandatory, str: Interpolation method. See docu. + type: /home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt #/esarchive/scratch/nperez/git4/sunset/conf/grid_description/griddes_system51c3s.txt # "/home/bsc/bsc032339/sunset/conf/grid_description/griddes_system51c3s.txt" #"to_system" #"to_reference" + #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. + Workflow: + Anomalies: + compute: no + cross_validation: no + save: all + Time_aggregation: + execute: no #yes + method: average + ini: [1,2,3,4] + end: [3,4,5,6] + Calibration: + method: mse_min # Mandatory, str: Calibration method. See docu. + cross_validation: yes + save: none + Skill: + metric: mean_bias EnsCorr rpss crpss EnsSprErr rps crps rps_syear crps_syear cov std n_eff + save: 'all' + cross_validation: yes + Probabilities: + percentiles: [[1/3, 2/3]] # frac: Quantile thresholds. + save: all + Indicators: + index: no + Visualization: + plots: skill_metrics #forecast_ensemble_mean most_likely_terciles + multi_panel: no + dots: both + projection: Robinson + file_format: 'PNG' + #projection: robinson + Scorecards: + execute: no # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -90, lat.max: -30} + start_months: NULL + metric: mean_bias enscorr rpss crpss EnsSprErr + metric_aggregation: 'skill' + #inf_to_na: yes + table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 4 # Optional, int: number of cores, defaults to 1 + remove_NAs: yes # Optional, bool: Whether NAs are removed, defaults to FALSE + Output_format: scorecards + logo: yes +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs #esarchive #gpfs + output_dir: /home/bsc/bsc032339/ #/esarchive/scratch/nperez/git4/ #/home/bsc/bsc032339/ # replace with the directory where you want to save the outputs + code_dir: /home/bsc/bsc032339/sunset/ # /esarchive/scratch/nperez/git4/sunset/ #/home/bsc/bsc032339/sunset/ # replace with the directory where your code is + autosubmit: no + # fill only if using autosubmit + auto_conf: + script: /esarchive/scratch/nperez/git3/sunset/full_ecvs_scorecards.R # replace with the path to your script + expid: a68v # replace with your EXPID + hpc_user: bsc32339 # replace with your hpc username + wallclock: 02:00 # hh:mm + processors_per_job: 4 + platform: nord3v2 + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: nuria.perez@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: yes # notify me by email when a job fails + diff --git a/recipe_template.yml b/recipe_template.yml index a9d383ce994fe905e81e639d40d9b188b11e3c5d..b9cd5300eda36aab5966260156bfcd4afd5dede2 100644 --- a/recipe_template.yml +++ b/recipe_template.yml @@ -5,8 +5,10 @@ Description: Analysis: Horizon: seasonal # Mandatory, str: 'subseasonal', 'seasonal', or 'decadal'. Variables: - # name: variable name(s) in the /esarchive (Mandatory, str) + # name: variable name(s) in the archive (Mandatory, str) # freq: 'monthly_mean', 'daily' or 'daily_mean' (Mandatory, str) + # To check the specific data loaded for each variable and frequency selected, + # see the corresponding archive_*.yml files in the conf/ folder. # units: desired data units for each variable. Only available for temperature, # precipitation, and pressure variables. - {name: 'tos', freq: 'monthly_mean', units: 'C'} @@ -60,7 +62,7 @@ Analysis: # latmax: maximum latitude (Mandatory, int) # lonmin: minimum longitude (Mandatory, int) # lonmax: maximum longitude (Mandatory, int) - - {name: global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + - {name: Global, latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} # To request more regions to be divided in atomic recipes, add them this way: # - {name: "nino34", latmin: -5, latmax: 5, lonmin: -10, lonmax: 60} # - {name: "Iberia", latmin: 34, latmax: 46, lonmin: -10, lonmax: 5} diff --git a/recipes/examples/scorecards_on_MN5.yml b/recipes/examples/scorecards_on_MN5.yml new file mode 100644 index 0000000000000000000000000000000000000000..32decb69b7a9ccd4c3d836a5763949aafba5ae83 --- /dev/null +++ b/recipes/examples/scorecards_on_MN5.yml @@ -0,0 +1,100 @@ +Description: + Author: An-Chi Ho + Info: Compute Skills and Plot Scorecards with Autosubmit + +Analysis: + Horizon: seasonal + Variables: + - {name: tas, freq: monthly_mean} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: ECMWF-SEAS5.1} + Multimodel: + execute: False # single option + Reference: + - {name: ERA5} + Time: + sdate: # list, split + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' + fcst_year: + hcst_start: '1993' # single option + hcst_end: '2003' # single option + ftime_min: 1 # single option + ftime_max: 6 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "global", latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: 'none' + Calibration: + method: raw + save: 'none' + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # list, don't split + cross_validation: yes + save: 'all' + Statistics: + metric: cov std n_eff spread + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'none' + Scorecards: + execute: yes # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: -90} + start_months: 'all' + metric: mean_bias enscorr rpss crpss enssprerr + metric_aggregation: 'score' + inf_to_na: TRUE # Optional, bool: set inf values in data to NA, default is FALSE table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 8 + remove_NAs: no # bool, don't split + Output_format: Scorecards # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + filesystem: gpfs + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /home/Earth/vagudets/git/auto-s2s/ + autosubmit: yes + # fill only if using autosubmit + auto_conf: + script: use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R # replace with the path to your script + expid: a7rn # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username + wallclock: 03:00 # hh:mm + processors_per_job: 16 + platform: mn5 + custom_directives: # ['#SBATCH --exclusive'] + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: victoria.agudetse@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: no # notify me by email when a job fails + + diff --git a/recipes/examples/scorecards_on_nord3v2.yml b/recipes/examples/scorecards_on_nord3v2.yml new file mode 100644 index 0000000000000000000000000000000000000000..06333966f7934c7b178e860922ddc193e9f25356 --- /dev/null +++ b/recipes/examples/scorecards_on_nord3v2.yml @@ -0,0 +1,100 @@ +Description: + Author: An-Chi Ho + Info: Compute Skills and Plot Scorecards with Autosubmit + +Analysis: + Horizon: seasonal + Variables: + - {name: tas, freq: monthly_mean} + Datasets: + System: # multiple systems for single model, split if Multimodel = F + - {name: ECMWF-SEAS5.1} + Multimodel: + execute: False # single option + Reference: + - {name: ERA5} + Time: + sdate: # list, split + - '0101' + - '0201' + - '0301' + - '0401' + - '0501' + - '0601' + - '0701' + - '0801' + - '0901' + - '1001' + - '1101' + - '1201' + fcst_year: + hcst_start: '1993' # single option + hcst_end: '2003' # single option + ftime_min: 1 # single option + ftime_max: 6 # single option + Region: # multiple lists, split? Add region name if length(Region) > 1 + - {name: "global", latmin: -90, latmax: 90, lonmin: 0, lonmax: 359.9} + Regrid: + method: bilinear + type: to_system + Workflow: + Anomalies: + compute: yes + cross_validation: no + save: 'none' + Calibration: + method: raw + save: 'none' + Skill: + metric: mean_bias EnsCorr rps rpss crps crpss EnsSprErr # list, don't split + cross_validation: yes + save: 'all' + Statistics: + metric: cov std n_eff spread + save: 'all' + Probabilities: + percentiles: [[1/3, 2/3]] + save: 'none' + Scorecards: + execute: yes # yes/no + regions: + Extra-tropical NH: {lon.min: 0, lon.max: 360, lat.min: 30, lat.max: 90} + Tropics: {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: 30} + Extra-tropical SH : {lon.min: 0, lon.max: 360, lat.min: -30, lat.max: -90} + start_months: 'all' + metric: mean_bias enscorr rpss crpss enssprerr + metric_aggregation: 'score' + inf_to_na: TRUE # Optional, bool: set inf values in data to NA, default is FALSE table_label: NULL + fileout_label: NULL + col1_width: NULL + col2_width: NULL + calculate_diff: FALSE + ncores: 8 + remove_NAs: no # bool, don't split + Output_format: Scorecards # string, don't split + +################################################################################ +## Run CONFIGURATION +################################################################################ +Run: + Loglevel: INFO + Terminal: yes + filesystem: esarchive + output_dir: /esarchive/scratch/vagudets/auto-s2s-outputs/ + code_dir: /home/Earth/vagudets/git/auto-s2s/ + autosubmit: yes + # fill only if using autosubmit + auto_conf: + script: use_cases/ex1_2_autosubmit_scorecards/ex1_2-script.R # replace with the path to your script + expid: a6wq # replace with your EXPID + hpc_user: bsc032762 # replace with your hpc username + wallclock: 03:00 # hh:mm + processors_per_job: 16 + platform: nord3v2 + custom_directives: ['#SBATCH --exclusive'] + email_notifications: yes # enable/disable email notifications. Change it if you want to. + email_address: victoria.agudetse@bsc.es # replace with your email address + notify_completed: yes # notify me by email when a job finishes + notify_failed: no # notify me by email when a job fails + + diff --git a/recipes/recipe_bigpredidata_oper.yml b/recipes/recipe_bigpredidata_oper.yml index c890f47c8846d8697b8bdcf6a7ca8906429cd8a8..ca90c31316513aa93bad75ec1b88c4198f639810 100644 --- a/recipes/recipe_bigpredidata_oper.yml +++ b/recipes/recipe_bigpredidata_oper.yml @@ -18,9 +18,9 @@ Analysis: - {name: ERA5} # Mandatory, str: Reference codename. See docu. Time: sdate: -# - '0101' + - '0101' # - '0201' - - '0301' +# - '0301' # - '0401' # - '0501' # - '0601' @@ -40,7 +40,7 @@ Analysis: - {name: "EU", latmin: 20, latmax: 80, lonmin: -20, lonmax: 40} Regrid: method: bilinear # Mandatory, str: Interpolation method. See docu. - type: "to_reference" + type: "to_system" #type: /esarchive/scratch/nmilders/gitlab/git_clones/auto-s2s/conf/grid_description.txt #'r360x180' # Mandatory, str: to_system, to_reference, or CDO-accepted grid. Workflow: Anomalies: @@ -61,7 +61,7 @@ Analysis: Indicators: index: no Visualization: - plots: skill_metrics, most_likely_terciles, forecast_ensemble_mean + plots: most_likely_terciles NA_color: white multi_panel: no dots: no diff --git a/rsz_rsz_bsc_logo.png b/rsz_rsz_bsc_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..59406d7a5f242aa87eb276bea39d6544a802b986 Binary files /dev/null and b/rsz_rsz_bsc_logo.png differ diff --git a/scripts/script_bigpredidata_oper.R b/scripts/script_bigpredidata_oper.R index aaaaa07198ce5820f0cd2378af8e511002d90817..482cd576224ed89654004a2d84d8eb7275c9dd82 100644 --- a/scripts/script_bigpredidata_oper.R +++ b/scripts/script_bigpredidata_oper.R @@ -4,12 +4,12 @@ source("modules/Units/Units.R") source("modules/Visualization/Visualization.R") source("modules/Aggregation/Aggregation.R") -args = commandArgs(trailingOnly = TRUE) -recipe_file <- args[1] -#recipe_file <- "/esarchive/scratch/ptrascas/R/dev-test_bigpredidata/sunset/sunset/recipes/recipe_bigpredidata_oper_test_sfcWind.yml" +#args = commandArgs(trailingOnly = TRUE) +#recipe_file <- args[1] +recipe_file <- "/esarchive/scratch/ptrascas/R/dev-test_bigpredidata/sunset/sunset/recipes/recipe_bigpredidata_oper.yml" #recipe_file <- "/esarchive/scratch/ptrascas/R/dev-test_bigpredidata/sunset/sunset/recipes/recipe_bigpredidata_oper_ecvs_Iberia_seasonal_split.yml" -recipe <- read_atomic_recipe(recipe_file) -#recipe <- prepare_outputs(recipe_file) +#recipe <- read_atomic_recipe(recipe_file) +recipe <- prepare_outputs(recipe_file) # Load datasets data <- Loading(recipe) data <- Units(recipe, data) @@ -32,23 +32,15 @@ skill_metrics <- Crossval_metrics(recipe = recipe, data_crossval = res, -if (!is.null(data$fcst)) { +#if (!is.null(data$fcst)) { # Required to plot a forecast: - data$fcst <- res$fcst - tmp_probs <- list(probs_fcst = res$probs$fcst[[1]]) - #nlats <- as.numeric(dim(data$hcst$data)['latitude']) - #nlons <- as.numeric(dim(data$hcst$data)['longitude']) - #ntimes <- as.numeric(dim(data$hcst$data)['time']) - - # TODO: consider more than terciles - #dim(tmp_probs$probs_fcst) <- c(bin = 3, syear = 1, var = 1, time = ntimes, - # latitude = nlats, longitude = nlons) - - - Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, - significance = TRUE, probabilities = tmp_probs) -} else { - Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, - significance = TRUE) -} +# data$fcst <- res$fcst +# tmp_probs <- list(probs_fcst = res$probs$fcst[[1]]) + +# Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, +# significance = TRUE, probabilities = tmp_probs) +#} else { +# Visualization(recipe = recipe, data = data, skill_metrics = skill_metrics, +# significance = TRUE) +#} diff --git a/subsunset.sh b/subsunset.sh new file mode 100644 index 0000000000000000000000000000000000000000..6e75546f4ced5bb0634e30f9ceecf4e5c08784f1 --- /dev/null +++ b/subsunset.sh @@ -0,0 +1,23 @@ +#!/bin/bash +#SBATCH -n 64 +#SBATCH -N 1 +#SBATCH -t 10:00:00 +#SBATCH -J sunset_sub +#SBATCH -o sunset_sub-%J.out +#SBATCH -e sunset_sub-%J.err +#SBATCH --account=bsc32 +#SBATCH --qos=gp_bsces +#SBATCH --constraint=lowmem + +#### --qos=acc_bsces + +source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_seas.yml + +Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_subseasonal_ecvs.yml + + diff --git a/sunset.sh b/sunset.sh new file mode 100644 index 0000000000000000000000000000000000000000..bd6f1d5594eeccb832c5b024e81a5882943feb6d --- /dev/null +++ b/sunset.sh @@ -0,0 +1,23 @@ +#!/bin/bash +#SBATCH -n 112 +#SBATCH -N 1 +#SBATCH -t 10:00:00 +#SBATCH -J sunset_multimodel +#SBATCH -o sunset_multimodel-%J.out +#SBATCH -e sunset_multimodel-%J.err +#SBATCH --account=bsc32 +#SBATCH --qos=gp_bsces +#SBATCH --constraint=highmem + +#### --qos=acc_bsces + +source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_tas.yml + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_seas.yml + +Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_anomalies.R /home/bsc/bsc032339/sunset/recipe_ecvs_ano_mul_seas.yml + + diff --git a/sunsetv2.sh b/sunsetv2.sh new file mode 100644 index 0000000000000000000000000000000000000000..5ae55755f5f77bf8a49e81bebd76a2f30b665a1d --- /dev/null +++ b/sunsetv2.sh @@ -0,0 +1,23 @@ +#!/bin/bash +#SBATCH -n 112 +#SBATCH -N 1 +#SBATCH -t 3:00:00 +#SBATCH -J sunset_ecmwf +#SBATCH -o sunset_ecmwf-%J.out +#SBATCH -e sunset_ecmwf-%J.err +#SBATCH --account=bsc32 +#SBATCH --qos=gp_bsces +#SBATCH --constraint=lowmem + + +#### --qos=acc_bsces + +source /gpfs/projects/bsc32/software/suselinux/11/software/Miniconda3/4.7.10/etc/profile.d/conda.sh +conda activate /gpfs/projects/bsc32/repository/apps/conda_envs/SUNSET-env_2.0.0 + +Rscript /home/bsc/bsc032339/sunset/full_ecvs_calibration.R /home/bsc/bsc032339/sunset/recipe_ecvs_cal_seas.yml + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_anomalies.R /home/bsc/bsc032339/sunset/recipe_ecvs_ano_seas.yml + +#Rscript /home/bsc/bsc032339/sunset/full_ecvs_multimodel_calibrated.R /home/bsc/bsc032339/sunset/recipe_tas.yml #full_ecvs_multimodel_anomalies.R #full_NAO.R #ecvs_multimodel_anomalies.R + diff --git a/testing_multimodel.R b/testing_multimodel.R new file mode 100644 index 0000000000000000000000000000000000000000..b1859cde25b3f7211cc886ef249200268b05d06b --- /dev/null +++ b/testing_multimodel.R @@ -0,0 +1,137 @@ +# From Multimodel +source("modules/Loading/R/dates2load.R") +source("modules/Loading/R/get_timeidx.R") +source("modules/Loading/R/check_latlon.R") +source('modules/Multimodel/load_multimodel.R') +source('modules/Multimodel/load_multimodel_splitted.R') +source('modules/Multimodel/dims_multimodel.R') +source('modules/Multimodel/build_multimodel.R') +source('modules/Multimodel/clean_multimodel.R') + +# recipe <- "recipe_tas.yml" # I have included multimodel yes +library(yaml) +recipe <- read_yaml(recipe) + +# (tolower(recipe$Analysis$Datasets$Multimodel$split_loading) %in% c('true','yes')) ## Fails becuase I don't know where split_loading comes from + +# I guess we need this: +# From load_multmodel + +archive <- read_yaml("conf/archive.yml")$esarchive +ref.name <- recipe$Analysis$Datasets$Reference$name + +# lets try creating a vector +## I don't know where this comes from +## recipe$Analysis$Datasets$System$models +exp.name <- c("ECMWF-SEAS5.1", "Meteo-France-System7") + +library(startR) + store.freq <- recipe$Analysis$Variables$freq + variable <- "tas" #strsplit(recipe$Analysis$Variables$name, ", | |,")[[1]] + exp_descrip <- archive$System[[exp.name[1]]] + reference_descrip <- archive$Reference[[ref.name]] + sdates <- dates2load(recipe, recipe$Run$logger) + lats.min <- recipe$Analysis$Region$latmin + lats.max <- recipe$Analysis$Region$latmax + lons.min <- recipe$Analysis$Region$lonmin + lons.max <- recipe$Analysis$Region$lonmax + circularsort <- check_latlon(lats.min, lats.max, lons.min, lons.max) + + +recipe$Run$output_dir <- file.path("/esarchive/scratch/nperez/git4/recipe_tas_20240531165840", "outputs", + recipe$Analysis$Datasets$Multimodel$createFrom) + +hcst_start <- recipe$Analysis$Time$hcst_start + hcst_end <- recipe$Analysis$Time$hcst_end + shortdate <- substr(recipe$Analysis$Time$sdate, start = 1, stop = 2) + filename <- paste0("scorecards_$model$_", ref.name, "_$var$__$file_date$__", + hcst_start, "-", hcst_end, "_s", shortdate, ".nc") + +source("modules/Saving/Saving.R") + hcst.path <- file.path(recipe$Run$output_dir, "$model$", ref.name, + recipe$Analysis$Workflow$Calibration$method, + "$var$", filename) +# hcst.path <- gsub(variable[1], "$var$", hcst.path) +# hcst.path <- gsub('Multimodel', "$model$", hcst.path) + fcst.path <- obs.path <- hcst.path + obs.path <- gsub("_$file_date$", "-obs_$file_date$", obs.path, fixed = T) + obs.path <- gsub("$model$", gsub('\\.','',exp.name[1]), obs.path, fixed = T) + + hcst <- Start(dat = hcst.path, + var = variable, + file_date = sdates$hcst, + model = gsub('\\.','',exp.name), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + + hcst <- as.s2dv_cube(hcst) + # Adjust dates for models where the time stamp goes into the next month + # No need to check the dates if the files are saved by SUNSET: + #if (recipe$Analysis$Variables$freq == "monthly_mean") { + # hcst$attrs$Dates[] <- hcst$attrs$Dates - seconds(exp_descrip$time_stamp_lag) + #} + +fcst <- as.s2dv_cube(fcst) + + fcst <- Start(dat = fcst.path, + var = variable, + file_date = sdates$fcst, + model = gsub('\\.','',exp.name), + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat', 'latitude'), + longitude = c('lon', 'longitude'), + ensemble = c('member', 'ensemble')), + ensemble = 'all', + metadata_dims = 'var', + largest_dims_length = TRUE, + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = split_multiselected_dims, + retrieve = TRUE) + +datos <- list(hcst = hcst, fcst = fcst, obs = obs) + + obs <- Start(dat = obs.path, + var = variable, + file_date = sdates$hcst, + time = 'all', + latitude = 'all', + latitude_reorder = Sort(), + longitude = 'all', + longitude_reorder = circularsort, + synonims = list(latitude = c('lat','latitude'), + longitude = c('lon','longitude')), + metadata_dims = 'var', + return_vars = list(latitude = 'dat', + longitude = 'dat', + time = 'file_date'), + split_multiselected_dims = TRUE, + retrieve = TRUE) + + default_dims <- c(dat = 1, var = 1, sday = 1, + sweek = 1, syear = 1, time = 1, + latitude = 1, longitude = 1, ensemble = 1) + default_dims[names(dim(obs))] <- dim(obs) + dim(obs) <- default_dims + + # Convert obs to s2dv_cube + obs <- as.s2dv_cube(obs) diff --git a/tools/add_logo.R b/tools/add_logo.R index 2bb0a5a68a1942b5d50e11e6e8c3fb4712864803..92259a2de23fbb8e06be40aab26c568cf8a559d2 100644 --- a/tools/add_logo.R +++ b/tools/add_logo.R @@ -31,4 +31,3 @@ add_logo <- function(file, logo, logo_resize_percentage = 0.25) { #system(paste0("convert ", fig, " -gravity south -background whie -splice 0x", logo_height, " extended_fig.png")) #system(paste0("convert extended_fig.png \\( ", logo, " -resize , map_width * 0.25, " \\) -gravity southeast -composite extended_fig.png")) - diff --git a/tools/divide_recipe.R b/tools/divide_recipe.R index 2a6b6c0af21973aa1930051be7fd042255029802..2da7ce1f739990f758e584991d1adce5bd2bf517 100644 --- a/tools/divide_recipe.R +++ b/tools/divide_recipe.R @@ -22,6 +22,10 @@ divide_recipe <- function(recipe) { Run = recipe$Run[c("Loglevel", "output_dir", "Terminal", "code_dir", "logfile", "filesystem")]) + if (!is.null(recipe$Run$tmp_dir)) { + beta_recipe$Run$tmp_dir <- recipe$Run$tmp_dir + } + # duplicate recipe by independent variables: # If a single variable is not given inside a list, rebuild structure if (any(c("name", "freq", "units") %in% names(recipe$Analysis$Variables))) { diff --git a/tools/prepare_outputs.R b/tools/prepare_outputs.R index 376876b73c819386fb66950179f91b38082d44f3..5bdff99e3dbea7d9af8635f7cea6d2b9bec0e2a9 100644 --- a/tools/prepare_outputs.R +++ b/tools/prepare_outputs.R @@ -32,7 +32,7 @@ prepare_outputs <- function(recipe_file, recipe$recipe_path <- recipe_file recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) - output_dir = recipe$Run$output_dir + output_dir <- recipe$Run$output_dir # Create output folders if (!uniqueID) { folder_name <- paste0(gsub(".yml", "", gsub("/", "_", recipe$name))) @@ -129,9 +129,18 @@ prepare_outputs <- function(recipe_file, } else { recipe <- check_recipe(recipe) } + + # If autosubmit is used, a temporary directory needs to be created if the + # filesystem is not accessible from the autosubmit machine. + tmp_dir <- read_yaml("conf/autosubmit.yml")[[recipe$Run$filesystem]]$tmp_dir + if (recipe$Run$autosubmit && !(is.null(tmp_dir))) { + tmp_dir <- gsub("\\$.+?\\$", recipe$Run$auto_conf$hpc_user, tmp_dir) + recipe$Run$tmp_dir <- file.path(tmp_dir, folder_name) + } # Create a copy of the recipe and remove the logger recipe_copy <- recipe recipe_copy$Run$logger <- NULL + # Copy recipe to output folder write_yaml(recipe_copy, file = file.path(output_dir, folder_name, 'logs', 'recipes', diff --git a/tools/read_atomic_recipe.R b/tools/read_atomic_recipe.R index 3cb11e7ed6b1d3cb91a765debeaeebd2076ddedc..99f001554d030cf2ccee80a185f8cea71f9bc0ae 100644 --- a/tools/read_atomic_recipe.R +++ b/tools/read_atomic_recipe.R @@ -26,6 +26,10 @@ read_atomic_recipe <- function(recipe_file) { recipe <- read_yaml(recipe_file, eval.exp = TRUE) recipe$recipe_path <- recipe_file recipe$name <- tools::file_path_sans_ext(basename(recipe_file)) + # Handle temporary directory for autosubmit + if (!is.null(recipe$Run$tmp_dir)) { + recipe$Run$output_dir <- recipe$Run$tmp_dir + } # Create log file for atomic recipe logfile <- file.path(recipe$Run$output_dir, 'logs', paste0(recipe$name, '.log')) @@ -51,6 +55,7 @@ read_atomic_recipe <- function(recipe_file) { appenders = list(file_appender(logfile, append = TRUE, layout = default_log_layout()))) } + recipe$Run$logger <- logger recipe$Run$logfile <- logfile # Restructure recipe to flatten redundant lists diff --git a/tools/write_autosubmit_conf.R b/tools/write_autosubmit_conf.R index 5f3981974f85d74ae880928f918f419a2ea65462..a6bc5aa04690b5a196c31fd694883bfe5ebb7fcd 100644 --- a/tools/write_autosubmit_conf.R +++ b/tools/write_autosubmit_conf.R @@ -23,6 +23,10 @@ write_autosubmit_conf <- function(recipe, nchunks, # Output directory dest_dir <- paste0(auto_specs$experiment_dir, expid, "/conf/") proj_dir <- paste0(auto_specs$experiment_dir, expid, "/proj/auto-s2s/") + tmp_dir <- auto_specs$tmp_dir + # HPC details + platform <- recipe$Run$auto_conf$platform + hpc_user <- recipe$Run$auto_conf$hpc_user # Create project directory if it does not exist yet so that chunk_to_recipe # and split_to_recipe files can be created if (!dir.exists(proj_dir)) { @@ -58,28 +62,41 @@ write_autosubmit_conf <- function(recipe, nchunks, ## wallclock, notify_on, platform?, processors, # Create bash file to associate chunk number to recipe name chunk_file <- paste0(proj_dir, "chunk_to_recipe") + ## TODO: Specify dir in chunk_to_recipe? .create_bash_file(fileout = chunk_file, dictionary = chunk_to_recipe, variable = "CHUNK") # Define job parameters - conf$JOBS$verification$WALLCLOCK <- recipe$Run$auto_conf$wallclock + notify_on <- "" if (recipe$Run$auto_conf$notify_completed) { - conf$JOBS$verification$NOTIFY_ON <- paste(conf$JOBS$verification$NOTIFY_ON, - "COMPLETED") + notify_on <- "COMPLETED" } if (recipe$Run$auto_conf$notify_failed) { - conf$JOBS$verification$NOTIFY_ON <- paste(conf$JOBS$verification$NOTIFY_ON, - "FAILED") + notify_on <- paste(notify_on, "FAILED") } + # Define last job before file transfer + last_job <- "verification" + # Verification job + conf$JOBS$verification$WALLCLOCK <- recipe$Run$auto_conf$wallclock + conf$JOBS$verification$NOTIFY_ON <- notify_on conf$JOBS$verification$PROCESSORS <- recipe$Run$auto_conf$processors_per_job # ncores? conf$JOBS$verification$CUSTOM_DIRECTIVES <- recipe$Run$auto_conf$custom_directives - # Only include Multimodel job if sections exists in the recipe + conf$JOBS$verification$PLATFORM <- platform + # Recipe transfer job: only include if a tmp_dir is defined + if (!is.null(tmp_dir)) { + conf$JOBS$verification$DEPENDENCIES <- "transfer_recipes" + conf$JOBS$transfer_recipes$NOTIFY_ON <- notify_on + } else { + conf$JOBS$transfer_recipes <- NULL + } + # Multimodel job: Only include if section exists in the recipe # is set to execute = 'True' or 'both' if (!is.null(recipe$Analysis$Datasets$Multimodel) && tolower(recipe$Analysis$Datasets$Multimodel$execute) == "false") { conf$JOBS$multimodel <- NULL conf$JOBS$scorecards$DEPENDENCIES <- "verification" } else { + conf$JOBS$multimodel$PLATFORM <- platform conf$JOBS$scorecards$DEPENDENCIES <- "multimodel" # Create bash file to associate split number to recipe name split_file <- paste0(proj_dir, "split_to_recipe") @@ -97,45 +114,52 @@ write_autosubmit_conf <- function(recipe, nchunks, # 'Splits' parameter should be the number of mulimodel jobs conf$JOBS$multimodel$SPLITS <- length(mm_dependencies) # Define the rest of the parameters - if (recipe$Run$auto_conf$notify_completed) { - conf$JOBS$multimodel$NOTIFY_ON <- paste(conf$JOBS$multimodel$NOTIFY_ON, - "COMPLETED") - } - if (recipe$Run$auto_conf$notify_failed) { - conf$JOBS$multimodel$NOTIFY_ON <- paste(conf$JOBS$multimodel$NOTIFY_ON, - "FAILED") - } - + conf$JOBS$multimodel$NOTIFY_ON <- notify_on conf$JOBS$multimodel$PROCESSORS <- recipe$Run$auto_conf$processors_multimodel conf$JOBS$multimodel$CUSTOM_DIRECTIVES <- recipe$Run$auto_conf$custom_directives_multimodel conf$JOBS$multimodel$WALLCLOCK <- recipe$Run$auto_conf$wallclock_multimodel + last_job <- "multimodel" } - # Only include Scorecards job if section exists in the recipe and + # Scorecards job: include only if section exists in the recipe and # is set to 'execute: True' if (!("Scorecards" %in% names(recipe$Analysis$Workflow)) || (!recipe$Analysis$Workflow$Scorecards$execute)) { conf$JOBS$scorecards <- NULL } else { - if (recipe$Run$auto_conf$notify_completed) { - conf$JOBS$scorecards$NOTIFY_ON <- paste(conf$JOBS$scorecards$NOTIFY_ON, - "COMPLETED") - } - if (recipe$Run$auto_conf$notify_failed) { - conf$JOBS$scorecards$NOTIFY_ON <- paste(conf$JOBS$scorecards$NOTIFY_ON, - "FAILED") - } + conf$JOBS$scorecards$NOTIFY_ON <- notify_on + conf$JOBS$scorecards$PLATFORM <- platform + last_job <- "scorecards" + } + # Results transfer job: only include if a tmp_dir is defined + if (!is.null(tmp_dir)) { + conf$JOBS$transfer_results$DEPENDENCIES <- last_job + conf$JOBS$transfer_results$NOTIFY_ON <- notify_on + } else { + conf$JOBS$transfer_results <- NULL } } else if (conf_type == "platforms") { + ## TODO: Allow user to choose platform # Section 4: platform configuration ## nord3v2 configuration... platform name? user, processors_per_node - conf$Platforms[[auto_specs$platform]]$USER <- - recipe$Run$auto_conf$hpc_user + conf$Platforms[[platform]]$USER <- hpc_user + unused_machines <- names(conf$Platforms)[!names(conf$Platforms) %in% c("transfer", platform)] + conf$Platforms[unused_machines] <- NULL + if (!is.null(tmp_dir)) { + conf$Platforms$transfer$USER <- hpc_user + } else { + conf$Platforms$transfer <- NULL + } } else if (conf_type == "proj") { # Section 5: proj ## modules? Info that goes on script, e.g. output directory conf$common$OUTDIR <- recipe$Run$output_dir conf$common$SCRIPT <- recipe$Run$auto_conf$script conf$common$RECIPE <- paste0(recipe$name, ".yml") + if (!is.null(tmp_dir)) { + conf$common$TMPDIR <- gsub("\\$.+?\\$", hpc_user, tmp_dir) + } else { + conf$common$TMPDIR <- recipe$Run$output_dir + } } # Write config file inside autosubmit dir write.config(conf, paste0(dest_dir, dest_file), diff --git a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml index 753c1036ce678bb870135d889166d73d170c9a9d..569ddad7b41c884f052684f4f28cc551e13aa607 100644 --- a/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml +++ b/use_cases/ex0_1_sample_dataset/ex0_1-recipe.yml @@ -28,6 +28,14 @@ Analysis: method: bilinear type: to_system Workflow: + Time_aggregation: + execute: yes + user_def: + ftimes: [1, 3] + JJA: !expr sort(c(seq(1,120, 12), seq(2,120,13), seq(3, 120, 14))) + method: average + ini: [1, 2, 1] + end: [2, 3, 3] Anomalies: compute: yes cross_validation: yes