From 6c3fc40b748edf6eb3afb7339f67b44fec0b7ee4 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 14 Jun 2022 16:29:41 +0200 Subject: [PATCH 01/11] Formatting --- modules/Skill/s2s.metrics.R | 43 +++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R index b0069730..987ba54f 100644 --- a/modules/Skill/s2s.metrics.R +++ b/modules/Skill/s2s.metrics.R @@ -47,8 +47,8 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, if (na.rm) { nsdates <- dim(exp)[which(names(dim(exp)) == time.dim)][] not_missing_dates <- c() - for (sdate in 1:nsdates){ - if(!all(is.na(Subset(exp, along=time.dim, list(sdate), drop=F)))){ + for (sdate in 1:nsdates) { + if(!all(is.na(Subset(exp, along=time.dim, list(sdate), drop=F)))) { not_missing_dates <- c(not_missing_dates, sdate) } } @@ -86,8 +86,8 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, if (metric %in% SPECSVER_METRICS) { data <- Apply(data=list(exp, obs), - target_dims=list(c(time.dim, 'ensemble'), - c(time.dim, 'ensemble')), + target_dims=list(c(time.dim, 'ensemble'), + c(time.dim, 'ensemble')), fun="veriApply", verifun=FUN[[metric]], prob=PROB[[metric]], @@ -127,38 +127,38 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, } else if (metric == "frpss_sign") { terciles_obs <- Compute_probs(obs, c(1/3, 2/3), - quantile_dims=c(time.dim), + quantile_dims=c(time.dim), ncores=ncores, - split_factor=1, - na.rm=na.rm) + split_factor=1, + na.rm=na.rm) terciles_exp <- Compute_probs(exp, c(1/3, 2/3), - quantile_dims=c(time.dim, 'ensemble'), + quantile_dims=c(time.dim, 'ensemble'), ncores=ncores, - split_factor=1, - na.rm=na.rm) + split_factor=1, + na.rm=na.rm) probs_clim = array(data = 1/3, dim = dim(terciles_obs$probs)) frps <- NULL n_members <- dim(exp)[which(names(dim(exp)) == 'ensemble')][] frps$clim <- multiApply::Apply(data = list(probs_exp = probs_clim, - probs_obs = terciles_obs$probs), + probs_obs = terciles_obs$probs), target_dims = 'bin', fun = .rps_from_probs, n_categories = 3, n_members = n_members, Fair = TRUE, - ncores = ncores)$output1 + ncores = ncores)$output1 frps$exp <- multiApply::Apply(data = list(probs_exp = terciles_exp$probs, - probs_obs = terciles_obs$probs), + probs_obs = terciles_obs$probs), target_dims = 'bin', fun = .rps_from_probs, n_categories = 3, n_members = n_members, Fair = TRUE, - ncores = ncores)$output1 + ncores = ncores)$output1 frps$clim_mean <- multiApply::Apply(data = frps$clim, target_dims=time.dim, fun=mean, ncores=ncores)$output1 @@ -193,7 +193,10 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, } -.correlation_eno = function(exp, obs, time_dim = 'syear', alpha = 0.05, ncores = 1){ +.correlation_eno = function(exp, obs, + time_dim = 'syear', + alpha = 0.05, + ncores = 1) { cor = NULL cor$r = cor(exp, obs) # Correlation coefficient @@ -222,7 +225,11 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, } -.rps_from_probs <- function(probs_exp, probs_obs, n_categories, Fair, n_members = NULL){ +.rps_from_probs <- function(probs_exp, + probs_obs, + n_categories, + Fair, + n_members = NULL) { ## Checkings if (length(probs_exp) != n_categories | length(probs_obs) != n_categories){ @@ -237,13 +244,13 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, ## RPS (Wilks 2011, pp.348-350) rps <- NULL - for (i in 1:n_categories){ + for (i in 1:n_categories) { rps[i] <- (cumsum(probs_exp)[i]-cumsum(probs_obs)[i])^2 } rps <- sum(rps) ## FairRPS - if (Fair == TRUE){ + if (Fair == TRUE) { ## formula of SpecsVerification::EnsRps ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) -- GitLab From d5b466bcbc13ceef6c0dce796c8d05d38b1b042f Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 14 Jun 2022 16:30:11 +0200 Subject: [PATCH 02/11] Change R version to R-4.1.2 --- MODULES | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MODULES b/MODULES index a5e3edc2..697bdf4e 100644 --- a/MODULES +++ b/MODULES @@ -35,7 +35,7 @@ elif [ $BSC_MACHINE == "nord3" ]; then else module load CDO/1.9.8-foss-2015a - module load R/3.6.1-foss-2015a-bare + module load R/4.1.2-foss-2015a-bare fi -- GitLab From 536cd27dc5f54b3a0cb9cb01b0c27965a287f4c3 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Tue, 14 Jun 2022 16:31:10 +0200 Subject: [PATCH 03/11] Get grid and time data and metadata, first steps to save metrics data and metadata --- modules/Saving/Save_data.R | 67 +++++++++++-------- .../data_load/testing_recipes/recipe_4.yml | 2 +- modules/test_victoria.R | 2 +- 3 files changed, 40 insertions(+), 31 deletions(-) diff --git a/modules/Saving/Save_data.R b/modules/Saving/Save_data.R index 3ace35b8..89e9e761 100644 --- a/modules/Saving/Save_data.R +++ b/modules/Saving/Save_data.R @@ -1,12 +1,11 @@ -## TODO: Change names of functions to make sure they don't clash -## with others in the tool +## TODO: Implement wrapper to get grid and time info? get_times <- function(fcst.horizon, leadtimes, sdate) { - - ## TODO: Adapt to Auto-S2S recipe format + # Generates time dimensions and the corresponding metadata. + switch(fcst.horizon, ## TODO: Remove "sub_obs"? - ## TODO: daily case? + ## TODO: implement daily case "seasonal" = {len <- length(leadtimes); ref <- 'months since '; stdname <- paste(strtoi(leadtimes), collapse=", ")}, "sub_obs" = {len <- 52; ref <- 'week of the year '; @@ -15,9 +14,9 @@ get_times <- function(fcst.horizon, leadtimes, sdate) { stdname <- ''} ) - time <- 1:len + time <- 1:len ## Is this correct? I think it needs to be changed dim(time) <- length(time) - #metadata <- list(time = list(standard_name = stdname, + # metadata <- list(time = list(standard_name = stdname, metadata <- list(time = list(units = paste0(ref, sdate, ' 00:00:00'))) attr(time, 'variables') <- metadata names(dim(time)) <- 'time' @@ -41,26 +40,20 @@ get_times <- function(fcst.horizon, leadtimes, sdate) { } get_latlon <- function(latitude, longitude) { - ## TODO: Verify if this is needed or if it needs a change now that we are - ## using s2dv_cube objects # Adds dimensions and metadata to lat and lon # latitude: array containing the latitude values # longitude: array containing the longitude values - ## the inputs are simply the arrays with the values? + dim(longitude) <- length(longitude) ## TODO: Extract metadata from s2dv_cube metadata <- list(longitude = list(units = 'degrees_east')) - # Add metadata to attributes attr(longitude, 'variables') <- metadata - # Add longitude dimension names(dim(longitude)) <- 'longitude' dim(latitude) <- length(latitude) ## TODO: Extract metadata from s2dv_cube metadata <- list(latitude = list(units = 'degrees_north')) - # Add metadata to attributes attr(latitude, 'variables') <- metadata - # Add latitude dimension names(dim(latitude)) <- 'latitude' return(list(lat=latitude, lon=longitude)) @@ -78,7 +71,7 @@ get_latlon <- function(latitude, longitude) { save_metrics <- function(skill, recipe, - grid, + data_cube, outfile, leadtimes, agg="global") { @@ -86,42 +79,58 @@ save_metrics <- function(skill, fcst.horizon <- tolower(recipe$Analysis$Horizon) fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, recipe$Analysis$Time$sdate$fcst_sday) - + ## TODO: Get latitude and longitude lalo <- c('longitude', 'latitude') - ## TODO: Sort out aggregation if (tolower(agg) == "global") { - skill <- lapply(skill, function(x){ - Reorder(x[[1]], c(lalo, 'time'))}) + # Remove singleton dimensions from each metric array and rearrange the + # longitude, latitude and time dimensions to the correct order. + ## TODO: For loop? + skill <- lapply(skill, function(x) { + Reorder(drop(x[[1]]), c(lalo, 'time'))}) } for (i in 1:length(skill)) { - + ## TODO: create dictionary with proper metadata? + ## TODO: Fix attributes metric <- names(skill[i]) if (tolower(agg) == "country"){ - sdname <- paste0(names(metric), " region-aggregated metric") + sdname <- paste0(metric, " region-aggregated metric") dims <- c('Country', 'time') } else { - sdname <- paste0(names(metric), " grid point metric") + sdname <- paste0(metric, " grid point metric") # formerly names(metric) dims <- c(lalo, 'time') } metadata <- list(name = metric, standard_name = sdname) - attr(skill[i], 'variables') <- metadata - names(dim(skill[[i]])) <- dims + attr(skill[i], 'variables') <- metadata ## Change this line? + names(dim(skill[[i]])) <- dims ## And this one? } + # Time data and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + leadtimes <- seq(from = recipe$Analysis$Time$leadtimemin, + to = recipe$Analysis$Time$leadtimemax) + ## TODO: What if fcst_syear is null? + fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, + recipe$Analysis$Time$sdate$fcst_sday) ## is this correct? + times <- get_times(fcst.horizon, leadtimes, fcst.sdate) time <- times$time time_step <- times$time_step - # if (tolower(agg) == "country") { - # country <- get_countries(grid) - # ArrayToNc(append(country, time, skill, time_step), outfile) - # } else { + # Grid data and metadata +# if (tolower(agg) == "country") { +# country <- get_countries(grid) +# ArrayToNc(append(country, time, skill, time_step), outfile) +# } else { + + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + latlon <- get_latlon(latitude, longitude) - latlon <- get_latlon(grid$lat, grid$lon) + # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) vars <- c(vars, skill, list(time_step)) ArrayToNc(vars, outfile) diff --git a/modules/data_load/testing_recipes/recipe_4.yml b/modules/data_load/testing_recipes/recipe_4.yml index 33c3a74c..01aea162 100644 --- a/modules/data_load/testing_recipes/recipe_4.yml +++ b/modules/data_load/testing_recipes/recipe_4.yml @@ -32,7 +32,7 @@ Analysis: Calibration: method: mse_min Skill: - metric: BSS90 + metric: BSS90_Specs, BSS10 Indicators: index: no Output_format: S2S4E diff --git a/modules/test_victoria.R b/modules/test_victoria.R index b5e026a3..8b148066 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -1,6 +1,6 @@ -recipe_file <- "modules/data_load/testing_recipes/recipe_1.yml" +recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml" source("modules/data_load/load.R") source("modules/Calibration/Calibration.R") -- GitLab From 844bb07bab44a43b4e1a0234416dd4f3c5e0caa9 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 15 Jun 2022 12:02:12 +0200 Subject: [PATCH 04/11] First working version to save metrics, metadata needs work --- modules/Saving/Save_data.R | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/modules/Saving/Save_data.R b/modules/Saving/Save_data.R index 89e9e761..cf40ae9f 100644 --- a/modules/Saving/Save_data.R +++ b/modules/Saving/Save_data.R @@ -13,7 +13,8 @@ get_times <- function(fcst.horizon, leadtimes, sdate) { "subseasonal" = {len <- 4; ref <- 'weeks since '; stdname <- ''} ) - + + ## TODO: Get correct date for each time step time <- 1:len ## Is this correct? I think it needs to be changed dim(time) <- length(time) # metadata <- list(time = list(standard_name = stdname, @@ -76,24 +77,19 @@ save_metrics <- function(skill, leadtimes, agg="global") { - fcst.horizon <- tolower(recipe$Analysis$Horizon) - fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, - recipe$Analysis$Time$sdate$fcst_sday) - ## TODO: Get latitude and longitude - + # Define grid dimensions and names lalo <- c('longitude', 'latitude') if (tolower(agg) == "global") { # Remove singleton dimensions from each metric array and rearrange the # longitude, latitude and time dimensions to the correct order. - ## TODO: For loop? + ## TODO: Make it work for elements containing multiple arrays skill <- lapply(skill, function(x) { Reorder(drop(x[[1]]), c(lalo, 'time'))}) } for (i in 1:length(skill)) { - ## TODO: create dictionary with proper metadata? - ## TODO: Fix attributes + ## TODO: create dictionary with proper metadata metric <- names(skill[i]) if (tolower(agg) == "country"){ sdname <- paste0(metric, " region-aggregated metric") @@ -102,10 +98,10 @@ save_metrics <- function(skill, sdname <- paste0(metric, " grid point metric") # formerly names(metric) dims <- c(lalo, 'time') } - metadata <- list(name = metric, standard_name = sdname) + metadata <- list(metric = list(name = metric, standard_name = sdname)) - attr(skill[i], 'variables') <- metadata ## Change this line? - names(dim(skill[[i]])) <- dims ## And this one? + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims } # Time data and metadata @@ -114,7 +110,7 @@ save_metrics <- function(skill, to = recipe$Analysis$Time$leadtimemax) ## TODO: What if fcst_syear is null? fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, - recipe$Analysis$Time$sdate$fcst_sday) ## is this correct? + recipe$Analysis$Time$sdate$fcst_sday) times <- get_times(fcst.horizon, leadtimes, fcst.sdate) time <- times$time -- GitLab From 143640da2cc33d99a3231874eb619ca0ce7514c3 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Wed, 15 Jun 2022 14:40:03 +0200 Subject: [PATCH 05/11] Add save_metrics() to testing script --- modules/test_victoria.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/test_victoria.R b/modules/test_victoria.R index 8b148066..a2111a57 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -5,6 +5,7 @@ recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml" source("modules/data_load/load.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") +source("modules/Saving/Save_data.R") # Load datasets data <- load_datasets(recipe_file) @@ -15,3 +16,7 @@ calibrated_data <- calibrate_datasets(data, recipe) # Compute skill metrics skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe, na.rm = T, ncores = 4) + +# Export skill metrics onto outfile +outfile <- "/esarchive/scratch/vagudets/auto-s2s-tests/files/skill/test-metrics.nc" +save_metrics(skill_metrics, recipe, data$hcst, outfile, agg = "global") -- GitLab From 30e62e1f77b8f028026e0c8bb35c3ed5622ed3e7 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 16 Jun 2022 14:56:46 +0200 Subject: [PATCH 06/11] Change Loading module folder and main function name, add saving to test script --- modules/{data_load/load.R => Loading/Loading.R} | 8 ++++---- modules/{data_load => Loading}/dates2load.R | 0 .../{data_load => Loading}/testing_recipes/recipe_1.yml | 0 .../{data_load => Loading}/testing_recipes/recipe_2.yml | 0 .../{data_load => Loading}/testing_recipes/recipe_3.yml | 0 .../{data_load => Loading}/testing_recipes/recipe_4.yml | 0 .../{data_load => Loading}/testing_recipes/recipe_5.yml | 0 .../testing_recipes/recipe_circular-sort-test.yml | 0 modules/test_victoria.R | 4 ++-- 9 files changed, 6 insertions(+), 6 deletions(-) rename modules/{data_load/load.R => Loading/Loading.R} (98%) rename modules/{data_load => Loading}/dates2load.R (100%) rename modules/{data_load => Loading}/testing_recipes/recipe_1.yml (100%) rename modules/{data_load => Loading}/testing_recipes/recipe_2.yml (100%) rename modules/{data_load => Loading}/testing_recipes/recipe_3.yml (100%) rename modules/{data_load => Loading}/testing_recipes/recipe_4.yml (100%) rename modules/{data_load => Loading}/testing_recipes/recipe_5.yml (100%) rename modules/{data_load => Loading}/testing_recipes/recipe_circular-sort-test.yml (100%) diff --git a/modules/data_load/load.R b/modules/Loading/Loading.R similarity index 98% rename from modules/data_load/load.R rename to modules/Loading/Loading.R index aaadf553..aba9d962 100755 --- a/modules/data_load/load.R +++ b/modules/Loading/Loading.R @@ -1,14 +1,14 @@ ## TODO: remove paths to personal scratchs source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") # Load required libraries/funs -source("modules/data_load/dates2load.R") +source("modules/Loading/dates2load.R") source("tools/libs.R") # RECIPE FOR TESTING # -------------------------------------------------------------------------------- -# recipe_file <- "modules/data_load/testing_recipes/recipe_3.yml" -# recipe_file <- "modules/data_load/testing_recipes/recipe_2.yml" -# recipe_file <- "modules/data_load/testing_recipes/recipe_1.yml" +# recipe_file <- "modules/Loading/testing_recipes/recipe_3.yml" +# recipe_file <- "modules/Loading/testing_recipes/recipe_2.yml" +# recipe_file <- "modules/Loading/testing_recipes/recipe_1.yml" load_datasets <- function(recipe_file) { diff --git a/modules/data_load/dates2load.R b/modules/Loading/dates2load.R similarity index 100% rename from modules/data_load/dates2load.R rename to modules/Loading/dates2load.R diff --git a/modules/data_load/testing_recipes/recipe_1.yml b/modules/Loading/testing_recipes/recipe_1.yml similarity index 100% rename from modules/data_load/testing_recipes/recipe_1.yml rename to modules/Loading/testing_recipes/recipe_1.yml diff --git a/modules/data_load/testing_recipes/recipe_2.yml b/modules/Loading/testing_recipes/recipe_2.yml similarity index 100% rename from modules/data_load/testing_recipes/recipe_2.yml rename to modules/Loading/testing_recipes/recipe_2.yml diff --git a/modules/data_load/testing_recipes/recipe_3.yml b/modules/Loading/testing_recipes/recipe_3.yml similarity index 100% rename from modules/data_load/testing_recipes/recipe_3.yml rename to modules/Loading/testing_recipes/recipe_3.yml diff --git a/modules/data_load/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml similarity index 100% rename from modules/data_load/testing_recipes/recipe_4.yml rename to modules/Loading/testing_recipes/recipe_4.yml diff --git a/modules/data_load/testing_recipes/recipe_5.yml b/modules/Loading/testing_recipes/recipe_5.yml similarity index 100% rename from modules/data_load/testing_recipes/recipe_5.yml rename to modules/Loading/testing_recipes/recipe_5.yml diff --git a/modules/data_load/testing_recipes/recipe_circular-sort-test.yml b/modules/Loading/testing_recipes/recipe_circular-sort-test.yml similarity index 100% rename from modules/data_load/testing_recipes/recipe_circular-sort-test.yml rename to modules/Loading/testing_recipes/recipe_circular-sort-test.yml diff --git a/modules/test_victoria.R b/modules/test_victoria.R index a2111a57..a8125b16 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -1,8 +1,8 @@ -recipe_file <- "modules/data_load/testing_recipes/recipe_4.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" -source("modules/data_load/load.R") +source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") source("modules/Saving/Save_data.R") -- GitLab From 681771aaa88e6ea0e4e4144c254aa14b64b87a05 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Thu, 16 Jun 2022 14:58:06 +0200 Subject: [PATCH 07/11] Put individual metrics as lists to enable exporting significances to netCDF as a variable --- modules/Loading/testing_recipes/recipe_4.yml | 2 +- modules/Saving/Save_data.R | 20 +++++++++----- modules/Skill/Skill.R | 29 +++++++++++++++----- modules/Skill/s2s.metrics.R | 4 +-- 4 files changed, 38 insertions(+), 17 deletions(-) diff --git a/modules/Loading/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml index 01aea162..e787a9fd 100644 --- a/modules/Loading/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_4.yml @@ -32,7 +32,7 @@ Analysis: Calibration: method: mse_min Skill: - metric: BSS90_Specs, BSS10 + metric: RPS FRPS RPSS FRPSS BSS10 BSS90 Indicators: index: no Output_format: S2S4E diff --git a/modules/Saving/Save_data.R b/modules/Saving/Save_data.R index cf40ae9f..66f25baa 100644 --- a/modules/Saving/Save_data.R +++ b/modules/Saving/Save_data.R @@ -80,10 +80,11 @@ save_metrics <- function(skill, # Define grid dimensions and names lalo <- c('longitude', 'latitude') + # Remove singleton dimensions from each metric array and rearrange the + # longitude, latitude and time dimensions to the correct order. if (tolower(agg) == "global") { - # Remove singleton dimensions from each metric array and rearrange the - # longitude, latitude and time dimensions to the correct order. - ## TODO: Make it work for elements containing multiple arrays + ## TODO: Implement metrics with additional non-singleton dimensions + ## e.g. 'ensemble' in ensemble correlation. skill <- lapply(skill, function(x) { Reorder(drop(x[[1]]), c(lalo, 'time'))}) } @@ -91,7 +92,7 @@ save_metrics <- function(skill, for (i in 1:length(skill)) { ## TODO: create dictionary with proper metadata metric <- names(skill[i]) - if (tolower(agg) == "country"){ + if (tolower(agg) == "country") { sdname <- paste0(metric, " region-aggregated metric") dims <- c('Country', 'time') } else { @@ -108,9 +109,13 @@ save_metrics <- function(skill, fcst.horizon <- tolower(recipe$Analysis$Horizon) leadtimes <- seq(from = recipe$Analysis$Time$leadtimemin, to = recipe$Analysis$Time$leadtimemax) - ## TODO: What if fcst_syear is null? - fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, - recipe$Analysis$Time$sdate$fcst_sday) + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)) { + fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, + recipe$Analysis$Time$sdate$fcst_sday) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate$fcst_sday) + } times <- get_times(fcst.horizon, leadtimes, fcst.sdate) time <- times$time @@ -130,6 +135,7 @@ save_metrics <- function(skill, vars <- list(latlon$lat, latlon$lon, time) vars <- c(vars, skill, list(time_step)) ArrayToNc(vars, outfile) + print("##### SKILL METRICS SAVED TO NETCDF FILE #####") } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index caf4939d..69152d8d 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -1,7 +1,7 @@ # This module should calculate verification metrics at any time of the workflow # It should implement different verification metrics: # - FRPSS and RPSS -# - FCRPSS abd CRPSS +# - FCRPSS and CRPSS # - enscorr # - bias # - reliability diagram @@ -54,7 +54,7 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { # obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml - ## TODO: Adapt time_dims to subseasonal case? + ## TODO: Adapt time_dims to subseasonal case time_dim <- 'syear' memb_dim <- 'ensemble' metrics <- tolower(recipe$Analysis$Workflow$Skill$metric) @@ -70,22 +70,29 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { if (metric %in% c('rps', 'frps')) { skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) - skill_metrics[[ metric ]] <- skill + skill_metrics[[ metric ]] <- list(skill) + # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) - skill_metrics[[ metric ]] <- skill + skill_metrics[[ metric ]] <- list(skill$rpss) + skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) + # Brier Skill Score - 10th percentile } else if (metric == 'bss10') { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, prob_thresholds = 0.1, Fair = Fair, ncores = ncores) - skill_metrics[[ metric ]] <- skill + skill_metrics[[ metric ]] <- list(skill$rpss) + skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) + # Brier Skill Score - 90th percentile } else if (metric == 'bss90') { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, prob_thresholds = 0.9, Fair = Fair, ncores = ncores) - skill_metrics[[ metric ]] <- skill + skill_metrics[[ metric ]] <- list(skill$rpss) + skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) + # Ensemble mean correlation } else if (metric == 'enscorr') { ## TODO: Implement option for Kendall and Spearman methods? @@ -95,9 +102,14 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { method = 'pearson', memb_dim = memb_dim, ncores = ncores) - skill_metrics[[ metric ]] <- skill + skill_metrics[[ metric ]] <- list(skill$corr) + skill_metrics[[ paste0(metric, "_p.value") ]] <- list(skill$p.val) + skill_metrics[[ paste0(metric, "_conf.low") ]] <- list(skill$conf.lower) + skill_metrics[[ paste0(metric, "_conf.up") ]] <- list(skill$conf.upper) + # SpecsVerification metrics } else if (grepl("specs", metric, fixed = TRUE)) { # Compute SpecsVerification version of the metrics + ## Retain _specs in metric name for clarity? metric <- (strsplit(metric, "_"))[[1]][1] # Get metric name skill <- Compute_verif_metrics(exp$data, obs$data, skill_metrics = metric, @@ -105,8 +117,11 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { na.rm = na.rm, ncores = ncores) skill_metrics[[ metric ]] <- skill + } + } + print("##### SKILL METRIC COMPUTATION COMPLETE #####") return(skill_metrics) diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R index 987ba54f..7246dd09 100644 --- a/modules/Skill/s2s.metrics.R +++ b/modules/Skill/s2s.metrics.R @@ -72,6 +72,7 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, veriUnwrap <- easyVerification:::veriUnwrap ## SPECS VERIFICATION PARAMETERS + # Assign function name and probability bins for each metric SPECSVER_METRICS <- c("frpss", "frps", "bss10", "bss90", "enscorr", "rpss") FUN <- list(frps="FairRps", rpss="EnsRpss", frpss="FairRpss", @@ -97,7 +98,6 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, tdim=1, na.rm=na.rm)[[1]] #* 100 - ## TODO: Keep 'ensemble' dimension to maintain flexibility? data <- Subset(data, c('ensemble'), list(1), drop='selected') data[!is.finite(data)] <- NaN metric <- paste0(metric, "_specs") @@ -163,7 +163,7 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, frps$clim_mean <- multiApply::Apply(data = frps$clim, target_dims=time.dim, fun=mean, ncores=ncores)$output1 frps$exp_mean <- multiApply::Apply(data = frps$exp, target_dims=time.dim, - fun=mean, ncores=ncores)$output1 + fun=mean, ncores=ncores)$output1 frpss_respect2clim <- NULL frpss_respect2clim$rpss <- 1 - frps$exp_mean/frps$clim_mean -- GitLab From 7522a373e5cb287a580de6c55c45920cbbc58904 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 17 Jun 2022 09:49:56 +0200 Subject: [PATCH 08/11] Remove sourced RPS and RPSS funs now that s2dv_v1.2.0 has been released --- modules/Skill/Skill.R | 4 - tools/tmp/RPS.R | 193 -------------------------------- tools/tmp/RPSS.R | 223 ------------------------------------- tools/tmp/RandomWalkTest.R | 82 -------------- 4 files changed, 502 deletions(-) delete mode 100644 tools/tmp/RPS.R delete mode 100644 tools/tmp/RPSS.R delete mode 100644 tools/tmp/RandomWalkTest.R diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index 69152d8d..5fb66b2c 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -8,10 +8,6 @@ # - ask Carlos which decadal metrics he is currently using source("modules/Skill/s2s.metrics.R") -## TODO: Remove once the new version of s2dv is released -source("tools/tmp/RandomWalkTest.R") -source("tools/tmp/RPS.R") -source("tools/tmp/RPSS.R") ## TODO: Implement this in the future ## Which parameter are required? diff --git a/tools/tmp/RPS.R b/tools/tmp/RPS.R deleted file mode 100644 index 84956ac8..00000000 --- a/tools/tmp/RPS.R +++ /dev/null @@ -1,193 +0,0 @@ -#'Compute the Ranked Probability Score -#' -#'The Ranked Probability Score (RPS; Wilks, 2011) is defined as the sum of the -#'squared differences between the cumulative forecast probabilities (computed -#'from the ensemble members) and the observations (defined as 0% if the category -#'did not happen and 100% if it happened). It can be used to evaluate the skill -#'of multi-categorical probabilistic forecasts. The RPS ranges between 0 -#'(perfect forecast) and n-1 (worst possible forecast), where n is the number of -#'categories. In the case of a forecast divided into two categories (the lowest -#'number of categories that a probabilistic forecast can have), the RPS -#'corresponds to the Brier Score (BS; Wilks, 2011), therefore, ranges between 0 -#'and 1. -#' -#'@param exp A named numerical array of the forecast with at least time -#' dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. -#'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the probabilities of the forecast. The default value is 'member'. -#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to -#' 1) between the categories. The default value is c(1/3, 2/3), which -#' corresponds to tercile equiprobable categories. -#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' -#' for computing the thresholds between the probabilistic categories. If NULL, -#' the whole period is used. The default value is NULL. -#'@param Fair A logical indicating whether to compute the FairRPSS (the -#' potential RPSS that the forecast would have with an infinite ensemble size). -#' The default value is FALSE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'A numerical array of RPS with the same dimensions as "exp" except the -#''time_dim' and 'memb_dim' dimensions. -#' -#'@references -#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 -#' -#'@examples -#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) -#'res <- RPS(exp = exp, obs = obs) -#' -#'@import multiApply -#'@importFrom easyVerification convert2prob -#'@export -RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', - prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - ncores = NULL) { - - # Check inputs - ## exp and obs (1) - if (!is.array(exp) | !is.numeric(exp)) - stop('Parameter "exp" must be a numeric array.') - if (!is.array(obs) | !is.numeric(obs)) - stop('Parameter "obs" must be a numeric array.') - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { - stop("Parameter 'exp' and 'obs' must have dimension names.") - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) - stop('Parameter "time_dim" must be a character string.') - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - ## exp and obs (2) - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == memb_dim)] - if (memb_dim %in% name_obs) { - name_obs <- name_obs[-which(name_obs == memb_dim)] - } - if (!identical(length(name_exp), length(name_obs)) | - !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'memb_dim'.")) - } - ## prob_thresholds - if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | - any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { - stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") - } - ## indices_for_clim - if (is.null(indices_for_clim)) { - indices_for_clim <- 1:dim(obs)[time_dim] - } else { - if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { - stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") - } else if (length(indices_for_clim) > dim(obs)[time_dim] | - max(indices_for_clim) > dim(obs)[time_dim] | - any(indices_for_clim) < 1) { - stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") - } - } - ## Fair - if (!is.logical(Fair) | length(Fair) > 1) { - stop("Parameter 'Fair' must be either TRUE or FALSE.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be either NULL or a positive integer.") - } - } - - ############################### - - # Compute RPS - if (!memb_dim %in% names(dim(obs))) { - target_dims_obs <- time_dim - } else { - target_dims_obs <- c(time_dim, memb_dim) - } - - rps <- Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = c(time_dim, memb_dim), - obs = target_dims_obs), - output_dims = time_dim, - fun = .RPS, - prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair, - ncores = ncores)$output1 - - # Return only the mean RPS - rps <- MeanDims(rps, time_dim, na.rm = FALSE) - - return(rps) -} - - -.RPS <- function(exp, obs, prob_thresholds = c(1/3, 2/3), - indices_for_clim = NULL, Fair = FALSE) { - # exp: [sdate, memb] - # obs: [sdate, (memb)] - - exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds) - # exp_probs: [bin, sdate] - obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds) - # obs_probs: [bin, sdate] - - probs_exp_cumsum <- apply(exp_probs, 2, cumsum) - probs_obs_cumsum <- apply(obs_probs, 2, cumsum) - rps <- apply((probs_exp_cumsum - probs_obs_cumsum)^2, 2, sum) - # rps: [sdate] - - if (Fair) { # FairRPS - ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] - R <- dim(exp)[2] #memb - R_new <- Inf - adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) - adjustment <- apply(adjustment, 2, sum) - rps <- rps + adjustment - } - - return(rps) -} - -.get_probs <- function(data, indices_for_quantiles, prob_thresholds) { - # if exp: [sdate, memb] - # if obs: [sdate, (memb)] - - # Add dim [memb = 1] to obs if it doesn't have memb_dim - if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1) - - # Absolute thresholds - quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) - - # Probabilities - probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate] - for (i_time in 1:dim(data)[1]) { - if (anyNA(data[i_time, ])) { - probs[, i_time] <- rep(NA, dim = length(quantiles) + 1) - } else { - probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) - } - } - - return(probs) -} - diff --git a/tools/tmp/RPSS.R b/tools/tmp/RPSS.R deleted file mode 100644 index 04c31378..00000000 --- a/tools/tmp/RPSS.R +++ /dev/null @@ -1,223 +0,0 @@ -#'Compute the Ranked Probability Skill Score -#' -#'The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score -#'based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to -#'assess whether a forecast presents an improvement or worsening with respect to -#'a reference forecast. The RPSS ranges between minus infinite and 1. If the -#'RPSS is positive, it indicates that the forecast has higher skill than the -#'reference forecast, while a negative value means that it has a lower skill. -#'Examples of reference forecasts are the climatological forecast (same -#'probabilities for all categories for all time steps), persistence, a previous -#'model version, and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref. -#'The statistical significance is obtained based on a Random Walk test at the -#'95% confidence level (DelSole and Tippett, 2016). -#' -#'@param exp A named numerical array of the forecast with at least time -#' dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. -#'@param ref A named numerical array of the reference forecast data with at -#' least time dimension. The dimensions must be the same as 'exp' except -#' 'memb_dim'. If it is NULL, the climatological forecast is used as reference -#' forecast. The default value is NULL. -#'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the probabilities of the forecast and the reference forecast. The -#' default value is 'member'. -#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to -#' 1) between the categories. The default value is c(1/3, 2/3), which -#' corresponds to tercile equiprobable categories. -#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' -#' for computing the thresholds between the probabilistic categories. If NULL, -#' the whole period is used. The default value is NULL. -#'@param Fair A logical indicating whether to compute the FairRPSS (the -#' potential RPSS that the forecast would have with an infinite ensemble size). -#' The default value is FALSE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'\item{$rpss}{ -#' A numerical array of the RPSS with the same dimensions as "exp" except the -#' 'time_dim' and 'memb_dim' dimensions. -#'} -#'\item{$sign}{ -#' A logical array of the statistical significance of the RPSS with the same -#' dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions. -#'} -#' -#'@references -#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 -#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 -#' -#'@examples -#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) -#'ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -#'res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast -#'res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast -#' -#'@import multiApply -#'@export -RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', - prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - ncores = NULL) { - - # Check inputs - ## exp, obs, and ref (1) - if (!is.array(exp) | !is.numeric(exp)) - stop('Parameter "exp" must be a numeric array.') - if (!is.array(obs) | !is.numeric(obs)) - stop('Parameter "obs" must be a numeric array.') - if (!is.null(ref)) { - if (!is.array(ref) | !is.numeric(ref)) - stop('Parameter "ref" must be a numeric array.') - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) - stop('Parameter "time_dim" must be a character string.') - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - if (!is.null(ref) & !time_dim %in% names(dim(ref))) { - stop("Parameter 'time_dim' is not found in 'ref' dimension.") - } - ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { - stop("Parameter 'memb_dim' is not found in 'ref' dimension.") - } - ## exp and obs (2) - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == memb_dim)] - if (memb_dim %in% name_obs) { - name_obs <- name_obs[-which(name_obs == memb_dim)] - } - if (!identical(length(name_exp), length(name_obs)) | - !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'memb_dim'.")) - } - if (!is.null(ref)) { - name_ref <- sort(names(dim(ref))) - name_ref <- name_ref[-which(name_ref == memb_dim)] - if (!identical(length(name_exp), length(name_ref)) | - !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'memb_dim'.")) - } - } - ## prob_thresholds - if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | - any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { - stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") - } - ## indices_for_clim - if (is.null(indices_for_clim)) { - indices_for_clim <- 1:dim(obs)[time_dim] - } else { - if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { - stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") - } else if (length(indices_for_clim) > dim(obs)[time_dim] | - max(indices_for_clim) > dim(obs)[time_dim] | - any(indices_for_clim) < 1) { - stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") - } - } - ## Fair - if (!is.logical(Fair) | length(Fair) > 1) { - stop("Parameter 'Fair' must be either TRUE or FALSE.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be either NULL or a positive integer.") - } - } - - ############################### - - # Compute RPSS - if (!memb_dim %in% names(dim(obs))) { - target_dims_obs <- time_dim - } else { - target_dims_obs <- c(time_dim, memb_dim) - } - - if (!is.null(ref)) { # use "ref" as reference forecast - data <- list(exp = exp, obs = obs, ref = ref) - target_dims = list(exp = c(time_dim, memb_dim), - obs = target_dims_obs, - ref = c(time_dim, memb_dim)) - } else { - data <- list(exp = exp, obs = obs) - target_dims = list(exp = c(time_dim, memb_dim), - obs = target_dims_obs) - } - output <- Apply(data, - target_dims = target_dims, - fun = .RPSS, - prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair, - ncores = ncores) - - return(output) -} - -.RPSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), - indices_for_clim = NULL, Fair = FALSE) { - # exp: [sdate, memb] - # obs: [sdate, (memb)] - # ref: [sdate, memb] or NULL - - # RPS of the forecast - rps_exp <- .RPS(exp = exp, obs = obs, prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair) - - # RPS of the reference forecast - if (is.null(ref)) { ## using climatology as reference forecast - obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds) - # obs_probs: [bin, sdate] - - clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) - clim_probs <- array(clim_probs, dim = dim(obs_probs)) - # clim_probs: [bin, sdate] - - # Calculate RPS for each time step - probs_clim_cumsum <- apply(clim_probs, 2, cumsum) - probs_obs_cumsum <- apply(obs_probs, 2, cumsum) - rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) - # rps_ref: [sdate] - -# if (Fair) { # FairRPS -# ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] -# R <- dim(exp)[2] #memb -# R_new <- Inf -# adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) -# adjustment <- apply(adjustment, 2, sum) -# rps_ref <- rps_ref + adjustment -# } - - } else { # use "ref" as reference forecast - rps_ref <- .RPS(exp = ref, obs = obs, prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair) - } - - # RPSS - rpss <- 1 - mean(rps_exp) / mean(rps_ref) - - # Significance - sign <- .RandomWalkTest(skill_A = rps_exp, skill_B = rps_ref)$signif - - return(list(rpss = rpss, sign = sign)) -} - diff --git a/tools/tmp/RandomWalkTest.R b/tools/tmp/RandomWalkTest.R deleted file mode 100644 index adeadc1e..00000000 --- a/tools/tmp/RandomWalkTest.R +++ /dev/null @@ -1,82 +0,0 @@ -#'Random walk test for skill differences -#' -#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a -#'common reference) based on Random Walks, with significance estimate at the 95% -#'confidence level, as in DelSole and Tippett (2016). -#' -#'@param skill_A A numerical array of the time series of the skill with the -#' forecaster A's. -#'@param skill_B A numerical array of the time series of the skill with the -#' forecaster B's. The dimensions should be identical as parameter 'skill_A'. -#'@param time_dim A character string indicating the name of the dimension along -#' which the tests are computed. The default value is 'sdate'. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return A list of 2: -#'\item{$score}{ -#' A numerical array with the same dimensions as the input arrays except -#' 'time_dim'. The number of times that forecaster A has been better than -#' forecaster B minus the number of times that forecaster B has been better -#' than forecaster A (for skill positively oriented). If $score is positive -#' forecaster A is better than forecaster B, and if $score is negative -#' forecaster B is better than forecaster B. -#'} -#'\item{$signif}{ -#' A logical array with the same dimensions as the input arrays except -#' 'time_dim'. Whether the difference is significant or not at the 5% -#' significance level. -#'} -#' -#'@examples -#' fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2)) -#' fcst_B <- array(c(21:60), dim = c(sdate = 10, lat = 2, lon = 2)) -#' reference <- array(1:40, dim = c(sdate = 10, lat = 2, lon = 2)) -#' skill_A <- abs(fcst_A - reference) -#' skill_B <- abs(fcst_B - reference) -#' RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', ncores = 1) -#' -#'@import multiApply -#'@export -RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', ncores = NULL){ - - ## Check inputs - if (is.null(skill_A) | is.null(skill_B)){ - stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") - } - if(!is.numeric(skill_A) | !is.numeric(skill_B)){ - stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") - } - if (!identical(dim(skill_A),dim(skill_B))){ - stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") - } - if(!is.character(time_dim)){ - stop("Parameter 'time_dim' must be a character string.") - } - if(!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))){ - stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") - } - if (!is.null(ncores)){ - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1){ - stop("Parameter 'ncores' must be a positive integer.") - } - } - - ## Compute the Random Walk Test - res <- multiApply::Apply(data = list(skill_A, skill_B), - target_dims = time_dim, - fun = .RandomWalkTest, - ncores = ncores) - return(res) -} - -.RandomWalkTest <- function(skill_A, skill_B){ - - score <- cumsum(skill_A > skill_B) - cumsum(skill_A < skill_B) - - ## TRUE if significant (if last value is above or below 2*sqrt(N)) - signif<- ifelse(test = (score[length(skill_A)] < (-2)*sqrt(length(skill_A))) | (score[length(skill_A)] > 2*sqrt(length(skill_A))), - yes = TRUE, no = FALSE) - - return(list("score"=score[length(skill_A)],"signif"=signif)) -} -- GitLab From 9e36e7f12e0ba37b0423b1453f6676a9876932b7 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Fri, 17 Jun 2022 10:23:20 +0200 Subject: [PATCH 09/11] Add some system and reference variables --- conf/archive.yml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index e869cb9c..248876aa 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -6,7 +6,8 @@ archive: system5c3s: src: "exp/ecmwf/system5c3s/" daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", - "prlr":"_s0-24h/", "sfcWind":"_f6h/"} + "prlr":"_s0-24h/", "sfcWind":"_f6h/", + "tasmin":"_f24h/", "tasmax":"_f24h/"} monthly_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", "sfcWind":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/"} @@ -26,6 +27,7 @@ archive: system21_m1: src: "exp/dwd/system21_m1/" monthly_mean: {"tas":"_f6h/", "prlr":"_f24h", + "g500":"_f12h/", "sfcWind":"_f6h/" "tasmin":"_f24h/", "tasmax":"_f24h/"} nmember: fcst: 50 @@ -68,8 +70,11 @@ archive: era5: src: "recon/ecmwf/era5/" daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", - "prlr":"_f1h-r1440x721cds/", "sfcWind":"_f1h-r1440x721cds/"} - monthly_mean: {"tas":"_f1h-r1440x721cds/", "prlr":"_f1h-r1440x721cds/"} + "prlr":"_f1h-r1440x721cds/", "sfcWind":"_f1h-r1440x721cds/", + "tasmax":"_f1h-r1440x721cds/", "tasmin":"_f1h-r1440x721cds/"} + monthly_mean: {"tas":"_f1h-r1440x721cds/", "prlr":"_f1h-r1440x721cds/", + "g500":"_f1h-r1440x721cds/","sfcWind":"_f1h-r1440x721cds/", + "tasmax":"_f1h-r1440x721cds/", "tasmin":"_f1h-r1440x721cds/"} reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" era5land: src: "recon/ecmwf/era5land/" -- GitLab From 8a312ac46ed31a2a33509de107efffb40b4e3514 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 20 Jun 2022 10:34:42 +0200 Subject: [PATCH 10/11] Add complete list of leadtime months to data summary --- tools/data_summary.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/data_summary.R b/tools/data_summary.R index 625bbef1..e211e202 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -5,17 +5,20 @@ ## TODO: Add check for missing files/NAs by dimension data_summary <- function(object, frequency) { - # Get name and start dates + # Get name, leadtime months and date range object_name <- deparse(substitute(object)) if (tolower(frequency) == "monthly_mean") { date_format <- '%b %Y' } else if (tolower(frequency) == "daily_mean") { date_format <- '%b %d %Y' } + months <- unique(format(as.Date(object$Dates[[1]]), format = '%B')) + months <- paste(as.character(months), collapse=", ") sdate_min <- format(min(as.Date(object$Dates[[1]])), format = date_format) sdate_max <- format(max(as.Date(object$Dates[[1]])), format = date_format) print("DATA SUMMARY:") + print(paste0(object_name, " months: ", months)) print(paste0(object_name, " range: ", sdate_min, " to ", sdate_max)) print(paste0(object_name, " dimensions: ")) print(dim(object$data)) -- GitLab From 8d1780092073bb9f8559242fdf3bffff63e79382 Mon Sep 17 00:00:00 2001 From: Victoria Agudetse Roures Date: Mon, 20 Jun 2022 10:36:34 +0200 Subject: [PATCH 11/11] Fix typo and formatting --- conf/archive.yml | 20 +++++++++++------ modules/Loading/testing_recipes/recipe_1.yml | 23 ++++++++++---------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/conf/archive.yml b/conf/archive.yml index 248876aa..184a3398 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -27,7 +27,7 @@ archive: system21_m1: src: "exp/dwd/system21_m1/" monthly_mean: {"tas":"_f6h/", "prlr":"_f24h", - "g500":"_f12h/", "sfcWind":"_f6h/" + "g500":"_f12h/", "sfcWind":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/"} nmember: fcst: 50 @@ -69,12 +69,18 @@ archive: Reference: era5: src: "recon/ecmwf/era5/" - daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", - "prlr":"_f1h-r1440x721cds/", "sfcWind":"_f1h-r1440x721cds/", - "tasmax":"_f1h-r1440x721cds/", "tasmin":"_f1h-r1440x721cds/"} - monthly_mean: {"tas":"_f1h-r1440x721cds/", "prlr":"_f1h-r1440x721cds/", - "g500":"_f1h-r1440x721cds/","sfcWind":"_f1h-r1440x721cds/", - "tasmax":"_f1h-r1440x721cds/", "tasmin":"_f1h-r1440x721cds/"} + daily_mean: {"tas":"_f1h-r1440x721cds/", + "rsds":"_f1h-r1440x721cds/", + "prlr":"_f1h-r1440x721cds/", + "sfcWind":"_f1h-r1440x721cds/", + "tasmax":"_f1h-r1440x721cds/", + "tasmin":"_f1h-r1440x721cds/"} + monthly_mean: {"tas":"_f1h-r1440x721cds/", + "prlr":"_f1h-r1440x721cds/", + "g500":"_f1h-r1440x721cds/", + "sfcWind":"_f1h-r1440x721cds/", + "tasmax":"_f1h-r1440x721cds/", + "tasmin":"_f1h-r1440x721cds/"} reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" era5land: src: "recon/ecmwf/era5land/" diff --git a/modules/Loading/testing_recipes/recipe_1.yml b/modules/Loading/testing_recipes/recipe_1.yml index ebf0299d..041c44af 100644 --- a/modules/Loading/testing_recipes/recipe_1.yml +++ b/modules/Loading/testing_recipes/recipe_1.yml @@ -1,33 +1,34 @@ -Description: System5test -Author: V. Agudetse +Description: + Author: V. Agudetse + Info: ECMWF System5 Seasonal Forecast Example recipe (daily mean, tas) Analysis: - Horizon: Seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal Variables: - name: tas # Mandatory, str: tas, + name: tas # Mandatory, str: variable name in /esarchive/ freq: daily_mean # Mandatory, str: either monthly_mean or daily_mean Datasets: System: - name: system5c3s # Mandatory, str: System codename + name: system5c3s # Mandatory, str: System codename. See docu. Multimodel: no # Mandatory, bool: Either yes/true or no/false Reference: - name: era5 # Mandatory, str: Reference codename + name: era5 # Mandatory, str: Reference codename. See docu. Time: sdate: fcst_syear: '2020' # Optional, int: Forecast year 'YYYY' - fcst_sday: '1101' # Mandatory, int: Forecast startdate, 'MMDD' + fcst_sday: '1101' # Mandatory, int: Start date, 'MMDD' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + hcst_end: '1996' # Mandatory, int: Hindcast end year 'YYYY' leadtimemin: 0 # Mandatory, int: First leadtime time step in months - leadtimemax: 0 # Mandatory, int: Last leadtime time step in months + leadtimemax: 1 # Mandatory, int: Last leadtime time step in months Region: latmin: -10 # Mandatory, int: minimum latitude latmax: 10 # Mandatory, int: maximum latitude - lonmin: 0 # Mandatory, int: minimum longitude + lonmin: 0 # Mandatory, int: minimum longitude lonmax: 20 # Mandatory, int: maximum longitude Regrid: method: bilinear # Mandatory, str: Interpolation method. See docu. - type: to_system # Mandatory, str: to_system, to_reference, or CDO-compatible grid. + type: to_reference # Mandatory, str: to_system, to_reference, or CDO-accepted grid. Workflow: Calibration: method: qmap # Mandatory, str: Calibration method. See docu. -- GitLab