diff --git a/MODULES b/MODULES index a5e3edc24a273820a45907c0ccfea8f16a763112..697bdf4eb913e82cfc6b739f9658700fe070a93d 100644 --- a/MODULES +++ b/MODULES @@ -35,7 +35,7 @@ elif [ $BSC_MACHINE == "nord3" ]; then else module load CDO/1.9.8-foss-2015a - module load R/3.6.1-foss-2015a-bare + module load R/4.1.2-foss-2015a-bare fi diff --git a/conf/archive.yml b/conf/archive.yml index e869cb9c0f2bf84a97efe48c75ff288a91b367ba..184a3398d4ae76f11bd7d88d196c6b677ab0dd6d 100644 --- a/conf/archive.yml +++ b/conf/archive.yml @@ -6,7 +6,8 @@ archive: system5c3s: src: "exp/ecmwf/system5c3s/" daily_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", - "prlr":"_s0-24h/", "sfcWind":"_f6h/"} + "prlr":"_s0-24h/", "sfcWind":"_f6h/", + "tasmin":"_f24h/", "tasmax":"_f24h/"} monthly_mean: {"tas":"_f6h/", "rsds":"_s0-24h/", "prlr":"_s0-24h/", "sfcWind":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/"} @@ -26,6 +27,7 @@ archive: system21_m1: src: "exp/dwd/system21_m1/" monthly_mean: {"tas":"_f6h/", "prlr":"_f24h", + "g500":"_f12h/", "sfcWind":"_f6h/", "tasmin":"_f24h/", "tasmax":"_f24h/"} nmember: fcst: 50 @@ -67,9 +69,18 @@ archive: Reference: era5: src: "recon/ecmwf/era5/" - daily_mean: {"tas":"_f1h-r1440x721cds/", "rsds":"_f1h-r1440x721cds/", - "prlr":"_f1h-r1440x721cds/", "sfcWind":"_f1h-r1440x721cds/"} - monthly_mean: {"tas":"_f1h-r1440x721cds/", "prlr":"_f1h-r1440x721cds/"} + daily_mean: {"tas":"_f1h-r1440x721cds/", + "rsds":"_f1h-r1440x721cds/", + "prlr":"_f1h-r1440x721cds/", + "sfcWind":"_f1h-r1440x721cds/", + "tasmax":"_f1h-r1440x721cds/", + "tasmin":"_f1h-r1440x721cds/"} + monthly_mean: {"tas":"_f1h-r1440x721cds/", + "prlr":"_f1h-r1440x721cds/", + "g500":"_f1h-r1440x721cds/", + "sfcWind":"_f1h-r1440x721cds/", + "tasmax":"_f1h-r1440x721cds/", + "tasmin":"_f1h-r1440x721cds/"} reference_grid: "/esarchive/recon/ecmwf/era5/monthly_mean/tas_f1h-r1440x721cds/tas_201805.nc" era5land: src: "recon/ecmwf/era5land/" diff --git a/modules/data_load/load.R b/modules/Loading/Loading.R similarity index 98% rename from modules/data_load/load.R rename to modules/Loading/Loading.R index aaadf5532f58dd530e2144745c497891fbedcbdb..aba9d9628b271bf62e732164776b0f6af9910581 100755 --- a/modules/data_load/load.R +++ b/modules/Loading/Loading.R @@ -1,14 +1,14 @@ ## TODO: remove paths to personal scratchs source("/esarchive/scratch/vagudets/repos/csoperational/R/get_regrid_params.R") # Load required libraries/funs -source("modules/data_load/dates2load.R") +source("modules/Loading/dates2load.R") source("tools/libs.R") # RECIPE FOR TESTING # -------------------------------------------------------------------------------- -# recipe_file <- "modules/data_load/testing_recipes/recipe_3.yml" -# recipe_file <- "modules/data_load/testing_recipes/recipe_2.yml" -# recipe_file <- "modules/data_load/testing_recipes/recipe_1.yml" +# recipe_file <- "modules/Loading/testing_recipes/recipe_3.yml" +# recipe_file <- "modules/Loading/testing_recipes/recipe_2.yml" +# recipe_file <- "modules/Loading/testing_recipes/recipe_1.yml" load_datasets <- function(recipe_file) { diff --git a/modules/data_load/dates2load.R b/modules/Loading/dates2load.R similarity index 100% rename from modules/data_load/dates2load.R rename to modules/Loading/dates2load.R diff --git a/modules/data_load/testing_recipes/recipe_1.yml b/modules/Loading/testing_recipes/recipe_1.yml similarity index 61% rename from modules/data_load/testing_recipes/recipe_1.yml rename to modules/Loading/testing_recipes/recipe_1.yml index ebf0299d9ff9530e0e23384c39eb407b7b954b9e..041c44af204cf0d5f101c93950808c96393b59d6 100644 --- a/modules/data_load/testing_recipes/recipe_1.yml +++ b/modules/Loading/testing_recipes/recipe_1.yml @@ -1,33 +1,34 @@ -Description: System5test -Author: V. Agudetse +Description: + Author: V. Agudetse + Info: ECMWF System5 Seasonal Forecast Example recipe (daily mean, tas) Analysis: - Horizon: Seasonal # Mandatory, str: either subseasonal, seasonal, or decadal + Horizon: seasonal # Mandatory, str: either subseasonal, seasonal, or decadal Variables: - name: tas # Mandatory, str: tas, + name: tas # Mandatory, str: variable name in /esarchive/ freq: daily_mean # Mandatory, str: either monthly_mean or daily_mean Datasets: System: - name: system5c3s # Mandatory, str: System codename + name: system5c3s # Mandatory, str: System codename. See docu. Multimodel: no # Mandatory, bool: Either yes/true or no/false Reference: - name: era5 # Mandatory, str: Reference codename + name: era5 # Mandatory, str: Reference codename. See docu. Time: sdate: fcst_syear: '2020' # Optional, int: Forecast year 'YYYY' - fcst_sday: '1101' # Mandatory, int: Forecast startdate, 'MMDD' + fcst_sday: '1101' # Mandatory, int: Start date, 'MMDD' hcst_start: '1993' # Mandatory, int: Hindcast start year 'YYYY' - hcst_end: '2016' # Mandatory, int: Hindcast end year 'YYYY' + hcst_end: '1996' # Mandatory, int: Hindcast end year 'YYYY' leadtimemin: 0 # Mandatory, int: First leadtime time step in months - leadtimemax: 0 # Mandatory, int: Last leadtime time step in months + leadtimemax: 1 # Mandatory, int: Last leadtime time step in months Region: latmin: -10 # Mandatory, int: minimum latitude latmax: 10 # Mandatory, int: maximum latitude - lonmin: 0 # Mandatory, int: minimum longitude + lonmin: 0 # Mandatory, int: minimum longitude lonmax: 20 # Mandatory, int: maximum longitude Regrid: method: bilinear # Mandatory, str: Interpolation method. See docu. - type: to_system # Mandatory, str: to_system, to_reference, or CDO-compatible grid. + type: to_reference # Mandatory, str: to_system, to_reference, or CDO-accepted grid. Workflow: Calibration: method: qmap # Mandatory, str: Calibration method. See docu. diff --git a/modules/data_load/testing_recipes/recipe_2.yml b/modules/Loading/testing_recipes/recipe_2.yml similarity index 100% rename from modules/data_load/testing_recipes/recipe_2.yml rename to modules/Loading/testing_recipes/recipe_2.yml diff --git a/modules/data_load/testing_recipes/recipe_3.yml b/modules/Loading/testing_recipes/recipe_3.yml similarity index 100% rename from modules/data_load/testing_recipes/recipe_3.yml rename to modules/Loading/testing_recipes/recipe_3.yml diff --git a/modules/data_load/testing_recipes/recipe_4.yml b/modules/Loading/testing_recipes/recipe_4.yml similarity index 94% rename from modules/data_load/testing_recipes/recipe_4.yml rename to modules/Loading/testing_recipes/recipe_4.yml index 33c3a74c716da3beee7e927f5596fbbf0f47c2ab..e787a9fd6770fd450fc2f4b0cb5eead47ded05db 100644 --- a/modules/data_load/testing_recipes/recipe_4.yml +++ b/modules/Loading/testing_recipes/recipe_4.yml @@ -32,7 +32,7 @@ Analysis: Calibration: method: mse_min Skill: - metric: BSS90 + metric: RPS FRPS RPSS FRPSS BSS10 BSS90 Indicators: index: no Output_format: S2S4E diff --git a/modules/data_load/testing_recipes/recipe_5.yml b/modules/Loading/testing_recipes/recipe_5.yml similarity index 100% rename from modules/data_load/testing_recipes/recipe_5.yml rename to modules/Loading/testing_recipes/recipe_5.yml diff --git a/modules/data_load/testing_recipes/recipe_circular-sort-test.yml b/modules/Loading/testing_recipes/recipe_circular-sort-test.yml similarity index 100% rename from modules/data_load/testing_recipes/recipe_circular-sort-test.yml rename to modules/Loading/testing_recipes/recipe_circular-sort-test.yml diff --git a/modules/Saving/Save_data.R b/modules/Saving/Save_data.R index 3ace35b85eff85e8dc01b3c722e5fb61c3834244..66f25baa3c07ab409ea0cbc94b4ab6bf0bfbab9e 100644 --- a/modules/Saving/Save_data.R +++ b/modules/Saving/Save_data.R @@ -1,12 +1,11 @@ -## TODO: Change names of functions to make sure they don't clash -## with others in the tool +## TODO: Implement wrapper to get grid and time info? get_times <- function(fcst.horizon, leadtimes, sdate) { - - ## TODO: Adapt to Auto-S2S recipe format + # Generates time dimensions and the corresponding metadata. + switch(fcst.horizon, ## TODO: Remove "sub_obs"? - ## TODO: daily case? + ## TODO: implement daily case "seasonal" = {len <- length(leadtimes); ref <- 'months since '; stdname <- paste(strtoi(leadtimes), collapse=", ")}, "sub_obs" = {len <- 52; ref <- 'week of the year '; @@ -14,10 +13,11 @@ get_times <- function(fcst.horizon, leadtimes, sdate) { "subseasonal" = {len <- 4; ref <- 'weeks since '; stdname <- ''} ) - - time <- 1:len + + ## TODO: Get correct date for each time step + time <- 1:len ## Is this correct? I think it needs to be changed dim(time) <- length(time) - #metadata <- list(time = list(standard_name = stdname, + # metadata <- list(time = list(standard_name = stdname, metadata <- list(time = list(units = paste0(ref, sdate, ' 00:00:00'))) attr(time, 'variables') <- metadata names(dim(time)) <- 'time' @@ -41,26 +41,20 @@ get_times <- function(fcst.horizon, leadtimes, sdate) { } get_latlon <- function(latitude, longitude) { - ## TODO: Verify if this is needed or if it needs a change now that we are - ## using s2dv_cube objects # Adds dimensions and metadata to lat and lon # latitude: array containing the latitude values # longitude: array containing the longitude values - ## the inputs are simply the arrays with the values? + dim(longitude) <- length(longitude) ## TODO: Extract metadata from s2dv_cube metadata <- list(longitude = list(units = 'degrees_east')) - # Add metadata to attributes attr(longitude, 'variables') <- metadata - # Add longitude dimension names(dim(longitude)) <- 'longitude' dim(latitude) <- length(latitude) ## TODO: Extract metadata from s2dv_cube metadata <- list(latitude = list(units = 'degrees_north')) - # Add metadata to attributes attr(latitude, 'variables') <- metadata - # Add latitude dimension names(dim(latitude)) <- 'latitude' return(list(lat=latitude, lon=longitude)) @@ -78,53 +72,70 @@ get_latlon <- function(latitude, longitude) { save_metrics <- function(skill, recipe, - grid, + data_cube, outfile, leadtimes, agg="global") { - fcst.horizon <- tolower(recipe$Analysis$Horizon) - fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, - recipe$Analysis$Time$sdate$fcst_sday) - - + # Define grid dimensions and names lalo <- c('longitude', 'latitude') - ## TODO: Sort out aggregation + # Remove singleton dimensions from each metric array and rearrange the + # longitude, latitude and time dimensions to the correct order. if (tolower(agg) == "global") { - skill <- lapply(skill, function(x){ - Reorder(x[[1]], c(lalo, 'time'))}) + ## TODO: Implement metrics with additional non-singleton dimensions + ## e.g. 'ensemble' in ensemble correlation. + skill <- lapply(skill, function(x) { + Reorder(drop(x[[1]]), c(lalo, 'time'))}) } for (i in 1:length(skill)) { - + ## TODO: create dictionary with proper metadata metric <- names(skill[i]) - if (tolower(agg) == "country"){ - sdname <- paste0(names(metric), " region-aggregated metric") + if (tolower(agg) == "country") { + sdname <- paste0(metric, " region-aggregated metric") dims <- c('Country', 'time') } else { - sdname <- paste0(names(metric), " grid point metric") + sdname <- paste0(metric, " grid point metric") # formerly names(metric) dims <- c(lalo, 'time') } - metadata <- list(name = metric, standard_name = sdname) + metadata <- list(metric = list(name = metric, standard_name = sdname)) + + attr(skill[[i]], 'variables') <- metadata + names(dim(skill[[i]])) <- dims + } - attr(skill[i], 'variables') <- metadata - names(dim(skill[[i]])) <- dims + # Time data and metadata + fcst.horizon <- tolower(recipe$Analysis$Horizon) + leadtimes <- seq(from = recipe$Analysis$Time$leadtimemin, + to = recipe$Analysis$Time$leadtimemax) + # If a fcst is provided, use that as the ref. year. Otherwise use 1970. + if (!is.null(recipe$Analysis$Time$sdate$fcst_syear)) { + fcst.sdate <- paste0(recipe$Analysis$Time$sdate$fcst_syear, + recipe$Analysis$Time$sdate$fcst_sday) + } else { + fcst.sdate <- paste0("1970", recipe$Analysis$Time$sdate$fcst_sday) } times <- get_times(fcst.horizon, leadtimes, fcst.sdate) time <- times$time time_step <- times$time_step - # if (tolower(agg) == "country") { - # country <- get_countries(grid) - # ArrayToNc(append(country, time, skill, time_step), outfile) - # } else { + # Grid data and metadata +# if (tolower(agg) == "country") { +# country <- get_countries(grid) +# ArrayToNc(append(country, time, skill, time_step), outfile) +# } else { + + latitude <- data_cube$lat[1:length(data_cube$lat)] + longitude <- data_cube$lon[1:length(data_cube$lon)] + latlon <- get_latlon(latitude, longitude) - latlon <- get_latlon(grid$lat, grid$lon) + # Compile variables into a list and export to netCDF vars <- list(latlon$lat, latlon$lon, time) vars <- c(vars, skill, list(time_step)) ArrayToNc(vars, outfile) + print("##### SKILL METRICS SAVED TO NETCDF FILE #####") } diff --git a/modules/Skill/Skill.R b/modules/Skill/Skill.R index caf4939d117dc89a313deaed2c15b81f96ac2dec..5fb66b2cb8a9edddf4dda99c58ff09b326b7ce16 100644 --- a/modules/Skill/Skill.R +++ b/modules/Skill/Skill.R @@ -1,17 +1,13 @@ # This module should calculate verification metrics at any time of the workflow # It should implement different verification metrics: # - FRPSS and RPSS -# - FCRPSS abd CRPSS +# - FCRPSS and CRPSS # - enscorr # - bias # - reliability diagram # - ask Carlos which decadal metrics he is currently using source("modules/Skill/s2s.metrics.R") -## TODO: Remove once the new version of s2dv is released -source("tools/tmp/RandomWalkTest.R") -source("tools/tmp/RPS.R") -source("tools/tmp/RPSS.R") ## TODO: Implement this in the future ## Which parameter are required? @@ -54,7 +50,7 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { # obs: s2dv_cube containing the observations # recipe: auto-s2s recipe as provided by read_yaml - ## TODO: Adapt time_dims to subseasonal case? + ## TODO: Adapt time_dims to subseasonal case time_dim <- 'syear' memb_dim <- 'ensemble' metrics <- tolower(recipe$Analysis$Workflow$Skill$metric) @@ -70,22 +66,29 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { if (metric %in% c('rps', 'frps')) { skill <- RPS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) - skill_metrics[[ metric ]] <- skill + skill_metrics[[ metric ]] <- list(skill) + # Ranked Probability Skill Score and Fair version } else if (metric %in% c('rpss', 'frpss')) { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, Fair = Fair, ncores = ncores) - skill_metrics[[ metric ]] <- skill + skill_metrics[[ metric ]] <- list(skill$rpss) + skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) + # Brier Skill Score - 10th percentile } else if (metric == 'bss10') { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, prob_thresholds = 0.1, Fair = Fair, ncores = ncores) - skill_metrics[[ metric ]] <- skill + skill_metrics[[ metric ]] <- list(skill$rpss) + skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) + # Brier Skill Score - 90th percentile } else if (metric == 'bss90') { skill <- RPSS(exp$data, obs$data, time_dim = time_dim, memb_dim = memb_dim, prob_thresholds = 0.9, Fair = Fair, ncores = ncores) - skill_metrics[[ metric ]] <- skill + skill_metrics[[ metric ]] <- list(skill$rpss) + skill_metrics[[ paste0(metric, "_significance") ]] <- list(skill$sign) + # Ensemble mean correlation } else if (metric == 'enscorr') { ## TODO: Implement option for Kendall and Spearman methods? @@ -95,9 +98,14 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { method = 'pearson', memb_dim = memb_dim, ncores = ncores) - skill_metrics[[ metric ]] <- skill + skill_metrics[[ metric ]] <- list(skill$corr) + skill_metrics[[ paste0(metric, "_p.value") ]] <- list(skill$p.val) + skill_metrics[[ paste0(metric, "_conf.low") ]] <- list(skill$conf.lower) + skill_metrics[[ paste0(metric, "_conf.up") ]] <- list(skill$conf.upper) + # SpecsVerification metrics } else if (grepl("specs", metric, fixed = TRUE)) { # Compute SpecsVerification version of the metrics + ## Retain _specs in metric name for clarity? metric <- (strsplit(metric, "_"))[[1]][1] # Get metric name skill <- Compute_verif_metrics(exp$data, obs$data, skill_metrics = metric, @@ -105,8 +113,11 @@ compute_skill_metrics <- function(exp, obs, recipe, na.rm = T, ncores = 1) { na.rm = na.rm, ncores = ncores) skill_metrics[[ metric ]] <- skill + } + } + print("##### SKILL METRIC COMPUTATION COMPLETE #####") return(skill_metrics) diff --git a/modules/Skill/s2s.metrics.R b/modules/Skill/s2s.metrics.R index b00697306790fa454cc8f40ba066bfcf0db114ea..7246dd0929fecfcbb6c1303384cba631b40be0ab 100644 --- a/modules/Skill/s2s.metrics.R +++ b/modules/Skill/s2s.metrics.R @@ -47,8 +47,8 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, if (na.rm) { nsdates <- dim(exp)[which(names(dim(exp)) == time.dim)][] not_missing_dates <- c() - for (sdate in 1:nsdates){ - if(!all(is.na(Subset(exp, along=time.dim, list(sdate), drop=F)))){ + for (sdate in 1:nsdates) { + if(!all(is.na(Subset(exp, along=time.dim, list(sdate), drop=F)))) { not_missing_dates <- c(not_missing_dates, sdate) } } @@ -72,6 +72,7 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, veriUnwrap <- easyVerification:::veriUnwrap ## SPECS VERIFICATION PARAMETERS + # Assign function name and probability bins for each metric SPECSVER_METRICS <- c("frpss", "frps", "bss10", "bss90", "enscorr", "rpss") FUN <- list(frps="FairRps", rpss="EnsRpss", frpss="FairRpss", @@ -86,8 +87,8 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, if (metric %in% SPECSVER_METRICS) { data <- Apply(data=list(exp, obs), - target_dims=list(c(time.dim, 'ensemble'), - c(time.dim, 'ensemble')), + target_dims=list(c(time.dim, 'ensemble'), + c(time.dim, 'ensemble')), fun="veriApply", verifun=FUN[[metric]], prob=PROB[[metric]], @@ -97,7 +98,6 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, tdim=1, na.rm=na.rm)[[1]] #* 100 - ## TODO: Keep 'ensemble' dimension to maintain flexibility? data <- Subset(data, c('ensemble'), list(1), drop='selected') data[!is.finite(data)] <- NaN metric <- paste0(metric, "_specs") @@ -127,43 +127,43 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, } else if (metric == "frpss_sign") { terciles_obs <- Compute_probs(obs, c(1/3, 2/3), - quantile_dims=c(time.dim), + quantile_dims=c(time.dim), ncores=ncores, - split_factor=1, - na.rm=na.rm) + split_factor=1, + na.rm=na.rm) terciles_exp <- Compute_probs(exp, c(1/3, 2/3), - quantile_dims=c(time.dim, 'ensemble'), + quantile_dims=c(time.dim, 'ensemble'), ncores=ncores, - split_factor=1, - na.rm=na.rm) + split_factor=1, + na.rm=na.rm) probs_clim = array(data = 1/3, dim = dim(terciles_obs$probs)) frps <- NULL n_members <- dim(exp)[which(names(dim(exp)) == 'ensemble')][] frps$clim <- multiApply::Apply(data = list(probs_exp = probs_clim, - probs_obs = terciles_obs$probs), + probs_obs = terciles_obs$probs), target_dims = 'bin', fun = .rps_from_probs, n_categories = 3, n_members = n_members, Fair = TRUE, - ncores = ncores)$output1 + ncores = ncores)$output1 frps$exp <- multiApply::Apply(data = list(probs_exp = terciles_exp$probs, - probs_obs = terciles_obs$probs), + probs_obs = terciles_obs$probs), target_dims = 'bin', fun = .rps_from_probs, n_categories = 3, n_members = n_members, Fair = TRUE, - ncores = ncores)$output1 + ncores = ncores)$output1 frps$clim_mean <- multiApply::Apply(data = frps$clim, target_dims=time.dim, fun=mean, ncores=ncores)$output1 frps$exp_mean <- multiApply::Apply(data = frps$exp, target_dims=time.dim, - fun=mean, ncores=ncores)$output1 + fun=mean, ncores=ncores)$output1 frpss_respect2clim <- NULL frpss_respect2clim$rpss <- 1 - frps$exp_mean/frps$clim_mean @@ -193,7 +193,10 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, } -.correlation_eno = function(exp, obs, time_dim = 'syear', alpha = 0.05, ncores = 1){ +.correlation_eno = function(exp, obs, + time_dim = 'syear', + alpha = 0.05, + ncores = 1) { cor = NULL cor$r = cor(exp, obs) # Correlation coefficient @@ -222,7 +225,11 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, } -.rps_from_probs <- function(probs_exp, probs_obs, n_categories, Fair, n_members = NULL){ +.rps_from_probs <- function(probs_exp, + probs_obs, + n_categories, + Fair, + n_members = NULL) { ## Checkings if (length(probs_exp) != n_categories | length(probs_obs) != n_categories){ @@ -237,13 +244,13 @@ Compute_verif_metrics <- function(exp, obs, skill_metrics, ## RPS (Wilks 2011, pp.348-350) rps <- NULL - for (i in 1:n_categories){ + for (i in 1:n_categories) { rps[i] <- (cumsum(probs_exp)[i]-cumsum(probs_obs)[i])^2 } rps <- sum(rps) ## FairRPS - if (Fair == TRUE){ + if (Fair == TRUE) { ## formula of SpecsVerification::EnsRps ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) diff --git a/modules/test_victoria.R b/modules/test_victoria.R index b5e026a3f36242691e405e8c0f5b54ec30f3d976..a8125b1680161742620200629661569b0349eb8c 100644 --- a/modules/test_victoria.R +++ b/modules/test_victoria.R @@ -1,10 +1,11 @@ -recipe_file <- "modules/data_load/testing_recipes/recipe_1.yml" +recipe_file <- "modules/Loading/testing_recipes/recipe_4.yml" -source("modules/data_load/load.R") +source("modules/Loading/Loading.R") source("modules/Calibration/Calibration.R") source("modules/Skill/Skill.R") +source("modules/Saving/Save_data.R") # Load datasets data <- load_datasets(recipe_file) @@ -15,3 +16,7 @@ calibrated_data <- calibrate_datasets(data, recipe) # Compute skill metrics skill_metrics <- compute_skill_metrics(calibrated_data$hcst, data$obs, recipe, na.rm = T, ncores = 4) + +# Export skill metrics onto outfile +outfile <- "/esarchive/scratch/vagudets/auto-s2s-tests/files/skill/test-metrics.nc" +save_metrics(skill_metrics, recipe, data$hcst, outfile, agg = "global") diff --git a/tools/data_summary.R b/tools/data_summary.R index 625bbef13a77183907857519346db8cd9b10cdd8..e211e202cf63cccc183cc31bd42c3466d0752f92 100644 --- a/tools/data_summary.R +++ b/tools/data_summary.R @@ -5,17 +5,20 @@ ## TODO: Add check for missing files/NAs by dimension data_summary <- function(object, frequency) { - # Get name and start dates + # Get name, leadtime months and date range object_name <- deparse(substitute(object)) if (tolower(frequency) == "monthly_mean") { date_format <- '%b %Y' } else if (tolower(frequency) == "daily_mean") { date_format <- '%b %d %Y' } + months <- unique(format(as.Date(object$Dates[[1]]), format = '%B')) + months <- paste(as.character(months), collapse=", ") sdate_min <- format(min(as.Date(object$Dates[[1]])), format = date_format) sdate_max <- format(max(as.Date(object$Dates[[1]])), format = date_format) print("DATA SUMMARY:") + print(paste0(object_name, " months: ", months)) print(paste0(object_name, " range: ", sdate_min, " to ", sdate_max)) print(paste0(object_name, " dimensions: ")) print(dim(object$data)) diff --git a/tools/tmp/RPS.R b/tools/tmp/RPS.R deleted file mode 100644 index 84956ac85a2ee39250e793ec80dab9f26df75eaa..0000000000000000000000000000000000000000 --- a/tools/tmp/RPS.R +++ /dev/null @@ -1,193 +0,0 @@ -#'Compute the Ranked Probability Score -#' -#'The Ranked Probability Score (RPS; Wilks, 2011) is defined as the sum of the -#'squared differences between the cumulative forecast probabilities (computed -#'from the ensemble members) and the observations (defined as 0% if the category -#'did not happen and 100% if it happened). It can be used to evaluate the skill -#'of multi-categorical probabilistic forecasts. The RPS ranges between 0 -#'(perfect forecast) and n-1 (worst possible forecast), where n is the number of -#'categories. In the case of a forecast divided into two categories (the lowest -#'number of categories that a probabilistic forecast can have), the RPS -#'corresponds to the Brier Score (BS; Wilks, 2011), therefore, ranges between 0 -#'and 1. -#' -#'@param exp A named numerical array of the forecast with at least time -#' dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. -#'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the probabilities of the forecast. The default value is 'member'. -#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to -#' 1) between the categories. The default value is c(1/3, 2/3), which -#' corresponds to tercile equiprobable categories. -#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' -#' for computing the thresholds between the probabilistic categories. If NULL, -#' the whole period is used. The default value is NULL. -#'@param Fair A logical indicating whether to compute the FairRPSS (the -#' potential RPSS that the forecast would have with an infinite ensemble size). -#' The default value is FALSE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'A numerical array of RPS with the same dimensions as "exp" except the -#''time_dim' and 'memb_dim' dimensions. -#' -#'@references -#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 -#' -#'@examples -#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) -#'res <- RPS(exp = exp, obs = obs) -#' -#'@import multiApply -#'@importFrom easyVerification convert2prob -#'@export -RPS <- function(exp, obs, time_dim = 'sdate', memb_dim = 'member', - prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - ncores = NULL) { - - # Check inputs - ## exp and obs (1) - if (!is.array(exp) | !is.numeric(exp)) - stop('Parameter "exp" must be a numeric array.') - if (!is.array(obs) | !is.numeric(obs)) - stop('Parameter "obs" must be a numeric array.') - if(any(is.null(names(dim(exp))))| any(nchar(names(dim(exp))) == 0) | - any(is.null(names(dim(obs))))| any(nchar(names(dim(obs))) == 0)) { - stop("Parameter 'exp' and 'obs' must have dimension names.") - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) - stop('Parameter "time_dim" must be a character string.') - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - ## exp and obs (2) - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == memb_dim)] - if (memb_dim %in% name_obs) { - name_obs <- name_obs[-which(name_obs == memb_dim)] - } - if (!identical(length(name_exp), length(name_obs)) | - !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'memb_dim'.")) - } - ## prob_thresholds - if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | - any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { - stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") - } - ## indices_for_clim - if (is.null(indices_for_clim)) { - indices_for_clim <- 1:dim(obs)[time_dim] - } else { - if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { - stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") - } else if (length(indices_for_clim) > dim(obs)[time_dim] | - max(indices_for_clim) > dim(obs)[time_dim] | - any(indices_for_clim) < 1) { - stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") - } - } - ## Fair - if (!is.logical(Fair) | length(Fair) > 1) { - stop("Parameter 'Fair' must be either TRUE or FALSE.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be either NULL or a positive integer.") - } - } - - ############################### - - # Compute RPS - if (!memb_dim %in% names(dim(obs))) { - target_dims_obs <- time_dim - } else { - target_dims_obs <- c(time_dim, memb_dim) - } - - rps <- Apply(data = list(exp = exp, obs = obs), - target_dims = list(exp = c(time_dim, memb_dim), - obs = target_dims_obs), - output_dims = time_dim, - fun = .RPS, - prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair, - ncores = ncores)$output1 - - # Return only the mean RPS - rps <- MeanDims(rps, time_dim, na.rm = FALSE) - - return(rps) -} - - -.RPS <- function(exp, obs, prob_thresholds = c(1/3, 2/3), - indices_for_clim = NULL, Fair = FALSE) { - # exp: [sdate, memb] - # obs: [sdate, (memb)] - - exp_probs <- .get_probs(data = exp, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds) - # exp_probs: [bin, sdate] - obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds) - # obs_probs: [bin, sdate] - - probs_exp_cumsum <- apply(exp_probs, 2, cumsum) - probs_obs_cumsum <- apply(obs_probs, 2, cumsum) - rps <- apply((probs_exp_cumsum - probs_obs_cumsum)^2, 2, sum) - # rps: [sdate] - - if (Fair) { # FairRPS - ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] - R <- dim(exp)[2] #memb - R_new <- Inf - adjustment <- (-1) / (R - 1) * probs_exp_cumsum * (1 - probs_exp_cumsum) - adjustment <- apply(adjustment, 2, sum) - rps <- rps + adjustment - } - - return(rps) -} - -.get_probs <- function(data, indices_for_quantiles, prob_thresholds) { - # if exp: [sdate, memb] - # if obs: [sdate, (memb)] - - # Add dim [memb = 1] to obs if it doesn't have memb_dim - if (length(dim(data)) == 1) dim(data) <- c(dim(data), 1) - - # Absolute thresholds - quantiles <- quantile(x = as.vector(data[indices_for_quantiles, ]), probs = prob_thresholds, type = 8, na.rm = TRUE) - - # Probabilities - probs <- array(dim = c(bin = length(quantiles) + 1, dim(data)[1])) # [bin, sdate] - for (i_time in 1:dim(data)[1]) { - if (anyNA(data[i_time, ])) { - probs[, i_time] <- rep(NA, dim = length(quantiles) + 1) - } else { - probs[, i_time] <- colMeans(easyVerification::convert2prob(data[i_time, ], threshold = quantiles)) - } - } - - return(probs) -} - diff --git a/tools/tmp/RPSS.R b/tools/tmp/RPSS.R deleted file mode 100644 index 04c31378f6e1d733d83a5fb1ea1e5d297612973b..0000000000000000000000000000000000000000 --- a/tools/tmp/RPSS.R +++ /dev/null @@ -1,223 +0,0 @@ -#'Compute the Ranked Probability Skill Score -#' -#'The Ranked Probability Skill Score (RPSS; Wilks, 2011) is the skill score -#'based on the Ranked Probability Score (RPS; Wilks, 2011). It can be used to -#'assess whether a forecast presents an improvement or worsening with respect to -#'a reference forecast. The RPSS ranges between minus infinite and 1. If the -#'RPSS is positive, it indicates that the forecast has higher skill than the -#'reference forecast, while a negative value means that it has a lower skill. -#'Examples of reference forecasts are the climatological forecast (same -#'probabilities for all categories for all time steps), persistence, a previous -#'model version, and another model. It is computed as RPSS = 1 - RPS_exp / RPS_ref. -#'The statistical significance is obtained based on a Random Walk test at the -#'95% confidence level (DelSole and Tippett, 2016). -#' -#'@param exp A named numerical array of the forecast with at least time -#' dimension. -#'@param obs A named numerical array of the observation with at least time -#' dimension. The dimensions must be the same as 'exp' except 'memb_dim'. -#'@param ref A named numerical array of the reference forecast data with at -#' least time dimension. The dimensions must be the same as 'exp' except -#' 'memb_dim'. If it is NULL, the climatological forecast is used as reference -#' forecast. The default value is NULL. -#'@param time_dim A character string indicating the name of the time dimension. -#' The default value is 'sdate'. -#'@param memb_dim A character string indicating the name of the member dimension -#' to compute the probabilities of the forecast and the reference forecast. The -#' default value is 'member'. -#'@param prob_thresholds A numeric vector of the relative thresholds (from 0 to -#' 1) between the categories. The default value is c(1/3, 2/3), which -#' corresponds to tercile equiprobable categories. -#'@param indices_for_clim A vector of the indices to be taken along 'time_dim' -#' for computing the thresholds between the probabilistic categories. If NULL, -#' the whole period is used. The default value is NULL. -#'@param Fair A logical indicating whether to compute the FairRPSS (the -#' potential RPSS that the forecast would have with an infinite ensemble size). -#' The default value is FALSE. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return -#'\item{$rpss}{ -#' A numerical array of the RPSS with the same dimensions as "exp" except the -#' 'time_dim' and 'memb_dim' dimensions. -#'} -#'\item{$sign}{ -#' A logical array of the statistical significance of the RPSS with the same -#' dimensions as 'exp' except the 'time_dim' and 'memb_dim' dimensions. -#'} -#' -#'@references -#'Wilks, 2011; https://doi.org/10.1016/B978-0-12-385022-5.00008-7 -#'DelSole and Tippett, 2016; https://doi.org/10.1175/MWR-D-15-0218.1 -#' -#'@examples -#'exp <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -#'obs <- array(rnorm(1000), dim = c(lat = 3, lon = 2, sdate = 50)) -#'ref <- array(rnorm(1000), dim = c(lat = 3, lon = 2, member = 10, sdate = 50)) -#'res <- RPSS(exp = exp, obs = obs) ## climatology as reference forecast -#'res <- RPSS(exp = exp, obs = obs, ref = ref) ## ref as reference forecast -#' -#'@import multiApply -#'@export -RPSS <- function(exp, obs, ref = NULL, time_dim = 'sdate', memb_dim = 'member', - prob_thresholds = c(1/3, 2/3), indices_for_clim = NULL, Fair = FALSE, - ncores = NULL) { - - # Check inputs - ## exp, obs, and ref (1) - if (!is.array(exp) | !is.numeric(exp)) - stop('Parameter "exp" must be a numeric array.') - if (!is.array(obs) | !is.numeric(obs)) - stop('Parameter "obs" must be a numeric array.') - if (!is.null(ref)) { - if (!is.array(ref) | !is.numeric(ref)) - stop('Parameter "ref" must be a numeric array.') - } - ## time_dim - if (!is.character(time_dim) | length(time_dim) != 1) - stop('Parameter "time_dim" must be a character string.') - if (!time_dim %in% names(dim(exp)) | !time_dim %in% names(dim(obs))) { - stop("Parameter 'time_dim' is not found in 'exp' or 'obs' dimension.") - } - if (!is.null(ref) & !time_dim %in% names(dim(ref))) { - stop("Parameter 'time_dim' is not found in 'ref' dimension.") - } - ## memb_dim - if (!is.character(memb_dim) | length(memb_dim) > 1) { - stop("Parameter 'memb_dim' must be a character string.") - } - if (!memb_dim %in% names(dim(exp))) { - stop("Parameter 'memb_dim' is not found in 'exp' dimension.") - } - if (!is.null(ref) & !memb_dim %in% names(dim(ref))) { - stop("Parameter 'memb_dim' is not found in 'ref' dimension.") - } - ## exp and obs (2) - name_exp <- sort(names(dim(exp))) - name_obs <- sort(names(dim(obs))) - name_exp <- name_exp[-which(name_exp == memb_dim)] - if (memb_dim %in% name_obs) { - name_obs <- name_obs[-which(name_obs == memb_dim)] - } - if (!identical(length(name_exp), length(name_obs)) | - !identical(dim(exp)[name_exp], dim(obs)[name_obs])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'memb_dim'.")) - } - if (!is.null(ref)) { - name_ref <- sort(names(dim(ref))) - name_ref <- name_ref[-which(name_ref == memb_dim)] - if (!identical(length(name_exp), length(name_ref)) | - !identical(dim(exp)[name_exp], dim(ref)[name_ref])) { - stop(paste0("Parameter 'exp' and 'obs' must have same length of ", - "all dimensions expect 'memb_dim'.")) - } - } - ## prob_thresholds - if (!is.numeric(prob_thresholds) | !is.vector(prob_thresholds) | - any(prob_thresholds <= 0) | any(prob_thresholds >= 1)) { - stop("Parameter 'prob_thresholds' must be a numeric vector between 0 and 1.") - } - ## indices_for_clim - if (is.null(indices_for_clim)) { - indices_for_clim <- 1:dim(obs)[time_dim] - } else { - if (!is.numeric(indices_for_clim) | !is.vector(indices_for_clim)) { - stop("Parameter 'indices_for_clim' must be NULL or a numeric vector.") - } else if (length(indices_for_clim) > dim(obs)[time_dim] | - max(indices_for_clim) > dim(obs)[time_dim] | - any(indices_for_clim) < 1) { - stop("Parameter 'indices_for_clim' should be the indices of 'time_dim'.") - } - } - ## Fair - if (!is.logical(Fair) | length(Fair) > 1) { - stop("Parameter 'Fair' must be either TRUE or FALSE.") - } - ## ncores - if (!is.null(ncores)) { - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | - length(ncores) > 1) { - stop("Parameter 'ncores' must be either NULL or a positive integer.") - } - } - - ############################### - - # Compute RPSS - if (!memb_dim %in% names(dim(obs))) { - target_dims_obs <- time_dim - } else { - target_dims_obs <- c(time_dim, memb_dim) - } - - if (!is.null(ref)) { # use "ref" as reference forecast - data <- list(exp = exp, obs = obs, ref = ref) - target_dims = list(exp = c(time_dim, memb_dim), - obs = target_dims_obs, - ref = c(time_dim, memb_dim)) - } else { - data <- list(exp = exp, obs = obs) - target_dims = list(exp = c(time_dim, memb_dim), - obs = target_dims_obs) - } - output <- Apply(data, - target_dims = target_dims, - fun = .RPSS, - prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair, - ncores = ncores) - - return(output) -} - -.RPSS <- function(exp, obs, ref = NULL, prob_thresholds = c(1/3, 2/3), - indices_for_clim = NULL, Fair = FALSE) { - # exp: [sdate, memb] - # obs: [sdate, (memb)] - # ref: [sdate, memb] or NULL - - # RPS of the forecast - rps_exp <- .RPS(exp = exp, obs = obs, prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair) - - # RPS of the reference forecast - if (is.null(ref)) { ## using climatology as reference forecast - obs_probs <- .get_probs(data = obs, indices_for_quantiles = indices_for_clim, - prob_thresholds = prob_thresholds) - # obs_probs: [bin, sdate] - - clim_probs <- c(prob_thresholds[1], diff(prob_thresholds), 1 - prob_thresholds[length(prob_thresholds)]) - clim_probs <- array(clim_probs, dim = dim(obs_probs)) - # clim_probs: [bin, sdate] - - # Calculate RPS for each time step - probs_clim_cumsum <- apply(clim_probs, 2, cumsum) - probs_obs_cumsum <- apply(obs_probs, 2, cumsum) - rps_ref <- apply((probs_clim_cumsum - probs_obs_cumsum)^2, 2, sum) - # rps_ref: [sdate] - -# if (Fair) { # FairRPS -# ## adjustment <- rowSums(-1 * (1/R - 1/R.new) * ens.cum * (R - ens.cum)/R/(R - 1)) [formula taken from SpecsVerification::EnsRps] -# R <- dim(exp)[2] #memb -# R_new <- Inf -# adjustment <- (-1) / (R - 1) * probs_clim_cumsum * (1 - probs_clim_cumsum) -# adjustment <- apply(adjustment, 2, sum) -# rps_ref <- rps_ref + adjustment -# } - - } else { # use "ref" as reference forecast - rps_ref <- .RPS(exp = ref, obs = obs, prob_thresholds = prob_thresholds, - indices_for_clim = indices_for_clim, Fair = Fair) - } - - # RPSS - rpss <- 1 - mean(rps_exp) / mean(rps_ref) - - # Significance - sign <- .RandomWalkTest(skill_A = rps_exp, skill_B = rps_ref)$signif - - return(list(rpss = rpss, sign = sign)) -} - diff --git a/tools/tmp/RandomWalkTest.R b/tools/tmp/RandomWalkTest.R deleted file mode 100644 index adeadc1ec94b62920c885640938f966c91e75ddc..0000000000000000000000000000000000000000 --- a/tools/tmp/RandomWalkTest.R +++ /dev/null @@ -1,82 +0,0 @@ -#'Random walk test for skill differences -#' -#'Forecast comparison of the skill obtained with 2 forecasts (with respect to a -#'common reference) based on Random Walks, with significance estimate at the 95% -#'confidence level, as in DelSole and Tippett (2016). -#' -#'@param skill_A A numerical array of the time series of the skill with the -#' forecaster A's. -#'@param skill_B A numerical array of the time series of the skill with the -#' forecaster B's. The dimensions should be identical as parameter 'skill_A'. -#'@param time_dim A character string indicating the name of the dimension along -#' which the tests are computed. The default value is 'sdate'. -#'@param ncores An integer indicating the number of cores to use for parallel -#' computation. The default value is NULL. -#' -#'@return A list of 2: -#'\item{$score}{ -#' A numerical array with the same dimensions as the input arrays except -#' 'time_dim'. The number of times that forecaster A has been better than -#' forecaster B minus the number of times that forecaster B has been better -#' than forecaster A (for skill positively oriented). If $score is positive -#' forecaster A is better than forecaster B, and if $score is negative -#' forecaster B is better than forecaster B. -#'} -#'\item{$signif}{ -#' A logical array with the same dimensions as the input arrays except -#' 'time_dim'. Whether the difference is significant or not at the 5% -#' significance level. -#'} -#' -#'@examples -#' fcst_A <- array(c(11:50), dim = c(sdate = 10, lat = 2, lon = 2)) -#' fcst_B <- array(c(21:60), dim = c(sdate = 10, lat = 2, lon = 2)) -#' reference <- array(1:40, dim = c(sdate = 10, lat = 2, lon = 2)) -#' skill_A <- abs(fcst_A - reference) -#' skill_B <- abs(fcst_B - reference) -#' RandomWalkTest(skill_A = skill_A, skill_B = skill_B, time_dim = 'sdate', ncores = 1) -#' -#'@import multiApply -#'@export -RandomWalkTest <- function(skill_A, skill_B, time_dim = 'sdate', ncores = NULL){ - - ## Check inputs - if (is.null(skill_A) | is.null(skill_B)){ - stop("Parameters 'skill_A' and 'skill_B' cannot be NULL.") - } - if(!is.numeric(skill_A) | !is.numeric(skill_B)){ - stop("Parameters 'skill_A' and 'skill_B' must be a numerical array.") - } - if (!identical(dim(skill_A),dim(skill_B))){ - stop("Parameters 'skill_A' and 'skill_B' must have the same dimensions.") - } - if(!is.character(time_dim)){ - stop("Parameter 'time_dim' must be a character string.") - } - if(!time_dim %in% names(dim(skill_A)) | !time_dim %in% names(dim(skill_B))){ - stop("Parameter 'time_dim' is not found in 'skill_A' or 'skill_B' dimensions.") - } - if (!is.null(ncores)){ - if (!is.numeric(ncores) | ncores %% 1 != 0 | ncores <= 0 | length(ncores) > 1){ - stop("Parameter 'ncores' must be a positive integer.") - } - } - - ## Compute the Random Walk Test - res <- multiApply::Apply(data = list(skill_A, skill_B), - target_dims = time_dim, - fun = .RandomWalkTest, - ncores = ncores) - return(res) -} - -.RandomWalkTest <- function(skill_A, skill_B){ - - score <- cumsum(skill_A > skill_B) - cumsum(skill_A < skill_B) - - ## TRUE if significant (if last value is above or below 2*sqrt(N)) - signif<- ifelse(test = (score[length(skill_A)] < (-2)*sqrt(length(skill_A))) | (score[length(skill_A)] > 2*sqrt(length(skill_A))), - yes = TRUE, no = FALSE) - - return(list("score"=score[length(skill_A)],"signif"=signif)) -}